├── Project21 └── Emojify │ └── readme.md ├── Project41 └── DBSCAN │ └── readme.md ├── Project26 └── FIFA Capacity LARS │ └── readme.md ├── Project42 └── OPTICS_clustering │ └── readme.md ├── Project20 └── Chatbot_using_seq2seq │ └── readme.md ├── Project34 └── Chatbot_with_attention │ └── readme.md ├── Project35 └── Memory Network Chatbots │ └── readme.md ├── Project38 └── Affinity Propagation │ └── readme.md ├── Project39 └── meanshift_clustering │ └── readme.md ├── Project43 └── Spectral Co Clustering │ ├── readme.md │ └── Spectral Co Clustering from scratch.ipynb ├── Project44 └── NGBoost_implementation │ ├── readme.md │ ├── Sample_Submission.xlsx │ ├── Test.csv │ └── Train.csv ├── Project9 └── YOLO_Object_Detection │ ├── readme.md │ ├── yolo-coco │ ├── readme.md │ ├── coco.names │ └── yolov3.cfg │ ├── sample images │ ├── readme.md │ └── bean and teddy.jpg │ ├── yolo_six_lines.py │ └── yolo_wrapper.py ├── Project10 └── Voice_Recognition_Adaboost │ ├── readme.md │ └── configuration_sheet.xlsx ├── Project13 └── Spam_Or_Ham_MultinomialNB │ ├── readme.md │ └── multinomial NB.ipynb ├── Project14 └── Pulsar_star_prediction_gbm │ └── readme.md ├── Project24 └── Graduate_Admission_Lasso │ └── readme.md ├── Project25 └── Facebook Metrics Elastic Net │ └── readme.md ├── Project37 └── Nifty50_volatility_forecast │ └── readme.md ├── Project15 └── Lower_back_pain_detection_KNN │ ├── readme.md │ └── Lower Back pain detection.ipynb ├── Project16 └── Parkinsons_classification_SVM │ └── readme.md ├── Project17 └── Stumble Upon Bagging Classifier │ ├── readme.md │ └── Bagging Classifier.ipynb ├── Project18 └── Quality_detection_Decision_trees │ ├── readme.md │ └── Wine_quality_Decision_Trees.ipynb ├── Project22 └── House Price Prediction Regression │ ├── readme.md │ └── Linear Regression.ipynb ├── Project23 └── Insurance_claim_prediction_Lasso │ └── readme.md ├── Project28 └── Air_Quality_Bayesian_Regression │ ├── readme.md │ └── Bayesian Regression.ipynb ├── Project30 └── world_war2_weather_SGDRegressor │ ├── readme.md │ └── SGDRegressor.ipynb ├── Project32 └── Logistic_Regression_credit_card │ └── readme.md ├── Project11 └── Forest_Cover_Prediction_Random_Forests │ └── readme.md ├── Project12 └── Fraud_detection_Extra_tree_classifier │ └── readme.md ├── Project40 └── agglomerative_hierarchial_clustering │ └── readme.md ├── Project33 └── Neural machine Translation with Attention │ └── readme.md ├── Project29 └── weather_prediction_passive_aggressive_regression │ ├── readme.md │ └── Passive Aggressive Regression.ipynb ├── Project31 └── House_Price_Revisted_Gaussian_Process_Regression │ ├── readme.md │ └── Gaussian Process Regression.ipynb ├── Project7 └── Machine Translation using Seq2Seq architecture │ └── readme.md ├── Project19 └── Instant_gratification_QDA_LDA │ ├── readme.md │ └── QDA_LDA.ipynb ├── Project1 └── Transfer_Learning_VGG16 │ ├── param1.jpg │ ├── config1.png │ ├── config2.png │ ├── architecture_vgg.jpg │ ├── utilities_to_run_code │ ├── configuration.xlsx │ ├── readme.md │ └── monkey_labels.txt │ └── readme.md ├── Project2 └── Neural Style Transfer │ ├── styles │ ├── scream.jpg │ ├── facepaint.jpg │ ├── flamenco.jpg │ ├── lacquer.jpg │ ├── pablopicaso.jpg │ ├── starrynight.jpg │ └── oil_painting_style.jpg │ ├── contents │ ├── beach.jpg │ ├── scenic view.jpg │ └── taylorswift.jpg │ ├── configuration_sheet.xlsx │ └── readme.md ├── Project3 └── Intel Image Classification │ ├── configuration_sheet.xlsx │ └── readme.md ├── Project36 └── Spectral_Clustering │ └── readme.md ├── Project5 └── Sentiment Analysis using Bidirectional LSTM │ └── readme.md ├── Project8 └── Attention mechanism to classify News │ └── readme.md ├── Project4 └── Stock Price Prediction using LSTM │ └── readme.md ├── README.md ├── Project27 └── House Price Revisited OMP vs Other regression methods │ └── readme.md └── Project6 └── Text Generation from Taylor Swift's songs └── readme.md /Project21/Emojify/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project41/DBSCAN/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project26/FIFA Capacity LARS/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project42/OPTICS_clustering/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project20/Chatbot_using_seq2seq/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project34/Chatbot_with_attention/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project35/Memory Network Chatbots/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project38/Affinity Propagation/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project39/meanshift_clustering/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project43/Spectral Co Clustering/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project44/NGBoost_implementation/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project9/YOLO_Object_Detection/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project10/Voice_Recognition_Adaboost/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project13/Spam_Or_Ham_MultinomialNB/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project14/Pulsar_star_prediction_gbm/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project24/Graduate_Admission_Lasso/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project25/Facebook Metrics Elastic Net/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project37/Nifty50_volatility_forecast/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project15/Lower_back_pain_detection_KNN/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project16/Parkinsons_classification_SVM/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project17/Stumble Upon Bagging Classifier/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project18/Quality_detection_Decision_trees/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project22/House Price Prediction Regression/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project23/Insurance_claim_prediction_Lasso/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project28/Air_Quality_Bayesian_Regression/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project30/world_war2_weather_SGDRegressor/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project32/Logistic_Regression_credit_card/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project9/YOLO_Object_Detection/yolo-coco/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project11/Forest_Cover_Prediction_Random_Forests/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project12/Fraud_detection_Extra_tree_classifier/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project40/agglomerative_hierarchial_clustering/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project9/YOLO_Object_Detection/sample images/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project33/Neural machine Translation with Attention/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project29/weather_prediction_passive_aggressive_regression/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project31/House_Price_Revisted_Gaussian_Process_Regression/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project7/Machine Translation using Seq2Seq architecture/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Project19/Instant_gratification_QDA_LDA/readme.md: -------------------------------------------------------------------------------- 1 | Still work in progress Not final 2 | -------------------------------------------------------------------------------- /Project1/Transfer_Learning_VGG16/param1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Darkprogrammerpb/DeepLearningProjects_when_I_was_a_noob/HEAD/Project1/Transfer_Learning_VGG16/param1.jpg -------------------------------------------------------------------------------- /Project1/Transfer_Learning_VGG16/config1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Darkprogrammerpb/DeepLearningProjects_when_I_was_a_noob/HEAD/Project1/Transfer_Learning_VGG16/config1.png -------------------------------------------------------------------------------- /Project1/Transfer_Learning_VGG16/config2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Darkprogrammerpb/DeepLearningProjects_when_I_was_a_noob/HEAD/Project1/Transfer_Learning_VGG16/config2.png -------------------------------------------------------------------------------- /Project2/Neural Style Transfer/styles/scream.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Darkprogrammerpb/DeepLearningProjects_when_I_was_a_noob/HEAD/Project2/Neural Style Transfer/styles/scream.jpg -------------------------------------------------------------------------------- /Project2/Neural Style Transfer/contents/beach.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Darkprogrammerpb/DeepLearningProjects_when_I_was_a_noob/HEAD/Project2/Neural Style Transfer/contents/beach.jpg -------------------------------------------------------------------------------- /Project2/Neural Style Transfer/styles/facepaint.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Darkprogrammerpb/DeepLearningProjects_when_I_was_a_noob/HEAD/Project2/Neural Style Transfer/styles/facepaint.jpg -------------------------------------------------------------------------------- /Project2/Neural Style Transfer/styles/flamenco.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Darkprogrammerpb/DeepLearningProjects_when_I_was_a_noob/HEAD/Project2/Neural Style Transfer/styles/flamenco.jpg -------------------------------------------------------------------------------- /Project2/Neural Style Transfer/styles/lacquer.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Darkprogrammerpb/DeepLearningProjects_when_I_was_a_noob/HEAD/Project2/Neural Style Transfer/styles/lacquer.jpg -------------------------------------------------------------------------------- /Project1/Transfer_Learning_VGG16/architecture_vgg.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Darkprogrammerpb/DeepLearningProjects_when_I_was_a_noob/HEAD/Project1/Transfer_Learning_VGG16/architecture_vgg.jpg -------------------------------------------------------------------------------- /Project2/Neural Style Transfer/styles/pablopicaso.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Darkprogrammerpb/DeepLearningProjects_when_I_was_a_noob/HEAD/Project2/Neural Style Transfer/styles/pablopicaso.jpg -------------------------------------------------------------------------------- /Project2/Neural Style Transfer/styles/starrynight.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Darkprogrammerpb/DeepLearningProjects_when_I_was_a_noob/HEAD/Project2/Neural Style Transfer/styles/starrynight.jpg -------------------------------------------------------------------------------- /Project2/Neural Style Transfer/configuration_sheet.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Darkprogrammerpb/DeepLearningProjects_when_I_was_a_noob/HEAD/Project2/Neural Style Transfer/configuration_sheet.xlsx -------------------------------------------------------------------------------- /Project2/Neural Style Transfer/contents/scenic view.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Darkprogrammerpb/DeepLearningProjects_when_I_was_a_noob/HEAD/Project2/Neural Style Transfer/contents/scenic view.jpg -------------------------------------------------------------------------------- /Project2/Neural Style Transfer/contents/taylorswift.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Darkprogrammerpb/DeepLearningProjects_when_I_was_a_noob/HEAD/Project2/Neural Style Transfer/contents/taylorswift.jpg -------------------------------------------------------------------------------- /Project44/NGBoost_implementation/Sample_Submission.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Darkprogrammerpb/DeepLearningProjects_when_I_was_a_noob/HEAD/Project44/NGBoost_implementation/Sample_Submission.xlsx -------------------------------------------------------------------------------- /Project10/Voice_Recognition_Adaboost/configuration_sheet.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Darkprogrammerpb/DeepLearningProjects_when_I_was_a_noob/HEAD/Project10/Voice_Recognition_Adaboost/configuration_sheet.xlsx -------------------------------------------------------------------------------- /Project2/Neural Style Transfer/styles/oil_painting_style.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Darkprogrammerpb/DeepLearningProjects_when_I_was_a_noob/HEAD/Project2/Neural Style Transfer/styles/oil_painting_style.jpg -------------------------------------------------------------------------------- /Project3/Intel Image Classification/configuration_sheet.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Darkprogrammerpb/DeepLearningProjects_when_I_was_a_noob/HEAD/Project3/Intel Image Classification/configuration_sheet.xlsx -------------------------------------------------------------------------------- /Project9/YOLO_Object_Detection/sample images/bean and teddy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Darkprogrammerpb/DeepLearningProjects_when_I_was_a_noob/HEAD/Project9/YOLO_Object_Detection/sample images/bean and teddy.jpg -------------------------------------------------------------------------------- /Project36/Spectral_Clustering/readme.md: -------------------------------------------------------------------------------- 1 | The necessary theory and explanation about spectral clustering is explained in my medium blog :- 2 | 3 | https://medium.com/@darkprogrammerpb/spectral-clustering-cdc224001433 4 | -------------------------------------------------------------------------------- /Project5/Sentiment Analysis using Bidirectional LSTM/readme.md: -------------------------------------------------------------------------------- 1 | The data source is :- https://www.kaggle.com/crowdflower/first-gop-debate-twitter-sentiment 2 | Bi directional LSTM is used for analysing positive and negative tweets 3 | -------------------------------------------------------------------------------- /Project8/Attention mechanism to classify News/readme.md: -------------------------------------------------------------------------------- 1 | The data is obtained from the source https://www.kaggle.com/yufengdev/bbc-fulltext-and-category 2 | Attention models are used to classify the categories of news articles. 3 | -------------------------------------------------------------------------------- /Project1/Transfer_Learning_VGG16/utilities_to_run_code/configuration.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Darkprogrammerpb/DeepLearningProjects_when_I_was_a_noob/HEAD/Project1/Transfer_Learning_VGG16/utilities_to_run_code/configuration.xlsx -------------------------------------------------------------------------------- /Project4/Stock Price Prediction using LSTM/readme.md: -------------------------------------------------------------------------------- 1 | The data source is :- https://www.kaggle.com/szrlee/stock-time-series-20050101-to-20171231 2 | We are considering the stock prices of Amazon. Also I have uploaded the data used in the code in the repository 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeepLearningProjects 2 | 3 | These projects are pretty old and I have not updated them yet because I am too busy to maintain a Github of my projects and competition codes. 4 | 5 | I did these projects when I started learning Data science. A lot has changed over years and refer to these notebooks with caution. 6 | -------------------------------------------------------------------------------- /Project1/Transfer_Learning_VGG16/utilities_to_run_code/readme.md: -------------------------------------------------------------------------------- 1 | # Data for the code :- 2 | The data is obtained from the following Kaggle link:- https://www.kaggle.com/slothkong/10-monkey-species 3 | There will be 2 folders. Both the folders are to be saved in the same location as the jupyter notebook and its configuration file. 4 | -------------------------------------------------------------------------------- /Project3/Intel Image Classification/readme.md: -------------------------------------------------------------------------------- 1 | We are working with a simple Convolutional Neural Network for predicting the stated labels in the dataset :- 2 | https://www.kaggle.com/puneet6060/intel-image-classification 3 | 4 | An attempt is made to unbox the convolutional layers by visualizing what happens at each layer and how the final prediction is made 5 | -------------------------------------------------------------------------------- /Project27/House Price Revisited OMP vs Other regression methods/readme.md: -------------------------------------------------------------------------------- 1 | We will look at a brief overview of what Orthogonal Matching Pursuit means:- 2 | ![omp_1](https://user-images.githubusercontent.com/51089715/66738172-7d34fc00-ee8b-11e9-82c9-dd5f7765d68a.jpg) 3 | ![omp_2](https://user-images.githubusercontent.com/51089715/66738181-81611980-ee8b-11e9-9338-f8ead42d74c7.jpg) 4 | 5 | # Algorithmic implementation and some essential sklearn module parameters of OMP: 6 | ![omp_3](https://user-images.githubusercontent.com/51089715/66738189-83c37380-ee8b-11e9-97df-673cc257e8d9.jpg) 7 | -------------------------------------------------------------------------------- /Project6/Text Generation from Taylor Swift's songs/readme.md: -------------------------------------------------------------------------------- 1 | 2 | ![textgen_1](https://user-images.githubusercontent.com/51089715/63297065-2b8a4e00-c2ee-11e9-9200-72b732cd942c.jpg) 3 | ![textgen_2](https://user-images.githubusercontent.com/51089715/63297071-2fb66b80-c2ee-11e9-861f-a7bebb902e9c.jpg) 4 | ![textgen_3](https://user-images.githubusercontent.com/51089715/63297083-35ac4c80-c2ee-11e9-8ab9-9bd1cd98cb41.jpg) 5 | ![textgen_4](https://user-images.githubusercontent.com/51089715/63297089-393fd380-c2ee-11e9-831b-800859bef070.jpg) 6 | ![textgen_5](https://user-images.githubusercontent.com/51089715/63297094-3cd35a80-c2ee-11e9-9b36-323b6bc882b1.jpg) 7 | ![textgen_6](https://user-images.githubusercontent.com/51089715/63297098-3f35b480-c2ee-11e9-9895-ef5dcd0014ea.jpg) 8 | -------------------------------------------------------------------------------- /Project9/YOLO_Object_Detection/yolo_six_lines.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import time 3 | import cv2 4 | import os 5 | import matplotlib 6 | matplotlib.rcParams['figure.figsize']= (5.0,5.0) 7 | import matplotlib.pyplot as plt 8 | from yolo_wrapper import * 9 | labels_path = os.getcwd()+'\\yolo-coco\\coco.names' ### load label path 10 | weights_path = os.getcwd()+'\\yolo-coco\\yolov3.weights' ### load weights path 11 | configs_path = os.getcwd()+'\\yolo-coco\\yolov3.cfg' ### Load configuration path 12 | test_image = os.getcwd()+'\\sample images\\bean and teddy.jpg' ### Load test image path 13 | yolo_class = Yolo_Implementation(labels_path,weights_path,configs_path,test_image) ### call wrapper library created 14 | yolo_class.yolo_non_max_suppress() ### Image created 15 | -------------------------------------------------------------------------------- /Project9/YOLO_Object_Detection/yolo-coco/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush -------------------------------------------------------------------------------- /Project1/Transfer_Learning_VGG16/readme.md: -------------------------------------------------------------------------------- 1 | # VGG16 Architecture 2 | A typical VGG16 architecture looks like this. 3 | Each box in the Convolution and Pooling Block from Blocks 1 to 5, contains the Convolution Number, Image tensor(representing the height, width and features), kernal size and stride. 4 | 5 | ![architecture_vgg](https://user-images.githubusercontent.com/51089715/61504390-c98dae80-a9f8-11e9-8596-f38e73b4cb67.jpg) 6 | 7 | # A brief overview of Training Data, Batch, Epochs and Batch size 8 | ![param1](https://user-images.githubusercontent.com/51089715/61504563-749e6800-a9f9-11e9-816d-3c88bbf63130.jpg) 9 | 10 | In the code, I have used a configuration sheet which contains all the parameters needed as an input for both the model as well as for data augmentation. A snippet of the configuration file (saved as configuration.xlsx)is shown below:- 11 | ![config2](https://user-images.githubusercontent.com/51089715/61504631-b9c29a00-a9f9-11e9-9a01-d243f812e36f.png) 12 | ![config1](https://user-images.githubusercontent.com/51089715/61504632-b9c29a00-a9f9-11e9-93c9-b63e7c3d4492.png) 13 | -------------------------------------------------------------------------------- /Project1/Transfer_Learning_VGG16/utilities_to_run_code/monkey_labels.txt: -------------------------------------------------------------------------------- 1 | Label, Latin Name , Common Name , Train Images , Validation Images 2 | n0 , alouatta_palliata , mantled_howler , 131 , 26 3 | n1 , erythrocebus_patas , patas_monkey , 139 , 28 4 | n2 , cacajao_calvus , bald_uakari , 137 , 27 5 | n3 , macaca_fuscata , japanese_macaque , 152 , 30 6 | n4 , cebuella_pygmea , pygmy_marmoset , 131 , 26 7 | n5 , cebus_capucinus , white_headed_capuchin , 141 , 28 8 | n6 , mico_argentatus , silvery_marmoset , 132 , 26 9 | n7 , saimiri_sciureus , common_squirrel_monkey , 142 , 28 10 | n8 , aotus_nigriceps , black_headed_night_monkey , 133 , 27 11 | n9 , trachypithecus_johnii , nilgiri_langur , 132 , 26 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /Project2/Neural Style Transfer/readme.md: -------------------------------------------------------------------------------- 1 | ### Sample demonstration of Neural Style Transfer (Using VGG19) 2 | ![examplestyletransfer](https://user-images.githubusercontent.com/51089715/62014529-ca280100-b1bf-11e9-973e-85ea5c2c9f2b.PNG) 3 | 4 | # Neural Style Transfer explained 5 | 6 | ![style transfer_1](https://user-images.githubusercontent.com/51089715/62105102-14dc7280-b2bf-11e9-85c2-c65d9f871db2.jpg) 7 | 8 | ![style transfer_2](https://user-images.githubusercontent.com/51089715/62105105-15750900-b2bf-11e9-95c8-2ed2d57b9c6c.jpg) 9 | 10 | ![style transfer_3](https://user-images.githubusercontent.com/51089715/62105104-14dc7280-b2bf-11e9-8108-c0575870762f.jpg) 11 | 12 | ![style transfer_4](https://user-images.githubusercontent.com/51089715/62105103-14dc7280-b2bf-11e9-83f3-2fb8943406c9.jpg) 13 | 14 | 15 | # Citation 16 | 1. A Neural algorithm for artistic style transfer (Leon A. Gatys, Alexander S. Ecker, Matthias Bethge) 17 | Link :- https://arxiv.org/pdf/1508.06576.pdf 18 | 2. Understanding Deep Image Representations by Inverting them (Aravindh Mahendran, Andrea Vedaldi) 19 | Link :- https://arxiv.org/pdf/1412.0035.pdf 20 | -------------------------------------------------------------------------------- /Project9/YOLO_Object_Detection/yolo_wrapper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import time 3 | import cv2 4 | import os 5 | import matplotlib 6 | matplotlib.rcParams['figure.figsize']= (5.0,5.0) 7 | import matplotlib.pyplot as plt 8 | 9 | 10 | class Yolo_Implementation(object): 11 | def __init__(self,labels_path,weights_path,config_path,test_image,score_threshold=0.1,nms_threshold=0.2): 12 | self.score_threshold = score_threshold 13 | self.nms_threshold = nms_threshold 14 | self.weights_path = weights_path 15 | self.config_path = config_path 16 | self.image = cv2.imread(test_image) 17 | (self.H, self.W) = self.image.shape[:2] 18 | self.labels = open(labels_path).read().strip().split("\n") 19 | self.colors = np.random.randint(0, 255, size=(len(self.labels), 3),dtype="uint8") 20 | 21 | def build_model(self): 22 | model = cv2.dnn.readNetFromDarknet(self.config_path,self.weights_path) 23 | blob = cv2.dnn.blobFromImage(self.image, 1 / 255.0, (480, 480),swapRB=True, crop=False) 24 | layers_yolo = model.getLayerNames() 25 | yolo_layers_needed = [layers_yolo[i[0]-1] for i in model.getUnconnectedOutLayers()] 26 | retval = model.setInput(blob) 27 | layer_outputs = model.forward(yolo_layers_needed) 28 | return layer_outputs 29 | 30 | def yolo_filter_boxes(self): 31 | boxes = [] 32 | probabilities = [] 33 | classIDs = [] 34 | layer_outputs = self.build_model() 35 | for output in layer_outputs: 36 | for detection in output: 37 | if detection[4]>0.0: ### Detecting the presence of object 38 | scores = detection[5:] ### Capturing the probabilities of corresponding class ID 39 | classid = np.argmax(scores) ### Finding the class ID with maximum probability 40 | prob = np.max(scores) ### Finding maximum probability 41 | if prob > self.score_threshold: ### Thresholding to filter yolo boxes (score threshold) 42 | box = detection[0:4]*np.array([self.W,self.H,self.W,self.H]) 43 | (centerX, centerY, width, height) = box.astype("int") 44 | x = int(centerX - (width / 2)) 45 | y = int(centerY - (height / 2)) 46 | boxes.append([x, y, int(width), int(height)]) 47 | probabilities.append(float(prob)) 48 | classIDs.append(classid) 49 | return boxes,probabilities,classIDs 50 | 51 | def yolo_non_max_suppress(self): 52 | boxes,probabilities,classIDs = self.yolo_filter_boxes() 53 | idxs = cv2.dnn.NMSBoxes(boxes, probabilities,self.score_threshold,self.nms_threshold) 54 | if len(idxs)>0: 55 | for i in idxs.flatten(): 56 | (x,y) = (boxes[i][0],boxes[i][1]) 57 | (w,h) = (boxes[i][2],boxes[i][3]) 58 | color = [int(c) for c in self.colors[classIDs[i]]] 59 | cv2.rectangle(self.image, (x, y), (x + w, y + h), color, 2) 60 | text = "{}: {:.4f}".format(self.labels[classIDs[i]], 100*round(probabilities[i],4)) 61 | cv2.putText(self.image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX,0.5, color, 1) 62 | plt.figure(figsize=(20,20)); 63 | plt.imshow(self.image[:,:,::-1]) 64 | plt.axis('off'); 65 | 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /Project44/NGBoost_implementation/Test.csv: -------------------------------------------------------------------------------- 1 | City,Location_Score,Internal_Audit_Score,External_Audit_Score,Fin_Score,Loss_score,Past_Results 2 | 41,18.272,13,12,9,7,0 3 | 17,64.799,6,10,7,4,1 4 | 31,68.89,3,3,7,8,0 5 | 3,16.492,15,10,7,4,1 6 | 9,17.178,7,3,8,3,1 7 | 30,64.151,5,8,5,8,0 8 | 23,35.149,6,7,4,3,1 9 | 31,63.258,5,6,3,5,0 10 | 29,6.163,7,14,3,4,0 11 | 5,62.562,10,14,5,3,0 12 | 35,14.521,11,6,3,5,0 13 | 4,64.968,4,7,3,4,1 14 | 23,13.614,8,4,7,7,1 15 | 37,7.754,6,7,4,5,0 16 | 12,16.445,6,15,15,5,1 17 | 37,69.472,5,3,3,6,1 18 | 9,18.84,6,5,8,4,0 19 | 37,31.485,8,3,7,8,1 20 | 20,11.418,7,9,7,8,0 21 | 10,20.22,13,14,12,6,1 22 | 38,14.292,5,8,5,5,1 23 | 22,8.949,6,4,5,8,1 24 | 40,60.878,3,11,7,4,0 25 | 37,71.927,5,7,8,4,1 26 | 5,74.542,8,7,5,6,1 27 | 9,21.54,8,15,10,4,1 28 | 6,14.902,7,4,8,3,1 29 | 41,12.679,10,15,11,3,1 30 | 40,71.995,10,14,4,3,0 31 | 41,70.479,3,6,3,3,1 32 | 4,75.694,6,8,4,5,1 33 | 26,15.29,15,8,8,4,1 34 | 23,10.552,11,5,4,5,1 35 | 23,9.689,12,10,13,3,2 36 | 37,59.47,6,6,4,7,0 37 | 23,67.137,5,7,7,6,0 38 | 28,9.16,15,11,15,3,1 39 | 37,10.364,5,5,7,3,1 40 | 41,70.015,8,7,4,5,1 41 | 37,76.166,6,3,5,6,1 42 | 40,11.909,15,3,3,8,1 43 | 9,11.532,4,4,6,6,1 44 | 40,8.813,6,6,4,7,1 45 | 31,25.807,3,6,6,6,0 46 | 34,11.649,11,12,12,3,1 47 | 2,69.585,6,8,6,6,0 48 | 40,34.46,5,5,5,8,0 49 | 13,7.14,10,10,14,5,1 50 | 1,18.098,11,7,5,3,0 51 | 23,61.045,3,11,8,6,0 52 | 41,75.06,10,4,4,4,0 53 | 9,20.981,11,5,8,7,1 54 | 6,75.149,5,6,5,8,0 55 | 31,22.93,8,13,9,6,1 56 | 37,8.929,3,8,5,8,0 57 | 4,70.452,7,3,3,6,1 58 | 40,14.09,10,9,14,5,2 59 | 8,76.782,8,8,6,3,0 60 | 9,36.991,5,4,8,7,0 61 | 23,9.347,6,8,8,8,1 62 | 21,17.141,5,4,5,3,1 63 | 39,69.422,6,3,7,6,1 64 | 19,20.011,7,8,6,8,1 65 | 13,63.497,4,15,3,3,1 66 | 9,10.867,3,6,3,4,1 67 | 9,59.895,8,6,4,8,0 68 | 40,14.722,13,12,5,6,1 69 | 40,16.382,5,6,3,3,0 70 | 9,21.779,7,6,4,8,0 71 | 38,23.923,6,6,4,5,0 72 | 2,18.137,3,3,8,3,0 73 | 37,20.891,10,10,10,7,0 74 | 10,14.552,10,6,11,4,5 75 | 2,24.254,15,11,13,8,2 76 | 38,9.483,8,5,8,7,0 77 | 0,19.968,7,10,12,8,1 78 | 9,16.489,8,5,7,5,1 79 | 40,17.641,10,10,3,8,1 80 | 38,61.137,6,5,5,8,0 81 | 28,17.913,11,4,13,7,0 82 | 11,12.582,11,12,10,5,0 83 | 11,69.736,8,11,8,4,1 84 | 1,22.301,7,6,8,4,0 85 | 37,77.668,8,5,3,3,0 86 | 12,77.497,5,5,6,8,0 87 | 37,67.345,4,3,7,3,0 88 | 39,20.108,10,9,10,7,1 89 | 23,7.627,7,8,3,3,0 90 | 22,20.249,4,7,8,6,1 91 | 36,70.052,5,9,6,3,1 92 | 37,34.943,7,4,8,6,0 93 | 40,13.047,8,7,14,5,1 94 | 41,7.052,11,4,4,3,1 95 | 37,61.237,8,4,6,6,0 96 | 5,12.156,9,8,10,6,0 97 | 3,11.083,14,11,6,5,0 98 | 2,76.327,8,7,6,8,1 99 | 9,61.427,7,3,6,6,0 100 | 9,28.466,8,3,8,4,1 101 | 40,7.724,12,5,4,8,0 102 | 22,7.278,8,6,4,6,0 103 | 37,62.974,8,5,4,4,1 104 | 22,20.781,12,13,11,6,1 105 | 3,67.933,4,10,4,8,1 106 | 41,13.286,6,3,8,4,0 107 | 13,7.108,11,14,14,4,0 108 | 6,22.053,4,7,3,6,1 109 | 5,20.186,13,15,15,6,0 110 | 38,19.025,3,5,5,8,0 111 | 30,23.931,10,13,14,8,0 112 | 2,62.952,4,6,8,7,1 113 | 6,19.707,7,4,6,5,1 114 | 40,5.808,7,11,7,8,1 115 | 31,68.645,7,3,7,5,1 116 | 31,64.455,6,8,3,5,1 117 | 23,68.512,7,12,8,5,0 118 | 22,14.062,10,12,10,7,1 119 | 2,74.233,7,12,6,3,1 120 | 38,20.05,4,14,13,6,1 121 | 3,58.576,10,15,10,5,0 122 | 2,16.898,8,4,3,5,0 123 | 4,13.668,11,13,3,6,1 124 | 41,59.899,3,6,5,4,0 125 | 9,32.142,4,3,6,4,0 126 | 12,67.397,8,6,3,6,1 127 | 8,11.921,10,14,14,5,1 128 | 10,18.416,3,5,5,8,0 129 | 41,17.444,10,6,4,5,0 130 | 3,73.677,11,4,5,6,0 131 | 0,6.041,10,3,5,6,1 132 | 40,6.917,5,4,8,3,0 133 | 12,64.758,4,6,4,8,0 134 | 41,64.43,4,4,4,3,1 135 | 37,58.964,6,7,3,6,0 136 | 1,17.652,11,14,6,7,2 137 | 19,65.905,9,11,8,4,0 138 | 37,17.567,9,8,8,4,0 139 | 23,22.81,7,6,6,5,0 140 | 2,67.802,8,5,6,4,1 141 | 3,69.432,7,8,6,4,0 142 | 1,21.748,8,10,10,6,1 143 | 40,21.639,3,6,5,8,1 144 | 5,9.816,12,4,6,5,0 145 | 1,59.553,4,14,6,4,1 146 | 8,40.557,6,6,8,3,0 147 | 6,17.014,7,4,10,6,1 148 | 1,22.983,10,12,11,8,0 149 | 19,10.658,13,13,12,6,0 150 | 37,74.905,7,8,6,7,1 151 | 1,62.766,7,7,5,6,0 152 | 8,9.755,11,8,13,7,2 153 | 6,61.76,6,8,4,5,0 154 | 2,8.283,5,4,3,8,0 155 | 19,64.47,4,7,3,8,0 156 | 2,19.369,4,9,6,5,1 157 | 19,11.546,8,7,3,7,0 158 | 10,19.028,8,3,5,5,0 159 | 40,8.032,11,11,3,8,2 160 | 9,10.099,4,5,12,3,0 161 | 13,22.776,10,3,12,10,1 162 | 40,7.349,11,11,11,6,1 163 | 6,33.668,3,4,8,6,0 164 | 40,61.095,5,5,5,4,0 165 | 31,12.499,8,8,7,7,0 166 | 6,13.028,11,6,5,6,0 167 | 6,16.039,7,3,7,4,0 168 | 3,14.413,8,4,5,8,1 169 | 3,21.512,13,4,6,8,0 170 | 41,6.366,15,14,12,6,1 171 | 2,41.776,8,5,4,6,0 172 | 9,16.166,12,15,10,3,0 173 | 40,73.751,11,13,6,3,0 174 | 1,7.511,14,12,12,8,1 175 | 6,76.377,7,3,8,3,0 176 | 41,62.196,8,8,4,4,1 177 | 10,22.084,7,3,14,4,1 178 | 4,6.894,6,8,7,7,1 179 | 9,8.854,12,7,14,4,1 180 | 41,7.122,8,7,10,4,0 181 | 10,6.996,14,7,13,8,0 182 | 19,14.166,10,15,14,6,1 183 | 41,18.223,6,7,11,8,1 184 | 3,9.979,15,5,7,6,1 185 | 18,17.17,8,6,7,7,0 186 | 40,70.917,8,6,4,3,1 187 | 27,13.84,5,6,4,4,0 188 | 6,10.888,12,14,14,7,0 189 | 6,15.65,15,10,14,4,0 190 | 2,22.511,12,12,9,6,0 191 | 8,19.883,7,9,7,4,0 192 | 18,66.671,7,4,3,4,0 193 | 3,6.965,4,4,6,7,1 194 | 28,64.037,6,15,8,7,0 195 | 31,11.694,11,8,6,6,0 196 | 9,23.353,10,5,6,8,1 197 | 31,14.707,13,4,4,6,0 198 | 31,75.583,4,8,8,6,0 199 | 13,67.544,11,8,3,6,1 200 | 40,8.693,15,14,15,7,3 201 | 41,61.352,3,5,3,8,0 202 | 13,21.243,13,10,15,8,1 203 | 31,15.861,3,4,7,7,0 204 | 9,19.396,6,13,12,7,0 205 | 9,16.204,13,5,4,6,1 206 | 9,10.812,12,11,5,8,0 207 | 40,27.975,4,5,3,7,1 208 | 12,15.27,8,8,7,7,1 209 | 0,7.106,8,8,14,8,5 210 | 38,19.081,3,4,7,4,1 211 | 9,15.496,10,5,3,4,1 212 | 19,10.04,6,5,8,6,1 213 | 2,11.604,13,13,4,8,1 214 | 2,9.586,8,3,3,6,0 215 | 5,22.514,11,7,6,3,1 216 | 9,73.12,6,6,6,8,1 217 | 8,7.531,11,4,4,7,1 218 | 1,65.03,3,3,7,6,0 219 | 1,9.895,8,5,8,7,0 220 | 9,5.469,6,4,3,5,0 221 | 41,73.41,5,3,7,4,1 222 | 28,16.596,6,4,6,5,0 223 | 31,72.562,7,3,11,8,0 224 | 1,33.662,4,5,4,5,0 225 | 6,18.415,8,5,3,6,0 226 | 6,65.557,10,12,9,7,1 227 | 5,63.253,6,10,3,8,1 228 | 5,10.222,3,8,7,7,0 229 | 1,12.685,10,3,4,5,1 230 | 2,73.165,6,5,5,5,0 231 | 2,6.952,8,8,5,4,1 232 | 3,6.796,7,5,4,5,1 233 | 3,9.197,13,12,10,7,1 234 | 41,67.581,6,5,3,6,1 235 | -------------------------------------------------------------------------------- /Project43/Spectral Co Clustering/Spectral Co Clustering from scratch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Importing necessary libraries for analysis" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 8, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import math\n", 17 | "from scipy.linalg import *\n", 18 | "from sklearn.datasets import make_checkerboard\n", 19 | "from sklearn.cluster import KMeans\n", 20 | "import numpy as np\n", 21 | "import matplotlib.pyplot as plt\n", 22 | "%matplotlib inline\n" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "## Creating data for application of algorithm" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 23, 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "data": { 39 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQEAAAECCAYAAAD+eGJTAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAALt0lEQVR4nO3db4xlhVnH8e/PsmBdcLtIgQ3FooQY/8WlmaAJRjENDeIL4EVNedFsY+PyohhIqpHwprzRoCmgMYZkEdI1oTQ1gPCC2BJCgk0M7UI2sLgqTbNWYN0tLgG62paFxxdzF4dlZufuzL333PH5fpLJvffcO3OenOx+95x7z55JVSGprx8begBJwzICUnNGQGrOCEjNGQGpOSMgNTdIBJJcleRfk3w7yS1DzHCiJAeSPJ9kb5I9A85xX5LDSfYtWXZ2kseTvDi63ToHM92W5OXR9tqb5OoZz3RhkieT7E/yQpKbRssH21YnmWnQbbWazPo8gSQfAP4NuBJ4CfgWcH1V/fNMB3n/XAeAhap6deA5fgP4PvC3VfVLo2V/DhypqttH0dxaVX888Ey3Ad+vqi/Oao4TZtoGbKuqZ5OcBTwDXAt8hoG21Ulm+l0G3FarGWJP4DLg21X1nar6EfAV4JoB5phLVfUUcOSExdcAu0f3d7P4B2vomQZVVQer6tnR/TeB/cAFDLitTjLTXBsiAhcA/7Hk8UvMx4Yq4OtJnkmyc+hhTnBeVR2ExT9owLkDz3PcjUmeGx0uzPQQZakkFwGXAk8zJ9vqhJlgTrbVcoaIQJZZNg/nLl9eVR8Dfhv43GgXWCu7G7gY2A4cBO4YYogkZwIPAjdX1RtDzHCiZWaai221kiEi8BJw4ZLHHwFeGWCO96iqV0a3h4GHWTxsmReHRsebx487Dw88D1V1qKrerqp3gHsYYHsl2cTiX7b7q+qh0eJBt9VyM83DtjqZISLwLeCSJD+T5HTgU8CjA8zxriSbR2/kkGQz8Alg38m/a6YeBXaM7u8AHhlwFuDdv2DHXceMt1eSAPcC+6vqziVPDbatVppp6G21mpl/OgAw+ojkL4APAPdV1Z/MfIj3zvOzLP7rD3Aa8OWhZkryAHAFcA5wCPgC8PfAV4GfBr4LfLKqZvZG3QozXcHi7m0BB4Abjh+Lz2imXwf+EXgeeGe0+FYWj8EH2VYnmel6BtxWqxkkApLmh2cMSs0ZAak5IyA1ZwSk5oyA1NygEZjD03OdaUzONL55neu4ofcE5nHjONN4nGl88zoXMHwEJA1sXScLJbkK+EsWz/z7m6q6/WSv37R5S52x9fx3H7919HU2bd6y5vVPw7zMtOUnTn/3/tHXj7B5y9kDTvN+8zLTwf987d379dZRsmnzgNMsbx7mqh+8Rv3o6HL/eY/T1vpDRxcH+WuWXBwkyaMnuzjIGVvP55f/YNdaV9nK1R+bh/9dPf/+9M8eHHqEDeGH3/yrFZ9bz+GAFweR/h9YTwTm9eIgkk7BeiIw1sVBkuxMsifJnreOvr6O1UmahvVEYKyLg1TVrqpaqKqFeXjDTdJ7rScCc3dxEEmnbs2fDlTVsSQ3Al/j/y4O8sLEJpM0E2uOAEBVPQY8NqFZJA3AMwal5oyA1JwRkJozAlJzRkBqzghIzRkBqTkjIDVnBKTmjIDUnBGQmjMCUnNGQGrOCEjNGQGpOSMgNWcEpOaMgNScEZCaMwJSc0ZAas4ISM0ZAak5IyA1ZwSk5oyA1JwRkJozAlJz6/qFpEkOAG8CbwPHqmphEkNJmp11RWDkt6rq1Qn8HEkD8HBAam69ESjg60meSbJzEgNJmq31Hg5cXlWvJDkXeDzJv1TVU0tfMIrDToDTP3TeOlcnadLWtSdQVa+Mbg8DDwOXLfOaXVW1UFULmzZvWc/qJE3BmiOQZHOSs47fBz4B7JvUYJJmYz2HA+cBDyc5/nO+XFX/MJGpJM3MmiNQVd8BfmWCs0gagB8RSs0ZAak5IyA1ZwSk5oyA1JwRkJozAlJzRkBqzghIzRkBqTkjIDVnBKTmjIDU3CQuNDq2/z7yGnu/8nezXOWG9Xu/+UdDj7AhbP7IR4ceYUN4a+/pKz7nnoDUnBGQmjMCUnNGQGrOCEjNGQGpOSMgNWcEpOaMgNScEZCaMwJSc0ZAas4ISM0ZAam5VSOQ5L4kh5PsW7Ls7CSPJ3lxdLt1umNKmpZx9gS+BFx1wrJbgCeq6hLgidFjSRvQqhGoqqeAIycsvgbYPbq/G7h2wnNJmpG1vidwXlUdBBjdnju5kSTN0tQvL5ZkJ7ATgE1nTnt1kk7RWvcEDiXZBjC6PbzSC6tqV1UtVNVCTvvgGlcnaVrWGoFHgR2j+zuARyYzjqRZG+cjwgeAfwJ+LslLST4L3A5cmeRF4MrRY0kb0KrvCVTV9Ss89fEJzyJpAJ4xKDVnBKTmjIDUnBGQmjMCUnNGQGrOCEjNGQGpOSMgNWcEpOaMgNScEZCaMwJSc0ZAas4ISM0ZAak5IyA1ZwSk5oyA1JwRkJozAlJzRkBqzghIzRkBqTkjIDVnBKTmjIDUnBGQmjMCUnPj/Gry+5IcTrJvybLbkrycZO/o6+rpjilpWsbZE/gScNUyy++qqu2jr8cmO5akWVk1AlX1FHBkBrNIGsB63hO4Mclzo8OFrSu9KMnOJHuS7Klj/7OO1UmahrVG4G7gYmA7cBC4Y6UXVtWuqlqoqoWc9sE1rk7StKwpAlV1qKrerqp3gHuAyyY7lqRZWVMEkmxb8vA6YN9Kr5U0305b7QVJHgCuAM5J8hLwBeCKJNuBAg4AN0xxRklTtGoEqur6ZRbfO4VZJA3AMwal5oyA1JwRkJozAlJzRkBqbtVPByZp6/kf5nf+8PdnucoN67F93xt6hA3h6IvPDz3ChvDOD1Y+Zd89Aak5IyA1ZwSk5oyA1JwRkJozAlJzRkBqzghIzRkBqTkjIDVnBKTmjIDUnBGQmjMCUnNGQGrOCEjNGQGpOSMgNWcEpOaMgNScEZCaMwJSc6tGIMmFSZ5Msj/JC0luGi0/O8njSV4c3W6d/riSJm2cPYFjwOer6ueBXwM+l+QXgFuAJ6rqEuCJ0WNJG8yqEaiqg1X17Oj+m8B+4ALgGmD36GW7gWunNaSk6Tml9wSSXARcCjwNnFdVB2ExFMC5kx5O0vSNHYEkZwIPAjdX1Run8H07k+xJsueHb7y2lhklTdFYEUiyicUA3F9VD40WH0qybfT8NuDwct9bVbuqaqGqFs74Sd87lObNOJ8OBLgX2F9Vdy556lFgx+j+DuCRyY8nadrG+a3ElwOfBp5Psne07FbgduCrST4LfBf45HRGlDRNq0agqr4BZIWnPz7ZcSTNmmcMSs0ZAak5IyA1ZwSk5oyA1JwRkJozAlJzRkBqzghIzRkBqTkjIDVnBKTmjIDUnBGQmjMCUnNGQGrOCEjNGQGpOSMgNWcEpObGudrwxFz4oR/nrmt/cZar3LCOvV1Dj7AhbPnMwtAjbAiX/+rXVnzOPQGpOSMgNWcEpOaMgNScEZCaMwJSc0ZAam6cX01+YZInk+xP8kKSm0bLb0vycpK9o6+rpz+upEkb52ShY8Dnq+rZJGcBzyR5fPTcXVX1xemNJ2naxvnV5AeBg6P7bybZD1ww7cEkzcYpvSeQ5CLgUuDp0aIbkzyX5L4kWyc8m6QZGDsCSc4EHgRurqo3gLuBi4HtLO4p3LHC9+1MsifJnv969dUJjCxpksaKQJJNLAbg/qp6CKCqDlXV21X1DnAPcNly31tVu6pqoaoWfuqccyY1t6QJGefTgQD3Avur6s4ly7ctedl1wL7Jjydp2sb5dOBy4NPA80n2jpbdClyfZDtQwAHghqlMKGmqxvl04BtAlnnqscmPI2nWPGNQas4ISM0ZAak5IyA1ZwSk5oyA1JwRkJozAlJzRkBqzghIzRkBqTkjIDVnBKTmjIDUnBGQmjMCUnNGQGrOCEjNGQGpOSMgNWcEpOaMgNScEZCaMwJSc0ZAas4ISM0ZAak5IyA1l6qa3cqS7wH/vmTROcCrMxtgPM40Hmca3zzM9dGq+vByT8w0Au9bebKnqhYGG2AZzjQeZxrfvM51nIcDUnNGQGpu6AjsGnj9y3Gm8TjT+OZ1LmDg9wQkDW/oPQFJAzMCUnNGQGrOCEjNGQGpuf8F6Tc6D2c23VMAAAAASUVORK5CYII=\n", 40 | "text/plain": [ 41 | "
" 42 | ] 43 | }, 44 | "metadata": { 45 | "needs_background": "light" 46 | }, 47 | "output_type": "display_data" 48 | } 49 | ], 50 | "source": [ 51 | "n_clusters = (4,3)\n", 52 | "A,_,_ = make_checkerboard(shape = (30,30),n_clusters = n_clusters,shuffle=False,random_state=0)\n", 53 | "plt.matshow(A, cmap=plt.cm.Blues);\n" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "## Creating the Row and Column diagonal matrices and also applying SVD on An" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 10, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "D_one_half = np.diag([np.power(A[i].sum(),-0.5) for i in range(A.shape[0])])\n", 70 | "D_two_half = np.diag([np.power(A.T[i].sum(),-0.5) for i in range(A.shape[1])])\n", 71 | "An = (D_one_half.dot(A)).dot(D_two_half)\n", 72 | "U,S,Vt = svd(An)" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "## Input parameters for the Co clustering" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 11, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "k = 4\n", 89 | "l = int(np.ceil(math.log(k,2)))" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "## Creating the matrix for application of kmeans algorithm" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 13, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "Ul = U[:,1:(l+1)]\n", 106 | "Vl = Vt.T[:,1:(l+1)]\n", 107 | "Z = np.vstack((Ul,Vl))" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "## Applying KMeans on the data Z and performing clustering " 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 15, 120 | "metadata": {}, 121 | "outputs": [ 122 | { 123 | "data": { 124 | "text/plain": [ 125 | "KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,\n", 126 | " n_clusters=4, n_init=10, n_jobs=None, precompute_distances='auto',\n", 127 | " random_state=None, tol=0.0001, verbose=0)" 128 | ] 129 | }, 130 | "execution_count": 15, 131 | "metadata": {}, 132 | "output_type": "execute_result" 133 | } 134 | ], 135 | "source": [ 136 | "kmeans = KMeans(n_clusters=k)\n", 137 | "kmeans.fit(Z)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 16, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "labels = kmeans.labels_\n", 147 | "row_clusters = labels[:A.shape[0]]\n", 148 | "column_clusters = labels[A.shape[0]:]" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "## We can clearly see the 4 clusters corresponding to the rows" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 21, 161 | "metadata": {}, 162 | "outputs": [ 163 | { 164 | "data": { 165 | "text/plain": [ 166 | "array([3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,\n", 167 | " 1, 1, 2, 2, 2, 2, 2, 2])" 168 | ] 169 | }, 170 | "execution_count": 21, 171 | "metadata": {}, 172 | "output_type": "execute_result" 173 | } 174 | ], 175 | "source": [ 176 | "row_clusters" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "## We can clearly see the 3 clusters corresponding to the columns" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 22, 189 | "metadata": {}, 190 | "outputs": [ 191 | { 192 | "data": { 193 | "text/plain": [ 194 | "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3,\n", 195 | " 3, 3, 3, 3, 3, 3, 3, 3])" 196 | ] 197 | }, 198 | "execution_count": 22, 199 | "metadata": {}, 200 | "output_type": "execute_result" 201 | } 202 | ], 203 | "source": [ 204 | "column_clusters" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "metadata": {}, 211 | "outputs": [], 212 | "source": [] 213 | } 214 | ], 215 | "metadata": { 216 | "kernelspec": { 217 | "display_name": "Python 3", 218 | "language": "python", 219 | "name": "python3" 220 | }, 221 | "language_info": { 222 | "codemirror_mode": { 223 | "name": "ipython", 224 | "version": 3 225 | }, 226 | "file_extension": ".py", 227 | "mimetype": "text/x-python", 228 | "name": "python", 229 | "nbconvert_exporter": "python", 230 | "pygments_lexer": "ipython3", 231 | "version": "3.7.4" 232 | } 233 | }, 234 | "nbformat": 4, 235 | "nbformat_minor": 2 236 | } 237 | -------------------------------------------------------------------------------- /Project9/YOLO_Object_Detection/yolo-coco/yolov3.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | # batch=1 4 | # subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=16 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | # Downsample 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=32 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [shortcut] 60 | from=-3 61 | activation=linear 62 | 63 | # Downsample 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=2 70 | pad=1 71 | activation=leaky 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [shortcut] 90 | from=-3 91 | activation=linear 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=64 96 | size=1 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [shortcut] 110 | from=-3 111 | activation=linear 112 | 113 | # Downsample 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=3 119 | stride=2 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=128 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [shortcut] 140 | from=-3 141 | activation=linear 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=128 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=256 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [shortcut] 160 | from=-3 161 | activation=linear 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=128 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=256 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [shortcut] 180 | from=-3 181 | activation=linear 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=256 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [shortcut] 200 | from=-3 201 | activation=linear 202 | 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=3 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [shortcut] 221 | from=-3 222 | activation=linear 223 | 224 | [convolutional] 225 | batch_normalize=1 226 | filters=128 227 | size=1 228 | stride=1 229 | pad=1 230 | activation=leaky 231 | 232 | [convolutional] 233 | batch_normalize=1 234 | filters=256 235 | size=3 236 | stride=1 237 | pad=1 238 | activation=leaky 239 | 240 | [shortcut] 241 | from=-3 242 | activation=linear 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=128 247 | size=1 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | batch_normalize=1 254 | filters=256 255 | size=3 256 | stride=1 257 | pad=1 258 | activation=leaky 259 | 260 | [shortcut] 261 | from=-3 262 | activation=linear 263 | 264 | [convolutional] 265 | batch_normalize=1 266 | filters=128 267 | size=1 268 | stride=1 269 | pad=1 270 | activation=leaky 271 | 272 | [convolutional] 273 | batch_normalize=1 274 | filters=256 275 | size=3 276 | stride=1 277 | pad=1 278 | activation=leaky 279 | 280 | [shortcut] 281 | from=-3 282 | activation=linear 283 | 284 | # Downsample 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=512 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=leaky 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | batch_normalize=1 304 | filters=512 305 | size=3 306 | stride=1 307 | pad=1 308 | activation=leaky 309 | 310 | [shortcut] 311 | from=-3 312 | activation=linear 313 | 314 | 315 | [convolutional] 316 | batch_normalize=1 317 | filters=256 318 | size=1 319 | stride=1 320 | pad=1 321 | activation=leaky 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=512 326 | size=3 327 | stride=1 328 | pad=1 329 | activation=leaky 330 | 331 | [shortcut] 332 | from=-3 333 | activation=linear 334 | 335 | 336 | [convolutional] 337 | batch_normalize=1 338 | filters=256 339 | size=1 340 | stride=1 341 | pad=1 342 | activation=leaky 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=512 347 | size=3 348 | stride=1 349 | pad=1 350 | activation=leaky 351 | 352 | [shortcut] 353 | from=-3 354 | activation=linear 355 | 356 | 357 | [convolutional] 358 | batch_normalize=1 359 | filters=256 360 | size=1 361 | stride=1 362 | pad=1 363 | activation=leaky 364 | 365 | [convolutional] 366 | batch_normalize=1 367 | filters=512 368 | size=3 369 | stride=1 370 | pad=1 371 | activation=leaky 372 | 373 | [shortcut] 374 | from=-3 375 | activation=linear 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=leaky 384 | 385 | [convolutional] 386 | batch_normalize=1 387 | filters=512 388 | size=3 389 | stride=1 390 | pad=1 391 | activation=leaky 392 | 393 | [shortcut] 394 | from=-3 395 | activation=linear 396 | 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | filters=256 401 | size=1 402 | stride=1 403 | pad=1 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=512 409 | size=3 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [shortcut] 415 | from=-3 416 | activation=linear 417 | 418 | 419 | [convolutional] 420 | batch_normalize=1 421 | filters=256 422 | size=1 423 | stride=1 424 | pad=1 425 | activation=leaky 426 | 427 | [convolutional] 428 | batch_normalize=1 429 | filters=512 430 | size=3 431 | stride=1 432 | pad=1 433 | activation=leaky 434 | 435 | [shortcut] 436 | from=-3 437 | activation=linear 438 | 439 | [convolutional] 440 | batch_normalize=1 441 | filters=256 442 | size=1 443 | stride=1 444 | pad=1 445 | activation=leaky 446 | 447 | [convolutional] 448 | batch_normalize=1 449 | filters=512 450 | size=3 451 | stride=1 452 | pad=1 453 | activation=leaky 454 | 455 | [shortcut] 456 | from=-3 457 | activation=linear 458 | 459 | # Downsample 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=1024 464 | size=3 465 | stride=2 466 | pad=1 467 | activation=leaky 468 | 469 | [convolutional] 470 | batch_normalize=1 471 | filters=512 472 | size=1 473 | stride=1 474 | pad=1 475 | activation=leaky 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=1024 480 | size=3 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [shortcut] 486 | from=-3 487 | activation=linear 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=512 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=leaky 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=1024 500 | size=3 501 | stride=1 502 | pad=1 503 | activation=leaky 504 | 505 | [shortcut] 506 | from=-3 507 | activation=linear 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=512 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | filters=1024 520 | size=3 521 | stride=1 522 | pad=1 523 | activation=leaky 524 | 525 | [shortcut] 526 | from=-3 527 | activation=linear 528 | 529 | [convolutional] 530 | batch_normalize=1 531 | filters=512 532 | size=1 533 | stride=1 534 | pad=1 535 | activation=leaky 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=1024 540 | size=3 541 | stride=1 542 | pad=1 543 | activation=leaky 544 | 545 | [shortcut] 546 | from=-3 547 | activation=linear 548 | 549 | ###################### 550 | 551 | [convolutional] 552 | batch_normalize=1 553 | filters=512 554 | size=1 555 | stride=1 556 | pad=1 557 | activation=leaky 558 | 559 | [convolutional] 560 | batch_normalize=1 561 | size=3 562 | stride=1 563 | pad=1 564 | filters=1024 565 | activation=leaky 566 | 567 | [convolutional] 568 | batch_normalize=1 569 | filters=512 570 | size=1 571 | stride=1 572 | pad=1 573 | activation=leaky 574 | 575 | [convolutional] 576 | batch_normalize=1 577 | size=3 578 | stride=1 579 | pad=1 580 | filters=1024 581 | activation=leaky 582 | 583 | [convolutional] 584 | batch_normalize=1 585 | filters=512 586 | size=1 587 | stride=1 588 | pad=1 589 | activation=leaky 590 | 591 | [convolutional] 592 | batch_normalize=1 593 | size=3 594 | stride=1 595 | pad=1 596 | filters=1024 597 | activation=leaky 598 | 599 | [convolutional] 600 | size=1 601 | stride=1 602 | pad=1 603 | filters=255 604 | activation=linear 605 | 606 | 607 | [yolo] 608 | mask = 6,7,8 609 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 610 | classes=80 611 | num=9 612 | jitter=.3 613 | ignore_thresh = .7 614 | truth_thresh = 1 615 | random=1 616 | 617 | 618 | [route] 619 | layers = -4 620 | 621 | [convolutional] 622 | batch_normalize=1 623 | filters=256 624 | size=1 625 | stride=1 626 | pad=1 627 | activation=leaky 628 | 629 | [upsample] 630 | stride=2 631 | 632 | [route] 633 | layers = -1, 61 634 | 635 | 636 | 637 | [convolutional] 638 | batch_normalize=1 639 | filters=256 640 | size=1 641 | stride=1 642 | pad=1 643 | activation=leaky 644 | 645 | [convolutional] 646 | batch_normalize=1 647 | size=3 648 | stride=1 649 | pad=1 650 | filters=512 651 | activation=leaky 652 | 653 | [convolutional] 654 | batch_normalize=1 655 | filters=256 656 | size=1 657 | stride=1 658 | pad=1 659 | activation=leaky 660 | 661 | [convolutional] 662 | batch_normalize=1 663 | size=3 664 | stride=1 665 | pad=1 666 | filters=512 667 | activation=leaky 668 | 669 | [convolutional] 670 | batch_normalize=1 671 | filters=256 672 | size=1 673 | stride=1 674 | pad=1 675 | activation=leaky 676 | 677 | [convolutional] 678 | batch_normalize=1 679 | size=3 680 | stride=1 681 | pad=1 682 | filters=512 683 | activation=leaky 684 | 685 | [convolutional] 686 | size=1 687 | stride=1 688 | pad=1 689 | filters=255 690 | activation=linear 691 | 692 | 693 | [yolo] 694 | mask = 3,4,5 695 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 696 | classes=80 697 | num=9 698 | jitter=.3 699 | ignore_thresh = .7 700 | truth_thresh = 1 701 | random=1 702 | 703 | 704 | 705 | [route] 706 | layers = -4 707 | 708 | [convolutional] 709 | batch_normalize=1 710 | filters=128 711 | size=1 712 | stride=1 713 | pad=1 714 | activation=leaky 715 | 716 | [upsample] 717 | stride=2 718 | 719 | [route] 720 | layers = -1, 36 721 | 722 | 723 | 724 | [convolutional] 725 | batch_normalize=1 726 | filters=128 727 | size=1 728 | stride=1 729 | pad=1 730 | activation=leaky 731 | 732 | [convolutional] 733 | batch_normalize=1 734 | size=3 735 | stride=1 736 | pad=1 737 | filters=256 738 | activation=leaky 739 | 740 | [convolutional] 741 | batch_normalize=1 742 | filters=128 743 | size=1 744 | stride=1 745 | pad=1 746 | activation=leaky 747 | 748 | [convolutional] 749 | batch_normalize=1 750 | size=3 751 | stride=1 752 | pad=1 753 | filters=256 754 | activation=leaky 755 | 756 | [convolutional] 757 | batch_normalize=1 758 | filters=128 759 | size=1 760 | stride=1 761 | pad=1 762 | activation=leaky 763 | 764 | [convolutional] 765 | batch_normalize=1 766 | size=3 767 | stride=1 768 | pad=1 769 | filters=256 770 | activation=leaky 771 | 772 | [convolutional] 773 | size=1 774 | stride=1 775 | pad=1 776 | filters=255 777 | activation=linear 778 | 779 | 780 | [yolo] 781 | mask = 0,1,2 782 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 783 | classes=80 784 | num=9 785 | jitter=.3 786 | ignore_thresh = .7 787 | truth_thresh = 1 788 | random=1 789 | 790 | -------------------------------------------------------------------------------- /Project28/Air_Quality_Bayesian_Regression/Bayesian Regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import warnings\n", 10 | "warnings.filterwarnings('ignore')\n", 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "import os\n", 14 | "import scipy\n", 15 | "from sklearn.decomposition import PCA\n", 16 | "from sklearn.linear_model import BayesianRidge\n", 17 | "from sklearn.preprocessing import MinMaxScaler, LabelBinarizer, OneHotEncoder\n", 18 | "from sklearn.model_selection import train_test_split, cross_val_score\n", 19 | "from sklearn.metrics import *\n", 20 | "import hyperopt\n", 21 | "from hyperopt import *\n", 22 | "from hyperopt import fmin, tpe, hp, space_eval\n", 23 | "import matplotlib.pyplot as plt\n", 24 | "%matplotlib inline \n" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "
\n", 32 | "Loading the data: We load the data from the mentioned path\n", 33 | "
" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 7, 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "data": { 43 | "text/html": [ 44 | "
\n", 45 | "\n", 58 | "\n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | "
DateTimeCO(GT)PT08.S1(CO)NMHC(GT)C6H6(GT)PT08.S2(NMHC)NOx(GT)PT08.S3(NOx)NO2(GT)PT08.S4(NO2)PT08.S5(O3)TRHAH
02004-03-1018:00:002.61360.0015011.8817231045.50166.01056.25113.01692.001267.5013.648.8750010.757754
12004-03-1019:00:002.01292.251129.397165954.75103.01173.7592.01558.75972.2513.347.7000000.725487
\n", 118 | "
" 119 | ], 120 | "text/plain": [ 121 | " Date Time CO(GT) PT08.S1(CO) NMHC(GT) C6H6(GT) \\\n", 122 | "0 2004-03-10 18:00:00 2.6 1360.00 150 11.881723 \n", 123 | "1 2004-03-10 19:00:00 2.0 1292.25 112 9.397165 \n", 124 | "\n", 125 | " PT08.S2(NMHC) NOx(GT) PT08.S3(NOx) NO2(GT) PT08.S4(NO2) PT08.S5(O3) \\\n", 126 | "0 1045.50 166.0 1056.25 113.0 1692.00 1267.50 \n", 127 | "1 954.75 103.0 1173.75 92.0 1558.75 972.25 \n", 128 | "\n", 129 | " T RH AH \n", 130 | "0 13.6 48.875001 0.757754 \n", 131 | "1 13.3 47.700000 0.725487 " 132 | ] 133 | }, 134 | "execution_count": 7, 135 | "metadata": {}, 136 | "output_type": "execute_result" 137 | } 138 | ], 139 | "source": [ 140 | "path_of_input_file = r'D:\\kaggle_trials\\AirQualityUCI\\AirQualityUCI.xlsx'\n", 141 | "df = pd.read_excel(path_of_input_file)\n", 142 | "df.head(2)" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "
\n", 150 | "Preprocessing data : We separate out the numerical and categorical columns from the data to be used for scaling and encoding respectively \n", 151 | "
" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 9, 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "cols_needed = list(df.columns)\n", 161 | "cols_needed = cols_needed[2:len(cols_needed)-1]\n", 162 | "\n", 163 | "possible_numeric_cols = list(df._get_numeric_data().columns)\n", 164 | "\n", 165 | "categorical_columns = list(set(cols_needed)- set(possible_numeric_cols))\n", 166 | "\n", 167 | "numerical_columns = []\n", 168 | "for i in range(len(possible_numeric_cols)):\n", 169 | " col_name = possible_numeric_cols[i]\n", 170 | " if len(df[col_name].unique())<10:\n", 171 | " categorical_columns.append(col_name)\n", 172 | " else:\n", 173 | " numerical_columns.append(col_name)" 174 | ] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "metadata": {}, 179 | "source": [ 180 | "
\n", 181 | "Missing value Treatment: We impute the numerical missing values with their respective means and the categorical values with their modes.\n", 182 | "
" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 11, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "for i in range(len(categorical_columns)):\n", 192 | " df[categorical_columns[i]] = df[categorical_columns[i]].fillna(df[categorical_columns[i]].mode()[0])\n", 193 | "mean_impute_dict ={}\n", 194 | "for i in range(len(numerical_columns)):\n", 195 | " mean_impute_dict[numerical_columns[i]] = np.nanmean(np.float_(df[numerical_columns[i]].values))\n", 196 | "for i in range(len(numerical_columns)):\n", 197 | " df[numerical_columns[i]] = df[numerical_columns[i]].fillna(mean_impute_dict[numerical_columns[i]])" 198 | ] 199 | }, 200 | { 201 | "cell_type": "markdown", 202 | "metadata": {}, 203 | "source": [ 204 | "
\n", 205 | "Scaling and Encoding: We scale and one hot encode the data to get the matrix we need for calculations\n", 206 | "
" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 13, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "ohe = OneHotEncoder()\n", 216 | "scalar = MinMaxScaler()\n", 217 | "encoded_matrix = ohe.fit_transform(df[categorical_columns])\n", 218 | "scaled_matrix = scalar.fit_transform(df[numerical_columns])\n", 219 | "X_complete_matrix = scipy.sparse.hstack((encoded_matrix,scaled_matrix)).A\n", 220 | "Y = scalar.fit_transform(df[['AH']])\n" 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": {}, 226 | "source": [ 227 | "
\n", 228 | "Train Test Split : We split the data to train and test set \n", 229 | "
" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": 14, 235 | "metadata": {}, 236 | "outputs": [], 237 | "source": [ 238 | "X_train, X_test, y_train, y_test = train_test_split(X_complete_matrix, Y, test_size=0.2, random_state=42)" 239 | ] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "metadata": {}, 244 | "source": [ 245 | "
\n", 246 | "Parameter Tuning and setting Grid for parameters: We set up the grid for parameter tuning and then tune the parameters to get the optimal list of parameters to use\n", 247 | "
" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 15, 253 | "metadata": {}, 254 | "outputs": [], 255 | "source": [ 256 | "Bayesian_ridge_grid = {'n_iter' : hp.choice('n_iter',range(300,600)),\n", 257 | " 'alpha_1': hp.uniform('alpha_1',0.0,1.0),\n", 258 | " 'alpha_2': hp.uniform('alpha_2',0.0,1.0),\n", 259 | " 'lambda_1': hp.uniform('lambda_1',0.0,1.0),\n", 260 | " 'lambda_2': hp.uniform('lambda_2',0.0,1.0),\n", 261 | " }" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 18, 267 | "metadata": {}, 268 | "outputs": [ 269 | { 270 | "name": "stdout", 271 | "output_type": "stream", 272 | "text": [ 273 | "100%|████████████████████████████████████████████████| 100/100 [00:02<00:00, 34.63it/s, best loss: -0.9999999999995861]\n", 274 | "The best parameter tuned on training set is given by :- {'alpha_1': 0.7822556011911279, 'alpha_2': 0.0014753500365261268, 'lambda_1': 0.12542036284480557, 'lambda_2': 0.8048114623228889, 'n_iter': 333}\n" 275 | ] 276 | } 277 | ], 278 | "source": [ 279 | "def hyperopt_train_test(params):\n", 280 | " reg = BayesianRidge(**params)\n", 281 | " return cross_val_score(reg, X_train, y_train).mean()\n", 282 | "\n", 283 | "def function_to_minimise(params):\n", 284 | " accuracy = hyperopt_train_test(params)\n", 285 | " return {'loss': -1*accuracy, 'status': STATUS_OK}\n", 286 | "\n", 287 | "\n", 288 | "trials = Trials()\n", 289 | "best = fmin(function_to_minimise, Bayesian_ridge_grid, algo=tpe.suggest, max_evals=100, trials=trials)\n", 290 | "best_parameters = space_eval(Bayesian_ridge_grid, best)\n", 291 | "print('The best parameter tuned on training set is given by :- ',best_parameters)" 292 | ] 293 | }, 294 | { 295 | "cell_type": "markdown", 296 | "metadata": {}, 297 | "source": [ 298 | "
\n", 299 | "Implementing the model: We now implement the model with tuned parameters and get the R^2 score\n", 300 | "
" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 21, 306 | "metadata": {}, 307 | "outputs": [ 308 | { 309 | "data": { 310 | "text/plain": [ 311 | "BayesianRidge(alpha_1=0.7822556011911279, alpha_2=0.0014753500365261268,\n", 312 | " compute_score=False, copy_X=True, fit_intercept=True,\n", 313 | " lambda_1=0.12542036284480557, lambda_2=0.8048114623228889,\n", 314 | " n_iter=333, normalize=False, tol=0.001, verbose=False)" 315 | ] 316 | }, 317 | "execution_count": 21, 318 | "metadata": {}, 319 | "output_type": "execute_result" 320 | } 321 | ], 322 | "source": [ 323 | "model = BayesianRidge(**best_parameters)\n", 324 | "model.fit(X_train, y_train)" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": 22, 330 | "metadata": {}, 331 | "outputs": [ 332 | { 333 | "name": "stdout", 334 | "output_type": "stream", 335 | "text": [ 336 | "The coefficient of determination is:- 0.9999999999999205\n" 337 | ] 338 | } 339 | ], 340 | "source": [ 341 | "y_pred = model.predict(X_test)\n", 342 | "print('The coefficient of determination is:- ',r2_score(y_pred,y_test))" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": null, 348 | "metadata": {}, 349 | "outputs": [], 350 | "source": [] 351 | } 352 | ], 353 | "metadata": { 354 | "kernelspec": { 355 | "display_name": "Python 3", 356 | "language": "python", 357 | "name": "python3" 358 | }, 359 | "language_info": { 360 | "codemirror_mode": { 361 | "name": "ipython", 362 | "version": 3 363 | }, 364 | "file_extension": ".py", 365 | "mimetype": "text/x-python", 366 | "name": "python", 367 | "nbconvert_exporter": "python", 368 | "pygments_lexer": "ipython3", 369 | "version": "3.7.4" 370 | } 371 | }, 372 | "nbformat": 4, 373 | "nbformat_minor": 2 374 | } 375 | -------------------------------------------------------------------------------- /Project29/weather_prediction_passive_aggressive_regression/Passive Aggressive Regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import warnings\n", 10 | "warnings.filterwarnings('ignore')\n", 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "import os\n", 14 | "import scipy\n", 15 | "from sklearn.decomposition import PCA\n", 16 | "from sklearn.linear_model import PassiveAggressiveRegressor\n", 17 | "from sklearn.preprocessing import MinMaxScaler, LabelBinarizer, OneHotEncoder\n", 18 | "from sklearn.model_selection import train_test_split, cross_val_score\n", 19 | "from sklearn.metrics import *\n", 20 | "import hyperopt\n", 21 | "from hyperopt import *\n", 22 | "from hyperopt import fmin, tpe, hp, space_eval\n", 23 | "import matplotlib.pyplot as plt\n", 24 | "%matplotlib inline \n" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "
\n", 32 | "Loading the data: We load the data from the mentioned path\n", 33 | "
" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 2, 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "data": { 43 | "text/html": [ 44 | "
\n", 45 | "\n", 58 | "\n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | "
Formatted DateSummaryPrecip TypeTemperature (C)Apparent Temperature (C)HumidityWind Speed (km/h)Wind Bearing (degrees)Visibility (km)Loud CoverPressure (millibars)Daily Summary
02006-04-01 00:00:00.000 +0200Partly Cloudyrain9.4722227.3888890.8914.1197251.015.82630.01015.13Partly cloudy throughout the day.
12006-04-01 01:00:00.000 +0200Partly Cloudyrain9.3555567.2277780.8614.2646259.015.82630.01015.63Partly cloudy throughout the day.
\n", 109 | "
" 110 | ], 111 | "text/plain": [ 112 | " Formatted Date Summary Precip Type Temperature (C) \\\n", 113 | "0 2006-04-01 00:00:00.000 +0200 Partly Cloudy rain 9.472222 \n", 114 | "1 2006-04-01 01:00:00.000 +0200 Partly Cloudy rain 9.355556 \n", 115 | "\n", 116 | " Apparent Temperature (C) Humidity Wind Speed (km/h) \\\n", 117 | "0 7.388889 0.89 14.1197 \n", 118 | "1 7.227778 0.86 14.2646 \n", 119 | "\n", 120 | " Wind Bearing (degrees) Visibility (km) Loud Cover Pressure (millibars) \\\n", 121 | "0 251.0 15.8263 0.0 1015.13 \n", 122 | "1 259.0 15.8263 0.0 1015.63 \n", 123 | "\n", 124 | " Daily Summary \n", 125 | "0 Partly cloudy throughout the day. \n", 126 | "1 Partly cloudy throughout the day. " 127 | ] 128 | }, 129 | "execution_count": 2, 130 | "metadata": {}, 131 | "output_type": "execute_result" 132 | } 133 | ], 134 | "source": [ 135 | "path_of_input_file = r'D:\\kaggle_trials\\szeged-weather\\weatherHistory.csv'\n", 136 | "df = pd.read_csv(path_of_input_file)\n", 137 | "df.head(2)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "
\n", 145 | "Preprocessing data : We separate out the numerical and categorical columns from the data to be used for scaling and encoding respectively \n", 146 | "
" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 3, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "cols_needed = list(df.columns)\n", 156 | "cols_needed = cols_needed[1:len(cols_needed)-1]\n", 157 | "\n", 158 | "possible_numeric_cols = list(df._get_numeric_data().columns)\n", 159 | "possible_numeric_cols.remove('Humidity')\n", 160 | "categorical_columns = list(set(cols_needed)- set(possible_numeric_cols))\n", 161 | "\n", 162 | "numerical_columns = []\n", 163 | "for i in range(len(possible_numeric_cols)):\n", 164 | " col_name = possible_numeric_cols[i]\n", 165 | " if len(df[col_name].unique())<10:\n", 166 | " categorical_columns.append(col_name)\n", 167 | " else:\n", 168 | " numerical_columns.append(col_name)" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": {}, 174 | "source": [ 175 | "
\n", 176 | "Missing value Treatment: We impute the numerical missing values with their respective means and the categorical values with their modes.\n", 177 | "
" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": 4, 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [ 186 | "for i in range(len(categorical_columns)):\n", 187 | " df[categorical_columns[i]] = df[categorical_columns[i]].fillna(df[categorical_columns[i]].mode()[0])\n", 188 | "mean_impute_dict ={}\n", 189 | "for i in range(len(numerical_columns)):\n", 190 | " mean_impute_dict[numerical_columns[i]] = np.nanmean(np.float_(df[numerical_columns[i]].values))\n", 191 | "for i in range(len(numerical_columns)):\n", 192 | " df[numerical_columns[i]] = df[numerical_columns[i]].fillna(mean_impute_dict[numerical_columns[i]])" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "
\n", 200 | "Scaling and Encoding: We scale and one hot encode the data to get the matrix we need for calculations\n", 201 | "
" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 5, 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [ 210 | "ohe = OneHotEncoder()\n", 211 | "scalar = MinMaxScaler()\n", 212 | "encoded_matrix = ohe.fit_transform(df[categorical_columns])\n", 213 | "scaled_matrix = scalar.fit_transform(df[numerical_columns])\n", 214 | "X_complete_matrix = scipy.sparse.hstack((encoded_matrix,scaled_matrix)).A\n", 215 | "Y = scalar.fit_transform(df[['Humidity']])\n" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "metadata": {}, 221 | "source": [ 222 | "
\n", 223 | "Train Test Split : We split the data to train and test set \n", 224 | "
" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 6, 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [ 233 | "X_train, X_test, y_train, y_test = train_test_split(X_complete_matrix, Y, test_size=0.2, random_state=42)" 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": {}, 239 | "source": [ 240 | "
\n", 241 | "Parameter Tuning and setting Grid for parameters: We set up the grid for parameter tuning and then tune the parameters to get the optimal list of parameters to use\n", 242 | "
" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": 11, 248 | "metadata": {}, 249 | "outputs": [], 250 | "source": [ 251 | "Pars_grid = { 'max_iter': hp.choice('max_iter',range(2,200)),\n", 252 | " 'C': hp.uniform('C',0.0,1.0),\n", 253 | " 'loss' : hp.choice('loss',['epsilon_insensitive','squared_epsilon_insensitive'])\n", 254 | " \n", 255 | " }" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 12, 261 | "metadata": {}, 262 | "outputs": [ 263 | { 264 | "name": "stdout", 265 | "output_type": "stream", 266 | "text": [ 267 | "100%|████████████████████████████████████████████████| 100/100 [00:55<00:00, 1.80it/s, best loss: -0.9739912951898163]\n", 268 | "The best parameter tuned on training set is given by :- {'C': 0.8948999920414936, 'loss': 'epsilon_insensitive', 'max_iter': 36}\n" 269 | ] 270 | } 271 | ], 272 | "source": [ 273 | "def hyperopt_train_test(params):\n", 274 | " reg = PassiveAggressiveRegressor(**params,random_state=19)\n", 275 | " return cross_val_score(reg, X_train, y_train).mean()\n", 276 | "\n", 277 | "def function_to_minimise(params):\n", 278 | " accuracy = hyperopt_train_test(params)\n", 279 | " return {'loss': -1*accuracy, 'status': STATUS_OK}\n", 280 | "\n", 281 | "\n", 282 | "trials = Trials()\n", 283 | "best = fmin(function_to_minimise, Pars_grid, algo=tpe.suggest, max_evals=100, trials=trials)\n", 284 | "best_parameters = space_eval(Pars_grid, best)\n", 285 | "print('The best parameter tuned on training set is given by :- ',best_parameters)" 286 | ] 287 | }, 288 | { 289 | "cell_type": "markdown", 290 | "metadata": {}, 291 | "source": [ 292 | "
\n", 293 | "Implementing the model: We now implement the model with tuned parameters and get the R^2 score\n", 294 | "
" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": 13, 300 | "metadata": {}, 301 | "outputs": [ 302 | { 303 | "data": { 304 | "text/plain": [ 305 | "PassiveAggressiveRegressor(C=0.8948999920414936, average=False,\n", 306 | " early_stopping=False, epsilon=0.1,\n", 307 | " fit_intercept=True, loss='epsilon_insensitive',\n", 308 | " max_iter=36, n_iter_no_change=5, random_state=None,\n", 309 | " shuffle=True, tol=0.001, validation_fraction=0.1,\n", 310 | " verbose=0, warm_start=False)" 311 | ] 312 | }, 313 | "execution_count": 13, 314 | "metadata": {}, 315 | "output_type": "execute_result" 316 | } 317 | ], 318 | "source": [ 319 | "model = PassiveAggressiveRegressor(**best_parameters)\n", 320 | "model.fit(X_train, y_train)" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": 14, 326 | "metadata": {}, 327 | "outputs": [ 328 | { 329 | "name": "stdout", 330 | "output_type": "stream", 331 | "text": [ 332 | "The coefficient of determination is:- 0.9770641814885767\n" 333 | ] 334 | } 335 | ], 336 | "source": [ 337 | "y_pred = model.predict(X_test)\n", 338 | "print('The coefficient of determination is:- ',r2_score(y_pred,y_test))" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": null, 344 | "metadata": {}, 345 | "outputs": [], 346 | "source": [] 347 | } 348 | ], 349 | "metadata": { 350 | "kernelspec": { 351 | "display_name": "Python 3", 352 | "language": "python", 353 | "name": "python3" 354 | }, 355 | "language_info": { 356 | "codemirror_mode": { 357 | "name": "ipython", 358 | "version": 3 359 | }, 360 | "file_extension": ".py", 361 | "mimetype": "text/x-python", 362 | "name": "python", 363 | "nbconvert_exporter": "python", 364 | "pygments_lexer": "ipython3", 365 | "version": "3.7.4" 366 | } 367 | }, 368 | "nbformat": 4, 369 | "nbformat_minor": 2 370 | } 371 | -------------------------------------------------------------------------------- /Project30/world_war2_weather_SGDRegressor/SGDRegressor.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import warnings\n", 10 | "warnings.filterwarnings('ignore')\n", 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "import os\n", 14 | "import scipy\n", 15 | "from sklearn.decomposition import PCA\n", 16 | "from sklearn.linear_model import SGDRegressor\n", 17 | "from sklearn.preprocessing import MinMaxScaler, LabelBinarizer, OneHotEncoder\n", 18 | "from sklearn.model_selection import train_test_split, cross_val_score\n", 19 | "from sklearn.metrics import *\n", 20 | "import hyperopt\n", 21 | "from hyperopt import *\n", 22 | "from hyperopt import fmin, tpe, hp, space_eval\n", 23 | "import matplotlib.pyplot as plt\n", 24 | "%matplotlib inline \n" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "
\n", 32 | "Loading the data: We load the data from the mentioned path\n", 33 | "
" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 21, 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "data": { 43 | "text/html": [ 44 | "
\n", 45 | "\n", 58 | "\n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | "
STAWindGustSpdMaxTempMinTempMeanTempSnowfallPoorWeatherYRMODADRSPDMAXMINMEA
010001NaN25.55555622.22222223.8888890NaN4271NaNNaN78.072.075.0
110001NaN28.88888921.66666725.5555560NaN4272NaNNaN84.071.078.0
\n", 118 | "
" 119 | ], 120 | "text/plain": [ 121 | " STA WindGustSpd MaxTemp MinTemp MeanTemp Snowfall PoorWeather \\\n", 122 | "0 10001 NaN 25.555556 22.222222 23.888889 0 NaN \n", 123 | "1 10001 NaN 28.888889 21.666667 25.555556 0 NaN \n", 124 | "\n", 125 | " YR MO DA DR SPD MAX MIN MEA \n", 126 | "0 42 7 1 NaN NaN 78.0 72.0 75.0 \n", 127 | "1 42 7 2 NaN NaN 84.0 71.0 78.0 " 128 | ] 129 | }, 130 | "execution_count": 21, 131 | "metadata": {}, 132 | "output_type": "execute_result" 133 | } 134 | ], 135 | "source": [ 136 | "path_of_input_file = r'D:\\kaggle_trials\\weatherww2\\Summary of Weather.csv'\n", 137 | "cols2read = ['STA','WindGustSpd','MaxTemp','MinTemp','MeanTemp'\n", 138 | " ,'Snowfall','PoorWeather','YR','MO','DA','DR',\n", 139 | " 'SPD','MAX','MIN','MEA']\n", 140 | "\n", 141 | "df = pd.read_csv(path_of_input_file,usecols= cols2read)\n", 142 | "df.head(2)" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "metadata": {}, 155 | "source": [ 156 | "
\n", 157 | "Preprocessing data : We separate out the numerical and categorical columns from the data to be used for scaling and encoding respectively \n", 158 | "
" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 22, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "cols_needed = list(df.columns)\n", 168 | "cols_needed = cols_needed[:len(cols_needed)-11]\n", 169 | "cols_needed.remove('MaxTemp')\n", 170 | "possible_numeric_cols = list(df._get_numeric_data().columns)\n", 171 | "possible_numeric_cols.remove('MaxTemp')\n", 172 | "categorical_columns = list(set(cols_needed)- set(possible_numeric_cols))\n", 173 | "\n", 174 | "numerical_columns = []\n", 175 | "for i in range(len(possible_numeric_cols)):\n", 176 | " col_name = possible_numeric_cols[i]\n", 177 | " if len(df[col_name].unique())<10:\n", 178 | " categorical_columns.append(col_name)\n", 179 | " else:\n", 180 | " numerical_columns.append(col_name)" 181 | ] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "metadata": {}, 186 | "source": [ 187 | "
\n", 188 | "Missing value Treatment: We impute the numerical missing values with their respective means and the categorical values with their modes.\n", 189 | "
" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 23, 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [ 198 | "for i in range(len(categorical_columns)):\n", 199 | " df[categorical_columns[i]] = df[categorical_columns[i]].fillna(df[categorical_columns[i]].mode()[0])\n", 200 | "mean_impute_dict ={}\n", 201 | "for i in range(len(numerical_columns)):\n", 202 | " mean_impute_dict[numerical_columns[i]] = np.nanmean(np.float_(df[numerical_columns[i]].values))\n", 203 | "for i in range(len(numerical_columns)):\n", 204 | " df[numerical_columns[i]] = df[numerical_columns[i]].fillna(mean_impute_dict[numerical_columns[i]])" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": {}, 210 | "source": [ 211 | "
\n", 212 | "Scaling and Encoding: We scale and one hot encode the data to get the matrix we need for calculations\n", 213 | "
" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 24, 219 | "metadata": {}, 220 | "outputs": [], 221 | "source": [ 222 | "ohe = OneHotEncoder()\n", 223 | "scalar = MinMaxScaler()\n", 224 | "encoded_matrix = ohe.fit_transform(df[categorical_columns])\n", 225 | "scaled_matrix = scalar.fit_transform(df[numerical_columns])\n", 226 | "X_complete_matrix = scipy.sparse.hstack((encoded_matrix,scaled_matrix)).A\n", 227 | "Y = scalar.fit_transform(df[['MaxTemp']])\n" 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "metadata": {}, 233 | "source": [ 234 | "
\n", 235 | "Train Test Split : We split the data to train and test set \n", 236 | "
" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 25, 242 | "metadata": {}, 243 | "outputs": [], 244 | "source": [ 245 | "X_train, X_test, y_train, y_test = train_test_split(X_complete_matrix, Y, test_size=0.2, random_state=42)" 246 | ] 247 | }, 248 | { 249 | "cell_type": "markdown", 250 | "metadata": {}, 251 | "source": [ 252 | "
\n", 253 | "Parameter Tuning and setting Grid for parameters: We set up the grid for parameter tuning and then tune the parameters to get the optimal list of parameters to use\n", 254 | "
" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 27, 260 | "metadata": {}, 261 | "outputs": [], 262 | "source": [ 263 | "SGDR_grid = {'loss' : hp.choice('loss',['squared_loss','huber','epsilon_insensitive','squared_epsilon_insensitive']),\n", 264 | " 'penalty' : hp.choice('penalty',['l2','l1','elasticnet','none']),\n", 265 | " 'alpha': hp.uniform('alpha',0.0,1.0),\n", 266 | " 'learning_rate': hp.choice('learning_rate',['constant','optimal','invscaling','adaptive'])\n", 267 | " \n", 268 | " }" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": 28, 274 | "metadata": {}, 275 | "outputs": [ 276 | { 277 | "name": "stdout", 278 | "output_type": "stream", 279 | "text": [ 280 | "100%|████████████████████████████████████████████████| 100/100 [03:09<00:00, 1.90s/it, best loss: -0.9939970686605907]\n", 281 | "The best parameter tuned on training set is given by :- {'alpha': 0.055081188680586174, 'learning_rate': 'adaptive', 'loss': 'squared_loss', 'penalty': 'none'}\n" 282 | ] 283 | } 284 | ], 285 | "source": [ 286 | "def hyperopt_train_test(params):\n", 287 | " reg = SGDRegressor(**params)\n", 288 | " return cross_val_score(reg, X_train, y_train).mean()\n", 289 | "\n", 290 | "def function_to_minimise(params):\n", 291 | " accuracy = hyperopt_train_test(params)\n", 292 | " return {'loss': -1*accuracy, 'status': STATUS_OK}\n", 293 | "\n", 294 | "\n", 295 | "trials = Trials()\n", 296 | "best = fmin(function_to_minimise, SGDR_grid, algo=tpe.suggest, max_evals=100, trials=trials)\n", 297 | "best_parameters = space_eval(SGDR_grid, best)\n", 298 | "print('The best parameter tuned on training set is given by :- ',best_parameters)" 299 | ] 300 | }, 301 | { 302 | "cell_type": "markdown", 303 | "metadata": {}, 304 | "source": [ 305 | "
\n", 306 | "Implementing the model: We now implement the model with tuned parameters and get the R^2 score\n", 307 | "
" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": 29, 313 | "metadata": {}, 314 | "outputs": [ 315 | { 316 | "data": { 317 | "text/plain": [ 318 | "SGDRegressor(alpha=0.055081188680586174, average=False, early_stopping=False,\n", 319 | " epsilon=0.1, eta0=0.01, fit_intercept=True, l1_ratio=0.15,\n", 320 | " learning_rate='adaptive', loss='squared_loss', max_iter=1000,\n", 321 | " n_iter_no_change=5, penalty='none', power_t=0.25,\n", 322 | " random_state=None, shuffle=True, tol=0.001,\n", 323 | " validation_fraction=0.1, verbose=0, warm_start=False)" 324 | ] 325 | }, 326 | "execution_count": 29, 327 | "metadata": {}, 328 | "output_type": "execute_result" 329 | } 330 | ], 331 | "source": [ 332 | "model = SGDRegressor(**best_parameters)\n", 333 | "model.fit(X_train, y_train)" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": 30, 339 | "metadata": {}, 340 | "outputs": [ 341 | { 342 | "name": "stdout", 343 | "output_type": "stream", 344 | "text": [ 345 | "The coefficient of determination is:- 0.9947724644478078\n" 346 | ] 347 | } 348 | ], 349 | "source": [ 350 | "y_pred = model.predict(X_test)\n", 351 | "print('The coefficient of determination is:- ',r2_score(y_pred,y_test))" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": null, 357 | "metadata": {}, 358 | "outputs": [], 359 | "source": [] 360 | } 361 | ], 362 | "metadata": { 363 | "kernelspec": { 364 | "display_name": "Python 3", 365 | "language": "python", 366 | "name": "python3" 367 | }, 368 | "language_info": { 369 | "codemirror_mode": { 370 | "name": "ipython", 371 | "version": 3 372 | }, 373 | "file_extension": ".py", 374 | "mimetype": "text/x-python", 375 | "name": "python", 376 | "nbconvert_exporter": "python", 377 | "pygments_lexer": "ipython3", 378 | "version": "3.7.4" 379 | } 380 | }, 381 | "nbformat": 4, 382 | "nbformat_minor": 2 383 | } 384 | -------------------------------------------------------------------------------- /Project13/Spam_Or_Ham_MultinomialNB/multinomial NB.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "Using TensorFlow backend.\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "import warnings\n", 18 | "warnings.filterwarnings('ignore')\n", 19 | "import numpy as np\n", 20 | "import pandas as pd\n", 21 | "import os\n", 22 | "from imblearn.over_sampling import SMOTE \n", 23 | "from sklearn.naive_bayes import MultinomialNB\n", 24 | "from sklearn.preprocessing import MinMaxScaler, LabelBinarizer, OneHotEncoder\n", 25 | "from sklearn.model_selection import train_test_split, cross_val_score\n", 26 | "from sklearn.metrics import *\n", 27 | "from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer\n", 28 | "from sklearn.decomposition import PCA,TruncatedSVD\n", 29 | "import hyperopt\n", 30 | "from hyperopt import *\n", 31 | "from hyperopt import fmin, tpe, hp, space_eval\n", 32 | "import string\n", 33 | "import matplotlib.pyplot as plt\n", 34 | "%matplotlib inline \n" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "
\n", 42 | "Loading dataset: We load the dataset and rename certain columns to be used in our analysis\n", 43 | "
" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 2, 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "data": { 53 | "text/html": [ 54 | "
\n", 55 | "\n", 68 | "\n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | "
labelsdata
0hamGo until jurong point, crazy.. Available only ...
1hamOk lar... Joking wif u oni...
2spamFree entry in 2 a wkly comp to win FA Cup fina...
3hamU dun say so early hor... U c already then say...
\n", 99 | "
" 100 | ], 101 | "text/plain": [ 102 | " labels data\n", 103 | "0 ham Go until jurong point, crazy.. Available only ...\n", 104 | "1 ham Ok lar... Joking wif u oni...\n", 105 | "2 spam Free entry in 2 a wkly comp to win FA Cup fina...\n", 106 | "3 ham U dun say so early hor... U c already then say..." 107 | ] 108 | }, 109 | "execution_count": 2, 110 | "metadata": {}, 111 | "output_type": "execute_result" 112 | } 113 | ], 114 | "source": [ 115 | "path_of_input_file = 'D:\\\\kaggle_trials\\\\sms-spam-collection-dataset\\\\spam.csv'\n", 116 | "df = pd.read_csv(path_of_input_file,encoding='ISO-8859-1')\n", 117 | "df = df.drop([\"Unnamed: 2\", \"Unnamed: 3\", \"Unnamed: 4\"], axis=1)\n", 118 | "df.columns = ['labels', 'data']\n", 119 | "df.head(4)" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "
\n", 127 | "Imbalance check: We can clearly see that the data is imbalanced because there will be more usual mails than spam mails." 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 3, 133 | "metadata": {}, 134 | "outputs": [ 135 | { 136 | "name": "stdout", 137 | "output_type": "stream", 138 | "text": [ 139 | "The number of labels are 2\n" 140 | ] 141 | } 142 | ], 143 | "source": [ 144 | "num_labels = df['labels'].unique()\n", 145 | "print('The number of labels are ',len(num_labels))" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 4, 151 | "metadata": {}, 152 | "outputs": [ 153 | { 154 | "name": "stdout", 155 | "output_type": "stream", 156 | "text": [ 157 | "The number of ham labels are :- 4825\n", 158 | "The number of spam labels are :- 747\n", 159 | "We dont have a balanced dataset and hence we need to perform imbalanced dataset handling\n" 160 | ] 161 | } 162 | ], 163 | "source": [ 164 | "for i in range(len(num_labels)):\n", 165 | " print('The number of ', num_labels[i] ,' labels are :- ',len(df[df['labels']==num_labels[i]]))\n", 166 | "print('We dont have a balanced dataset and hence we need to perform imbalanced dataset handling')" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "
\n", 174 | "Label Binarizing: We binarize the labels to integers making it easy to feed into the model\n", 175 | "
" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 5, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "lb = LabelBinarizer()\n", 185 | "Y = lb.fit_transform(df['labels'].values)\n" 186 | ] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": {}, 191 | "source": [ 192 | "
\n", 193 | "Text Preprocessing: We preprocess the text data by removing punctuations and converting every word to lowercase. Also we create a feature matrix X by using Tf-Idf vectorizer. We used Tf-idf because there are some words people use in usual sms conversations that may not have any word embeddings associated with them\n", 194 | "
" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 6, 200 | "metadata": {}, 201 | "outputs": [], 202 | "source": [ 203 | "\n", 204 | "def preprocess_text(statement):\n", 205 | " punc_removed_statement = \"\".join(l for l in statement if l not in string.punctuation)\n", 206 | " splitting2words = punc_removed_statement.split()\n", 207 | " lower_cased_statement = \" \".join(word.lower() for word in splitting2words)\n", 208 | " return lower_cased_statement" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 7, 214 | "metadata": {}, 215 | "outputs": [], 216 | "source": [ 217 | "df['preprocessed_data']= df['data'].apply(preprocess_text)" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": 8, 223 | "metadata": {}, 224 | "outputs": [], 225 | "source": [ 226 | "tfidf = TfidfVectorizer(decode_error='ignore')\n", 227 | "X = tfidf.fit_transform(df['preprocessed_data'])\n" 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "metadata": {}, 233 | "source": [ 234 | "
\n", 235 | "Removing Imbalance : Our data is balanced now after applying SMOTE\n", 236 | "
" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 10, 242 | "metadata": {}, 243 | "outputs": [], 244 | "source": [ 245 | "sm = SMOTE(random_state=42)\n", 246 | "X_res, Y_res = sm.fit_resample(X, Y)" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 11, 252 | "metadata": {}, 253 | "outputs": [ 254 | { 255 | "name": "stdout", 256 | "output_type": "stream", 257 | "text": [ 258 | "Positive examples before Oversampling is 747\n", 259 | "Negative examples before Oversampling is 4825\n", 260 | "\n", 261 | "\n", 262 | "Positive examples after Oversampling is 4825\n", 263 | "Negative examples after Oversampling is 4825\n", 264 | "\n", 265 | "\n" 266 | ] 267 | } 268 | ], 269 | "source": [ 270 | "print('Positive examples before Oversampling is ', sum(Y == [1])[0])\n", 271 | "print('Negative examples before Oversampling is ', sum(Y == [0])[0])\n", 272 | "print('\\n')\n", 273 | "print('Positive examples after Oversampling is ', sum(Y_res == [1]))\n", 274 | "print('Negative examples after Oversampling is ', sum(Y_res == [0]))\n", 275 | "print('\\n')" 276 | ] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": {}, 281 | "source": [ 282 | "
\n", 283 | "Train test split: We create the train test split of the data\n", 284 | "
" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": 12, 290 | "metadata": {}, 291 | "outputs": [], 292 | "source": [ 293 | "X_train, X_test, y_train, y_test = train_test_split(X_res, Y_res, test_size=0.33, random_state=42)" 294 | ] 295 | }, 296 | { 297 | "cell_type": "markdown", 298 | "metadata": {}, 299 | "source": [ 300 | "
\n", 301 | "Hyper parameter grid: We now set the grid for tuning the hyper parameters associated with the model.\n", 302 | "
" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": 13, 308 | "metadata": {}, 309 | "outputs": [], 310 | "source": [ 311 | "multinomial_grid = {'alpha' : hp.uniform('alpha',0.5,5),\n", 312 | " 'fit_prior' : hp.choice('fit_prior',[True,False])}" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": 14, 318 | "metadata": {}, 319 | "outputs": [ 320 | { 321 | "name": "stdout", 322 | "output_type": "stream", 323 | "text": [ 324 | "100%|████████████████████████████████████████████████| 500/500 [00:06<00:00, 75.32it/s, best loss: -0.9862334199996453]\n", 325 | "The best parameter tuned on training set is given by :- {'alpha': 0.5031703617001609, 'fit_prior': False}\n" 326 | ] 327 | } 328 | ], 329 | "source": [ 330 | "def hyperopt_train_test(params):\n", 331 | " clf = MultinomialNB(**params)\n", 332 | " return cross_val_score(clf, X_train, y_train).mean()\n", 333 | "\n", 334 | "def function_to_minimise(params):\n", 335 | " accuracy = hyperopt_train_test(params)\n", 336 | " return {'loss': -1*accuracy, 'status': STATUS_OK}\n", 337 | "\n", 338 | "\n", 339 | "trials = Trials()\n", 340 | "best = fmin(function_to_minimise, multinomial_grid, algo=tpe.suggest, max_evals=500, trials=trials)\n", 341 | "best_parameters = space_eval(multinomial_grid, best)\n", 342 | "print('The best parameter tuned on training set is given by :- ',best_parameters)" 343 | ] 344 | }, 345 | { 346 | "cell_type": "markdown", 347 | "metadata": {}, 348 | "source": [ 349 | "
\n", 350 | "Final Results and Model fitting: We finally fit the model with the tuned hyper parameters and present a classification report as our analysis \n", 351 | "
" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": 15, 357 | "metadata": {}, 358 | "outputs": [ 359 | { 360 | "data": { 361 | "text/plain": [ 362 | "MultinomialNB(alpha=0.5031703617001609, class_prior=None, fit_prior=False)" 363 | ] 364 | }, 365 | "execution_count": 15, 366 | "metadata": {}, 367 | "output_type": "execute_result" 368 | } 369 | ], 370 | "source": [ 371 | "model = MultinomialNB(**best_parameters)\n", 372 | "model.fit(X_train, y_train)" 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": 16, 378 | "metadata": {}, 379 | "outputs": [], 380 | "source": [ 381 | "y_pred = model.predict(X_test)" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": 17, 387 | "metadata": {}, 388 | "outputs": [ 389 | { 390 | "name": "stdout", 391 | "output_type": "stream", 392 | "text": [ 393 | " precision recall f1-score support\n", 394 | "\n", 395 | " 0 0.99 0.99 0.99 1614\n", 396 | " 1 0.99 0.99 0.99 1571\n", 397 | "\n", 398 | " accuracy 0.99 3185\n", 399 | " macro avg 0.99 0.99 0.99 3185\n", 400 | "weighted avg 0.99 0.99 0.99 3185\n", 401 | "\n" 402 | ] 403 | } 404 | ], 405 | "source": [ 406 | "print(classification_report(y_pred,y_test))" 407 | ] 408 | }, 409 | { 410 | "cell_type": "code", 411 | "execution_count": null, 412 | "metadata": {}, 413 | "outputs": [], 414 | "source": [] 415 | } 416 | ], 417 | "metadata": { 418 | "kernelspec": { 419 | "display_name": "Python 3", 420 | "language": "python", 421 | "name": "python3" 422 | }, 423 | "language_info": { 424 | "codemirror_mode": { 425 | "name": "ipython", 426 | "version": 3 427 | }, 428 | "file_extension": ".py", 429 | "mimetype": "text/x-python", 430 | "name": "python", 431 | "nbconvert_exporter": "python", 432 | "pygments_lexer": "ipython3", 433 | "version": "3.7.3" 434 | } 435 | }, 436 | "nbformat": 4, 437 | "nbformat_minor": 2 438 | } 439 | -------------------------------------------------------------------------------- /Project17/Stumble Upon Bagging Classifier/Bagging Classifier.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import warnings\n", 10 | "warnings.filterwarnings('ignore')\n", 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "import os\n", 14 | "import scipy\n", 15 | "from sklearn.decomposition import PCA,TruncatedSVD\n", 16 | "from sklearn.ensemble import *\n", 17 | "from sklearn.preprocessing import MinMaxScaler, LabelBinarizer, OneHotEncoder\n", 18 | "from sklearn.model_selection import train_test_split, cross_val_score\n", 19 | "from sklearn.metrics import *\n", 20 | "import hyperopt\n", 21 | "from hyperopt import *\n", 22 | "from hyperopt import fmin, tpe, hp, space_eval\n", 23 | "import matplotlib.pyplot as plt\n", 24 | "%matplotlib inline \n" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "
\n", 32 | "oading dataset: We load our dataset here \n", 33 | "
" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 2, 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "data": { 43 | "text/html": [ 44 | "
\n", 45 | "\n", 58 | "\n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | "
urlurlidboilerplatealchemy_categoryalchemy_category_scoreavglinksizecommonlinkratio_1commonlinkratio_2commonlinkratio_3commonlinkratio_4...is_newslengthyLinkDomainlinkwordscorenews_front_pagenon_markup_alphanum_charactersnumberOfLinksnumwords_in_urlparametrizedLinkRatiospelling_errors_ratiolabel
0http://www.bloomberg.com/news/2010-12-23/ibm-p...4042{\"title\":\"IBM Sees Holographic Calls Air Breat...business0.7891312.0555560.6764710.2058820.0470590.023529...11240542417080.1529410.0791300
1http://www.popsci.com/technology/article/2012-...8471{\"title\":\"The Fully Electronic Futuristic Star...recreation0.5741473.6779660.5080210.2887700.2139040.144385...11400497318790.1818180.1254481
\n", 136 | "

2 rows × 27 columns

\n", 137 | "
" 138 | ], 139 | "text/plain": [ 140 | " url urlid \\\n", 141 | "0 http://www.bloomberg.com/news/2010-12-23/ibm-p... 4042 \n", 142 | "1 http://www.popsci.com/technology/article/2012-... 8471 \n", 143 | "\n", 144 | " boilerplate alchemy_category \\\n", 145 | "0 {\"title\":\"IBM Sees Holographic Calls Air Breat... business \n", 146 | "1 {\"title\":\"The Fully Electronic Futuristic Star... recreation \n", 147 | "\n", 148 | " alchemy_category_score avglinksize commonlinkratio_1 commonlinkratio_2 \\\n", 149 | "0 0.789131 2.055556 0.676471 0.205882 \n", 150 | "1 0.574147 3.677966 0.508021 0.288770 \n", 151 | "\n", 152 | " commonlinkratio_3 commonlinkratio_4 ... is_news lengthyLinkDomain \\\n", 153 | "0 0.047059 0.023529 ... 1 1 \n", 154 | "1 0.213904 0.144385 ... 1 1 \n", 155 | "\n", 156 | " linkwordscore news_front_page non_markup_alphanum_characters \\\n", 157 | "0 24 0 5424 \n", 158 | "1 40 0 4973 \n", 159 | "\n", 160 | " numberOfLinks numwords_in_url parametrizedLinkRatio \\\n", 161 | "0 170 8 0.152941 \n", 162 | "1 187 9 0.181818 \n", 163 | "\n", 164 | " spelling_errors_ratio label \n", 165 | "0 0.079130 0 \n", 166 | "1 0.125448 1 \n", 167 | "\n", 168 | "[2 rows x 27 columns]" 169 | ] 170 | }, 171 | "execution_count": 2, 172 | "metadata": {}, 173 | "output_type": "execute_result" 174 | } 175 | ], 176 | "source": [ 177 | "input_file_path = r'D:\\kaggle_trials\\stumbleupon\\train.tsv'\n", 178 | "df = pd.read_csv(input_file_path,sep = '\\t')\n", 179 | "df.head(2)" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": {}, 185 | "source": [ 186 | "
\n", 187 | "Preprocessing data : We handle missing data, one hot encode categorical data and then finally scale numerical data.\n", 188 | "
" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 3, 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "cols_needed = df.columns\n", 198 | "df = df[cols_needed].replace('?', np.nan)\n", 199 | "irrelevant_columns = ['framebased','urlid','url','boilerplate']\n", 200 | "cols_needed = list(set(cols_needed)-set(irrelevant_columns))\n", 201 | "cols_needed_features = list(cols_needed[:len(cols_needed)-1])\n", 202 | "cols_needed_labels = cols_needed[-1]\n", 203 | "cols_encoding_needed = ['alchemy_category','hasDomainLink','is_news','lengthyLinkDomain','news_front_page']\n", 204 | "cols_scaling_needed = list(set(cols_needed)-set(cols_encoding_needed))\n", 205 | "for i in range(len(cols_encoding_needed)):\n", 206 | " df[cols_encoding_needed[i]] = df[cols_encoding_needed[i]].fillna(df[cols_encoding_needed[i]].mode()[0])\n", 207 | "mean_impute_dict ={}\n", 208 | "for i in range(len(cols_scaling_needed)):\n", 209 | " mean_impute_dict[cols_scaling_needed[i]] = np.nanmean(np.float_(df[cols_scaling_needed[i]].values))\n", 210 | "for i in range(len(cols_scaling_needed)):\n", 211 | " df[cols_scaling_needed[i]] = df[cols_scaling_needed[i]].fillna(mean_impute_dict[cols_scaling_needed[i]])\n", 212 | "ohe = OneHotEncoder()\n", 213 | "scalar = MinMaxScaler()\n", 214 | "encoded_matrix = ohe.fit_transform(df[cols_encoding_needed])\n", 215 | "scaled_matrix = scalar.fit_transform(df[cols_scaling_needed])\n", 216 | "X_complete_matrix = scipy.sparse.hstack((encoded_matrix,scaled_matrix)).A\n", 217 | "Y = df['label'].values" 218 | ] 219 | }, 220 | { 221 | "cell_type": "markdown", 222 | "metadata": {}, 223 | "source": [ 224 | "
\n", 225 | "Train Test split: We perform train test split on the data\n", 226 | "
" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": 4, 232 | "metadata": {}, 233 | "outputs": [], 234 | "source": [ 235 | "X_train, X_test, y_train, y_test = train_test_split(X_complete_matrix, Y, test_size=0.33, random_state=42)" 236 | ] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "metadata": {}, 241 | "source": [ 242 | "
\n", 243 | "Hyper parameter grid creation : We perform parameter tuning by creating a grid of hyper parameters\n", 244 | "
" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": 5, 250 | "metadata": {}, 251 | "outputs": [], 252 | "source": [ 253 | "bagging_grid = {'n_estimators' : hp.choice('n_estimators',range(5,20)),\n", 254 | " 'max_features' : hp.uniform('max_features',0.1,0.95),\n", 255 | " 'bootstrap' : hp.choice('bootstrap',[True,False])\n", 256 | " }" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 6, 262 | "metadata": {}, 263 | "outputs": [ 264 | { 265 | "name": "stdout", 266 | "output_type": "stream", 267 | "text": [ 268 | "100%|█████████████████████████████████████████████████████████████████| 30/30 [00:12<00:00, 2.82it/s, best loss: -1.0]\n", 269 | "The best parameter tuned on training set is given by :- {'bootstrap': False, 'max_features': 0.8755934908566991, 'n_estimators': 10}\n" 270 | ] 271 | } 272 | ], 273 | "source": [ 274 | "def hyperopt_train_test(params):\n", 275 | " clf = BaggingClassifier(**params)\n", 276 | " return cross_val_score(clf, X_train, y_train).mean()\n", 277 | "\n", 278 | "def function_to_minimise(params):\n", 279 | " accuracy = hyperopt_train_test(params)\n", 280 | " return {'loss': -1*accuracy, 'status': STATUS_OK}\n", 281 | "\n", 282 | "\n", 283 | "trials = Trials()\n", 284 | "best = fmin(function_to_minimise, bagging_grid, algo=tpe.suggest, max_evals=30, trials=trials)\n", 285 | "best_parameters = space_eval(bagging_grid, best)\n", 286 | "print('The best parameter tuned on training set is given by :- ',best_parameters)" 287 | ] 288 | }, 289 | { 290 | "cell_type": "markdown", 291 | "metadata": {}, 292 | "source": [ 293 | "
\n", 294 | "Model Fitting and analysis: We fit the model using the tuned parameters and then present a classification report as analysis\n", 295 | "
" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": 7, 301 | "metadata": {}, 302 | "outputs": [ 303 | { 304 | "data": { 305 | "text/plain": [ 306 | "BaggingClassifier(base_estimator=None, bootstrap=False,\n", 307 | " bootstrap_features=False, max_features=0.8755934908566991,\n", 308 | " max_samples=1.0, n_estimators=10, n_jobs=None,\n", 309 | " oob_score=False, random_state=None, verbose=0,\n", 310 | " warm_start=False)" 311 | ] 312 | }, 313 | "execution_count": 7, 314 | "metadata": {}, 315 | "output_type": "execute_result" 316 | } 317 | ], 318 | "source": [ 319 | "model = BaggingClassifier(**best_parameters)\n", 320 | "model.fit(X_train, y_train)" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": 8, 326 | "metadata": {}, 327 | "outputs": [], 328 | "source": [ 329 | "y_pred = model.predict(X_test)" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": 9, 335 | "metadata": {}, 336 | "outputs": [ 337 | { 338 | "name": "stdout", 339 | "output_type": "stream", 340 | "text": [ 341 | " precision recall f1-score support\n", 342 | "\n", 343 | " 0 1.00 1.00 1.00 1198\n", 344 | " 1 1.00 1.00 1.00 1243\n", 345 | "\n", 346 | " accuracy 1.00 2441\n", 347 | " macro avg 1.00 1.00 1.00 2441\n", 348 | "weighted avg 1.00 1.00 1.00 2441\n", 349 | "\n" 350 | ] 351 | } 352 | ], 353 | "source": [ 354 | "print(classification_report(y_pred,y_test))" 355 | ] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "execution_count": null, 360 | "metadata": {}, 361 | "outputs": [], 362 | "source": [] 363 | } 364 | ], 365 | "metadata": { 366 | "kernelspec": { 367 | "display_name": "Python 3", 368 | "language": "python", 369 | "name": "python3" 370 | }, 371 | "language_info": { 372 | "codemirror_mode": { 373 | "name": "ipython", 374 | "version": 3 375 | }, 376 | "file_extension": ".py", 377 | "mimetype": "text/x-python", 378 | "name": "python", 379 | "nbconvert_exporter": "python", 380 | "pygments_lexer": "ipython3", 381 | "version": "3.7.3" 382 | } 383 | }, 384 | "nbformat": 4, 385 | "nbformat_minor": 2 386 | } 387 | -------------------------------------------------------------------------------- /Project44/NGBoost_implementation/Train.csv: -------------------------------------------------------------------------------- 1 | City,Location_Score,Internal_Audit_Score,External_Audit_Score,Fin_Score,Loss_score,Past_Results,IsUnderRisk 2 | 2,8.032,14,8,3,6,0,1 3 | 31,77.73,8,3,3,8,1,0 4 | 40,59.203,3,12,11,3,0,1 5 | 12,73.08,4,5,7,6,0,0 6 | 4,15.666,13,15,6,7,2,1 7 | 1,6.237,10,10,12,3,1,1 8 | 9,13.795,8,3,5,3,0,0 9 | 23,74.132,11,15,5,8,0,1 10 | 40,69.522,8,4,7,6,0,0 11 | 38,6.577,8,5,7,3,1,0 12 | 4,75.514,8,12,4,4,0,1 13 | 0,34.522,11,10,14,3,1,1 14 | 10,34.374,4,8,7,3,1,0 15 | 8,22.872,8,8,7,7,0,1 16 | 9,16.313,14,10,3,4,1,1 17 | 11,16.396,13,15,8,6,0,1 18 | 19,9.537,3,7,5,7,0,0 19 | 9,16.433,12,11,15,3,1,1 20 | 6,41.253,3,6,4,5,0,0 21 | 2,22.067,7,3,8,5,1,1 22 | 8,7.061,3,5,6,8,1,0 23 | 23,38.318,7,7,6,3,1,0 24 | 1,15.931,10,7,7,6,0,1 25 | 6,8.875,7,7,7,4,1,1 26 | 10,17.463,13,15,12,3,1,1 27 | 37,7.087,11,3,4,5,1,1 28 | 6,23.298,7,8,4,4,1,1 29 | 2,70.261,3,6,5,3,0,0 30 | 3,11.092,8,5,4,6,0,0 31 | 40,14.226,8,5,6,4,1,1 32 | 40,21.448,5,7,7,3,1,0 33 | 5,73.111,10,8,4,8,0,1 34 | 2,73.793,7,7,5,4,0,1 35 | 31,9.606,11,4,4,4,1,1 36 | 6,12.058,6,7,3,7,1,1 37 | 41,17.908,6,3,7,6,1,0 38 | 10,7.11,7,5,7,6,1,0 39 | 40,73.507,8,5,5,7,1,1 40 | 16,68.547,6,3,3,7,1,0 41 | 9,5.518,12,4,7,6,0,1 42 | 22,15.105,5,8,5,8,0,0 43 | 6,18.332,12,14,12,8,1,1 44 | 40,33.219,6,7,7,5,1,0 45 | 10,14.211,11,3,13,6,1,1 46 | 40,61.459,10,8,5,8,1,1 47 | 6,75.615,8,5,4,8,1,0 48 | 23,67.797,3,4,8,3,0,0 49 | 2,10.968,15,13,15,6,2,1 50 | 40,9.348,15,10,6,5,0,1 51 | 37,22.864,12,14,13,3,2,1 52 | 2,73.972,10,3,3,3,1,1 53 | 37,9.138,6,3,7,8,1,0 54 | 41,16.482,4,3,6,4,1,0 55 | 40,17.024,12,13,7,5,1,1 56 | 31,64.502,5,8,8,5,1,0 57 | 38,16.893,11,3,7,6,0,1 58 | 41,63.372,10,15,4,5,0,1 59 | 29,12.337,12,10,11,5,1,1 60 | 9,61.625,3,3,5,4,0,0 61 | 41,62.94,7,8,8,6,0,0 62 | 4,18.072,10,3,4,3,1,1 63 | 19,64.93,6,6,5,6,1,0 64 | 0,24.597,11,12,14,3,1,1 65 | 38,66.043,6,6,5,5,1,0 66 | 37,15.586,7,3,14,3,1,1 67 | 2,9.83,3,7,6,6,0,0 68 | 6,17.653,10,10,12,6,0,1 69 | 8,6.141,8,3,5,4,1,0 70 | 5,70.881,9,13,4,3,0,1 71 | 40,11.362,4,6,6,6,1,0 72 | 40,7.031,5,5,4,11,1,1 73 | 37,11.446,5,3,8,4,0,0 74 | 3,8.165,13,11,13,9,2,1 75 | 37,22.682,8,3,8,7,1,1 76 | 38,68.387,13,14,8,6,1,1 77 | 16,18.916,8,15,11,7,1,1 78 | 40,21.057,7,5,4,8,0,1 79 | 9,9.183,11,8,8,8,0,1 80 | 2,69.686,5,5,3,5,0,0 81 | 28,17.085,12,13,3,6,1,1 82 | 10,13.895,5,6,5,8,1,0 83 | 31,65.878,5,10,8,4,1,1 84 | 38,63.619,3,5,4,3,1,0 85 | 9,12.289,9,3,7,4,0,1 86 | 9,14.239,6,7,4,6,0,0 87 | 3,16.714,7,12,5,6,0,1 88 | 5,18.76,7,13,10,3,0,1 89 | 31,20.709,8,15,6,4,0,1 90 | 37,69.554,7,3,7,3,0,0 91 | 5,16.925,12,11,12,8,1,1 92 | 40,19.273,7,8,4,7,1,1 93 | 31,32.018,8,6,4,5,1,0 94 | 9,12.78,7,4,5,4,0,0 95 | 10,13.312,13,15,15,4,1,1 96 | 9,13.948,11,4,3,6,0,1 97 | 31,20.751,12,15,11,6,0,1 98 | 2,11.506,13,10,11,8,1,1 99 | 40,68.709,6,4,3,6,0,1 100 | 22,16.466,3,4,8,8,1,1 101 | 4,73.469,8,3,5,8,1,0 102 | 10,14.56,13,4,3,8,1,1 103 | 6,15.273,15,10,7,8,0,1 104 | 9,17.042,7,6,4,7,1,0 105 | 2,64.536,6,4,7,5,0,0 106 | 9,7.33,4,5,5,11,0,1 107 | 23,9.178,10,13,10,3,2,1 108 | 18,20.105,8,6,10,5,1,1 109 | 41,60.605,3,4,7,6,0,0 110 | 35,73.107,7,5,5,4,0,0 111 | 41,74.696,8,4,7,5,0,0 112 | 10,6.99,11,7,7,5,0,1 113 | 13,10.026,13,15,14,3,1,1 114 | 32,11.969,7,8,7,7,1,0 115 | 14,13.858,12,5,7,7,0,1 116 | 40,62.755,3,8,4,7,0,1 117 | 31,16.789,13,6,6,8,0,1 118 | 5,11.703,10,14,11,7,1,1 119 | 3,58.664,11,11,10,8,0,1 120 | 41,71.89,4,6,3,7,0,0 121 | 10,8.543,11,15,13,8,3,1 122 | 30,11.016,15,14,10,7,0,1 123 | 10,70.989,9,13,8,5,1,1 124 | 12,77.731,7,7,7,7,0,1 125 | 16,14.956,14,13,14,5,1,1 126 | 8,11.475,10,14,11,6,3,1 127 | 0,6.265,6,3,13,4,0,1 128 | 10,69.63,11,7,4,6,1,1 129 | 23,21.842,8,8,4,4,0,0 130 | 9,17.917,3,3,6,7,0,0 131 | 31,11.331,6,3,8,8,1,1 132 | 29,11.587,11,11,12,5,1,1 133 | 5,17.81,10,6,8,6,1,1 134 | 3,22.057,12,4,6,7,1,1 135 | 40,15.402,7,6,8,3,0,1 136 | 37,26.522,6,5,4,7,1,0 137 | 38,62.351,6,4,8,7,0,0 138 | 38,67.086,8,8,8,4,1,0 139 | 35,72.523,6,3,6,3,0,0 140 | 43,14.394,7,4,4,8,0,0 141 | 2,9.513,10,7,7,6,0,1 142 | 39,23.428,11,12,11,6,0,1 143 | 23,16.741,11,11,6,3,0,1 144 | 41,20.429,10,9,10,6,1,1 145 | 41,6.365,4,4,4,8,1,0 146 | 6,15.527,6,8,3,3,1,1 147 | 41,7.738,11,15,11,8,2,1 148 | 40,15.243,7,5,5,4,0,1 149 | 5,15.152,15,7,7,5,0,1 150 | 21,19.288,11,13,15,3,1,1 151 | 6,71.521,5,3,8,4,0,0 152 | 24,23.122,14,3,9,5,1,1 153 | 39,16.12,14,11,10,4,0,1 154 | 41,12.131,8,3,4,5,1,0 155 | 1,61.136,3,7,6,7,1,0 156 | 10,24.4,14,5,11,6,1,1 157 | 5,15.54,8,8,5,3,0,1 158 | 23,76.248,7,5,8,7,0,0 159 | 38,20.659,5,4,5,6,1,0 160 | 41,68.617,7,10,6,3,0,1 161 | 6,7.168,7,8,4,5,1,1 162 | 3,23.528,8,11,8,3,0,1 163 | 13,34.33,7,6,6,8,0,0 164 | 8,66.838,6,14,8,3,0,1 165 | 31,13.364,10,4,3,7,1,1 166 | 19,74.089,4,6,5,4,1,0 167 | 41,69.327,4,7,6,4,0,0 168 | 41,8.501,12,10,5,6,2,1 169 | 5,5.864,10,8,5,7,0,1 170 | 2,64.828,11,12,7,4,0,1 171 | 9,11.663,7,6,7,9,1,1 172 | 38,73.012,3,6,4,5,1,0 173 | 40,14.323,12,7,8,4,0,1 174 | 32,6.836,11,15,12,3,0,1 175 | 10,73.305,10,15,10,8,3,1 176 | 38,18.869,6,7,7,6,0,0 177 | 31,22.474,8,8,11,5,0,1 178 | 9,15.904,10,8,8,6,1,1 179 | 40,61.413,7,8,3,8,1,1 180 | 41,18.559,9,12,14,8,1,1 181 | 9,75.588,5,5,8,4,0,0 182 | 3,72.991,6,4,7,8,1,0 183 | 0,68.706,6,7,8,8,1,0 184 | 30,11.759,13,12,13,8,1,1 185 | 5,59.421,7,7,5,7,1,0 186 | 9,20.316,7,5,8,3,1,1 187 | 37,66.535,4,7,4,6,1,0 188 | 10,8.82,4,5,3,4,1,0 189 | 6,21.472,11,10,8,3,0,1 190 | 41,68.892,6,3,4,8,1,0 191 | 41,41.666,15,9,6,8,1,1 192 | 8,22.09,12,7,7,5,1,1 193 | 1,19.142,4,8,14,7,0,1 194 | 28,22.846,10,3,6,3,0,1 195 | 19,8.563,13,7,7,6,0,1 196 | 8,65.226,5,4,7,8,0,0 197 | 10,5.643,3,5,5,5,0,1 198 | 13,65.948,8,7,7,7,1,0 199 | 5,24.318,12,12,8,4,1,1 200 | 41,14.28,8,4,4,8,0,0 201 | 5,21.454,6,7,7,7,0,0 202 | 40,6.625,3,5,11,6,1,1 203 | 3,15.364,8,3,3,6,1,0 204 | 25,20.164,15,11,11,4,0,1 205 | 5,13.616,8,6,4,3,0,1 206 | 37,23.185,6,4,5,5,1,0 207 | 17,72.416,3,7,3,4,1,0 208 | 19,70.696,8,5,3,3,0,0 209 | 33,16.881,10,11,12,7,1,1 210 | 5,60.24,8,7,8,5,1,0 211 | 2,17.192,12,5,7,6,1,1 212 | 40,7.522,9,5,8,6,0,1 213 | 4,22.535,10,5,6,8,1,1 214 | 31,9.919,7,5,6,7,1,1 215 | 3,76.245,7,5,8,4,0,0 216 | 2,61.29,7,8,3,7,0,0 217 | 13,21.153,4,14,6,8,1,1 218 | 2,77.113,7,3,8,3,1,0 219 | 1,72.068,3,5,7,8,1,0 220 | 37,11.999,6,3,5,7,1,0 221 | 23,9.546,7,3,5,7,1,1 222 | 2,7.698,12,9,3,6,1,1 223 | 9,27.759,5,7,14,8,1,1 224 | 3,68.356,4,5,3,8,0,0 225 | 35,29.201,3,5,7,4,1,0 226 | 37,13.203,7,7,6,5,1,0 227 | 8,66.008,5,7,5,6,1,0 228 | 40,40.352,6,4,5,8,1,0 229 | 15,16.942,10,13,12,4,1,1 230 | 21,17.052,13,5,10,8,1,1 231 | 38,73.811,8,5,8,5,1,0 232 | 6,72.971,4,5,3,6,1,0 233 | 40,13.558,10,11,4,5,0,1 234 | 31,75.918,5,7,7,3,0,0 235 | 18,11.058,4,3,13,8,0,1 236 | 4,59.526,5,7,6,4,1,0 237 | 2,18.979,3,4,10,7,0,1 238 | 3,67.946,8,7,13,8,1,1 239 | 31,12.785,4,7,4,4,1,0 240 | 9,18.976,11,10,4,3,1,1 241 | 9,80.219,7,3,6,4,1,0 242 | 41,9.254,13,11,14,5,1,1 243 | 3,64.037,7,4,8,4,1,0 244 | 40,11.955,6,4,3,6,0,1 245 | 21,22.967,7,10,10,8,0,1 246 | 10,20.613,12,3,5,7,0,1 247 | 41,9.075,6,3,4,7,0,0 248 | 5,22.625,14,12,13,5,0,1 249 | 30,20.766,11,9,7,5,0,1 250 | 17,20.078,7,5,7,8,1,0 251 | 23,20.682,7,11,3,4,1,1 252 | 9,9.928,13,4,5,7,1,1 253 | 37,14.566,11,5,6,8,0,1 254 | 13,62.656,5,4,5,6,0,0 255 | 40,63.099,6,3,5,3,1,0 256 | 16,64.281,4,5,5,5,1,0 257 | 13,13.359,10,8,6,4,0,1 258 | 6,18.501,6,8,5,6,0,1 259 | 2,14.481,11,3,14,6,0,1 260 | 41,7.571,12,12,12,5,1,1 261 | 30,12.394,11,7,8,5,1,1 262 | 10,17.583,12,13,12,3,3,1 263 | 19,33.904,5,6,5,8,1,0 264 | 4,12.165,12,11,4,5,0,1 265 | 1,6.324,12,10,4,3,0,1 266 | 16,8.297,7,5,4,7,0,0 267 | 28,19.9,13,10,3,6,1,1 268 | 37,22.902,10,13,11,4,0,1 269 | 35,8.091,6,5,8,5,1,0 270 | 9,23.716,3,8,3,5,1,0 271 | 27,19.155,13,14,11,4,1,1 272 | 8,64.194,5,7,3,6,0,0 273 | 13,12.117,11,10,11,7,1,1 274 | 5,70.247,4,7,6,4,1,0 275 | 9,11.595,13,4,7,3,1,1 276 | 20,24.486,12,12,11,3,1,1 277 | 40,65.404,5,8,5,4,1,1 278 | 17,19.701,11,13,6,10,3,1 279 | 9,11.83,5,6,3,3,0,0 280 | 10,22.447,9,3,10,6,1,1 281 | 22,7.991,10,13,15,4,1,1 282 | 6,18.282,4,5,7,3,0,0 283 | 40,72.207,5,8,3,6,1,1 284 | 30,11.222,14,3,8,7,0,1 285 | 10,75.574,8,3,3,11,0,1 286 | 22,18.042,15,11,12,8,1,1 287 | 40,8.621,9,8,8,5,1,1 288 | 3,5.363,4,6,4,3,0,1 289 | 13,75.891,3,5,8,8,0,0 290 | 1,15.782,6,11,4,7,0,1 291 | 4,16.334,7,8,5,5,1,1 292 | 6,12.858,10,11,11,3,0,1 293 | 6,63.614,5,4,4,4,0,0 294 | 40,21.671,15,10,10,5,4,1 295 | 31,74.686,3,4,6,7,1,0 296 | 31,8.0,5,8,7,3,0,0 297 | 9,16.56,11,8,6,9,1,1 298 | 23,16.49,10,7,7,8,0,1 299 | 13,14.713,3,7,8,4,1,1 300 | 17,19.683,7,11,13,13,1,1 301 | 6,17.594,11,7,10,4,1,1 302 | 10,12.576,5,6,10,3,0,1 303 | 4,12.268,14,10,15,4,0,1 304 | 3,18.244,15,6,5,6,1,1 305 | 9,67.553,11,5,5,8,1,1 306 | 38,21.641,11,5,3,10,0,1 307 | 3,63.693,5,6,4,4,0,0 308 | 9,69.416,10,5,12,3,0,1 309 | 5,60.086,3,4,5,6,0,0 310 | 17,59.321,7,8,8,4,1,0 311 | 6,6.261,8,5,6,4,0,1 312 | 38,10.282,7,7,7,6,1,1 313 | 41,17.047,15,8,3,3,1,1 314 | 9,13.036,11,10,7,7,1,1 315 | 41,33.32,8,5,8,4,0,0 316 | 10,16.115,12,3,12,3,2,1 317 | 37,8.369,7,6,6,5,0,0 318 | 27,72.532,7,4,5,7,0,0 319 | 3,80.664,7,3,4,6,1,0 320 | 23,19.707,13,10,10,6,0,1 321 | 10,13.762,8,8,8,7,0,1 322 | 9,10.442,11,14,7,6,1,1 323 | 32,9.691,13,10,10,6,1,1 324 | 37,21.999,11,7,3,4,1,1 325 | 37,10.15,11,15,12,7,0,1 326 | 3,30.135,3,4,8,3,1,0 327 | 23,16.807,13,10,14,3,1,1 328 | 13,10.768,7,8,11,3,0,1 329 | 4,24.984,6,8,8,8,1,0 330 | 37,20.889,15,10,8,3,0,1 331 | 40,71.992,10,11,8,6,0,1 332 | 20,24.68,6,3,8,3,1,0 333 | 6,8.325,10,14,10,7,2,1 334 | 2,14.849,8,3,11,5,1,1 335 | 28,11.034,15,15,15,6,1,1 336 | 9,8.56,10,11,14,4,1,1 337 | 9,69.772,6,7,3,3,0,0 338 | 41,66.184,7,4,6,4,1,0 339 | 12,17.618,3,11,10,7,2,1 340 | 10,40.773,12,13,13,5,0,1 341 | 6,6.112,5,6,7,7,1,0 342 | 13,75.738,7,5,6,7,0,0 343 | 18,20.051,7,4,8,4,1,1 344 | 13,18.501,12,12,5,6,1,1 345 | 41,68.023,8,3,4,8,0,0 346 | 38,19.019,4,5,7,7,1,0 347 | 0,64.929,6,5,4,8,0,1 348 | 31,30.22,5,3,5,4,0,0 349 | 13,16.512,4,8,5,7,1,0 350 | 2,21.333,13,7,5,3,0,1 351 | 9,15.379,13,10,15,3,0,1 352 | 31,14.672,11,13,11,5,0,1 353 | 18,80.138,7,4,6,6,1,0 354 | 31,20.253,5,3,8,8,0,0 355 | 37,12.874,5,8,8,8,1,0 356 | 37,72.212,5,6,5,3,0,0 357 | 5,11.535,8,4,7,3,0,0 358 | 31,16.408,15,15,10,6,1,1 359 | 9,60.309,3,6,6,3,0,0 360 | 37,18.162,5,8,3,5,0,0 361 | 6,73.498,3,7,4,5,0,0 362 | 9,14.677,10,6,13,6,0,1 363 | 40,18.887,8,6,11,3,0,1 364 | 23,11.823,5,6,11,8,0,1 365 | 19,12.789,15,14,9,8,0,1 366 | 19,58.808,5,7,8,8,0,0 367 | 37,32.721,6,6,7,7,1,1 368 | 40,21.223,14,14,7,5,1,1 369 | 6,24.267,15,11,5,8,1,1 370 | 41,70.218,8,4,8,5,1,0 371 | 6,14.561,4,5,3,8,0,0 372 | 31,58.818,6,3,5,3,1,0 373 | 38,16.393,8,8,8,8,0,0 374 | 37,10.168,10,5,5,8,1,1 375 | 4,37.642,3,7,3,5,1,0 376 | 19,75.161,6,6,5,4,0,0 377 | 10,20.034,4,5,3,5,0,1 378 | 13,18.201,11,11,14,3,0,1 379 | 5,19.175,15,12,14,3,1,1 380 | 37,61.63,7,5,3,3,1,1 381 | 28,65.894,7,3,3,6,1,0 382 | 40,17.033,9,8,5,7,0,1 383 | 4,66.136,6,6,8,3,1,0 384 | 38,71.909,6,3,7,6,1,0 385 | 13,27.942,8,8,4,4,0,0 386 | 5,67.543,10,6,3,4,0,1 387 | 9,66.707,11,13,8,6,1,1 388 | 6,18.038,8,4,11,5,0,1 389 | 37,12.487,10,5,8,7,0,1 390 | 18,20.02,7,8,5,4,1,1 391 | 40,73.983,3,4,4,3,0,0 392 | 1,11.969,10,13,11,4,0,1 393 | 7,58.874,4,6,7,6,0,0 394 | 29,16.48,3,12,13,3,1,1 395 | 8,68.296,6,7,6,6,0,0 396 | 6,22.004,7,6,6,7,0,1 397 | 3,30.951,6,5,6,3,0,0 398 | 4,23.571,5,7,5,3,0,0 399 | 5,62.532,4,6,3,8,0,0 400 | 6,37.788,8,5,4,5,1,0 401 | 13,15.208,7,8,8,5,0,1 402 | 2,20.733,11,5,4,3,0,1 403 | 38,16.718,11,13,12,4,6,1 404 | 40,21.376,14,10,13,7,1,1 405 | 38,18.017,5,4,8,6,1,0 406 | 38,21.251,8,3,5,6,0,0 407 | 6,14.154,13,10,8,4,1,1 408 | 6,20.265,10,15,4,4,0,1 409 | 11,22.732,3,3,7,7,1,1 410 | 9,20.05,6,8,4,6,0,1 411 | 1,15.202,3,4,3,7,0,0 412 | 31,80.809,4,3,4,5,0,0 413 | 2,24.728,7,7,4,4,1,0 414 | 40,16.587,15,10,10,8,0,1 415 | 6,41.128,3,4,5,7,0,0 416 | 9,18.172,6,4,8,4,1,1 417 | 40,7.385,15,6,8,7,0,1 418 | 10,19.029,5,3,6,3,0,1 419 | 23,19.803,9,10,3,8,1,1 420 | 9,42.041,6,4,4,5,1,0 421 | 3,60.161,6,6,7,5,1,1 422 | 22,9.142,12,7,8,5,0,1 423 | 2,10.854,8,8,3,4,0,1 424 | 0,16.903,12,9,10,5,1,1 425 | 6,14.766,8,5,6,8,1,1 426 | 4,16.089,5,5,4,5,0,0 427 | 6,63.296,3,5,4,6,1,1 428 | 5,67.833,8,6,5,6,1,0 429 | 9,60.644,13,7,6,6,1,1 430 | 40,74.922,7,4,8,3,1,0 431 | 19,14.959,8,15,10,8,0,1 432 | 41,61.286,7,8,7,3,0,1 433 | 9,17.933,8,6,5,6,1,1 434 | 13,15.847,5,11,5,8,1,1 435 | 38,21.237,10,8,7,6,1,1 436 | 2,10.077,10,3,6,4,1,1 437 | 23,16.805,8,3,9,3,0,1 438 | 30,13.572,9,6,14,7,0,1 439 | 2,6.999,8,5,4,6,1,0 440 | 5,16.108,10,8,11,5,2,1 441 | 41,5.185,12,4,8,4,1,1 442 | 5,25.534,8,7,8,8,0,1 443 | 38,30.828,7,8,8,3,0,1 444 | 10,67.897,4,5,10,7,1,1 445 | 30,14.507,11,15,11,7,1,1 446 | 38,7.458,9,3,7,8,1,1 447 | 41,12.804,11,3,5,3,1,1 448 | 38,19.81,3,8,6,4,0,0 449 | 40,67.522,8,5,7,5,1,0 450 | 40,73.206,8,6,5,6,0,0 451 | 41,13.028,3,7,6,4,1,1 452 | 0,76.01,8,15,14,7,2,1 453 | 38,75.721,8,6,7,3,0,0 454 | 23,12.662,13,7,11,8,0,1 455 | 38,21.563,7,8,4,6,1,1 456 | 6,13.987,12,3,15,3,1,1 457 | 38,14.646,6,3,3,7,0,1 458 | 10,71.1,3,6,3,6,0,1 459 | 5,8.196,15,13,9,5,0,1 460 | 2,17.289,4,6,8,6,0,0 461 | 5,22.043,11,15,13,7,1,1 462 | 11,19.878,8,10,11,3,1,1 463 | 2,16.54,11,12,11,6,1,1 464 | 3,69.527,8,4,7,4,1,0 465 | 19,18.109,12,10,15,5,0,1 466 | 2,17.371,8,8,6,4,0,1 467 | 6,21.531,11,14,15,8,1,1 468 | 9,15.612,6,7,4,3,1,1 469 | 41,65.896,4,7,8,3,0,0 470 | 42,17.919,3,4,4,8,0,0 471 | 28,17.0,4,11,10,3,1,1 472 | 0,17.261,12,12,9,6,1,1 473 | 22,21.615,13,7,8,5,1,1 474 | 3,23.557,8,5,8,3,1,1 475 | 19,10.466,11,14,6,3,0,1 476 | 1,13.054,7,6,5,8,1,0 477 | 1,66.935,8,12,4,3,0,1 478 | 2,8.17,12,6,6,3,0,1 479 | 23,13.901,10,10,12,3,0,1 480 | 10,20.925,6,5,5,8,10,1 481 | 40,59.672,8,4,3,3,0,1 482 | 10,76.146,6,5,7,4,1,1 483 | 40,12.52,15,14,6,5,1,1 484 | 1,10.14,6,8,13,8,0,1 485 | 31,72.246,6,8,8,4,0,0 486 | 3,7.615,13,6,3,8,1,1 487 | 27,65.626,4,7,8,7,0,0 488 | 9,73.268,6,11,3,7,1,1 489 | 3,10.284,10,14,15,5,0,1 490 | 40,73.547,4,7,6,6,1,1 491 | 4,22.291,11,3,4,4,1,1 492 | 10,64.806,8,4,7,8,0,1 493 | 40,22.447,12,10,5,5,0,1 494 | 40,39.997,5,8,4,4,1,0 495 | 37,18.104,6,8,3,6,1,0 496 | 40,7.864,15,3,5,7,0,1 497 | 23,71.073,6,3,8,6,0,1 498 | 23,72.196,9,5,4,7,0,1 499 | 40,76.346,7,4,4,8,1,1 500 | 41,59.967,7,8,6,5,1,0 501 | 17,22.117,10,10,4,8,0,1 502 | 1,22.523,6,6,7,3,1,0 503 | 5,66.913,8,3,7,7,0,1 504 | 40,74.813,5,6,4,6,1,0 505 | 10,9.372,7,9,5,4,0,1 506 | 10,9.722,7,3,7,5,1,1 507 | 41,70.952,10,12,6,7,0,1 508 | 31,14.813,12,8,7,8,1,1 509 | 40,18.708,10,14,11,6,1,1 510 | 10,68.119,13,11,9,5,1,1 511 | 40,10.332,15,10,12,8,0,1 512 | 9,7.538,7,3,4,6,1,1 513 | 40,18.307,6,6,4,8,1,0 514 | 10,11.284,8,8,7,6,0,1 515 | 6,10.744,12,4,4,5,1,1 516 | 22,42.592,4,6,3,7,1,0 517 | 31,73.383,3,7,6,3,1,0 518 | 19,76.174,6,3,4,4,0,0 519 | 6,68.657,3,5,4,7,0,0 520 | 40,18.196,5,6,3,7,0,1 521 | 17,68.004,8,7,3,5,0,0 522 | 40,15.71,3,3,6,5,0,1 523 | 41,11.1,8,6,8,4,0,0 524 | 35,14.471,12,3,4,8,0,1 525 | 40,19.433,8,8,4,8,1,1 526 | 2,12.28,10,15,8,4,1,1 527 | 35,69.969,7,6,6,4,1,0 528 | 19,74.384,8,5,5,4,1,0 529 | 44,5.931,5,8,6,4,0,0 530 | 10,24.361,8,8,4,4,0,0 531 | 6,10.578,7,3,5,5,0,1 532 | 9,11.344,11,13,4,8,0,1 533 | 9,21.946,15,13,6,6,1,1 534 | 37,12.318,8,8,3,5,0,0 535 | 41,22.134,11,7,7,8,1,1 536 | 26,10.607,10,11,13,5,1,1 537 | 18,62.091,4,3,7,8,1,0 538 | 6,74.338,8,7,6,4,0,0 539 | 41,20.634,4,7,7,5,0,0 540 | 16,74.017,7,4,5,7,1,0 541 | 2,70.46,7,5,6,4,0,0 542 | 1,79.243,7,5,3,8,1,0 543 | 40,69.14,7,8,4,5,1,1 544 | 13,23.332,14,12,10,3,2,1 545 | -------------------------------------------------------------------------------- /Project19/Instant_gratification_QDA_LDA/QDA_LDA.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 37, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import warnings\n", 10 | "warnings.filterwarnings('ignore')\n", 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "import os\n", 14 | "from sklearn.decomposition import PCA,TruncatedSVD\n", 15 | "from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis,LinearDiscriminantAnalysis\n", 16 | "from sklearn.preprocessing import MinMaxScaler, LabelBinarizer, OneHotEncoder\n", 17 | "from sklearn.model_selection import train_test_split, cross_val_score\n", 18 | "from sklearn.metrics import *\n", 19 | "import hyperopt\n", 20 | "from hyperopt import *\n", 21 | "from hyperopt import fmin, tpe, hp, space_eval\n", 22 | "import matplotlib.pyplot as plt\n", 23 | "%matplotlib inline \n" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 38, 29 | "metadata": {}, 30 | "outputs": [ 31 | { 32 | "data": { 33 | "text/html": [ 34 | "
\n", 35 | "\n", 48 | "\n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | "
idmuggy-smalt-axolotl-pembusdorky-peach-sheepdog-ordinalslimy-seashell-cassowary-goosesnazzy-harlequin-chicken-distractionfrumpy-smalt-mau-ordinalstealthy-beige-pinscher-goldenchummy-cream-tarantula-entropyhazy-emerald-cuttlefish-unsortednerdy-indigo-wolfhound-sorted...wheezy-myrtle-mandrill-entropywiggy-lilac-lemming-sortedgloppy-cerise-snail-contributorwoozy-silver-havanese-gaussianjumpy-thistle-discus-sortedmuggy-turquoise-donkey-importantblurry-buff-hyena-entropybluesy-chocolate-kudu-fepidgamy-white-monster-experttarget
0707b395ecdcbb4dc2eabea00e4d1b179-2.0706541.0181600.2286430.8572210.0522710.230303-6.3850900.439369-0.721946...0.3518950.618824-1.5424230.5981750.6117570.6787720.247059-0.806677-0.1936490
15880c03c6582a7b42248668e56b4bdec-0.4917020.082645-0.0111931.071266-0.346347-0.0822090.110579-0.382374-0.229620...-0.645115-1.2460902.613357-0.4796641.5812890.9312580.151937-0.7665950.4743510
24ccbcb3d13e5072ff1d9c61afe2c4f77-1.6804730.860529-1.0761950.7401243.6784450.2885580.5158750.920590-1.223277...0.5164220.130521-0.4592102.028205-0.093968-0.218274-0.163136-0.8702890.0640381
3e350f17a357f12a1941f0837afb7eb8d0.1837740.919134-0.9469580.9184920.8622781.1552870.9111060.562598-1.349685...-1.1689671.385089-0.3530283.316150-0.524087-0.7943273.9363650.682989-2.5212110
\n", 174 | "

4 rows × 258 columns

\n", 175 | "
" 176 | ], 177 | "text/plain": [ 178 | " id muggy-smalt-axolotl-pembus \\\n", 179 | "0 707b395ecdcbb4dc2eabea00e4d1b179 -2.070654 \n", 180 | "1 5880c03c6582a7b42248668e56b4bdec -0.491702 \n", 181 | "2 4ccbcb3d13e5072ff1d9c61afe2c4f77 -1.680473 \n", 182 | "3 e350f17a357f12a1941f0837afb7eb8d 0.183774 \n", 183 | "\n", 184 | " dorky-peach-sheepdog-ordinal slimy-seashell-cassowary-goose \\\n", 185 | "0 1.018160 0.228643 \n", 186 | "1 0.082645 -0.011193 \n", 187 | "2 0.860529 -1.076195 \n", 188 | "3 0.919134 -0.946958 \n", 189 | "\n", 190 | " snazzy-harlequin-chicken-distraction frumpy-smalt-mau-ordinal \\\n", 191 | "0 0.857221 0.052271 \n", 192 | "1 1.071266 -0.346347 \n", 193 | "2 0.740124 3.678445 \n", 194 | "3 0.918492 0.862278 \n", 195 | "\n", 196 | " stealthy-beige-pinscher-golden chummy-cream-tarantula-entropy \\\n", 197 | "0 0.230303 -6.385090 \n", 198 | "1 -0.082209 0.110579 \n", 199 | "2 0.288558 0.515875 \n", 200 | "3 1.155287 0.911106 \n", 201 | "\n", 202 | " hazy-emerald-cuttlefish-unsorted nerdy-indigo-wolfhound-sorted ... \\\n", 203 | "0 0.439369 -0.721946 ... \n", 204 | "1 -0.382374 -0.229620 ... \n", 205 | "2 0.920590 -1.223277 ... \n", 206 | "3 0.562598 -1.349685 ... \n", 207 | "\n", 208 | " wheezy-myrtle-mandrill-entropy wiggy-lilac-lemming-sorted \\\n", 209 | "0 0.351895 0.618824 \n", 210 | "1 -0.645115 -1.246090 \n", 211 | "2 0.516422 0.130521 \n", 212 | "3 -1.168967 1.385089 \n", 213 | "\n", 214 | " gloppy-cerise-snail-contributor woozy-silver-havanese-gaussian \\\n", 215 | "0 -1.542423 0.598175 \n", 216 | "1 2.613357 -0.479664 \n", 217 | "2 -0.459210 2.028205 \n", 218 | "3 -0.353028 3.316150 \n", 219 | "\n", 220 | " jumpy-thistle-discus-sorted muggy-turquoise-donkey-important \\\n", 221 | "0 0.611757 0.678772 \n", 222 | "1 1.581289 0.931258 \n", 223 | "2 -0.093968 -0.218274 \n", 224 | "3 -0.524087 -0.794327 \n", 225 | "\n", 226 | " blurry-buff-hyena-entropy bluesy-chocolate-kudu-fepid \\\n", 227 | "0 0.247059 -0.806677 \n", 228 | "1 0.151937 -0.766595 \n", 229 | "2 -0.163136 -0.870289 \n", 230 | "3 3.936365 0.682989 \n", 231 | "\n", 232 | " gamy-white-monster-expert target \n", 233 | "0 -0.193649 0 \n", 234 | "1 0.474351 0 \n", 235 | "2 0.064038 1 \n", 236 | "3 -2.521211 0 \n", 237 | "\n", 238 | "[4 rows x 258 columns]" 239 | ] 240 | }, 241 | "execution_count": 38, 242 | "metadata": {}, 243 | "output_type": "execute_result" 244 | } 245 | ], 246 | "source": [ 247 | "path_of_input_file = 'D:\\\\kaggle_trials\\\\instant-gratification\\\\train.csv'\n", 248 | "df = pd.read_csv(path_of_input_file)\n", 249 | "df.head(4)" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": 40, 255 | "metadata": {}, 256 | "outputs": [ 257 | { 258 | "name": "stdout", 259 | "output_type": "stream", 260 | "text": [ 261 | "The number of labels are 2\n" 262 | ] 263 | } 264 | ], 265 | "source": [ 266 | "num_labels = df['target'].unique()\n", 267 | "print('The number of labels are ',len(num_labels))" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 41, 273 | "metadata": {}, 274 | "outputs": [ 275 | { 276 | "name": "stdout", 277 | "output_type": "stream", 278 | "text": [ 279 | "The number of 0 labels are :- 131013\n", 280 | "The number of 1 labels are :- 131131\n", 281 | "We dont have a balanced dataset and hence we need to perform imbalanced dataset handling\n" 282 | ] 283 | } 284 | ], 285 | "source": [ 286 | "for i in range(len(num_labels)):\n", 287 | " print('The number of ', num_labels[i] ,' labels are :- ',len(df[df['target']==num_labels[i]]))\n", 288 | "print('We dont have a balanced dataset and hence we need to perform imbalanced dataset handling')" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": 63, 294 | "metadata": {}, 295 | "outputs": [ 296 | { 297 | "name": "stdout", 298 | "output_type": "stream", 299 | "text": [ 300 | "0.5900932689923516\n" 301 | ] 302 | } 303 | ], 304 | "source": [ 305 | "col_names = df.columns\n", 306 | "Y = df[col_names[-1]].values\n", 307 | "\n", 308 | "columns_to_scale = col_names[1:-1]\n", 309 | "scaler = MinMaxScaler()\n", 310 | "scaled_columns = scaler.fit_transform(df[columns_to_scale]) \n", 311 | "X_processed_data = scaled_columns\n", 312 | "dim_r = TruncatedSVD(n_components=200)\n", 313 | "dim_r.fit(X_processed_data)\n", 314 | "X_train, X_test, y_train, y_test = train_test_split(X_processed_data, Y, test_size=0.2, random_state=42)\n", 315 | "qda = QuadraticDiscriminantAnalysis(0.1)\n", 316 | "qda.fit(X_train,y_train)\n", 317 | "accuracy = qda.score(X_test,y_test)\n", 318 | "print(accuracy)" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": 62, 324 | "metadata": {}, 325 | "outputs": [ 326 | { 327 | "name": "stdout", 328 | "output_type": "stream", 329 | "text": [ 330 | "0.5214480535581453\n" 331 | ] 332 | } 333 | ], 334 | "source": [ 335 | "lda = LinearDiscriminantAnalysis()\n", 336 | "lda.fit(X_train,y_train)\n", 337 | "accuracy = lda.score(X_test,y_test)\n", 338 | "print(accuracy)" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": null, 344 | "metadata": {}, 345 | "outputs": [], 346 | "source": [] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": null, 351 | "metadata": {}, 352 | "outputs": [], 353 | "source": [] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": null, 358 | "metadata": {}, 359 | "outputs": [], 360 | "source": [] 361 | } 362 | ], 363 | "metadata": { 364 | "kernelspec": { 365 | "display_name": "Python 3", 366 | "language": "python", 367 | "name": "python3" 368 | }, 369 | "language_info": { 370 | "codemirror_mode": { 371 | "name": "ipython", 372 | "version": 3 373 | }, 374 | "file_extension": ".py", 375 | "mimetype": "text/x-python", 376 | "name": "python", 377 | "nbconvert_exporter": "python", 378 | "pygments_lexer": "ipython3", 379 | "version": "3.7.3" 380 | } 381 | }, 382 | "nbformat": 4, 383 | "nbformat_minor": 2 384 | } 385 | -------------------------------------------------------------------------------- /Project15/Lower_back_pain_detection_KNN/Lower Back pain detection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "Using TensorFlow backend.\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "import warnings\n", 18 | "warnings.filterwarnings('ignore')\n", 19 | "from imblearn.over_sampling import SMOTE \n", 20 | "import numpy as np\n", 21 | "import pandas as pd\n", 22 | "import os\n", 23 | "from sklearn.neighbors import KNeighborsClassifier\n", 24 | "from sklearn.preprocessing import StandardScaler, LabelBinarizer\n", 25 | "from sklearn.model_selection import train_test_split, cross_val_score\n", 26 | "from sklearn.metrics import *\n", 27 | "import hyperopt\n", 28 | "from hyperopt import *\n", 29 | "from hyperopt import fmin, tpe, hp, space_eval\n", 30 | "import matplotlib.pyplot as plt\n", 31 | "%matplotlib inline \n" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "
\n", 39 | "Loading the data : We loaded the data from the given data source to demonstrate K neighbors Classifier\n", 40 | "
" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 2, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "data": { 50 | "text/html": [ 51 | "
\n", 52 | "\n", 65 | "\n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | "
Col1Col2Col3Col4Col5Col6Col7Col8Col9Col10Col11Col12Class_attUnnamed: 13
063.02781822.55258639.60911740.47523298.672917-0.2544000.74450312.566114.538615.30468-28.65850143.5123AbnormalNaN
139.05695110.06099125.01537828.995960114.4054254.5642590.41518612.887417.532316.78486-25.53060716.1102AbnormalNaN
268.83202122.21848250.09219446.613539105.985135-3.5303170.47488926.834317.486116.65897-29.03188819.2221AbnormalPrediction is done by using binary classificat...
369.29700824.65287844.31123844.644130101.86849511.2115230.36934523.560312.707411.42447-30.47024618.8329AbnormalNaN
\n", 156 | "
" 157 | ], 158 | "text/plain": [ 159 | " Col1 Col2 Col3 Col4 Col5 Col6 \\\n", 160 | "0 63.027818 22.552586 39.609117 40.475232 98.672917 -0.254400 \n", 161 | "1 39.056951 10.060991 25.015378 28.995960 114.405425 4.564259 \n", 162 | "2 68.832021 22.218482 50.092194 46.613539 105.985135 -3.530317 \n", 163 | "3 69.297008 24.652878 44.311238 44.644130 101.868495 11.211523 \n", 164 | "\n", 165 | " Col7 Col8 Col9 Col10 Col11 Col12 Class_att \\\n", 166 | "0 0.744503 12.5661 14.5386 15.30468 -28.658501 43.5123 Abnormal \n", 167 | "1 0.415186 12.8874 17.5323 16.78486 -25.530607 16.1102 Abnormal \n", 168 | "2 0.474889 26.8343 17.4861 16.65897 -29.031888 19.2221 Abnormal \n", 169 | "3 0.369345 23.5603 12.7074 11.42447 -30.470246 18.8329 Abnormal \n", 170 | "\n", 171 | " Unnamed: 13 \n", 172 | "0 NaN \n", 173 | "1 NaN \n", 174 | "2 Prediction is done by using binary classificat... \n", 175 | "3 NaN " 176 | ] 177 | }, 178 | "execution_count": 2, 179 | "metadata": {}, 180 | "output_type": "execute_result" 181 | } 182 | ], 183 | "source": [ 184 | "path_of_input_file = 'D:\\\\kaggle_trials\\\\lower-back-pain-symptoms-dataset\\\\Dataset_spine.csv'\n", 185 | "df = pd.read_csv(path_of_input_file,)\n", 186 | "df.head(4)" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "
\n", 194 | "Data Imbalance: We check the data imbalance here. Clearly we have an imbalanced dataset\n", 195 | "
" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": 3, 201 | "metadata": {}, 202 | "outputs": [ 203 | { 204 | "name": "stdout", 205 | "output_type": "stream", 206 | "text": [ 207 | "The number of labels are 2\n" 208 | ] 209 | } 210 | ], 211 | "source": [ 212 | "num_labels = df['Class_att'].unique()\n", 213 | "print('The number of labels are ',len(num_labels))" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 4, 219 | "metadata": {}, 220 | "outputs": [ 221 | { 222 | "name": "stdout", 223 | "output_type": "stream", 224 | "text": [ 225 | "The number of Abnormal labels are :- 210\n", 226 | "The number of Normal labels are :- 100\n" 227 | ] 228 | } 229 | ], 230 | "source": [ 231 | "for i in range(len(num_labels)):\n", 232 | " print('The number of ', num_labels[i] ,' labels are :- ',len(df[df['Class_att']==num_labels[i]]))" 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": {}, 238 | "source": [ 239 | "
\n", 240 | "Preprocessing: We now preproocess and make the dataset balanced\n", 241 | "
" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 5, 247 | "metadata": {}, 248 | "outputs": [], 249 | "source": [ 250 | "lb = LabelBinarizer()\n", 251 | "Y = lb.fit_transform(df['Class_att'].values)\n", 252 | "X = df[df.columns[:12]].values\n", 253 | "\n", 254 | "sm = SMOTE(random_state=42)\n", 255 | "X_res, Y_res = sm.fit_resample(X, Y)" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 6, 261 | "metadata": {}, 262 | "outputs": [ 263 | { 264 | "name": "stdout", 265 | "output_type": "stream", 266 | "text": [ 267 | "Positive examples before Oversampling is 100\n", 268 | "Negative examples before Oversampling is 210\n", 269 | "\n", 270 | "\n", 271 | "Positive examples after Oversampling is 210\n", 272 | "Negative examples after Oversampling is 210\n", 273 | "\n", 274 | "\n" 275 | ] 276 | } 277 | ], 278 | "source": [ 279 | "print('Positive examples before Oversampling is ', sum(Y == [1])[0])\n", 280 | "print('Negative examples before Oversampling is ', sum(Y == [0])[0])\n", 281 | "print('\\n')\n", 282 | "print('Positive examples after Oversampling is ', sum(Y_res == [1]))\n", 283 | "print('Negative examples after Oversampling is ', sum(Y_res == [0]))\n", 284 | "print('\\n')" 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": {}, 290 | "source": [ 291 | "
\n", 292 | "Train Test Split: We split the data to train and test components.\n", 293 | "
" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 7, 299 | "metadata": {}, 300 | "outputs": [], 301 | "source": [ 302 | "X_train, X_test, y_train, y_test = train_test_split(X_res, Y_res, test_size=0.33, random_state=42)" 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": {}, 308 | "source": [ 309 | "
\n", 310 | "Hyper parameter Grid: We create a grid for different hyper parameters to iterate from \n", 311 | "
" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": 8, 317 | "metadata": {}, 318 | "outputs": [], 319 | "source": [ 320 | "kneighbors_grid = {'n_neighbors' : hp.choice('n_neighbors',range(10,20)),\n", 321 | " 'weights' : hp.choice('weights',['uniform','distance']),\n", 322 | " 'algorithm' : hp.choice('algorithm',['ball_tree','kd_tree','brute']),\n", 323 | " 'leaf_size' : hp.choice('leaf_size',range(1,50)),\n", 324 | " 'metric' : hp.choice('metric',['euclidean','manhattan','chebyshev','minkowski'])\n", 325 | "}" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": 9, 331 | "metadata": {}, 332 | "outputs": [ 333 | { 334 | "name": "stdout", 335 | "output_type": "stream", 336 | "text": [ 337 | "100%|████████████████████████████████████████████████| 500/500 [00:06<00:00, 77.86it/s, best loss: -0.8609696283720053]\n", 338 | "The best parameter tuned on training set is given by :- {'algorithm': 'ball_tree', 'leaf_size': 45, 'metric': 'chebyshev', 'n_neighbors': 10, 'weights': 'distance'}\n" 339 | ] 340 | } 341 | ], 342 | "source": [ 343 | "def hyperopt_train_test(params):\n", 344 | " clf = KNeighborsClassifier(**params)\n", 345 | " return cross_val_score(clf, X_train, y_train).mean()\n", 346 | "\n", 347 | "def function_to_minimise(params):\n", 348 | " accuracy = hyperopt_train_test(params)\n", 349 | " return {'loss': -1*accuracy, 'status': STATUS_OK}\n", 350 | "\n", 351 | "\n", 352 | "trials = Trials()\n", 353 | "best = fmin(function_to_minimise, kneighbors_grid, algo=tpe.suggest, max_evals=500, trials=trials)\n", 354 | "best_parameters = space_eval(kneighbors_grid, best)\n", 355 | "print('The best parameter tuned on training set is given by :- ',best_parameters)" 356 | ] 357 | }, 358 | { 359 | "cell_type": "markdown", 360 | "metadata": {}, 361 | "source": [ 362 | "
\n", 363 | "Model Fitting and conclusion: We now fit the model and then provide a classification analysis on the model fit\n", 364 | "
" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 10, 370 | "metadata": {}, 371 | "outputs": [ 372 | { 373 | "data": { 374 | "text/plain": [ 375 | "KNeighborsClassifier(algorithm='ball_tree', leaf_size=45, metric='chebyshev',\n", 376 | " metric_params=None, n_jobs=None, n_neighbors=10, p=2,\n", 377 | " weights='distance')" 378 | ] 379 | }, 380 | "execution_count": 10, 381 | "metadata": {}, 382 | "output_type": "execute_result" 383 | } 384 | ], 385 | "source": [ 386 | "knnclf = KNeighborsClassifier(**best_parameters)\n", 387 | "knnclf.fit(X_train,y_train)" 388 | ] 389 | }, 390 | { 391 | "cell_type": "code", 392 | "execution_count": 11, 393 | "metadata": {}, 394 | "outputs": [], 395 | "source": [ 396 | "y_hat = knnclf.predict(X_test)" 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": 12, 402 | "metadata": {}, 403 | "outputs": [ 404 | { 405 | "name": "stdout", 406 | "output_type": "stream", 407 | "text": [ 408 | " precision recall f1-score support\n", 409 | "\n", 410 | " 0 0.72 0.98 0.83 54\n", 411 | " 1 0.98 0.75 0.85 85\n", 412 | "\n", 413 | " accuracy 0.84 139\n", 414 | " macro avg 0.85 0.87 0.84 139\n", 415 | "weighted avg 0.88 0.84 0.84 139\n", 416 | "\n" 417 | ] 418 | } 419 | ], 420 | "source": [ 421 | "print(classification_report(y_hat,y_test))" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": null, 427 | "metadata": {}, 428 | "outputs": [], 429 | "source": [] 430 | } 431 | ], 432 | "metadata": { 433 | "kernelspec": { 434 | "display_name": "Python 3", 435 | "language": "python", 436 | "name": "python3" 437 | }, 438 | "language_info": { 439 | "codemirror_mode": { 440 | "name": "ipython", 441 | "version": 3 442 | }, 443 | "file_extension": ".py", 444 | "mimetype": "text/x-python", 445 | "name": "python", 446 | "nbconvert_exporter": "python", 447 | "pygments_lexer": "ipython3", 448 | "version": "3.7.3" 449 | } 450 | }, 451 | "nbformat": 4, 452 | "nbformat_minor": 2 453 | } 454 | -------------------------------------------------------------------------------- /Project18/Quality_detection_Decision_trees/Wine_quality_Decision_Trees.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "Using TensorFlow backend.\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "import warnings\n", 18 | "warnings.filterwarnings('ignore')\n", 19 | "import numpy as np\n", 20 | "import pandas as pd\n", 21 | "import os\n", 22 | "from imblearn.over_sampling import SMOTE \n", 23 | "from sklearn.tree import DecisionTreeClassifier\n", 24 | "from sklearn.preprocessing import MinMaxScaler, LabelBinarizer, OneHotEncoder, LabelEncoder\n", 25 | "from sklearn.model_selection import train_test_split, cross_val_score\n", 26 | "from sklearn.metrics import *\n", 27 | "from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer\n", 28 | "from sklearn.decomposition import PCA,TruncatedSVD\n", 29 | "import hyperopt\n", 30 | "from hyperopt import *\n", 31 | "from hyperopt import fmin, tpe, hp, space_eval\n", 32 | "import string\n", 33 | "import matplotlib.pyplot as plt\n", 34 | "%matplotlib inline \n" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "
\n", 42 | "Loading data: We load the dataset necessary for analysis\n", 43 | "
" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 2, 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "data": { 53 | "text/html": [ 54 | "
\n", 55 | "\n", 68 | "\n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | "
fixed acidityvolatile aciditycitric acidresidual sugarchloridesfree sulfur dioxidetotal sulfur dioxidedensitypHsulphatesalcoholquality
07.00.270.3620.70.04545.0170.01.00103.000.458.86
16.30.300.341.60.04914.0132.00.99403.300.499.56
28.10.280.406.90.05030.097.00.99513.260.4410.16
37.20.230.328.50.05847.0186.00.99563.190.409.96
\n", 149 | "
" 150 | ], 151 | "text/plain": [ 152 | " fixed acidity volatile acidity citric acid residual sugar chlorides \\\n", 153 | "0 7.0 0.27 0.36 20.7 0.045 \n", 154 | "1 6.3 0.30 0.34 1.6 0.049 \n", 155 | "2 8.1 0.28 0.40 6.9 0.050 \n", 156 | "3 7.2 0.23 0.32 8.5 0.058 \n", 157 | "\n", 158 | " free sulfur dioxide total sulfur dioxide density pH sulphates \\\n", 159 | "0 45.0 170.0 1.0010 3.00 0.45 \n", 160 | "1 14.0 132.0 0.9940 3.30 0.49 \n", 161 | "2 30.0 97.0 0.9951 3.26 0.44 \n", 162 | "3 47.0 186.0 0.9956 3.19 0.40 \n", 163 | "\n", 164 | " alcohol quality \n", 165 | "0 8.8 6 \n", 166 | "1 9.5 6 \n", 167 | "2 10.1 6 \n", 168 | "3 9.9 6 " 169 | ] 170 | }, 171 | "execution_count": 2, 172 | "metadata": {}, 173 | "output_type": "execute_result" 174 | } 175 | ], 176 | "source": [ 177 | "path_of_input_file = 'D:\\\\kaggle_trials\\\\mlcourse\\\\winequality-white.csv'\n", 178 | "df = pd.read_csv(path_of_input_file)\n", 179 | "df.head(4)" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 3, 185 | "metadata": {}, 186 | "outputs": [ 187 | { 188 | "name": "stdout", 189 | "output_type": "stream", 190 | "text": [ 191 | "We can clearly see that every value is numerical and hence only scaling will be needed for preprocessing steps\n" 192 | ] 193 | } 194 | ], 195 | "source": [ 196 | "print('We can clearly see that every value is numerical and hence only scaling will be needed for preprocessing steps')" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [ 203 | "
\n", 204 | "Unbalanced data: We can clearly see that the data in unbalanced\n", 205 | "
" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 4, 211 | "metadata": {}, 212 | "outputs": [ 213 | { 214 | "name": "stdout", 215 | "output_type": "stream", 216 | "text": [ 217 | "The number of labels are 7\n" 218 | ] 219 | } 220 | ], 221 | "source": [ 222 | "num_labels = df['quality'].unique()\n", 223 | "print('The number of labels are ',len(num_labels))" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": 5, 229 | "metadata": {}, 230 | "outputs": [ 231 | { 232 | "name": "stdout", 233 | "output_type": "stream", 234 | "text": [ 235 | "The number of 6 labels are :- 2198\n", 236 | "The number of 5 labels are :- 1457\n", 237 | "The number of 7 labels are :- 880\n", 238 | "The number of 8 labels are :- 175\n", 239 | "The number of 4 labels are :- 163\n", 240 | "The number of 3 labels are :- 20\n", 241 | "The number of 9 labels are :- 5\n", 242 | "We dont have a balanced dataset and hence we need to perform imbalanced dataset handling\n" 243 | ] 244 | } 245 | ], 246 | "source": [ 247 | "for i in range(len(num_labels)):\n", 248 | " print('The number of ', num_labels[i] ,' labels are :- ',len(df[df['quality']==num_labels[i]]))\n", 249 | "print('We dont have a balanced dataset and hence we need to perform imbalanced dataset handling')" 250 | ] 251 | }, 252 | { 253 | "cell_type": "markdown", 254 | "metadata": {}, 255 | "source": [ 256 | "
\n", 257 | "Preprocessing steps: We preprocess the data and make the data balanced\n", 258 | "
" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 46, 264 | "metadata": {}, 265 | "outputs": [], 266 | "source": [ 267 | "cols_needed = df.columns\n", 268 | "columns_to_scale = cols_needed[:-1]\n", 269 | "scaler = MinMaxScaler()\n", 270 | "scaled_columns = scaler.fit_transform(df[columns_to_scale]) \n", 271 | "X_processed_data = scaled_columns\n", 272 | "lb = LabelEncoder()\n", 273 | "Y = lb.fit_transform(df['quality'].values)" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": 47, 279 | "metadata": {}, 280 | "outputs": [], 281 | "source": [ 282 | "pca = PCA(n_components=10)\n", 283 | "X_reduced = pca.fit_transform(X_processed_data)" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 48, 289 | "metadata": {}, 290 | "outputs": [], 291 | "source": [ 292 | "sm = SMOTE(random_state=42,k_neighbors=4)\n", 293 | "X_res, Y_res = sm.fit_resample(X_reduced, Y)" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 49, 299 | "metadata": {}, 300 | "outputs": [ 301 | { 302 | "name": "stdout", 303 | "output_type": "stream", 304 | "text": [ 305 | "The number of 0 labels are :- 2198\n", 306 | "The number of 1 labels are :- 2198\n", 307 | "The number of 2 labels are :- 2198\n", 308 | "The number of 3 labels are :- 2198\n", 309 | "The number of 4 labels are :- 2198\n", 310 | "The number of 5 labels are :- 2198\n", 311 | "The number of 6 labels are :- 2198\n" 312 | ] 313 | } 314 | ], 315 | "source": [ 316 | "for i in range(len(num_labels)):\n", 317 | " print('The number of ', i ,' labels are :- ',\n", 318 | " [np.array_equal(Y_res[j],i) for j in range(len(Y_res))].count(True))" 319 | ] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "metadata": {}, 324 | "source": [ 325 | "
\n", 326 | "Train-test split: train test split of data is performed\n", 327 | "
" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": 50, 333 | "metadata": {}, 334 | "outputs": [], 335 | "source": [ 336 | "X_train, X_test, y_train, y_test = train_test_split(X_res, Y_res, test_size=0.33, random_state=42)" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": 51, 342 | "metadata": {}, 343 | "outputs": [], 344 | "source": [ 345 | "decision_tree_grid = {'criterion' : hp.choice('criterion',['gini','entropy']),\n", 346 | " 'max_depth' : hp.choice('max_depth',range(1,150)),\n", 347 | " 'min_samples_split' : hp.choice('min_samples_split',range(2,30)),\n", 348 | " 'min_samples_leaf' : hp.uniform('min_samples_leaf',0.1,0.5),\n", 349 | " 'max_features' : hp.choice('max_features',range(1,10))\n", 350 | " \n", 351 | " }" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": 52, 357 | "metadata": {}, 358 | "outputs": [ 359 | { 360 | "name": "stdout", 361 | "output_type": "stream", 362 | "text": [ 363 | "100%|████████████████████████████████████████████████| 500/500 [00:26<00:00, 19.15it/s, best loss: -0.4014437049772785]\n", 364 | "The best parameter tuned on training set is given by :- {'criterion': 'gini', 'max_depth': 128, 'max_features': 6, 'min_samples_leaf': 0.10042508293788373, 'min_samples_split': 5}\n" 365 | ] 366 | } 367 | ], 368 | "source": [ 369 | "def hyperopt_train_test(params):\n", 370 | " clf = DecisionTreeClassifier(**params)\n", 371 | " return cross_val_score(clf, X_train, y_train).mean()\n", 372 | "\n", 373 | "def function_to_minimise(params):\n", 374 | " accuracy = hyperopt_train_test(params)\n", 375 | " return {'loss': -1*accuracy, 'status': STATUS_OK}\n", 376 | "\n", 377 | "\n", 378 | "trials = Trials()\n", 379 | "best = fmin(function_to_minimise, decision_tree_grid, algo=tpe.suggest, max_evals=500, trials=trials)\n", 380 | "best_parameters = space_eval(decision_tree_grid, best)\n", 381 | "print('The best parameter tuned on training set is given by :- ',best_parameters)" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": 53, 387 | "metadata": {}, 388 | "outputs": [ 389 | { 390 | "data": { 391 | "text/plain": [ 392 | "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=128,\n", 393 | " max_features=6, max_leaf_nodes=None,\n", 394 | " min_impurity_decrease=0.0, min_impurity_split=None,\n", 395 | " min_samples_leaf=0.10042508293788373,\n", 396 | " min_samples_split=5, min_weight_fraction_leaf=0.0,\n", 397 | " presort=False, random_state=None, splitter='best')" 398 | ] 399 | }, 400 | "execution_count": 53, 401 | "metadata": {}, 402 | "output_type": "execute_result" 403 | } 404 | ], 405 | "source": [ 406 | "model = DecisionTreeClassifier(**best_parameters)\n", 407 | "model.fit(X_train, y_train)" 408 | ] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "execution_count": 54, 413 | "metadata": {}, 414 | "outputs": [], 415 | "source": [ 416 | "y_pred = model.predict(X_test)" 417 | ] 418 | }, 419 | { 420 | "cell_type": "code", 421 | "execution_count": 55, 422 | "metadata": {}, 423 | "outputs": [ 424 | { 425 | "name": "stdout", 426 | "output_type": "stream", 427 | "text": [ 428 | " precision recall f1-score support\n", 429 | "\n", 430 | " 0 0.35 0.47 0.40 532\n", 431 | " 1 0.60 0.34 0.43 1327\n", 432 | " 2 0.20 0.23 0.21 644\n", 433 | " 3 0.00 0.00 0.00 0\n", 434 | " 4 0.31 0.31 0.31 715\n", 435 | " 5 0.35 0.47 0.40 541\n", 436 | " 6 0.89 0.47 0.62 1319\n", 437 | "\n", 438 | " accuracy 0.38 5078\n", 439 | " macro avg 0.39 0.33 0.34 5078\n", 440 | "weighted avg 0.53 0.38 0.43 5078\n", 441 | "\n" 442 | ] 443 | } 444 | ], 445 | "source": [ 446 | "print(classification_report(y_pred,y_test))" 447 | ] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "execution_count": null, 452 | "metadata": {}, 453 | "outputs": [], 454 | "source": [] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": null, 459 | "metadata": {}, 460 | "outputs": [], 461 | "source": [] 462 | } 463 | ], 464 | "metadata": { 465 | "kernelspec": { 466 | "display_name": "Python 3", 467 | "language": "python", 468 | "name": "python3" 469 | }, 470 | "language_info": { 471 | "codemirror_mode": { 472 | "name": "ipython", 473 | "version": 3 474 | }, 475 | "file_extension": ".py", 476 | "mimetype": "text/x-python", 477 | "name": "python", 478 | "nbconvert_exporter": "python", 479 | "pygments_lexer": "ipython3", 480 | "version": "3.7.3" 481 | } 482 | }, 483 | "nbformat": 4, 484 | "nbformat_minor": 2 485 | } 486 | -------------------------------------------------------------------------------- /Project31/House_Price_Revisted_Gaussian_Process_Regression/Gaussian Process Regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import warnings\n", 10 | "warnings.filterwarnings('ignore')\n", 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "import os\n", 14 | "import scipy\n", 15 | "from sklearn.decomposition import PCA\n", 16 | "from sklearn.gaussian_process import GaussianProcessRegressor\n", 17 | "from sklearn.ensemble import GradientBoostingRegressor\n", 18 | "from sklearn.preprocessing import MinMaxScaler, LabelBinarizer, OneHotEncoder\n", 19 | "from sklearn.model_selection import train_test_split, cross_val_score\n", 20 | "from sklearn.metrics import *\n", 21 | "import hyperopt\n", 22 | "from hyperopt import *\n", 23 | "from hyperopt import fmin, tpe, hp, space_eval\n", 24 | "import matplotlib.pyplot as plt\n", 25 | "%matplotlib inline \n" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "
\n", 33 | "Loading the data: We load the data from the mentioned path\n", 34 | "
" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "data": { 44 | "text/html": [ 45 | "
\n", 46 | "\n", 59 | "\n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | "
IdMSSubClassMSZoningLotFrontageLotAreaStreetAlleyLotShapeLandContourUtilities...PoolAreaPoolQCFenceMiscFeatureMiscValMoSoldYrSoldSaleTypeSaleConditionSalePrice
0160RL65.08450PaveNaNRegLvlAllPub...0NaNNaNNaN022008WDNormal208500
1220RL80.09600PaveNaNRegLvlAllPub...0NaNNaNNaN052007WDNormal181500
\n", 137 | "

2 rows × 81 columns

\n", 138 | "
" 139 | ], 140 | "text/plain": [ 141 | " Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n", 142 | "0 1 60 RL 65.0 8450 Pave NaN Reg \n", 143 | "1 2 20 RL 80.0 9600 Pave NaN Reg \n", 144 | "\n", 145 | " LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal MoSold \\\n", 146 | "0 Lvl AllPub ... 0 NaN NaN NaN 0 2 \n", 147 | "1 Lvl AllPub ... 0 NaN NaN NaN 0 5 \n", 148 | "\n", 149 | " YrSold SaleType SaleCondition SalePrice \n", 150 | "0 2008 WD Normal 208500 \n", 151 | "1 2007 WD Normal 181500 \n", 152 | "\n", 153 | "[2 rows x 81 columns]" 154 | ] 155 | }, 156 | "execution_count": 2, 157 | "metadata": {}, 158 | "output_type": "execute_result" 159 | } 160 | ], 161 | "source": [ 162 | "path_of_input_file = r'D:\\kaggle_trials\\house-prices-advanced-regression-techniques\\train.csv'\n", 163 | "df = pd.read_csv(path_of_input_file)\n", 164 | "df.head(2)" 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "
\n", 172 | "Preprocessing data : We separate out the numerical and categorical columns from the data to be used for scaling and encoding respectively \n", 173 | "
" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 3, 179 | "metadata": {}, 180 | "outputs": [], 181 | "source": [ 182 | "cols_needed = list(df.columns)\n", 183 | "cols_needed = cols_needed[:len(cols_needed)-1]\n", 184 | "\n", 185 | "possible_numeric_cols = list(df._get_numeric_data().columns)\n", 186 | "possible_numeric_cols.remove('Id')\n", 187 | "\n", 188 | "categorical_columns = list(set(cols_needed)- set(possible_numeric_cols))\n", 189 | "\n", 190 | "numerical_columns = []\n", 191 | "for i in range(len(possible_numeric_cols)):\n", 192 | " col_name = possible_numeric_cols[i]\n", 193 | " if len(df[col_name].unique())<10:\n", 194 | " categorical_columns.append(col_name)\n", 195 | " else:\n", 196 | " numerical_columns.append(col_name)" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [ 203 | "
\n", 204 | "Missing value Treatment: We impute the numerical missing values with their respective means and the categorical values with their modes.\n", 205 | "
" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 4, 211 | "metadata": {}, 212 | "outputs": [], 213 | "source": [ 214 | "for i in range(len(categorical_columns)):\n", 215 | " df[categorical_columns[i]] = df[categorical_columns[i]].fillna(df[categorical_columns[i]].mode()[0])\n", 216 | "mean_impute_dict ={}\n", 217 | "for i in range(len(numerical_columns)):\n", 218 | " mean_impute_dict[numerical_columns[i]] = np.nanmean(np.float_(df[numerical_columns[i]].values))\n", 219 | "for i in range(len(numerical_columns)):\n", 220 | " df[numerical_columns[i]] = df[numerical_columns[i]].fillna(mean_impute_dict[numerical_columns[i]])" 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": {}, 226 | "source": [ 227 | "
\n", 228 | "Scaling and Encoding: We scale and one hot encode the data to get the matrix we need for calculations\n", 229 | "
" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": 5, 235 | "metadata": {}, 236 | "outputs": [], 237 | "source": [ 238 | "ohe = OneHotEncoder()\n", 239 | "scalar = MinMaxScaler()\n", 240 | "encoded_matrix = ohe.fit_transform(df[categorical_columns])\n", 241 | "scaled_matrix = scalar.fit_transform(df[numerical_columns])\n", 242 | "X_complete_matrix = scipy.sparse.hstack((encoded_matrix,scaled_matrix)).A\n", 243 | "Y = scalar.fit_transform(df[['SalePrice']])\n" 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": [ 250 | "
\n", 251 | "Train Test Split : We split the data to train and test set \n", 252 | "
" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": 6, 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [ 261 | "X_train, X_test, y_train, y_test = train_test_split(X_complete_matrix, Y, test_size=0.2, random_state=42)" 262 | ] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "metadata": {}, 267 | "source": [ 268 | "
\n", 269 | "Implementing the model: We now implement the model with tuned parameters and get the R^2 score\n", 270 | "
" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": 7, 276 | "metadata": {}, 277 | "outputs": [ 278 | { 279 | "data": { 280 | "text/plain": [ 281 | "GaussianProcessRegressor(alpha=1e-10, copy_X_train=True, kernel=None,\n", 282 | " n_restarts_optimizer=0, normalize_y=False,\n", 283 | " optimizer='fmin_l_bfgs_b', random_state=None)" 284 | ] 285 | }, 286 | "execution_count": 7, 287 | "metadata": {}, 288 | "output_type": "execute_result" 289 | } 290 | ], 291 | "source": [ 292 | "model = GaussianProcessRegressor()\n", 293 | "model.fit(X_train, y_train)" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 8, 299 | "metadata": {}, 300 | "outputs": [ 301 | { 302 | "name": "stdout", 303 | "output_type": "stream", 304 | "text": [ 305 | "The coefficient of determination is:- -218.8804257341233\n" 306 | ] 307 | } 308 | ], 309 | "source": [ 310 | "y_pred = model.predict(X_test)\n", 311 | "print('The coefficient of determination is:- ',r2_score(y_pred,y_test))" 312 | ] 313 | }, 314 | { 315 | "cell_type": "markdown", 316 | "metadata": {}, 317 | "source": [ 318 | "
\n", 319 | "Conclusion: Clearly Gaussian Process Regression is not giving a good coefficient of determination at all. We will use Gradient Boosting Regressor in this scenario to see what improvements we can make\n", 320 | "
" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": 9, 326 | "metadata": {}, 327 | "outputs": [], 328 | "source": [ 329 | "gradient_boost_reg_grid = {'loss' : hp.choice('loss',['ls','lad','huber','quantile']),\n", 330 | " 'learning_rate': hp.uniform('learning_rate',0.0,1.0),\n", 331 | " 'n_estimators' : hp.choice('n_estimators',range(50,300)),\n", 332 | " 'max_features' : hp.choice('max_features',['auto','sqrt','log2',None]),\n", 333 | " 'min_samples_split': hp.uniform('min_samples_split',0.0,1.0),\n", 334 | " 'min_samples_leaf' : hp.uniform('min_samples_leaf',0.0,0.5),\n", 335 | " }" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": 10, 341 | "metadata": {}, 342 | "outputs": [ 343 | { 344 | "name": "stdout", 345 | "output_type": "stream", 346 | "text": [ 347 | "100%|██████████████████████████████████████████████████| 20/20 [00:49<00:00, 2.46s/it, best loss: -0.9715407832381585]\n", 348 | "The best parameter tuned on training set is given by :- {'learning_rate': 0.627881784944577, 'loss': 'huber', 'max_features': None, 'min_samples_leaf': 0.01756016919912151, 'min_samples_split': 0.4790955420164398, 'n_estimators': 250}\n" 349 | ] 350 | } 351 | ], 352 | "source": [ 353 | "def hyperopt_train_test(params):\n", 354 | " reg = GradientBoostingRegressor(**params,random_state=19)\n", 355 | " return cross_val_score(reg, X_train, y_train).mean()\n", 356 | "\n", 357 | "def function_to_minimise(params):\n", 358 | " accuracy = hyperopt_train_test(params)\n", 359 | " return {'loss': -1*accuracy, 'status': STATUS_OK}\n", 360 | "\n", 361 | "\n", 362 | "trials = Trials()\n", 363 | "best = fmin(function_to_minimise, gradient_boost_reg_grid, algo=tpe.suggest, max_evals=20, trials=trials)\n", 364 | "best_parameters = space_eval(gradient_boost_reg_grid, best)\n", 365 | "print('The best parameter tuned on training set is given by :- ',best_parameters)" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": 11, 371 | "metadata": {}, 372 | "outputs": [ 373 | { 374 | "data": { 375 | "text/plain": [ 376 | "GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,\n", 377 | " learning_rate=0.627881784944577, loss='huber',\n", 378 | " max_depth=3, max_features=None, max_leaf_nodes=None,\n", 379 | " min_impurity_decrease=0.0, min_impurity_split=None,\n", 380 | " min_samples_leaf=0.01756016919912151,\n", 381 | " min_samples_split=0.4790955420164398,\n", 382 | " min_weight_fraction_leaf=0.0, n_estimators=250,\n", 383 | " n_iter_no_change=None, presort='auto',\n", 384 | " random_state=None, subsample=1.0, tol=0.0001,\n", 385 | " validation_fraction=0.1, verbose=0, warm_start=False)" 386 | ] 387 | }, 388 | "execution_count": 11, 389 | "metadata": {}, 390 | "output_type": "execute_result" 391 | } 392 | ], 393 | "source": [ 394 | "model = GradientBoostingRegressor(**best_parameters)\n", 395 | "model.fit(X_train, y_train)" 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": 12, 401 | "metadata": {}, 402 | "outputs": [ 403 | { 404 | "name": "stdout", 405 | "output_type": "stream", 406 | "text": [ 407 | "The coefficient of determination is:- 0.9195001148340186\n" 408 | ] 409 | } 410 | ], 411 | "source": [ 412 | "y_pred = model.predict(X_test)\n", 413 | "print('The coefficient of determination is:- ',r2_score(y_pred,y_test))" 414 | ] 415 | }, 416 | { 417 | "cell_type": "markdown", 418 | "metadata": {}, 419 | "source": [ 420 | "
\n", 421 | "Conclusion: Gradient Boosting regressor is an improvement over Gaussian Process Regressor in this case.\n", 422 | "
" 423 | ] 424 | }, 425 | { 426 | "cell_type": "code", 427 | "execution_count": null, 428 | "metadata": {}, 429 | "outputs": [], 430 | "source": [] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": null, 435 | "metadata": {}, 436 | "outputs": [], 437 | "source": [] 438 | }, 439 | { 440 | "cell_type": "code", 441 | "execution_count": null, 442 | "metadata": {}, 443 | "outputs": [], 444 | "source": [] 445 | } 446 | ], 447 | "metadata": { 448 | "kernelspec": { 449 | "display_name": "Python 3", 450 | "language": "python", 451 | "name": "python3" 452 | }, 453 | "language_info": { 454 | "codemirror_mode": { 455 | "name": "ipython", 456 | "version": 3 457 | }, 458 | "file_extension": ".py", 459 | "mimetype": "text/x-python", 460 | "name": "python", 461 | "nbconvert_exporter": "python", 462 | "pygments_lexer": "ipython3", 463 | "version": "3.7.4" 464 | } 465 | }, 466 | "nbformat": 4, 467 | "nbformat_minor": 2 468 | } 469 | -------------------------------------------------------------------------------- /Project22/House Price Prediction Regression/Linear Regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import warnings\n", 10 | "warnings.filterwarnings('ignore')\n", 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "import os\n", 14 | "import scipy\n", 15 | "from sklearn.decomposition import PCA\n", 16 | "from sklearn.linear_model import LinearRegression\n", 17 | "from sklearn.ensemble import AdaBoostRegressor\n", 18 | "from sklearn.preprocessing import MinMaxScaler, LabelBinarizer, OneHotEncoder\n", 19 | "from sklearn.model_selection import train_test_split, cross_val_score\n", 20 | "from sklearn.metrics import *\n", 21 | "import hyperopt\n", 22 | "from hyperopt import *\n", 23 | "from hyperopt import fmin, tpe, hp, space_eval\n", 24 | "import matplotlib.pyplot as plt\n", 25 | "%matplotlib inline \n" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "
\n", 33 | "Loading the data: We load the data from the mentioned path\n", 34 | "
" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "data": { 44 | "text/html": [ 45 | "
\n", 46 | "\n", 59 | "\n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | "
IdMSSubClassMSZoningLotFrontageLotAreaStreetAlleyLotShapeLandContourUtilities...PoolAreaPoolQCFenceMiscFeatureMiscValMoSoldYrSoldSaleTypeSaleConditionSalePrice
0160RL65.08450PaveNaNRegLvlAllPub...0NaNNaNNaN022008WDNormal208500
1220RL80.09600PaveNaNRegLvlAllPub...0NaNNaNNaN052007WDNormal181500
2360RL68.011250PaveNaNIR1LvlAllPub...0NaNNaNNaN092008WDNormal223500
3470RL60.09550PaveNaNIR1LvlAllPub...0NaNNaNNaN022006WDAbnorml140000
\n", 185 | "

4 rows × 81 columns

\n", 186 | "
" 187 | ], 188 | "text/plain": [ 189 | " Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n", 190 | "0 1 60 RL 65.0 8450 Pave NaN Reg \n", 191 | "1 2 20 RL 80.0 9600 Pave NaN Reg \n", 192 | "2 3 60 RL 68.0 11250 Pave NaN IR1 \n", 193 | "3 4 70 RL 60.0 9550 Pave NaN IR1 \n", 194 | "\n", 195 | " LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal MoSold \\\n", 196 | "0 Lvl AllPub ... 0 NaN NaN NaN 0 2 \n", 197 | "1 Lvl AllPub ... 0 NaN NaN NaN 0 5 \n", 198 | "2 Lvl AllPub ... 0 NaN NaN NaN 0 9 \n", 199 | "3 Lvl AllPub ... 0 NaN NaN NaN 0 2 \n", 200 | "\n", 201 | " YrSold SaleType SaleCondition SalePrice \n", 202 | "0 2008 WD Normal 208500 \n", 203 | "1 2007 WD Normal 181500 \n", 204 | "2 2008 WD Normal 223500 \n", 205 | "3 2006 WD Abnorml 140000 \n", 206 | "\n", 207 | "[4 rows x 81 columns]" 208 | ] 209 | }, 210 | "execution_count": 2, 211 | "metadata": {}, 212 | "output_type": "execute_result" 213 | } 214 | ], 215 | "source": [ 216 | "path_of_input_file = r'D:\\kaggle_trials\\house-prices-advanced-regression-techniques\\train.csv'\n", 217 | "df = pd.read_csv(path_of_input_file)\n", 218 | "df.head(4)" 219 | ] 220 | }, 221 | { 222 | "cell_type": "markdown", 223 | "metadata": {}, 224 | "source": [ 225 | "
\n", 226 | "Categorical and Numerical Columns Identification: We identify categorical and numerical columns from the data. We do set a threshold that if any categorical value is classified as numerical, then it has to be classified back to categorical if the number of distinct values of that column in the dataframe is less than 10 \n", 227 | "
" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 3, 233 | "metadata": {}, 234 | "outputs": [], 235 | "source": [ 236 | "cols_needed = list(df.columns)\n", 237 | "cols_needed = cols_needed[:len(cols_needed)-1]\n", 238 | "\n", 239 | "possible_numeric_cols = list(df._get_numeric_data().columns)\n", 240 | "possible_numeric_cols.remove('Id')\n", 241 | "\n", 242 | "categorical_columns = list(set(cols_needed)- set(possible_numeric_cols))\n", 243 | "\n", 244 | "numerical_columns = []\n", 245 | "for i in range(len(possible_numeric_cols)):\n", 246 | " col_name = possible_numeric_cols[i]\n", 247 | " if len(df[col_name].unique())<10:\n", 248 | " categorical_columns.append(col_name)\n", 249 | " else:\n", 250 | " numerical_columns.append(col_name)" 251 | ] 252 | }, 253 | { 254 | "cell_type": "markdown", 255 | "metadata": {}, 256 | "source": [ 257 | "
\n", 258 | "Missing Value Treatment: We impute the categorical missing values with their mode and the numerical missing values with their mean\n", 259 | "
" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": 4, 265 | "metadata": {}, 266 | "outputs": [], 267 | "source": [ 268 | "for i in range(len(categorical_columns)):\n", 269 | " df[categorical_columns[i]] = df[categorical_columns[i]].fillna(df[categorical_columns[i]].mode()[0])\n", 270 | "mean_impute_dict ={}\n", 271 | "for i in range(len(numerical_columns)):\n", 272 | " mean_impute_dict[numerical_columns[i]] = np.nanmean(np.float_(df[numerical_columns[i]].values))\n", 273 | "for i in range(len(numerical_columns)):\n", 274 | " df[numerical_columns[i]] = df[numerical_columns[i]].fillna(mean_impute_dict[numerical_columns[i]])" 275 | ] 276 | }, 277 | { 278 | "cell_type": "markdown", 279 | "metadata": {}, 280 | "source": [ 281 | "
\n", 282 | "Encoding and Feature Scaling: We do the one hot encoding of categorical values and scale(by using MinMaxScaler) the numerical values to get the final feature matrix X. Subseqently, we consider the SalePrice column to be our target variable\n", 283 | "
" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 5, 289 | "metadata": {}, 290 | "outputs": [], 291 | "source": [ 292 | "ohe = OneHotEncoder()\n", 293 | "scalar = MinMaxScaler()\n", 294 | "encoded_matrix = ohe.fit_transform(df[categorical_columns])\n", 295 | "scaled_matrix = scalar.fit_transform(df[numerical_columns])\n", 296 | "X_complete_matrix = scipy.sparse.hstack((encoded_matrix,scaled_matrix)).A\n", 297 | "Y = scalar.fit_transform(df[['SalePrice']])\n" 298 | ] 299 | }, 300 | { 301 | "cell_type": "markdown", 302 | "metadata": {}, 303 | "source": [ 304 | "
\n", 305 | "Train Test split: We perform train test split on the data\n", 306 | "
" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": 6, 312 | "metadata": {}, 313 | "outputs": [], 314 | "source": [ 315 | "X_train, X_test, y_train, y_test = train_test_split(X_complete_matrix, Y, test_size=0.33, random_state=42)" 316 | ] 317 | }, 318 | { 319 | "cell_type": "markdown", 320 | "metadata": {}, 321 | "source": [ 322 | "
\n", 323 | "Linear Regression Model Fit: We fit a linear regression model on the data to get the results\n", 324 | "
" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": 7, 330 | "metadata": {}, 331 | "outputs": [], 332 | "source": [ 333 | "reg = LinearRegression()\n", 334 | "reg.fit(X_train,y_train)\n", 335 | "y_pred_linear_reg = reg.predict(X_test)" 336 | ] 337 | }, 338 | { 339 | "cell_type": "markdown", 340 | "metadata": {}, 341 | "source": [ 342 | "
\n", 343 | "Numerical Results: The coefficient of determination is given below\n", 344 | "
" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": 8, 350 | "metadata": {}, 351 | "outputs": [ 352 | { 353 | "name": "stdout", 354 | "output_type": "stream", 355 | "text": [ 356 | "The coefficient of determination is:- 0.9580147190340209\n" 357 | ] 358 | } 359 | ], 360 | "source": [ 361 | "print('The coefficient of determination is:- ',r2_score(y_pred_linear_reg,y_test))" 362 | ] 363 | }, 364 | { 365 | "cell_type": "markdown", 366 | "metadata": {}, 367 | "source": [ 368 | "
\n", 369 | "Adaboost Regressor: We will try to fit an Adaboost regressor to the given data\n", 370 | " \n", 371 | "
" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": 9, 377 | "metadata": {}, 378 | "outputs": [], 379 | "source": [ 380 | "adaboost_reg_grid = {'n_estimators' : hp.choice('n_estimators',range(5,50)),\n", 381 | " 'learning_rate' : hp.uniform('learning_rate',0.05,1.01),\n", 382 | " 'loss' : hp.choice('loss',['linear','square','exponential'])\n", 383 | " }" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": 10, 389 | "metadata": {}, 390 | "outputs": [ 391 | { 392 | "name": "stdout", 393 | "output_type": "stream", 394 | "text": [ 395 | "100%|██████████████████████████████████████████████████| 30/30 [00:50<00:00, 2.14s/it, best loss: -0.9887685515354506]\n", 396 | "The best parameter tuned on training set is given by :- {'learning_rate': 0.7582218518751838, 'loss': 'square', 'n_estimators': 41}\n" 397 | ] 398 | } 399 | ], 400 | "source": [ 401 | "def hyperopt_train_test(params):\n", 402 | " reg = AdaBoostRegressor(**params)\n", 403 | " return cross_val_score(reg, X_train, y_train).mean()\n", 404 | "\n", 405 | "def function_to_minimise(params):\n", 406 | " accuracy = hyperopt_train_test(params)\n", 407 | " return {'loss': -1*accuracy, 'status': STATUS_OK}\n", 408 | "\n", 409 | "\n", 410 | "trials = Trials()\n", 411 | "best = fmin(function_to_minimise, adaboost_reg_grid, algo=tpe.suggest, max_evals=30, trials=trials)\n", 412 | "best_parameters = space_eval(adaboost_reg_grid, best)\n", 413 | "print('The best parameter tuned on training set is given by :- ',best_parameters)" 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": 11, 419 | "metadata": {}, 420 | "outputs": [ 421 | { 422 | "data": { 423 | "text/plain": [ 424 | "AdaBoostRegressor(base_estimator=None, learning_rate=0.7582218518751838,\n", 425 | " loss='square', n_estimators=41, random_state=None)" 426 | ] 427 | }, 428 | "execution_count": 11, 429 | "metadata": {}, 430 | "output_type": "execute_result" 431 | } 432 | ], 433 | "source": [ 434 | "model = AdaBoostRegressor(**best_parameters)\n", 435 | "model.fit(X_train, y_train)" 436 | ] 437 | }, 438 | { 439 | "cell_type": "code", 440 | "execution_count": 12, 441 | "metadata": {}, 442 | "outputs": [], 443 | "source": [ 444 | "y_pred = model.predict(X_test)" 445 | ] 446 | }, 447 | { 448 | "cell_type": "code", 449 | "execution_count": 13, 450 | "metadata": {}, 451 | "outputs": [ 452 | { 453 | "name": "stdout", 454 | "output_type": "stream", 455 | "text": [ 456 | "The coefficient of determination is:- 0.974902747127915\n" 457 | ] 458 | } 459 | ], 460 | "source": [ 461 | "print('The coefficient of determination is:- ',r2_score(y_pred,y_test))" 462 | ] 463 | }, 464 | { 465 | "cell_type": "markdown", 466 | "metadata": {}, 467 | "source": [ 468 | "
\n", 469 | "Conclusion : We can clearly see that Adaboost regressor performed really well as compared to Linear Regression\n", 470 | "
" 471 | ] 472 | } 473 | ], 474 | "metadata": { 475 | "kernelspec": { 476 | "display_name": "Python 3", 477 | "language": "python", 478 | "name": "python3" 479 | }, 480 | "language_info": { 481 | "codemirror_mode": { 482 | "name": "ipython", 483 | "version": 3 484 | }, 485 | "file_extension": ".py", 486 | "mimetype": "text/x-python", 487 | "name": "python", 488 | "nbconvert_exporter": "python", 489 | "pygments_lexer": "ipython3", 490 | "version": "3.7.3" 491 | } 492 | }, 493 | "nbformat": 4, 494 | "nbformat_minor": 2 495 | } 496 | --------------------------------------------------------------------------------