├── .gitignore
├── LICENSE
├── README.md
├── datasets
    ├── .gitignore
    ├── 15_fraud_detection.csv.zip
    ├── E18.xlsx
    ├── Tweets.zip
    ├── Wine_data_red.csv
    ├── Wine_data_white.csv
    ├── agaricus-lepiota.zip
    ├── churn.csv
    ├── communities.data
    ├── dataTest_carListings.zip
    ├── dataTesting.zip
    ├── dataTrain_carListings.zip
    ├── dataTraining.zip
    ├── glass.csv
    ├── hitters.csv
    ├── houses_prices_prediction.csv.zip
    ├── income.csv.zip
    ├── mashable.csv
    ├── mashable_texts.csv
    ├── phishing.csv
    ├── titanic.csv.zip
    ├── universityGraduateAdmissions.csv
    ├── vehicles_test.csv
    ├── vehicles_train.csv
    └── wine.data
├── exercises
    ├── E1-DataScienceOverview.md
    ├── E10-RandomForestPerformanceReview.md
    ├── E11-RandomForest.ipynb
    ├── E12-GradientBoostingRewiew.md
    ├── E13-CategoricalEncoding.ipynb
    ├── E14-UnbalancedLearningOverview.md
    ├── E15-fraud_detection.ipynb
    ├── E16-NLPOverview
    ├── E17-SentimentPrediction.ipynb
    ├── E18-ClassHomeworksAnalysis.ipynb
    ├── E19-NeuralNetworksFundamentation.ipynb
    ├── E2- Python Text Analysis.ipynb
    ├── E20-NeuralNetworksKeras.ipynb
    ├── E3-LearningOverview.md
    ├── E4-Regression-Linear&Logistic.ipynb
    ├── E5-DecisionTreesOverview.md
    ├── E6-SVM&Regularization.ipynb
    ├── E7-DecisionTrees.ipynb
    ├── E8-EnsembleTreesOverview.md
    ├── E9-Bagging.ipynb
    ├── P1-UsedVehiclePricePrediction.ipynb
    ├── P2-MovieGenrePrediction.ipynb
    ├── P3-ConversationsToxicityDetection-SampleSolution.ipynb
    ├── P3-ConversationsToxicityDetection.ipynb
    ├── images
    │   ├── classification_kiank.png
    │   ├── grad_summary.png
    │   ├── hidden_tunning.png
    │   ├── sgd.gif
    │   └── sgd_bad.gif
    └── moviegenre.png
└── notebooks
    ├── 01-IntroMachineLearning.ipynb
    ├── 02-IntroPython_Numpy_Scypy_Pandas.ipynb
    ├── 03-linear_regression.ipynb
    ├── 04-logistic_regression.ipynb
    ├── 05-SVM.ipynb
    ├── 06-decision_trees.ipynb
    ├── 07-regularization.ipynb
    ├── 08-IntroductionToAPIs.ipynb
    ├── 09-Model_Deployment.ipynb
    ├── 10-CreatingAPIinAWS.ipynb
    ├── 11-Ensembles_Bagging.ipynb
    ├── 12-Ensembles_Boosting.ipynb
    ├── 13-Ensembles_RandomForest.ipynb
    ├── 14-data_preparation.ipynb
    ├── 15-Unbalanced_Datasets.ipynb
    ├── 16-IntroNLP.pdf
    ├── 17-NaturalLanguageProcessing.ipynb
    ├── 18-IntroNLP_II.pdf
    ├── 19-IntroductionDeepLearningMLP.ipynb
    ├── 20-DeepLearning_keras.ipynb
    ├── 21-CNN.ipynb
    ├── 22-RecurrentNeuralNetworks_LSTM.ipynb
    ├── images
        ├── MLP.png
        ├── Perceptron.png
        ├── backprop (1).png
        ├── backprop.png
        ├── bkwd_step_net.png
        ├── classification_kiank.png
        ├── dl_overview.png
        ├── fwd_step.png
        ├── fwd_step_net.png
        ├── grad_summary.png
        ├── img001.PNG
        ├── img002.GIF
        ├── img003.PNG
        ├── img004.PNG
        ├── img005.PNG
        ├── img006.PNG
        ├── img007.PNG
        ├── img008.PNG
        ├── img009.PNG
        ├── img010.PNG
        ├── img011.PNG
        ├── img012.PNG
        ├── img013.PNG
        ├── img014.PNG
        ├── img015.PNG
        ├── img016.PNG
        ├── keras-logo-small.jpg
        ├── logistic_function.png
        ├── multi-layers-1.png
        ├── multi-layers-2.png
        ├── sgd.gif
        ├── sgd_bad.gif
        ├── single_layer.png
        └── updateParameters.png
    └── model_deployment
        ├── api.py
        ├── m09_model_deployment.py
        └── phishing_clf.pkl


/.gitignore:
--------------------------------------------------------------------------------
1 | *.ipynb_checkpoints
2 | *~
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Alejandro Correa Bahnsen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Practical Machine Learning
 2 | 
 3 | *Instructor: Alejandro Correa Bahnsen*
 4 | 
 5 | - email: <al.bahnsen@gmail.com>
 6 | - twitter: [@albahnsen](https://twitter.com/albahnsen)
 7 | - github: [albahnsen](http://github.com/albahnsen)
 8 | 
 9 | 
10 | The use of statistical models in computer algorithms allows computers to make decisions and predictions, and to perform tasks that traditionally require human cognitive abilities. Machine learning is the interdisciplinary field at the intersection of statistics and computer science which develops such algorithnms and interweaves them with computer systems. It underpins many modern technologies, such as speech recognition, internet search, bioinformatics, computer vision, Amazon’s recommender system, Google’s driverless car and the most recent imaging systems for cancer diagnosis are all based on Machine Learning technology.
11 | 
12 | This course on Machine Learning will explain how to build systems that learn and adapt using real-world applications. Some of the topics to be covered include machine learning, python data analysis, deep learning frameworks, natural language processing models and recurrent models. The course will be project-oriented, with emphasis placed on writing software implementations of learning algorithms applied to real-world problems, in particular, image analysis, image captioning, natural language pocessing, sentiment detection, among others.
13 | 
14 | ## Requiriments 
15 | * [Python](http://www.python.org) version 3.5;
16 | * [Numpy](http://www.numpy.org), the core numerical extensions for linear algebra and multidimensional arrays;
17 | * [Scipy](http://www.scipy.org), additional libraries for scientific programming;
18 | * [Matplotlib](http://matplotlib.sf.net), excellent plotting and graphing libraries;
19 | * [IPython](http://ipython.org), with the additional libraries required for the notebook interface.
20 | * [Pandas](http://pandas.pydata.org/), Python version of R dataframe
21 | * [Seaborn](stanford.edu/~mwaskom/software/seaborn/), used mainly for plot styling
22 | * [scikit-learn](http://scikit-learn.org), Machine learning library!
23 | 
24 | A good, easy to install option that supports Mac, Windows, and Linux, and that has all of these packages (and much more) is the [Anaconda](https://www.continuum.io/).
25 | 
26 | GIT!! Unfortunatelly out of the scope of this class, but please take a look at these [tutorials](https://help.github.com/articles/good-resources-for-learning-git-and-github/)
27 | 
28 | ## Evaluation
29 | 
30 | * 30% Exercises
31 | * 50% Projects
32 | * 20% Final Project
33 | 
34 | ## Schedule
35 | 
36 | ### Supervised Machine Learning
37 | | Date | Session         | Notebooks/Presentations          | Exercises |
38 | | :----| :----| :------------- | :------------- | 
39 | | January 21st | Introduction to python and ML | <ul><li>[1 - Intro to ML](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/notebooks/01-IntroMachineLearning.ipynb) </li> <li>[2 - Intro to Python for data analysis](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/notebooks/02-IntroPython_Numpy_Scypy_Pandas.ipynb) </li></ul> | <ul><li>[E1 - Data Science Overview](https://github.com/albahnsen/PracticalMachineLearningClass/blob/master/exercises/E1-DataScienceOverview) </li><li>[E2 - Python Text Analysis](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/exercises/E2-%20Python%20Text%20Analysis.ipynb) </li>  </ul> | 
40 | | January 28th | Linear Models | <ul><li>[3 - Linear Regression](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/notebooks/03-linear_regression.ipynb) </li> <li>[4 - Logistic Regression](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/notebooks/04-logistic_regression.ipynb) </li>  </ul> | <ul><li>[E3 - Supervised vs Unsupervised Overview](https://github.com/albahnsen/PracticalMachineLearningClass/blob/master/exercises/E3-LearningOverview.md) </li> <li>[E4 - Linear Models](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/exercises/E4-Regression-Linear%26Logistic.ipynb) </li> </ul> | 
41 | | February 4th | SVM & Decision Trees  | <ul><li>[5 - SVM](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/notebooks/05-SVM.ipynb) </li><li>[6 - Decision Tress](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/notebooks/06-decision_trees.ipynb) </li><li>[7 - Regularization](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/notebooks/07-regularization.ipynb) </li></ul> | <ul><li>[E5 - Decision Trees Overview](https://github.com/albahnsen/PracticalMachineLearningClass/blob/master/exercises/E5-DecisionTreesOverview.md) </li> <li>[E6 - SVM - Regularization](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/exercises/E6-SVM%26Regularization.ipynb) </li> <li>[E7 - Decision Trees](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/exercises/E7-DecisionTrees.ipynb) </li></ul> | 
42 | | February 11th | Machine Learning as a Service | <ul>  <li>[8 - Introduction to APIs](https://nbviewer.jupyter.org/format/slides/github/albahnsen/PracticalMachineLearningClass/blob/master/notebooks/08-IntroductionToAPIs.ipynb#/) </li> <li>[9 - Model Deployment](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/notebooks/09-Model_Deployment.ipynb) </li><li>[10 - Creating APIs in AWS](https://nbviewer.jupyter.org/format/slides/github/albahnsen/PracticalMachineLearningClass/blob/master/notebooks/10-CreatingAPIinAWS.ipynb#/) </li></ul> | <ul><li>[P1 - Survival Prediction API](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/exercises/P1-UsedVehiclePricePrediction.ipynb) </li> </ul> | 
43 | | February 18th |  Ensembles | <ul><li>[11 - Bagging](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/notebooks/11-Ensembles_Bagging.ipynb) </li><li>[12 - Boosting](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/notebooks/12-Ensembles_Boosting.ipynb) </li></ul> | <ul><li>[E8 - Best Ensemble Overview](https://github.com/albahnsen/PracticalMachineLearningClass/blob/master/exercises/E8-EnsembleTreesOverview.md) </li><li>[E9 - Bagging](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/exercises/E9-Bagging.ipynb) </li> </ul> | 
44 | | February 25th |  Random Forest | <ul><li>[13 - Random Forest](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/notebooks/13-Ensembles_RandomForest.ipynb) </li></ul> | <ul><li>[E10 - Random Forest Performance Review](https://github.com/albahnsen/PracticalMachineLearningClass/blob/master/exercises/E10-RandomForestPerformanceReview.md) </li><li>[E11 - Random Forest](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/exercises/E11-RandomForest.ipynb) </li> </ul> | 
45 | | March 4th |  Feature Engineering | <ul><li>[14 - Feature Engineering](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/notebooks/14-data_preparation.ipynb) </li> </ul> | <ul><li>[E12 - Gradient Boosting Review](https://github.com/albahnsen/PracticalMachineLearningClass/blob/master/exercises/E12-GradientBoostingRewiew.md) </li> <li>[E13 - Categorical Encoding ](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/exercises/E13-CategoricalEncoding.ipynb) </li> </ul> | 
46 | | March 11th | Project Presentations  |  | | 
47 | | March 18th |  Unbalanced Learning | <ul><li>[15 - Unbalanced Learning](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/notebooks/15-Unbalanced_Datasets.ipynb) </li></ul> | <ul><li>[E14 - Unbalanced Learning Overview](https://github.com/albahnsen/PracticalMachineLearningClass/blob/master/exercises/E14-UnbalancedLearningOverview.md) </li> <li>[E15 - Fraud Detection](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/exercises/E15-fraud_detection.ipynb) </li>  </ul>  | 
48 | 
49 | 
50 |  ### Natural Language Processing
51 | | Date | Session         | Notebooks/Presentations          | Exercises |
52 | | :----| :----| :------------- | :------------- | 
53 | | April 1st | Natural Language Processing  | <ul><li>[16 - Introduction to NLP](https://github.com/albahnsen/PracticalMachineLearningClass/blob/master/notebooks/16-IntroNLP.pdf) </li></ul> <ul><li>[17 - Introduction to NLP II ](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/notebooks/17-NaturalLanguageProcessing.ipynb) </li></ul> | <ul><li>[E16 - NLP Review](https://github.com/albahnsen/PracticalMachineLearningClass/blob/master/exercises/E16-NLPOverview.md) </li><li>[E17 - Sentiment Analysis](https://github.com/albahnsen/PracticalMachineLearningClass/blob/master/exercises/E17-SentimentPrediction.ipynb) </li> <li>[P2 - NLP Movies Analysis API](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/exercises/P2-MovieGenrePrediction.ipynb) </li> </ul> | 
54 | | April 8th |  Sentiment Analysis | <ul><li>[18 - Sentiment Analysis](https://github.com/albahnsen/PracticalMachineLearningClass/blob/master/notebooks/18-IntroNLP_II.pdf) </li></ul> | <ul><li>[E18 - Homeworks Analysis](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/exercises/E18-ClassHomeworksAnalysis.ipynb) </li>  </ul> |
55 | | April 22nd | Project Presentations  |  | | 
56 |  ### Advanced Topics in Machine Learning
57 | | Date | Session         | Notebooks/Presentations          | Exercises |
58 | | :----| :----| :------------- | :------------- | 
59 | | April 29th |  Introduction to Deep Learning | <ul><li>[19 - Introduction to Neural Networks - MLP ](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/notebooks/19-IntroductionDeepLearningMLP.ipynb) </li></ul> <ul><li>[20 - Deep Learning in keras](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/notebooks/20-DeepLearning_keras.ipynb) </li></ul> | <ul><li>[E19 - Neural Networks Fundamentation](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/exercises/E19-NeuralNetworksFundamentation.ipynb) </li> </ul> <ul><li>[E20 - Neural Networks in Keras](https://nbviewer.jupyter.org/github/albahnsen/PracticalMachineLearningClass/blob/master/exercises/E20-NeuralNetworksKeras.ipynb) </li> </ul> | 
60 | | May 6th |  Introduction to Deep Learning II| <ul><li>[20 - LSTM]() </li></ul>  <ul><li>[21 - CNN]() </li></ul> | <ul><li>[]() </li> </ul> | 
61 | 
62 | 
63 | ### Final Project Presentation 
64 | 
65 | * [P3 - Kaggle Competition]() 
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/datasets/.gitignore:
--------------------------------------------------------------------------------
1 | /test.csv.zip
2 | /train.csv.zip
3 | 


--------------------------------------------------------------------------------
/datasets/15_fraud_detection.csv.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/datasets/15_fraud_detection.csv.zip


--------------------------------------------------------------------------------
/datasets/E18.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/datasets/E18.xlsx


--------------------------------------------------------------------------------
/datasets/Tweets.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/datasets/Tweets.zip


--------------------------------------------------------------------------------
/datasets/agaricus-lepiota.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/datasets/agaricus-lepiota.zip


--------------------------------------------------------------------------------
/datasets/dataTest_carListings.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/datasets/dataTest_carListings.zip


--------------------------------------------------------------------------------
/datasets/dataTesting.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/datasets/dataTesting.zip


--------------------------------------------------------------------------------
/datasets/dataTrain_carListings.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/datasets/dataTrain_carListings.zip


--------------------------------------------------------------------------------
/datasets/dataTraining.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/datasets/dataTraining.zip


--------------------------------------------------------------------------------
/datasets/glass.csv:
--------------------------------------------------------------------------------
  1 | id,ri,na,mg,al,si,k,ca,ba,fe,glass_type
  2 | 22,1.51966,14.77,3.75,0.29,72.02,0.03,9.0,0.0,0.0,1
  3 | 185,1.51115,17.38,0.0,0.34,75.41,0.0,6.65,0.0,0.0,6
  4 | 40,1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0.0,0.0,1
  5 | 39,1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0.0,0.0,1
  6 | 51,1.5232,13.72,3.72,0.51,71.75,0.09,10.06,0.0,0.16,1
  7 | 184,1.51969,14.56,0.0,0.56,73.48,0.0,11.22,0.0,0.0,6
  8 | 110,1.5181799999999999,13.72,0.0,0.56,74.45,0.0,10.99,0.0,0.0,2
  9 | 158,1.52121,14.03,3.76,0.58,71.79,0.11,9.65,0.0,0.0,3
 10 | 153,1.51779,13.64,3.65,0.65,73.0,0.06,8.93,0.0,0.0,3
 11 | 104,1.52725,13.8,3.15,0.66,70.57,0.08,11.64,0.0,0.0,2
 12 | 113,1.52777,12.64,0.0,0.67,72.02,0.06,14.4,0.0,0.0,2
 13 | 48,1.52667,13.99,3.7,0.71,71.57,0.02,9.82,0.0,0.1,1
 14 | 44,1.5221,13.73,3.84,0.72,71.76,0.17,9.74,0.0,0.0,1
 15 | 162,1.51934,13.64,3.54,0.75,72.65,0.16,8.89,0.15,0.24,3
 16 | 112,1.52739,11.02,0.0,0.75,73.08,0.0,14.96,0.0,0.0,2
 17 | 111,1.52664,11.23,0.0,0.77,73.21,0.0,14.68,0.0,0.0,2
 18 | 64,1.52227,14.17,3.81,0.78,71.35,0.0,9.69,0.0,0.0,1
 19 | 49,1.52223,13.21,3.77,0.79,71.99,0.13,10.02,0.0,0.0,1
 20 | 70,1.523,13.31,3.58,0.82,71.99,0.12,10.17,0.0,0.03,1
 21 | 103,1.5182,12.62,2.76,0.83,73.81,0.35,9.42,0.0,0.2,2
 22 | 152,1.5212700000000001,14.32,3.9,0.83,71.5,0.0,9.49,0.0,0.0,3
 23 | 68,1.52152,13.05,3.65,0.87,72.32,0.19,9.85,0.0,0.17,1
 24 | 67,1.52152,13.05,3.65,0.87,72.22,0.19,9.85,0.0,0.17,1
 25 | 63,1.5217200000000002,13.51,3.86,0.88,71.79,0.23,9.54,0.0,0.11,1
 26 | 18,1.52196,14.36,3.85,0.89,71.36,0.15,9.15,0.0,0.0,1
 27 | 65,1.5217200000000002,13.48,3.74,0.9,72.01,0.18,9.61,0.0,0.07,1
 28 | 69,1.52152,13.12,3.58,0.9,72.2,0.23,9.82,0.0,0.16,1
 29 | 163,1.52211,14.19,3.78,0.91,71.36,0.23,9.14,0.0,0.37,3
 30 | 108,1.53393,12.3,0.0,1.0,70.16,0.12,16.19,0.0,0.24,2
 31 | 109,1.5222200000000001,14.43,0.0,1.0,72.67,0.1,11.52,0.0,0.08,2
 32 | 8,1.51756,13.15,3.61,1.05,73.24,0.57,8.24,0.0,0.0,1
 33 | 97,1.51841,13.02,3.62,1.06,72.34,0.64,9.13,0.0,0.15,2
 34 | 142,1.51851,13.2,3.63,1.07,72.83,0.57,8.41,0.09,0.17,2
 35 | 137,1.51806,13.0,3.8,1.08,73.07,0.56,8.38,0.0,0.12,2
 36 | 1,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1
 37 | 147,1.51769,13.65,3.66,1.11,72.77,0.11,8.6,0.0,0.0,3
 38 | 61,1.51905,13.6,3.62,1.11,72.64,0.14,8.76,0.0,0.0,1
 39 | 91,1.51841,12.93,3.74,1.11,72.28,0.64,8.96,0.0,0.22,2
 40 | 41,1.51793,12.79,3.5,1.12,73.03,0.64,8.77,0.0,0.0,1
 41 | 66,1.52099,13.69,3.59,1.12,71.96,0.09,9.4,0.0,0.0,1
 42 | 57,1.51215,12.99,3.47,1.12,72.98,0.62,8.35,0.0,0.31,1
 43 | 7,1.5174299999999998,13.3,3.6,1.14,73.09,0.58,8.17,0.0,0.0,1
 44 | 32,1.51747,12.84,3.5,1.14,73.27,0.56,8.55,0.0,0.0,1
 45 | 25,1.5172,13.38,3.5,1.15,72.85,0.5,8.43,0.0,0.0,1
 46 | 125,1.52177,13.2,3.68,1.15,72.75,0.54,8.52,0.0,0.0,2
 47 | 17,1.5178399999999999,12.68,3.67,1.16,73.11,0.61,8.7,0.0,0.0,1
 48 | 98,1.5174299999999998,12.2,3.25,1.16,73.55,0.62,8.9,0.0,0.24,2
 49 | 105,1.5241,13.83,2.9,1.17,71.15,0.08,10.79,0.0,0.0,2
 50 | 59,1.51754,13.48,3.74,1.17,72.99,0.59,8.03,0.0,0.0,1
 51 | 47,1.5186899999999999,13.19,3.37,1.18,72.72,0.57,8.83,0.0,0.16,1
 52 | 133,1.51813,13.43,3.98,1.18,72.49,0.58,8.15,0.0,0.0,2
 53 | 19,1.51911,13.9,3.73,1.18,72.12,0.06,8.89,0.0,0.0,1
 54 | 53,1.5180799999999999,13.43,2.87,1.19,72.84,0.55,9.03,0.0,0.0,1
 55 | 45,1.51786,12.73,3.43,1.19,72.95,0.62,8.76,0.0,0.3,1
 56 | 178,1.51937,13.79,2.41,1.19,72.76,0.0,9.77,0.0,0.0,6
 57 | 115,1.51847,13.1,3.97,1.19,72.44,0.6,8.43,0.0,0.0,2
 58 | 60,1.51754,13.39,3.66,1.19,72.79,0.57,8.27,0.0,0.11,1
 59 | 202,1.51653,11.95,0.0,1.19,75.18,2.7,8.93,0.0,0.0,7
 60 | 42,1.51755,12.71,3.42,1.2,73.2,0.59,8.64,0.0,0.0,1
 61 | 36,1.51567,13.29,3.45,1.21,72.74,0.56,8.57,0.0,0.0,1
 62 | 26,1.5176399999999999,12.98,3.54,1.21,73.0,0.65,8.53,0.0,0.0,1
 63 | 154,1.5161,13.42,3.4,1.22,72.69,0.59,8.32,0.0,0.0,3
 64 | 188,1.52315,13.44,3.34,1.23,72.38,0.6,8.83,0.0,0.0,7
 65 | 16,1.5176100000000001,12.81,3.54,1.23,73.24,0.58,8.39,0.0,0.0,1
 66 | 50,1.51898,13.58,3.35,1.23,72.08,0.59,8.91,0.0,0.0,1
 67 | 33,1.51775,12.85,3.48,1.23,72.97,0.61,8.56,0.09,0.22,1
 68 | 145,1.5166,12.99,3.18,1.23,72.97,0.58,8.81,0.0,0.24,2
 69 | 146,1.51839,12.85,3.67,1.24,72.57,0.62,8.68,0.0,0.35,2
 70 | 5,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,1
 71 | 135,1.51811,13.33,3.85,1.25,72.78,0.52,8.12,0.0,0.0,2
 72 | 83,1.5164600000000001,13.41,3.55,1.25,72.81,0.68,8.1,0.0,0.0,2
 73 | 79,1.51613,13.92,3.52,1.25,72.88,0.37,7.94,0.0,0.14,2
 74 | 127,1.51667,12.94,3.61,1.26,72.75,0.56,8.6,0.0,0.0,2
 75 | 114,1.51892,13.46,3.83,1.26,72.55,0.57,8.21,0.0,0.14,2
 76 | 156,1.5164600000000001,13.04,3.4,1.26,73.01,0.52,8.58,0.0,0.0,3
 77 | 12,1.51763,12.8,3.66,1.27,73.01,0.6,8.56,0.0,0.0,1
 78 | 72,1.5184799999999998,13.64,3.87,1.27,71.96,0.54,8.32,0.0,0.32,2
 79 | 14,1.51748,12.86,3.56,1.27,73.21,0.54,8.38,0.0,0.17,1
 80 | 30,1.5178399999999999,13.08,3.49,1.28,72.86,0.6,8.49,0.0,0.0,1
 81 | 54,1.51837,13.14,2.84,1.28,72.85,0.55,9.07,0.0,0.0,1
 82 | 157,1.51655,13.41,3.39,1.28,72.64,0.52,8.65,0.0,0.0,3
 83 | 52,1.51926,13.2,3.33,1.28,72.36,0.6,9.14,0.0,0.11,1
 84 | 58,1.51824,12.87,3.48,1.29,72.95,0.6,8.43,0.0,0.0,1
 85 | 4,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,1
 86 | 56,1.51769,12.45,2.71,1.29,73.7,0.56,9.06,0.0,0.24,1
 87 | 55,1.51778,13.21,2.81,1.29,72.98,0.51,9.02,0.0,0.09,1
 88 | 23,1.51736,12.78,3.62,1.29,72.79,0.59,8.7,0.0,0.0,1
 89 | 31,1.51768,12.65,3.56,1.3,73.08,0.61,8.69,0.0,0.14,1
 90 | 136,1.51789,13.19,3.9,1.3,72.33,0.55,8.44,0.0,0.28,2
 91 | 190,1.52365,15.79,1.83,1.31,70.43,0.31,8.61,1.68,0.0,7
 92 | 15,1.51763,12.61,3.59,1.31,73.29,0.58,8.5,0.0,0.0,1
 93 | 155,1.51694,12.86,3.58,1.31,72.61,0.61,8.79,0.0,0.0,3
 94 | 62,1.51977,13.81,3.58,1.32,71.72,0.12,8.67,0.69,0.0,1
 95 | 121,1.51844,13.25,3.76,1.32,72.4,0.58,8.42,0.0,0.0,2
 96 | 37,1.51909,13.89,3.53,1.32,71.81,0.51,8.78,0.11,0.0,1
 97 | 28,1.51721,12.87,3.48,1.33,73.04,0.56,8.43,0.0,0.0,1
 98 | 116,1.5184600000000001,13.41,3.89,1.33,72.38,0.51,8.28,0.0,0.0,2
 99 | 43,1.51779,13.21,3.39,1.33,72.76,0.59,8.59,0.0,0.0,1
100 | 148,1.5161,13.33,3.53,1.34,72.67,0.56,8.33,0.0,0.0,3
101 | 35,1.51783,12.69,3.54,1.34,72.95,0.57,8.75,0.0,0.0,1
102 | 24,1.5175100000000001,12.81,3.57,1.35,73.02,0.62,8.59,0.0,0.0,1
103 | 150,1.51643,12.16,3.52,1.35,72.89,0.57,8.53,0.0,0.0,3
104 | 38,1.51797,12.74,3.48,1.35,72.96,0.64,8.68,0.0,0.0,1
105 | 46,1.519,13.49,3.48,1.35,71.95,0.55,9.0,0.0,0.0,1
106 | 132,1.52614,13.7,0.0,1.36,71.24,0.19,13.44,0.0,0.1,2
107 | 10,1.51755,13.0,3.6,1.36,72.99,0.57,8.4,0.0,0.11,1
108 | 2,1.5176100000000001,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
109 | 131,1.52177,13.75,1.01,1.36,72.19,0.33,11.14,0.0,0.0,2
110 | 9,1.51918,14.04,3.58,1.37,72.08,0.56,8.3,0.0,0.0,1
111 | 34,1.5175299999999998,12.57,3.47,1.38,73.39,0.6,8.55,0.0,0.06,1
112 | 149,1.5167,13.24,3.57,1.38,72.7,0.56,8.44,0.0,0.1,3
113 | 13,1.51589,12.88,3.43,1.4,73.28,0.69,8.05,0.0,0.24,1
114 | 174,1.52043,13.38,0.0,1.4,72.25,0.33,12.5,0.0,0.0,5
115 | 27,1.51793,13.21,3.48,1.41,72.64,0.59,8.43,0.0,0.0,1
116 | 117,1.51829,13.24,3.9,1.41,72.33,0.55,8.31,0.0,0.1,2
117 | 128,1.52081,13.78,2.28,1.43,71.99,0.49,9.85,0.0,0.17,2
118 | 96,1.5186,13.36,3.43,1.43,72.26,0.51,8.6,0.0,0.0,2
119 | 29,1.51768,12.56,3.52,1.43,73.15,0.57,8.54,0.0,0.0,1
120 | 82,1.51593,13.25,3.45,1.43,73.17,0.61,7.86,0.0,0.0,2
121 | 100,1.51811,12.96,2.96,1.43,72.92,0.6,8.79,0.14,0.0,2
122 | 143,1.51662,12.85,3.51,1.44,73.01,0.68,8.23,0.06,0.25,2
123 | 101,1.51655,12.75,2.85,1.44,73.27,0.57,8.79,0.11,0.22,2
124 | 92,1.51605,12.9,3.44,1.45,73.06,0.44,8.27,0.0,0.0,2
125 | 94,1.5159,13.24,3.34,1.47,73.1,0.39,8.22,0.0,0.0,2
126 | 87,1.51569,13.24,3.49,1.47,73.25,0.38,8.03,0.0,0.0,2
127 | 120,1.51652,13.56,3.57,1.47,72.45,0.64,7.96,0.0,0.0,2
128 | 123,1.5168700000000002,13.23,3.54,1.48,72.84,0.56,8.1,0.0,0.0,2
129 | 89,1.5161799999999999,13.01,3.5,1.48,72.89,0.6,8.12,0.0,0.0,2
130 | 21,1.5175,12.82,3.55,1.49,72.75,0.54,8.52,0.0,0.19,1
131 | 95,1.51629,12.71,3.33,1.49,73.28,0.67,8.24,0.0,0.0,2
132 | 86,1.51625,13.36,3.58,1.49,72.72,0.45,8.21,0.0,0.0,2
133 | 76,1.5159,13.02,3.58,1.51,73.12,0.69,7.96,0.0,0.0,2
134 | 176,1.52119,12.97,0.33,1.51,73.39,0.13,11.27,0.0,0.28,5
135 | 159,1.51776,13.53,3.41,1.52,72.04,0.58,8.79,0.0,0.0,3
136 | 73,1.51593,13.09,3.59,1.52,73.1,0.67,7.83,0.0,0.0,2
137 | 88,1.51645,13.4,3.49,1.52,72.65,0.67,8.08,0.0,0.1,2
138 | 119,1.51673,13.3,3.64,1.53,72.53,0.65,8.03,0.0,0.29,2
139 | 77,1.51645,13.44,3.61,1.54,72.39,0.66,8.03,0.0,0.0,2
140 | 181,1.51299,14.4,1.74,1.54,74.55,0.0,7.59,0.0,0.0,6
141 | 161,1.5183200000000001,13.33,3.34,1.54,72.14,0.56,8.99,0.0,0.0,3
142 | 75,1.51596,13.02,3.56,1.54,73.11,0.72,7.9,0.0,0.0,2
143 | 78,1.51627,13.0,3.58,1.54,72.83,0.61,8.04,0.0,0.0,2
144 | 3,1.5161799999999999,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1
145 | 139,1.51674,12.79,3.52,1.54,73.36,0.66,7.9,0.0,0.0,2
146 | 134,1.518,13.71,3.93,1.54,71.81,0.54,8.21,0.0,0.15,2
147 | 84,1.51594,13.09,3.52,1.55,72.87,0.68,8.05,0.0,0.09,2
148 | 177,1.51905,14.0,2.39,1.56,72.37,0.0,9.57,0.0,0.0,6
149 | 167,1.5215100000000001,11.03,1.71,1.56,73.44,0.58,11.62,0.0,0.0,5
150 | 166,1.5217100000000001,11.56,1.88,1.56,72.86,0.47,11.41,0.0,0.0,5
151 | 11,1.5157100000000001,12.72,3.46,1.56,73.2,0.67,8.09,0.0,0.24,1
152 | 126,1.51872,12.93,3.66,1.56,72.51,0.58,8.55,0.0,0.12,2
153 | 138,1.51711,12.89,3.62,1.57,72.96,0.61,8.11,0.0,0.0,2
154 | 74,1.51631,13.34,3.57,1.57,72.87,0.61,7.89,0.0,0.0,2
155 | 171,1.52369,13.44,0.0,1.58,72.22,0.32,12.24,0.0,0.0,5
156 | 93,1.51588,13.12,3.41,1.58,73.26,0.07,8.39,0.0,0.19,2
157 | 141,1.5169,13.33,3.54,1.61,72.54,0.68,8.11,0.0,0.0,2
158 | 179,1.51829,14.46,2.24,1.62,72.38,0.0,9.26,0.0,0.0,6
159 | 122,1.51663,12.93,3.54,1.62,72.96,0.64,8.03,0.0,0.21,2
160 | 6,1.51596,12.79,3.61,1.62,72.97,0.64,8.07,0.0,0.26,1
161 | 130,1.5202,13.98,1.35,1.63,71.76,0.39,10.56,0.0,0.18,2
162 | 160,1.51796,13.5,3.36,1.63,71.94,0.57,8.81,0.0,0.09,3
163 | 102,1.5173,12.35,2.72,1.63,72.87,0.7,9.23,0.0,0.0,2
164 | 140,1.51674,12.87,3.56,1.64,73.14,0.65,7.99,0.0,0.0,2
165 | 168,1.51969,12.64,0.0,1.65,73.75,0.38,11.53,0.0,0.0,5
166 | 180,1.51852,14.09,2.19,1.66,72.67,0.0,9.32,0.0,0.0,6
167 | 129,1.52068,13.55,2.09,1.67,72.18,0.53,9.57,0.27,0.17,2
168 | 20,1.51735,13.02,3.54,1.69,72.73,0.54,8.44,0.0,0.07,1
169 | 124,1.5170700000000001,13.48,3.48,1.71,72.52,0.62,7.99,0.0,0.0,2
170 | 99,1.51689,12.67,2.88,1.71,73.21,0.73,8.54,0.0,0.0,2
171 | 182,1.51888,14.99,0.78,1.74,72.5,0.0,9.95,0.0,0.0,6
172 | 71,1.5157399999999999,14.86,3.67,1.74,71.87,0.16,7.36,0.0,0.12,2
173 | 151,1.51665,13.14,3.45,1.76,72.48,0.6,8.38,0.0,0.17,3
174 | 170,1.51994,13.27,0.0,1.76,73.03,0.47,11.32,0.0,0.0,5
175 | 191,1.51613,13.88,1.78,1.79,73.1,0.0,8.67,0.76,0.0,7
176 | 144,1.51709,13.0,3.47,1.79,72.72,0.66,8.18,0.0,0.0,2
177 | 206,1.51732,14.95,0.0,1.8,72.99,0.0,8.61,1.55,0.0,7
178 | 186,1.51131,13.69,3.2,1.81,72.81,1.76,5.43,1.19,0.0,7
179 | 118,1.51708,13.72,3.68,1.81,72.06,0.64,7.88,0.0,0.0,2
180 | 208,1.51831,14.39,0.0,1.82,72.86,1.41,6.47,2.88,0.0,7
181 | 169,1.5166600000000001,12.86,0.0,1.83,73.88,0.97,10.17,0.0,0.0,5
182 | 165,1.51915,12.73,1.85,1.86,72.69,0.6,10.09,0.0,0.0,5
183 | 207,1.51645,14.94,0.0,1.87,73.11,0.0,8.67,1.38,0.0,7
184 | 90,1.5164,12.55,3.48,1.87,73.23,0.63,8.08,0.0,0.09,2
185 | 106,1.52475,11.45,0.0,1.88,72.19,0.81,13.24,0.0,0.34,2
186 | 80,1.5159,12.82,3.52,1.9,72.86,0.69,7.97,0.0,0.0,2
187 | 213,1.51651,14.38,0.0,1.94,73.61,0.0,8.48,1.57,0.0,7
188 | 195,1.51683,14.56,0.0,1.98,73.29,0.0,8.52,1.57,0.07,7
189 | 211,1.51685,14.92,0.0,1.99,73.06,0.0,8.4,1.59,0.0,7
190 | 204,1.5165799999999998,14.8,0.0,1.99,73.11,0.0,8.28,1.71,0.0,7
191 | 194,1.51719,14.75,0.0,2.0,73.02,0.0,8.53,1.59,0.08,7
192 | 212,1.52065,14.36,0.0,2.02,73.42,0.0,8.44,1.64,0.0,7
193 | 189,1.52247,14.86,2.2,2.06,70.26,0.76,9.76,0.0,0.0,7
194 | 214,1.51711,14.23,0.0,2.08,73.36,0.0,8.62,1.67,0.0,7
195 | 85,1.51409,14.25,3.09,2.08,72.28,1.1,7.08,0.0,0.0,2
196 | 183,1.51916,14.15,0.0,2.09,72.74,0.0,10.88,0.0,0.0,6
197 | 107,1.53125,10.73,0.0,2.1,69.81,0.58,13.3,3.15,0.28,2
198 | 81,1.5159200000000002,12.86,3.52,2.12,72.66,0.69,7.97,0.0,0.0,2
199 | 175,1.5205799999999998,12.85,1.61,2.17,72.18,0.76,9.7,0.24,0.51,5
200 | 187,1.5183799999999998,14.32,3.26,2.22,71.25,1.46,5.79,1.63,0.0,7
201 | 201,1.51508,15.15,0.0,2.25,73.5,0.0,8.34,0.63,0.0,7
202 | 205,1.51617,14.95,0.0,2.27,73.3,0.0,8.71,0.67,0.0,7
203 | 198,1.5172700000000001,14.7,0.0,2.34,73.28,0.0,8.95,0.66,0.0,7
204 | 192,1.5160200000000001,14.85,0.0,2.38,73.28,0.0,8.76,0.64,0.09,7
205 | 203,1.51514,14.85,0.0,2.42,73.72,0.0,8.39,0.56,0.0,7
206 | 200,1.51609,15.01,0.0,2.51,73.05,0.05,8.83,0.53,0.0,7
207 | 197,1.51556,13.87,0.0,2.54,73.23,0.14,9.41,0.81,0.01,7
208 | 199,1.51531,14.38,0.0,2.66,73.1,0.04,9.08,0.64,0.0,7
209 | 196,1.51545,14.14,0.0,2.68,73.39,0.08,9.07,0.61,0.05,7
210 | 209,1.5164,14.37,0.0,2.74,72.85,0.0,9.45,0.54,0.0,7
211 | 193,1.51623,14.2,0.0,2.79,73.46,0.04,9.04,0.4,0.09,7
212 | 210,1.51623,14.14,0.0,2.88,72.61,0.08,9.18,1.06,0.0,7
213 | 173,1.51321,13.0,0.0,3.02,70.7,6.21,6.93,0.0,0.0,5
214 | 172,1.51316,13.02,0.0,3.04,70.48,6.21,6.96,0.0,0.0,5
215 | 164,1.51514,14.01,2.68,3.5,69.89,1.68,5.87,2.2,0.0,5
216 | 


--------------------------------------------------------------------------------
/datasets/hitters.csv:
--------------------------------------------------------------------------------
  1 | "AtBat","Hits","HmRun","Runs","RBI","Walks","Years","CAtBat","CHits","CHmRun","CRuns","CRBI","CWalks","League","Division","PutOuts","Assists","Errors","Salary","NewLeague"
  2 | 293,66,1,30,29,14,1,293,66,1,30,29,14,"A","E",446,33,20,NA,"A"
  3 | 315,81,7,24,38,39,14,3449,835,69,321,414,375,"N","W",632,43,10,475,"N"
  4 | 479,130,18,66,72,76,3,1624,457,63,224,266,263,"A","W",880,82,14,480,"A"
  5 | 496,141,20,65,78,37,11,5628,1575,225,828,838,354,"N","E",200,11,3,500,"N"
  6 | 321,87,10,39,42,30,2,396,101,12,48,46,33,"N","E",805,40,4,91.5,"N"
  7 | 594,169,4,74,51,35,11,4408,1133,19,501,336,194,"A","W",282,421,25,750,"A"
  8 | 185,37,1,23,8,21,2,214,42,1,30,9,24,"N","E",76,127,7,70,"A"
  9 | 298,73,0,24,24,7,3,509,108,0,41,37,12,"A","W",121,283,9,100,"A"
 10 | 323,81,6,26,32,8,2,341,86,6,32,34,8,"N","W",143,290,19,75,"N"
 11 | 401,92,17,49,66,65,13,5206,1332,253,784,890,866,"A","E",0,0,0,1100,"A"
 12 | 574,159,21,107,75,59,10,4631,1300,90,702,504,488,"A","E",238,445,22,517.143,"A"
 13 | 202,53,4,31,26,27,9,1876,467,15,192,186,161,"N","W",304,45,11,512.5,"N"
 14 | 418,113,13,48,61,47,4,1512,392,41,205,204,203,"N","E",211,11,7,550,"N"
 15 | 239,60,0,30,11,22,6,1941,510,4,309,103,207,"A","E",121,151,6,700,"A"
 16 | 196,43,7,29,27,30,13,3231,825,36,376,290,238,"N","E",80,45,8,240,"N"
 17 | 183,39,3,20,15,11,3,201,42,3,20,16,11,"A","W",118,0,0,NA,"A"
 18 | 568,158,20,89,75,73,15,8068,2273,177,1045,993,732,"N","W",105,290,10,775,"N"
 19 | 190,46,2,24,8,15,5,479,102,5,65,23,39,"A","W",102,177,16,175,"A"
 20 | 407,104,6,57,43,65,12,5233,1478,100,643,658,653,"A","W",912,88,9,NA,"A"
 21 | 127,32,8,16,22,14,8,727,180,24,67,82,56,"N","W",202,22,2,135,"N"
 22 | 413,92,16,72,48,65,1,413,92,16,72,48,65,"N","E",280,9,5,100,"N"
 23 | 426,109,3,55,43,62,1,426,109,3,55,43,62,"A","W",361,22,2,115,"N"
 24 | 22,10,1,4,2,1,6,84,26,2,9,9,3,"A","W",812,84,11,NA,"A"
 25 | 472,116,16,60,62,74,6,1924,489,67,242,251,240,"N","W",518,55,3,600,"N"
 26 | 629,168,18,73,102,40,18,8424,2464,164,1008,1072,402,"A","E",1067,157,14,776.667,"A"
 27 | 587,163,4,92,51,70,6,2695,747,17,442,198,317,"A","E",434,9,3,765,"A"
 28 | 324,73,4,32,18,22,7,1931,491,13,291,108,180,"N","E",222,3,3,708.333,"N"
 29 | 474,129,10,50,56,40,10,2331,604,61,246,327,166,"N","W",732,83,13,750,"N"
 30 | 550,152,6,92,37,81,5,2308,633,32,349,182,308,"N","W",262,329,16,625,"N"
 31 | 513,137,20,90,95,90,14,5201,1382,166,763,734,784,"A","W",267,5,3,900,"A"
 32 | 313,84,9,42,30,39,17,6890,1833,224,1033,864,1087,"A","W",127,221,7,NA,"A"
 33 | 419,108,6,55,36,22,3,591,149,8,80,46,31,"N","W",226,7,4,110,"N"
 34 | 517,141,27,70,87,52,9,3571,994,215,545,652,337,"N","W",1378,102,8,NA,"N"
 35 | 583,168,17,83,80,56,5,1646,452,44,219,208,136,"A","E",109,292,25,612.5,"A"
 36 | 204,49,6,23,25,12,7,1309,308,27,126,132,66,"A","W",419,46,5,300,"A"
 37 | 379,106,10,38,60,30,14,6207,1906,146,859,803,571,"N","W",72,170,24,850,"N"
 38 | 161,36,0,19,10,17,4,1053,244,3,156,86,107,"A","E",70,149,12,NA,"A"
 39 | 268,60,5,24,25,15,2,350,78,5,34,29,18,"N","W",442,59,6,90,"N"
 40 | 346,98,5,31,53,30,16,5913,1615,235,784,901,560,"A","E",0,0,0,NA,"A"
 41 | 241,61,1,34,12,14,1,241,61,1,34,12,14,"N","W",166,172,10,NA,"N"
 42 | 181,41,1,15,21,33,2,232,50,4,20,29,45,"A","E",326,29,5,67.5,"A"
 43 | 216,54,0,21,18,15,18,7318,1926,46,796,627,483,"N","W",103,84,5,NA,"N"
 44 | 200,57,6,23,14,14,9,2516,684,46,371,230,195,"N","W",69,1,1,NA,"N"
 45 | 217,46,7,32,19,9,4,694,160,32,86,76,32,"A","E",307,25,1,180,"A"
 46 | 194,40,7,19,29,30,11,4183,1069,64,486,493,608,"A","E",325,22,2,NA,"A"
 47 | 254,68,2,28,26,22,6,999,236,21,108,117,118,"A","E",359,30,4,305,"A"
 48 | 416,132,7,57,49,33,3,932,273,24,113,121,80,"N","W",73,177,18,215,"N"
 49 | 205,57,8,34,32,9,5,756,192,32,117,107,51,"A","E",58,4,4,247.5,"A"
 50 | 542,140,12,46,75,41,16,7099,2130,235,987,1089,431,"A","E",697,61,9,NA,"A"
 51 | 526,146,13,71,70,84,6,2648,715,77,352,342,289,"N","W",303,9,9,815,"N"
 52 | 457,101,14,42,63,22,17,6521,1767,281,1003,977,619,"A","W",389,39,4,875,"A"
 53 | 214,53,2,30,29,23,2,226,59,2,32,32,27,"N","E",109,7,3,70,"N"
 54 | 19,7,0,1,2,1,4,41,13,1,3,4,4,"A","E",0,0,0,NA,"A"
 55 | 591,168,19,80,72,39,9,4478,1307,113,634,563,319,"A","W",67,147,4,1200,"A"
 56 | 403,101,12,45,53,39,12,5150,1429,166,747,666,526,"A","E",316,6,5,675,"A"
 57 | 405,102,18,49,85,20,6,950,231,29,99,138,64,"N","W",161,10,3,415,"N"
 58 | 244,58,9,28,25,35,4,1335,333,49,164,179,194,"N","W",142,14,2,340,"N"
 59 | 235,61,3,24,39,21,14,3926,1029,35,441,401,333,"A","E",425,43,4,NA,"A"
 60 | 313,78,6,32,41,12,12,3742,968,35,409,321,170,"N","W",106,206,7,416.667,"N"
 61 | 627,177,25,98,81,70,6,3210,927,133,529,472,313,"A","E",240,482,13,1350,"A"
 62 | 416,113,24,58,69,16,1,416,113,24,58,69,16,"A","E",203,70,10,90,"A"
 63 | 155,44,6,21,23,15,16,6631,1634,98,698,661,777,"N","E",53,88,3,275,"N"
 64 | 236,56,0,27,15,11,4,1115,270,1,116,64,57,"A","W",125,199,13,230,"A"
 65 | 216,53,1,31,15,22,4,926,210,9,118,69,114,"N","W",73,152,11,225,"N"
 66 | 24,3,0,1,0,2,3,159,28,0,20,12,9,"A","W",80,4,0,NA,"A"
 67 | 585,139,31,93,94,62,17,7546,1982,315,1141,1179,727,"A","E",0,0,0,950,"A"
 68 | 191,37,4,12,17,14,4,773,163,16,61,74,52,"N","E",391,38,8,NA,"N"
 69 | 199,53,5,29,22,21,3,514,120,8,57,40,39,"A","W",152,3,5,75,"A"
 70 | 521,142,20,67,86,45,4,815,205,22,99,103,78,"A","E",107,242,23,105,"A"
 71 | 419,113,1,44,27,44,12,4484,1231,32,612,344,422,"A","E",211,2,1,NA,"A"
 72 | 311,81,3,42,30,26,17,8247,2198,100,950,909,690,"N","W",153,223,10,320,"N"
 73 | 138,31,8,18,21,38,3,244,53,12,33,32,55,"N","E",244,21,4,NA,"N"
 74 | 512,131,26,69,96,52,14,5347,1397,221,712,815,548,"A","W",119,216,12,850,"A"
 75 | 507,122,29,78,85,91,18,7761,1947,347,1175,1152,1380,"A","E",808,108,2,535,"A"
 76 | 529,137,26,86,97,97,15,6661,1785,291,1082,949,989,"A","E",280,10,5,933.333,"A"
 77 | 424,119,6,57,46,13,9,3651,1046,32,461,301,112,"A","E",224,286,8,850,"N"
 78 | 351,97,4,55,29,39,4,1258,353,16,196,110,117,"N","W",226,7,3,210,"A"
 79 | 195,55,5,24,33,30,8,1313,338,25,144,149,153,"N","E",83,2,1,NA,"N"
 80 | 388,103,15,59,47,39,6,2174,555,80,285,274,186,"A","W",182,9,4,325,"A"
 81 | 339,96,4,37,29,23,4,1064,290,11,123,108,55,"A","W",104,213,9,275,"A"
 82 | 561,118,35,70,94,33,16,6677,1575,442,901,1210,608,"A","W",463,32,8,NA,"A"
 83 | 255,70,7,49,35,43,15,6311,1661,154,1019,608,820,"N","E",51,54,8,450,"N"
 84 | 677,238,31,117,113,53,5,2223,737,93,349,401,171,"A","E",1377,100,6,1975,"A"
 85 | 227,46,7,23,20,12,5,1325,324,44,156,158,67,"A","W",92,2,2,NA,"A"
 86 | 614,163,29,89,83,75,11,5017,1388,266,813,822,617,"N","W",303,6,6,1900,"N"
 87 | 329,83,9,50,39,56,9,3828,948,145,575,528,635,"A","W",276,6,2,600,"A"
 88 | 637,174,31,89,116,56,14,6727,2024,247,978,1093,495,"N","W",278,9,9,1041.667,"N"
 89 | 280,82,16,44,45,47,2,428,113,25,61,70,63,"A","E",148,4,2,110,"A"
 90 | 155,41,12,21,29,22,16,5409,1338,181,746,805,875,"A","W",165,9,1,260,"A"
 91 | 458,114,13,67,57,48,4,1350,298,28,160,123,122,"A","W",246,389,18,475,"A"
 92 | 314,83,13,39,46,16,5,1457,405,28,156,159,76,"A","W",533,40,4,431.5,"A"
 93 | 475,123,27,76,93,72,4,1810,471,108,292,343,267,"N","E",226,10,6,1220,"N"
 94 | 317,78,7,35,35,32,1,317,78,7,35,35,32,"A","E",45,122,26,70,"A"
 95 | 511,138,25,76,96,61,3,592,164,28,87,110,71,"A","W",157,7,8,145,"A"
 96 | 278,69,3,24,21,29,8,2079,565,32,258,192,162,"N","W",142,210,10,NA,"N"
 97 | 382,119,13,54,58,36,12,2133,594,41,287,294,227,"N","W",59,156,9,595,"N"
 98 | 565,148,24,90,104,77,14,7287,2083,305,1135,1234,791,"A","E",292,9,5,1861.46,"A"
 99 | 277,71,2,27,29,14,15,5952,1647,60,753,596,259,"N","W",360,32,5,NA,"N"
100 | 415,115,27,97,71,68,3,711,184,45,156,119,99,"N","W",274,2,7,300,"N"
101 | 424,110,15,70,47,36,7,2130,544,38,335,174,258,"N","W",292,6,3,490,"N"
102 | 495,151,17,61,84,78,10,5624,1679,275,884,1015,709,"A","E",1045,88,13,2460,"A"
103 | 524,132,9,69,47,54,2,972,260,14,123,92,90,"A","E",212,327,20,NA,"A"
104 | 233,49,2,41,23,18,8,1350,336,7,166,122,106,"A","E",102,132,10,375,"A"
105 | 395,106,16,48,56,35,10,2303,571,86,266,323,248,"A","E",709,41,7,NA,"A"
106 | 397,114,23,67,67,53,13,5589,1632,241,906,926,716,"A","E",244,2,4,NA,"A"
107 | 210,37,8,15,19,15,6,994,244,36,107,114,53,"A","E",40,115,15,NA,"A"
108 | 420,95,23,55,58,37,3,646,139,31,77,77,61,"N","W",206,10,7,NA,"N"
109 | 566,154,22,76,84,43,14,6100,1583,131,743,693,300,"A","W",316,439,10,750,"A"
110 | 641,198,31,101,108,41,5,2129,610,92,297,319,117,"A","E",269,17,10,1175,"A"
111 | 215,51,4,19,18,11,1,215,51,4,19,18,11,"A","E",116,5,12,70,"A"
112 | 441,128,16,70,73,80,14,6675,2095,209,1072,1050,695,"A","W",97,218,16,1500,"A"
113 | 325,76,16,33,52,37,5,1506,351,71,195,219,214,"N","W",726,87,3,385,"A"
114 | 490,125,24,81,105,62,13,6063,1646,271,847,999,680,"N","E",869,62,8,1925.571,"N"
115 | 574,152,31,91,101,64,3,985,260,53,148,173,95,"N","W",1253,111,11,215,"N"
116 | 284,64,14,30,42,24,18,7023,1925,348,986,1239,666,"N","E",96,4,4,NA,"N"
117 | 596,171,34,91,108,52,6,2862,728,107,361,401,224,"A","W",118,334,21,900,"A"
118 | 472,118,12,63,54,30,4,793,187,14,102,80,50,"A","W",228,377,26,155,"A"
119 | 283,77,14,45,47,26,16,6840,1910,259,915,1067,546,"A","W",144,6,5,700,"A"
120 | 408,94,4,42,36,66,9,3573,866,59,429,365,410,"N","W",282,487,19,535,"N"
121 | 327,85,3,30,44,20,8,2140,568,16,216,208,93,"A","E",91,185,12,362.5,"A"
122 | 370,96,21,49,46,60,15,6986,1972,231,1070,955,921,"N","E",137,5,9,733.333,"N"
123 | 354,77,16,36,55,41,20,8716,2172,384,1172,1267,1057,"N","W",83,174,16,200,"N"
124 | 539,139,5,93,58,69,5,1469,369,12,247,126,198,"A","W",462,9,7,400,"A"
125 | 340,84,11,62,33,47,5,1516,376,42,284,141,219,"N","E",185,8,4,400,"A"
126 | 510,126,2,42,44,35,11,5562,1578,44,703,519,256,"N","W",207,358,20,737.5,"N"
127 | 315,59,16,45,36,58,13,4677,1051,268,681,782,697,"A","W",0,0,0,NA,"A"
128 | 282,78,13,37,51,29,5,1649,453,73,211,280,138,"A","W",670,57,5,500,"A"
129 | 380,120,5,54,51,31,8,3118,900,92,444,419,240,"A","W",237,8,1,600,"A"
130 | 584,158,15,70,84,42,5,2358,636,58,265,316,134,"N","E",331,20,4,662.5,"N"
131 | 570,169,21,72,88,38,7,3754,1077,140,492,589,263,"A","W",295,15,5,950,"A"
132 | 306,104,14,50,58,25,7,2954,822,55,313,377,187,"N","E",116,222,15,750,"N"
133 | 220,54,10,30,39,31,5,1185,299,40,145,154,128,"N","E",50,136,20,297.5,"N"
134 | 278,70,7,22,37,18,18,7186,2081,190,935,1088,643,"A","W",0,0,0,325,"A"
135 | 445,99,1,46,24,29,4,618,129,1,72,31,48,"A","W",278,415,16,87.5,"A"
136 | 143,39,5,18,30,15,9,639,151,16,80,97,61,"N","W",138,15,1,175,"N"
137 | 185,40,4,23,11,18,3,524,125,7,58,37,47,"N","E",97,2,2,90,"N"
138 | 589,170,40,107,108,69,6,2325,634,128,371,376,238,"A","E",368,20,3,1237.5,"A"
139 | 343,103,6,48,36,40,15,4338,1193,70,581,421,325,"A","E",211,56,13,430,"A"
140 | 284,69,1,33,18,25,5,1407,361,6,139,98,111,"A","E",122,140,5,NA,"N"
141 | 438,103,2,65,32,71,2,440,103,2,67,32,71,"A","W",276,7,9,100,"N"
142 | 600,144,33,85,117,65,2,696,173,38,101,130,69,"A","W",319,4,14,165,"A"
143 | 663,200,29,108,121,32,4,1447,404,57,210,222,68,"A","E",241,8,6,250,"A"
144 | 232,55,9,34,23,45,12,4405,1213,194,702,705,625,"N","E",623,35,3,1300,"N"
145 | 479,133,10,48,72,55,17,7472,2147,153,980,1032,854,"N","W",237,5,4,773.333,"N"
146 | 209,45,0,38,19,42,10,3859,916,23,557,279,478,"A","W",132,205,5,NA,"A"
147 | 528,132,21,61,74,41,6,2641,671,97,273,383,226,"N","E",885,105,8,1008.333,"N"
148 | 160,39,8,18,31,22,14,2128,543,56,304,268,298,"A","E",33,3,0,275,"A"
149 | 599,183,10,80,74,32,5,2482,715,27,330,326,158,"A","E",231,374,18,775,"A"
150 | 497,136,7,58,38,26,11,3871,1066,40,450,367,241,"A","E",304,347,10,850,"A"
151 | 210,70,13,32,51,28,15,4040,1130,97,544,462,551,"A","E",0,0,0,365,"A"
152 | 225,61,5,32,26,26,11,1568,408,25,202,185,257,"A","W",132,9,0,NA,"A"
153 | 151,41,4,26,21,19,2,288,68,9,45,39,35,"A","W",28,56,2,95,"A"
154 | 278,86,4,33,38,45,1,278,86,4,33,38,45,"N","W",102,4,2,110,"N"
155 | 341,95,6,48,42,20,10,2964,808,81,379,428,221,"N","W",158,4,5,100,"N"
156 | 537,147,23,58,88,47,10,2744,730,97,302,351,174,"N","E",92,257,20,277.5,"N"
157 | 399,102,3,56,34,34,5,670,167,4,89,48,54,"A","W",211,9,3,80,"A"
158 | 309,94,5,37,32,26,13,4618,1330,57,616,522,436,"N","E",161,3,3,600,"N"
159 | 401,100,2,60,19,28,4,876,238,2,126,44,55,"N","E",193,11,4,NA,"N"
160 | 336,93,9,35,46,23,15,5779,1610,128,730,741,497,"A","W",0,0,0,NA,"A"
161 | 616,163,27,83,107,32,3,1437,377,65,181,227,82,"A","W",110,308,15,200,"A"
162 | 219,47,8,24,26,17,12,1188,286,23,100,125,63,"A","W",260,58,4,NA,"A"
163 | 579,174,7,67,78,58,6,3053,880,32,366,337,218,"N","E",280,479,5,657,"N"
164 | 165,39,2,13,9,16,3,196,44,2,18,10,18,"A","W",332,19,2,75,"N"
165 | 618,200,20,98,110,62,13,7127,2163,351,1104,1289,564,"A","E",330,16,8,2412.5,"A"
166 | 257,66,5,31,26,32,14,3910,979,33,518,324,382,"N","W",87,166,14,250,"A"
167 | 315,76,13,35,60,25,3,630,151,24,68,94,55,"N","E",498,39,13,155,"N"
168 | 591,157,16,90,78,26,4,2020,541,52,310,226,91,"N","E",290,440,25,640,"N"
169 | 404,92,11,54,49,18,6,1354,325,30,188,135,63,"A","E",222,5,5,300,"A"
170 | 315,73,5,23,37,16,4,450,108,6,38,46,28,"A","W",227,15,3,110,"A"
171 | 249,69,6,32,19,20,4,702,209,10,97,48,44,"N","E",103,8,2,NA,"N"
172 | 429,91,12,41,42,57,13,5590,1397,83,578,579,644,"A","W",686,46,4,825,"N"
173 | 212,54,13,28,44,18,2,233,59,13,31,46,20,"A","E",243,23,5,NA,"A"
174 | 453,101,3,46,43,61,3,948,218,6,96,72,91,"N","W",249,444,16,195,"N"
175 | 161,43,4,17,26,22,3,707,179,21,77,99,76,"A","W",300,12,2,NA,"A"
176 | 184,47,5,20,28,18,11,3327,890,74,419,382,304,"N","W",49,2,0,450,"N"
177 | 591,184,20,83,79,38,5,1689,462,40,219,195,82,"N","W",303,12,5,630,"N"
178 | 181,58,6,34,23,22,1,181,58,6,34,23,22,"N","W",88,0,3,86.5,"N"
179 | 441,118,28,84,86,68,8,2723,750,126,433,420,309,"A","E",190,2,2,1300,"A"
180 | 490,150,21,69,58,35,14,6126,1839,121,983,707,600,"A","E",96,5,3,1000,"N"
181 | 551,171,13,94,83,94,13,6090,1840,128,969,900,917,"N","E",1199,149,5,1800,"N"
182 | 550,147,29,85,91,71,6,2816,815,117,405,474,319,"A","W",1218,104,10,1310,"A"
183 | 283,74,4,34,29,22,10,3919,1062,85,505,456,283,"N","W",145,5,7,737.5,"N"
184 | 560,161,26,89,96,66,4,1789,470,65,233,260,155,"N","W",332,9,8,625,"N"
185 | 328,91,12,51,43,33,2,342,94,12,51,44,33,"N","E",145,59,8,125,"N"
186 | 586,159,12,72,79,53,9,3082,880,83,363,477,295,"N","E",181,13,4,1043.333,"N"
187 | 503,136,5,62,48,83,10,3423,970,20,408,303,414,"N","W",65,258,8,725,"N"
188 | 344,85,24,69,64,88,7,911,214,64,150,156,187,"A","W",0,0,0,300,"A"
189 | 680,223,31,119,96,34,3,1928,587,35,262,201,91,"A","W",429,8,6,365,"A"
190 | 279,64,0,31,26,30,1,279,64,0,31,26,30,"N","W",107,205,16,75,"N"
191 | 484,127,20,66,65,67,7,3006,844,116,436,458,377,"N","E",1231,80,7,1183.333,"N"
192 | 431,127,8,77,45,58,2,667,187,9,117,64,88,"N","E",283,8,3,202.5,"N"
193 | 283,70,8,33,37,27,12,4479,1222,94,557,483,307,"A","E",156,2,2,225,"A"
194 | 491,141,11,77,47,37,15,4291,1240,84,615,430,340,"A","E",239,8,2,525,"A"
195 | 199,52,9,26,28,21,6,805,191,30,113,119,87,"N","W",235,22,5,265,"N"
196 | 589,149,21,89,86,64,7,3558,928,102,513,471,351,"A","E",371,6,6,787.5,"A"
197 | 327,84,22,53,62,38,10,4273,1123,212,577,700,334,"A","E",483,48,6,800,"N"
198 | 464,128,28,67,94,52,13,5829,1552,210,740,840,452,"A","W",0,0,0,587.5,"A"
199 | 166,34,0,20,13,17,1,166,34,0,20,13,17,"N","E",64,119,9,NA,"N"
200 | 338,92,18,42,60,21,3,682,185,36,88,112,50,"A","E",0,0,0,145,"A"
201 | 508,146,8,80,44,46,9,3148,915,41,571,289,326,"A","W",245,5,9,NA,"A"
202 | 584,157,20,95,73,63,10,4704,1320,93,724,522,576,"A","E",276,421,11,420,"A"
203 | 216,54,2,27,25,33,1,216,54,2,27,25,33,"N","W",317,36,1,75,"N"
204 | 625,179,4,94,60,65,5,1696,476,12,216,163,166,"A","E",303,450,14,575,"A"
205 | 243,53,4,18,26,27,4,853,228,23,101,110,76,"N","E",107,3,3,NA,"N"
206 | 489,131,19,77,55,34,7,2051,549,62,300,263,153,"A","W",310,9,9,780,"A"
207 | 209,56,12,22,36,19,2,216,58,12,24,37,19,"N","E",201,6,3,90,"N"
208 | 407,93,8,47,30,30,2,969,230,14,121,69,68,"N","W",172,317,25,150,"N"
209 | 490,148,14,64,78,49,13,3400,1000,113,445,491,301,"A","E",0,0,0,700,"N"
210 | 209,59,6,20,37,27,4,884,209,14,66,106,92,"N","E",415,35,3,NA,"N"
211 | 442,131,18,68,77,33,6,1416,398,47,210,203,136,"A","E",233,7,7,550,"A"
212 | 317,88,3,40,32,19,8,2543,715,28,269,270,118,"A","W",220,16,4,NA,"A"
213 | 288,65,8,30,36,27,9,2815,698,55,315,325,189,"N","E",259,30,10,650,"A"
214 | 209,54,3,25,14,12,1,209,54,3,25,14,12,"A","W",102,6,3,68,"A"
215 | 303,71,3,18,30,36,3,344,76,3,20,36,45,"N","E",468,47,6,100,"N"
216 | 330,77,19,47,53,27,6,1928,516,90,247,288,161,"N","W",149,8,6,670,"N"
217 | 504,120,28,71,71,54,3,1085,259,54,150,167,114,"A","E",103,283,19,175,"A"
218 | 258,60,8,28,33,18,3,638,170,17,80,75,36,"A","W",358,32,8,137,"A"
219 | 20,1,0,0,0,0,2,41,9,2,6,7,4,"N","E",78,220,6,2127.333,"N"
220 | 374,94,5,36,26,62,7,1968,519,26,181,199,288,"N","W",756,64,15,875,"N"
221 | 211,43,10,26,35,39,3,498,116,14,59,55,78,"A","W",463,32,8,120,"A"
222 | 299,75,6,38,23,26,3,580,160,8,71,33,44,"N","E",212,1,2,140,"N"
223 | 576,167,8,89,49,57,4,822,232,19,132,83,79,"N","E",325,12,8,210,"N"
224 | 381,110,9,61,45,32,7,3015,834,40,451,249,168,"N","E",228,7,5,800,"N"
225 | 288,76,7,34,37,15,4,1644,408,16,198,120,113,"N","W",203,3,3,240,"N"
226 | 369,93,9,43,42,49,5,1258,323,54,181,177,157,"A","E",149,1,6,350,"A"
227 | 330,76,12,35,41,47,4,1367,326,55,167,198,167,"N","W",512,30,5,NA,"N"
228 | 547,137,2,58,47,12,2,1038,271,3,129,80,24,"A","W",261,459,22,175,"A"
229 | 572,152,18,105,49,65,2,978,249,36,168,91,101,"A","W",325,13,3,200,"A"
230 | 359,84,4,46,27,21,12,4992,1257,37,699,386,387,"N","W",151,8,5,NA,"N"
231 | 514,144,0,67,54,79,9,4739,1169,13,583,374,528,"N","E",229,453,15,1940,"N"
232 | 359,80,15,45,48,63,7,1493,359,61,176,202,175,"N","W",682,93,13,700,"N"
233 | 526,163,12,88,50,77,4,1556,470,38,245,167,174,"A","W",250,11,1,750,"A"
234 | 313,83,9,43,41,30,14,5885,1543,104,751,714,535,"N","W",58,141,23,450,"N"
235 | 540,135,30,82,88,55,1,540,135,30,82,88,55,"A","W",157,6,14,172,"A"
236 | 437,123,9,62,55,40,9,4139,1203,79,676,390,364,"A","E",82,170,15,1260,"A"
237 | 551,160,23,86,90,87,5,2235,602,75,278,328,273,"A","W",1224,115,11,NA,"A"
238 | 237,52,0,15,25,30,24,14053,4256,160,2165,1314,1566,"N","W",523,43,6,750,"N"
239 | 236,56,6,41,19,21,5,1257,329,24,166,125,105,"A","E",172,1,4,190,"A"
240 | 473,154,6,61,48,29,6,1966,566,29,250,252,178,"A","E",846,84,9,580,"A"
241 | 309,72,0,33,31,26,5,354,82,0,41,32,26,"N","E",117,269,12,130,"N"
242 | 271,77,5,35,29,33,12,4933,1358,48,630,435,403,"A","W",62,90,3,450,"A"
243 | 357,96,7,50,45,39,5,1394,344,43,178,192,136,"A","W",167,2,4,300,"A"
244 | 216,56,4,22,18,15,12,2796,665,43,266,304,198,"A","E",391,44,4,250,"A"
245 | 256,70,13,42,36,44,16,7058,1845,312,965,1128,990,"N","E",41,118,8,1050,"A"
246 | 466,108,33,75,86,72,3,652,142,44,102,109,102,"A","E",286,8,8,215,"A"
247 | 327,68,13,42,29,45,18,3949,939,78,438,380,466,"A","E",659,53,7,400,"A"
248 | 462,119,16,49,65,37,7,2131,583,69,244,288,150,"A","E",866,65,6,NA,"A"
249 | 341,110,9,45,49,46,9,2331,658,50,249,322,274,"A","E",251,9,4,560,"A"
250 | 608,160,28,130,74,89,8,4071,1182,103,862,417,708,"A","E",426,4,6,1670,"A"
251 | 419,101,18,65,58,92,20,9528,2510,548,1509,1659,1342,"A","W",0,0,0,487.5,"A"
252 | 33,6,0,2,4,7,1,33,6,0,2,4,7,"A","W",205,5,4,NA,"A"
253 | 376,82,21,42,60,35,5,1770,408,115,238,299,157,"A","W",0,0,0,425,"A"
254 | 486,145,11,51,76,40,11,3967,1102,67,410,497,284,"N","E",88,204,16,500,"A"
255 | 186,44,7,28,16,11,1,186,44,7,28,16,11,"N","W",99,3,1,NA,"N"
256 | 307,80,1,42,36,29,7,2421,656,18,379,198,184,"A","W",145,2,2,NA,"A"
257 | 246,76,5,35,39,13,6,912,234,12,102,96,80,"A","E",44,0,1,250,"A"
258 | 205,52,8,31,27,17,12,5134,1323,56,643,445,459,"A","E",155,3,2,400,"A"
259 | 348,90,11,50,45,43,10,2288,614,43,295,273,269,"A","E",60,176,6,450,"A"
260 | 523,135,8,52,44,52,9,3368,895,39,377,284,296,"N","W",367,475,19,750,"N"
261 | 312,68,2,32,22,24,1,312,68,2,32,22,24,"A","E",86,150,15,70,"A"
262 | 496,119,8,57,33,21,7,3358,882,36,365,280,165,"N","W",155,371,29,875,"N"
263 | 126,27,3,8,10,5,4,239,49,3,16,13,14,"N","E",190,2,9,190,"N"
264 | 275,68,5,42,42,61,6,961,238,16,128,104,172,"N","E",181,3,2,191,"N"
265 | 627,178,14,68,76,46,6,3146,902,74,494,345,242,"N","E",309,492,5,740,"N"
266 | 394,86,1,38,28,36,4,1089,267,3,94,71,76,"N","E",203,369,16,250,"N"
267 | 208,57,8,32,25,18,3,653,170,17,98,54,62,"N","E",42,94,13,140,"N"
268 | 382,101,16,50,55,22,1,382,101,16,50,55,22,"A","W",200,7,6,97.5,"A"
269 | 459,113,20,59,57,68,12,5348,1369,155,713,660,735,"A","W",0,0,0,740,"A"
270 | 549,149,7,73,47,42,1,549,149,7,73,47,42,"N","W",255,450,17,140,"N"
271 | 288,63,3,25,33,16,10,2682,667,38,315,259,204,"A","W",135,257,7,341.667,"A"
272 | 303,84,4,35,32,23,2,312,87,4,39,32,23,"N","W",179,5,3,NA,"N"
273 | 522,163,9,82,46,62,13,7037,2019,153,1043,827,535,"A","E",352,9,1,1000,"A"
274 | 512,117,29,54,88,43,6,1750,412,100,204,276,155,"A","W",1236,98,18,100,"A"
275 | 220,66,5,20,28,13,3,290,80,5,27,31,15,"A","W",281,21,3,90,"A"
276 | 522,140,16,73,77,60,4,730,185,22,93,106,86,"N","E",1320,166,17,200,"N"
277 | 461,112,18,54,54,35,2,680,160,24,76,75,49,"A","W",111,226,11,135,"A"
278 | 581,145,17,66,68,21,2,831,210,21,106,86,40,"N","E",320,465,32,155,"N"
279 | 530,159,3,82,50,47,6,1619,426,11,218,149,163,"A","W",196,354,15,475,"A"
280 | 557,142,21,58,81,23,18,8759,2583,271,1138,1299,478,"N","W",1160,53,7,1450,"N"
281 | 439,96,0,44,36,65,4,711,148,1,68,56,99,"N","E",229,406,22,150,"N"
282 | 453,103,8,53,33,52,2,507,123,8,63,39,58,"A","W",289,407,6,105,"A"
283 | 528,122,1,67,45,51,4,1716,403,12,211,146,155,"A","W",209,372,17,350,"A"
284 | 633,210,6,91,56,59,6,3070,872,19,420,230,274,"N","W",367,432,16,90,"N"
285 | 16,2,0,1,0,0,2,28,4,0,1,0,0,"A","E",247,4,8,NA,"A"
286 | 562,169,17,88,73,53,8,3181,841,61,450,342,373,"A","E",351,442,17,530,"A"
287 | 281,76,3,42,25,20,8,2658,657,48,324,300,179,"A","E",106,144,7,341.667,"A"
288 | 593,152,23,69,75,53,6,2765,686,133,369,384,321,"A","W",315,10,6,940,"A"
289 | 687,213,10,91,65,27,4,1518,448,15,196,137,89,"A","E",294,445,13,350,"A"
290 | 368,103,3,48,28,54,8,1897,493,9,207,162,198,"N","W",209,246,3,326.667,"N"
291 | 263,70,1,26,23,30,4,888,220,9,83,82,86,"N","E",81,147,4,250,"N"
292 | 642,211,14,107,59,52,5,2364,770,27,352,230,193,"N","W",337,19,4,740,"N"
293 | 265,68,8,26,30,29,7,1337,339,32,135,163,128,"N","W",92,5,3,425,"A"
294 | 289,63,7,36,41,44,17,7402,1954,195,1115,919,1153,"A","W",166,211,7,NA,"A"
295 | 559,141,2,48,61,73,8,3162,874,16,421,349,359,"N","E",352,414,9,925,"N"
296 | 520,120,17,53,44,21,4,927,227,22,106,80,52,"A","W",70,144,11,185,"A"
297 | 19,4,1,2,3,1,1,19,4,1,2,3,1,"N","W",692,70,8,920,"A"
298 | 205,43,2,24,17,20,7,854,219,12,105,99,71,"N","E",131,6,1,286.667,"N"
299 | 193,47,10,21,29,24,6,1136,256,42,129,139,106,"A","W",299,13,5,245,"A"
300 | 181,46,1,19,18,17,5,937,238,9,88,95,104,"A","E",37,98,9,NA,"A"
301 | 213,61,4,17,22,3,17,4061,1145,83,488,491,244,"A","W",178,45,4,235,"A"
302 | 510,147,10,56,52,53,7,2872,821,63,307,340,174,"N","E",810,99,18,1150,"N"
303 | 578,138,1,56,59,34,3,1399,357,7,149,161,87,"N","E",133,371,20,160,"N"
304 | 200,51,2,14,29,25,23,9778,2732,379,1272,1652,925,"N","W",398,29,7,NA,"N"
305 | 441,113,5,76,52,76,5,1546,397,17,226,149,191,"A","W",160,290,11,425,"A"
306 | 172,42,3,17,14,15,10,4086,1150,57,579,363,406,"N","W",65,0,0,900,"N"
307 | 580,194,9,91,62,78,8,3372,1028,48,604,314,469,"N","E",270,13,6,NA,"N"
308 | 127,32,4,14,25,12,19,8396,2402,242,1048,1348,819,"N","W",167,18,6,500,"N"
309 | 279,69,4,35,31,32,4,1359,355,31,180,148,158,"N","E",133,173,9,277.5,"N"
310 | 480,112,18,50,71,44,7,3031,771,110,338,406,239,"N","E",94,270,16,750,"N"
311 | 600,139,0,94,29,60,2,1236,309,1,201,69,110,"N","E",300,12,9,160,"N"
312 | 610,186,19,107,98,74,6,2728,753,69,399,366,286,"N","E",1182,96,13,1300,"N"
313 | 360,81,5,37,44,37,7,2268,566,41,279,257,246,"N","E",170,284,3,525,"N"
314 | 387,124,1,67,27,36,7,1775,506,6,272,125,194,"N","E",186,290,17,550,"N"
315 | 580,207,8,107,71,105,5,2778,978,32,474,322,417,"A","E",121,267,19,1600,"A"
316 | 408,117,11,66,41,34,1,408,117,11,66,41,34,"N","W",942,72,11,120,"N"
317 | 593,172,22,82,100,57,1,593,172,22,82,100,57,"A","W",1222,139,15,165,"A"
318 | 221,53,2,21,23,22,8,1063,283,15,107,124,106,"N","E",325,58,6,NA,"N"
319 | 497,127,7,65,48,37,5,2703,806,32,379,311,138,"N","E",325,9,3,700,"N"
320 | 492,136,5,76,50,94,12,5511,1511,39,897,451,875,"A","E",313,381,20,875,"A"
321 | 475,126,3,61,43,52,6,1700,433,7,217,93,146,"A","W",37,113,7,385,"A"
322 | 573,144,9,85,60,78,8,3198,857,97,470,420,332,"A","E",1314,131,12,960,"A"
323 | 631,170,9,77,44,31,11,4908,1457,30,775,357,249,"A","W",408,4,3,1000,"A"
324 | 


--------------------------------------------------------------------------------
/datasets/houses_prices_prediction.csv.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/datasets/houses_prices_prediction.csv.zip


--------------------------------------------------------------------------------
/datasets/income.csv.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/datasets/income.csv.zip


--------------------------------------------------------------------------------
/datasets/titanic.csv.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/datasets/titanic.csv.zip


--------------------------------------------------------------------------------
/datasets/universityGraduateAdmissions.csv:
--------------------------------------------------------------------------------
  1 | Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR ,CGPA,Research,Chance of Admit 
  2 | 1,337,118,4,4.5,4.5,9.65,1,0.92
  3 | 2,324,107,4,4,4.5,8.87,1,0.76
  4 | 3,316,104,3,3,3.5,8,1,0.72
  5 | 4,322,110,3,3.5,2.5,8.67,1,0.8
  6 | 5,314,103,2,2,3,8.21,0,0.65
  7 | 6,330,115,5,4.5,3,9.34,1,0.9
  8 | 7,321,109,3,3,4,8.2,1,0.75
  9 | 8,308,101,2,3,4,7.9,0,0.68
 10 | 9,302,102,1,2,1.5,8,0,0.5
 11 | 10,323,108,3,3.5,3,8.6,0,0.45
 12 | 11,325,106,3,3.5,4,8.4,1,0.52
 13 | 12,327,111,4,4,4.5,9,1,0.84
 14 | 13,328,112,4,4,4.5,9.1,1,0.78
 15 | 14,307,109,3,4,3,8,1,0.62
 16 | 15,311,104,3,3.5,2,8.2,1,0.61
 17 | 16,314,105,3,3.5,2.5,8.3,0,0.54
 18 | 17,317,107,3,4,3,8.7,0,0.66
 19 | 18,319,106,3,4,3,8,1,0.65
 20 | 19,318,110,3,4,3,8.8,0,0.63
 21 | 20,303,102,3,3.5,3,8.5,0,0.62
 22 | 21,312,107,3,3,2,7.9,1,0.64
 23 | 22,325,114,4,3,2,8.4,0,0.7
 24 | 23,328,116,5,5,5,9.5,1,0.94
 25 | 24,334,119,5,5,4.5,9.7,1,0.95
 26 | 25,336,119,5,4,3.5,9.8,1,0.97
 27 | 26,340,120,5,4.5,4.5,9.6,1,0.94
 28 | 27,322,109,5,4.5,3.5,8.8,0,0.76
 29 | 28,298,98,2,1.5,2.5,7.5,1,0.44
 30 | 29,295,93,1,2,2,7.2,0,0.46
 31 | 30,310,99,2,1.5,2,7.3,0,0.54
 32 | 31,300,97,2,3,3,8.1,1,0.65
 33 | 32,327,103,3,4,4,8.3,1,0.74
 34 | 33,338,118,4,3,4.5,9.4,1,0.91
 35 | 34,340,114,5,4,4,9.6,1,0.9
 36 | 35,331,112,5,4,5,9.8,1,0.94
 37 | 36,320,110,5,5,5,9.2,1,0.88
 38 | 37,299,106,2,4,4,8.4,0,0.64
 39 | 38,300,105,1,1,2,7.8,0,0.58
 40 | 39,304,105,1,3,1.5,7.5,0,0.52
 41 | 40,307,108,2,4,3.5,7.7,0,0.48
 42 | 41,308,110,3,3.5,3,8,1,0.46
 43 | 42,316,105,2,2.5,2.5,8.2,1,0.49
 44 | 43,313,107,2,2.5,2,8.5,1,0.53
 45 | 44,332,117,4,4.5,4,9.1,0,0.87
 46 | 45,326,113,5,4.5,4,9.4,1,0.91
 47 | 46,322,110,5,5,4,9.1,1,0.88
 48 | 47,329,114,5,4,5,9.3,1,0.86
 49 | 48,339,119,5,4.5,4,9.7,0,0.89
 50 | 49,321,110,3,3.5,5,8.85,1,0.82
 51 | 50,327,111,4,3,4,8.4,1,0.78
 52 | 51,313,98,3,2.5,4.5,8.3,1,0.76
 53 | 52,312,100,2,1.5,3.5,7.9,1,0.56
 54 | 53,334,116,4,4,3,8,1,0.78
 55 | 54,324,112,4,4,2.5,8.1,1,0.72
 56 | 55,322,110,3,3,3.5,8,0,0.7
 57 | 56,320,103,3,3,3,7.7,0,0.64
 58 | 57,316,102,3,2,3,7.4,0,0.64
 59 | 58,298,99,2,4,2,7.6,0,0.46
 60 | 59,300,99,1,3,2,6.8,1,0.36
 61 | 60,311,104,2,2,2,8.3,0,0.42
 62 | 61,309,100,2,3,3,8.1,0,0.48
 63 | 62,307,101,3,4,3,8.2,0,0.47
 64 | 63,304,105,2,3,3,8.2,1,0.54
 65 | 64,315,107,2,4,3,8.5,1,0.56
 66 | 65,325,111,3,3,3.5,8.7,0,0.52
 67 | 66,325,112,4,3.5,3.5,8.92,0,0.55
 68 | 67,327,114,3,3,3,9.02,0,0.61
 69 | 68,316,107,2,3.5,3.5,8.64,1,0.57
 70 | 69,318,109,3,3.5,4,9.22,1,0.68
 71 | 70,328,115,4,4.5,4,9.16,1,0.78
 72 | 71,332,118,5,5,5,9.64,1,0.94
 73 | 72,336,112,5,5,5,9.76,1,0.96
 74 | 73,321,111,5,5,5,9.45,1,0.93
 75 | 74,314,108,4,4.5,4,9.04,1,0.84
 76 | 75,314,106,3,3,5,8.9,0,0.74
 77 | 76,329,114,2,2,4,8.56,1,0.72
 78 | 77,327,112,3,3,3,8.72,1,0.74
 79 | 78,301,99,2,3,2,8.22,0,0.64
 80 | 79,296,95,2,3,2,7.54,1,0.44
 81 | 80,294,93,1,1.5,2,7.36,0,0.46
 82 | 81,312,105,3,2,3,8.02,1,0.5
 83 | 82,340,120,4,5,5,9.5,1,0.96
 84 | 83,320,110,5,5,4.5,9.22,1,0.92
 85 | 84,322,115,5,4,4.5,9.36,1,0.92
 86 | 85,340,115,5,4.5,4.5,9.45,1,0.94
 87 | 86,319,103,4,4.5,3.5,8.66,0,0.76
 88 | 87,315,106,3,4.5,3.5,8.42,0,0.72
 89 | 88,317,107,2,3.5,3,8.28,0,0.66
 90 | 89,314,108,3,4.5,3.5,8.14,0,0.64
 91 | 90,316,109,4,4.5,3.5,8.76,1,0.74
 92 | 91,318,106,2,4,4,7.92,1,0.64
 93 | 92,299,97,3,5,3.5,7.66,0,0.38
 94 | 93,298,98,2,4,3,8.03,0,0.34
 95 | 94,301,97,2,3,3,7.88,1,0.44
 96 | 95,303,99,3,2,2.5,7.66,0,0.36
 97 | 96,304,100,4,1.5,2.5,7.84,0,0.42
 98 | 97,306,100,2,3,3,8,0,0.48
 99 | 98,331,120,3,4,4,8.96,1,0.86
100 | 99,332,119,4,5,4.5,9.24,1,0.9
101 | 100,323,113,3,4,4,8.88,1,0.79
102 | 101,322,107,3,3.5,3.5,8.46,1,0.71
103 | 102,312,105,2,2.5,3,8.12,0,0.64
104 | 103,314,106,2,4,3.5,8.25,0,0.62
105 | 104,317,104,2,4.5,4,8.47,0,0.57
106 | 105,326,112,3,3.5,3,9.05,1,0.74
107 | 106,316,110,3,4,4.5,8.78,1,0.69
108 | 107,329,111,4,4.5,4.5,9.18,1,0.87
109 | 108,338,117,4,3.5,4.5,9.46,1,0.91
110 | 109,331,116,5,5,5,9.38,1,0.93
111 | 110,304,103,5,5,4,8.64,0,0.68
112 | 111,305,108,5,3,3,8.48,0,0.61
113 | 112,321,109,4,4,4,8.68,1,0.69
114 | 113,301,107,3,3.5,3.5,8.34,1,0.62
115 | 114,320,110,2,4,3.5,8.56,0,0.72
116 | 115,311,105,3,3.5,3,8.45,1,0.59
117 | 116,310,106,4,4.5,4.5,9.04,1,0.66
118 | 117,299,102,3,4,3.5,8.62,0,0.56
119 | 118,290,104,4,2,2.5,7.46,0,0.45
120 | 119,296,99,2,3,3.5,7.28,0,0.47
121 | 120,327,104,5,3,3.5,8.84,1,0.71
122 | 121,335,117,5,5,5,9.56,1,0.94
123 | 122,334,119,5,4.5,4.5,9.48,1,0.94
124 | 123,310,106,4,1.5,2.5,8.36,0,0.57
125 | 124,308,108,3,3.5,3.5,8.22,0,0.61
126 | 125,301,106,4,2.5,3,8.47,0,0.57
127 | 126,300,100,3,2,3,8.66,1,0.64
128 | 127,323,113,3,4,3,9.32,1,0.85
129 | 128,319,112,3,2.5,2,8.71,1,0.78
130 | 129,326,112,3,3.5,3,9.1,1,0.84
131 | 130,333,118,5,5,5,9.35,1,0.92
132 | 131,339,114,5,4,4.5,9.76,1,0.96
133 | 132,303,105,5,5,4.5,8.65,0,0.77
134 | 133,309,105,5,3.5,3.5,8.56,0,0.71
135 | 134,323,112,5,4,4.5,8.78,0,0.79
136 | 135,333,113,5,4,4,9.28,1,0.89
137 | 136,314,109,4,3.5,4,8.77,1,0.82
138 | 137,312,103,3,5,4,8.45,0,0.76
139 | 138,316,100,2,1.5,3,8.16,1,0.71
140 | 139,326,116,2,4.5,3,9.08,1,0.8
141 | 140,318,109,1,3.5,3.5,9.12,0,0.78
142 | 141,329,110,2,4,3,9.15,1,0.84
143 | 142,332,118,2,4.5,3.5,9.36,1,0.9
144 | 143,331,115,5,4,3.5,9.44,1,0.92
145 | 144,340,120,4,4.5,4,9.92,1,0.97
146 | 145,325,112,2,3,3.5,8.96,1,0.8
147 | 146,320,113,2,2,2.5,8.64,1,0.81
148 | 147,315,105,3,2,2.5,8.48,0,0.75
149 | 148,326,114,3,3,3,9.11,1,0.83
150 | 149,339,116,4,4,3.5,9.8,1,0.96
151 | 150,311,106,2,3.5,3,8.26,1,0.79
152 | 151,334,114,4,4,4,9.43,1,0.93
153 | 152,332,116,5,5,5,9.28,1,0.94
154 | 153,321,112,5,5,5,9.06,1,0.86
155 | 154,324,105,3,3,4,8.75,0,0.79
156 | 155,326,108,3,3,3.5,8.89,0,0.8
157 | 156,312,109,3,3,3,8.69,0,0.77
158 | 157,315,105,3,2,2.5,8.34,0,0.7
159 | 158,309,104,2,2,2.5,8.26,0,0.65
160 | 159,306,106,2,2,2.5,8.14,0,0.61
161 | 160,297,100,1,1.5,2,7.9,0,0.52
162 | 161,315,103,1,1.5,2,7.86,0,0.57
163 | 162,298,99,1,1.5,3,7.46,0,0.53
164 | 163,318,109,3,3,3,8.5,0,0.67
165 | 164,317,105,3,3.5,3,8.56,0,0.68
166 | 165,329,111,4,4.5,4,9.01,1,0.81
167 | 166,322,110,5,4.5,4,8.97,0,0.78
168 | 167,302,102,3,3.5,5,8.33,0,0.65
169 | 168,313,102,3,2,3,8.27,0,0.64
170 | 169,293,97,2,2,4,7.8,1,0.64
171 | 170,311,99,2,2.5,3,7.98,0,0.65
172 | 171,312,101,2,2.5,3.5,8.04,1,0.68
173 | 172,334,117,5,4,4.5,9.07,1,0.89
174 | 173,322,110,4,4,5,9.13,1,0.86
175 | 174,323,113,4,4,4.5,9.23,1,0.89
176 | 175,321,111,4,4,4,8.97,1,0.87
177 | 176,320,111,4,4.5,3.5,8.87,1,0.85
178 | 177,329,119,4,4.5,4.5,9.16,1,0.9
179 | 178,319,110,3,3.5,3.5,9.04,0,0.82
180 | 179,309,108,3,2.5,3,8.12,0,0.72
181 | 180,307,102,3,3,3,8.27,0,0.73
182 | 181,300,104,3,3.5,3,8.16,0,0.71
183 | 182,305,107,2,2.5,2.5,8.42,0,0.71
184 | 183,299,100,2,3,3.5,7.88,0,0.68
185 | 184,314,110,3,4,4,8.8,0,0.75
186 | 185,316,106,2,2.5,4,8.32,0,0.72
187 | 186,327,113,4,4.5,4.5,9.11,1,0.89
188 | 187,317,107,3,3.5,3,8.68,1,0.84
189 | 188,335,118,5,4.5,3.5,9.44,1,0.93
190 | 189,331,115,5,4.5,3.5,9.36,1,0.93
191 | 190,324,112,5,5,5,9.08,1,0.88
192 | 191,324,111,5,4.5,4,9.16,1,0.9
193 | 192,323,110,5,4,5,8.98,1,0.87
194 | 193,322,114,5,4.5,4,8.94,1,0.86
195 | 194,336,118,5,4.5,5,9.53,1,0.94
196 | 195,316,109,3,3.5,3,8.76,0,0.77
197 | 196,307,107,2,3,3.5,8.52,1,0.78
198 | 197,306,105,2,3,2.5,8.26,0,0.73
199 | 198,310,106,2,3.5,2.5,8.33,0,0.73
200 | 199,311,104,3,4.5,4.5,8.43,0,0.7
201 | 200,313,107,3,4,4.5,8.69,0,0.72
202 | 201,317,103,3,2.5,3,8.54,1,0.73
203 | 202,315,110,2,3.5,3,8.46,1,0.72
204 | 203,340,120,5,4.5,4.5,9.91,1,0.97
205 | 204,334,120,5,4,5,9.87,1,0.97
206 | 205,298,105,3,3.5,4,8.54,0,0.69
207 | 206,295,99,2,2.5,3,7.65,0,0.57
208 | 207,315,99,2,3.5,3,7.89,0,0.63
209 | 208,310,102,3,3.5,4,8.02,1,0.66
210 | 209,305,106,2,3,3,8.16,0,0.64
211 | 210,301,104,3,3.5,4,8.12,1,0.68
212 | 211,325,108,4,4.5,4,9.06,1,0.79
213 | 212,328,110,4,5,4,9.14,1,0.82
214 | 213,338,120,4,5,5,9.66,1,0.95
215 | 214,333,119,5,5,4.5,9.78,1,0.96
216 | 215,331,117,4,4.5,5,9.42,1,0.94
217 | 216,330,116,5,5,4.5,9.36,1,0.93
218 | 217,322,112,4,4.5,4.5,9.26,1,0.91
219 | 218,321,109,4,4,4,9.13,1,0.85
220 | 219,324,110,4,3,3.5,8.97,1,0.84
221 | 220,312,104,3,3.5,3.5,8.42,0,0.74
222 | 221,313,103,3,4,4,8.75,0,0.76
223 | 222,316,110,3,3.5,4,8.56,0,0.75
224 | 223,324,113,4,4.5,4,8.79,0,0.76
225 | 224,308,109,2,3,4,8.45,0,0.71
226 | 225,305,105,2,3,2,8.23,0,0.67
227 | 226,296,99,2,2.5,2.5,8.03,0,0.61
228 | 227,306,110,2,3.5,4,8.45,0,0.63
229 | 228,312,110,2,3.5,3,8.53,0,0.64
230 | 229,318,112,3,4,3.5,8.67,0,0.71
231 | 230,324,111,4,3,3,9.01,1,0.82
232 | 231,313,104,3,4,4.5,8.65,0,0.73
233 | 232,319,106,3,3.5,2.5,8.33,1,0.74
234 | 233,312,107,2,2.5,3.5,8.27,0,0.69
235 | 234,304,100,2,2.5,3.5,8.07,0,0.64
236 | 235,330,113,5,5,4,9.31,1,0.91
237 | 236,326,111,5,4.5,4,9.23,1,0.88
238 | 237,325,112,4,4,4.5,9.17,1,0.85
239 | 238,329,114,5,4.5,5,9.19,1,0.86
240 | 239,310,104,3,2,3.5,8.37,0,0.7
241 | 240,299,100,1,1.5,2,7.89,0,0.59
242 | 241,296,101,1,2.5,3,7.68,0,0.6
243 | 242,317,103,2,2.5,2,8.15,0,0.65
244 | 243,324,115,3,3.5,3,8.76,1,0.7
245 | 244,325,114,3,3.5,3,9.04,1,0.76
246 | 245,314,107,2,2.5,4,8.56,0,0.63
247 | 246,328,110,4,4,2.5,9.02,1,0.81
248 | 247,316,105,3,3,3.5,8.73,0,0.72
249 | 248,311,104,2,2.5,3.5,8.48,0,0.71
250 | 249,324,110,3,3.5,4,8.87,1,0.8
251 | 250,321,111,3,3.5,4,8.83,1,0.77
252 | 251,320,104,3,3,2.5,8.57,1,0.74
253 | 252,316,99,2,2.5,3,9,0,0.7
254 | 253,318,100,2,2.5,3.5,8.54,1,0.71
255 | 254,335,115,4,4.5,4.5,9.68,1,0.93
256 | 255,321,114,4,4,5,9.12,0,0.85
257 | 256,307,110,4,4,4.5,8.37,0,0.79
258 | 257,309,99,3,4,4,8.56,0,0.76
259 | 258,324,100,3,4,5,8.64,1,0.78
260 | 259,326,102,4,5,5,8.76,1,0.77
261 | 260,331,119,4,5,4.5,9.34,1,0.9
262 | 261,327,108,5,5,3.5,9.13,1,0.87
263 | 262,312,104,3,3.5,4,8.09,0,0.71
264 | 263,308,103,2,2.5,4,8.36,1,0.7
265 | 264,324,111,3,2.5,1.5,8.79,1,0.7
266 | 265,325,110,2,3,2.5,8.76,1,0.75
267 | 266,313,102,3,2.5,2.5,8.68,0,0.71
268 | 267,312,105,2,2,2.5,8.45,0,0.72
269 | 268,314,107,3,3,3.5,8.17,1,0.73
270 | 269,327,113,4,4.5,5,9.14,0,0.83
271 | 270,308,108,4,4.5,5,8.34,0,0.77
272 | 271,306,105,2,2.5,3,8.22,1,0.72
273 | 272,299,96,2,1.5,2,7.86,0,0.54
274 | 273,294,95,1,1.5,1.5,7.64,0,0.49
275 | 274,312,99,1,1,1.5,8.01,1,0.52
276 | 275,315,100,1,2,2.5,7.95,0,0.58
277 | 276,322,110,3,3.5,3,8.96,1,0.78
278 | 277,329,113,5,5,4.5,9.45,1,0.89
279 | 278,320,101,2,2.5,3,8.62,0,0.7
280 | 279,308,103,2,3,3.5,8.49,0,0.66
281 | 280,304,102,2,3,4,8.73,0,0.67
282 | 281,311,102,3,4.5,4,8.64,1,0.68
283 | 282,317,110,3,4,4.5,9.11,1,0.8
284 | 283,312,106,3,4,3.5,8.79,1,0.81
285 | 284,321,111,3,2.5,3,8.9,1,0.8
286 | 285,340,112,4,5,4.5,9.66,1,0.94
287 | 286,331,116,5,4,4,9.26,1,0.93
288 | 287,336,118,5,4.5,4,9.19,1,0.92
289 | 288,324,114,5,5,4.5,9.08,1,0.89
290 | 289,314,104,4,5,5,9.02,0,0.82
291 | 290,313,109,3,4,3.5,9,0,0.79
292 | 291,307,105,2,2.5,3,7.65,0,0.58
293 | 292,300,102,2,1.5,2,7.87,0,0.56
294 | 293,302,99,2,1,2,7.97,0,0.56
295 | 294,312,98,1,3.5,3,8.18,1,0.64
296 | 295,316,101,2,2.5,2,8.32,1,0.61
297 | 296,317,100,2,3,2.5,8.57,0,0.68
298 | 297,310,107,3,3.5,3.5,8.67,0,0.76
299 | 298,320,120,3,4,4.5,9.11,0,0.86
300 | 299,330,114,3,4.5,4.5,9.24,1,0.9
301 | 300,305,112,3,3,3.5,8.65,0,0.71
302 | 301,309,106,2,2.5,2.5,8,0,0.62
303 | 302,319,108,2,2.5,3,8.76,0,0.66
304 | 303,322,105,2,3,3,8.45,1,0.65
305 | 304,323,107,3,3.5,3.5,8.55,1,0.73
306 | 305,313,106,2,2.5,2,8.43,0,0.62
307 | 306,321,109,3,3.5,3.5,8.8,1,0.74
308 | 307,323,110,3,4,3.5,9.1,1,0.79
309 | 308,325,112,4,4,4,9,1,0.8
310 | 309,312,108,3,3.5,3,8.53,0,0.69
311 | 310,308,110,4,3.5,3,8.6,0,0.7
312 | 311,320,104,3,3,3.5,8.74,1,0.76
313 | 312,328,108,4,4.5,4,9.18,1,0.84
314 | 313,311,107,4,4.5,4.5,9,1,0.78
315 | 314,301,100,3,3.5,3,8.04,0,0.67
316 | 315,305,105,2,3,4,8.13,0,0.66
317 | 316,308,104,2,2.5,3,8.07,0,0.65
318 | 317,298,101,2,1.5,2,7.86,0,0.54
319 | 318,300,99,1,1,2.5,8.01,0,0.58
320 | 319,324,111,3,2.5,2,8.8,1,0.79
321 | 320,327,113,4,3.5,3,8.69,1,0.8
322 | 321,317,106,3,4,3.5,8.5,1,0.75
323 | 322,323,104,3,4,4,8.44,1,0.73
324 | 323,314,107,2,2.5,4,8.27,0,0.72
325 | 324,305,102,2,2,2.5,8.18,0,0.62
326 | 325,315,104,3,3,2.5,8.33,0,0.67
327 | 326,326,116,3,3.5,4,9.14,1,0.81
328 | 327,299,100,3,2,2,8.02,0,0.63
329 | 328,295,101,2,2.5,2,7.86,0,0.69
330 | 329,324,112,4,4,3.5,8.77,1,0.8
331 | 330,297,96,2,2.5,1.5,7.89,0,0.43
332 | 331,327,113,3,3.5,3,8.66,1,0.8
333 | 332,311,105,2,3,2,8.12,1,0.73
334 | 333,308,106,3,3.5,2.5,8.21,1,0.75
335 | 334,319,108,3,3,3.5,8.54,1,0.71
336 | 335,312,107,4,4.5,4,8.65,1,0.73
337 | 336,325,111,4,4,4.5,9.11,1,0.83
338 | 337,319,110,3,3,2.5,8.79,0,0.72
339 | 338,332,118,5,5,5,9.47,1,0.94
340 | 339,323,108,5,4,4,8.74,1,0.81
341 | 340,324,107,5,3.5,4,8.66,1,0.81
342 | 341,312,107,3,3,3,8.46,1,0.75
343 | 342,326,110,3,3.5,3.5,8.76,1,0.79
344 | 343,308,106,3,3,3,8.24,0,0.58
345 | 344,305,103,2,2.5,3.5,8.13,0,0.59
346 | 345,295,96,2,1.5,2,7.34,0,0.47
347 | 346,316,98,1,1.5,2,7.43,0,0.49
348 | 347,304,97,2,1.5,2,7.64,0,0.47
349 | 348,299,94,1,1,1,7.34,0,0.42
350 | 349,302,99,1,2,2,7.25,0,0.57
351 | 350,313,101,3,2.5,3,8.04,0,0.62
352 | 351,318,107,3,3,3.5,8.27,1,0.74
353 | 352,325,110,4,3.5,4,8.67,1,0.73
354 | 353,303,100,2,3,3.5,8.06,1,0.64
355 | 354,300,102,3,3.5,2.5,8.17,0,0.63
356 | 355,297,98,2,2.5,3,7.67,0,0.59
357 | 356,317,106,2,2,3.5,8.12,0,0.73
358 | 357,327,109,3,3.5,4,8.77,1,0.79
359 | 358,301,104,2,3.5,3.5,7.89,1,0.68
360 | 359,314,105,2,2.5,2,7.64,0,0.7
361 | 360,321,107,2,2,1.5,8.44,0,0.81
362 | 361,322,110,3,4,5,8.64,1,0.85
363 | 362,334,116,4,4,3.5,9.54,1,0.93
364 | 363,338,115,5,4.5,5,9.23,1,0.91
365 | 364,306,103,2,2.5,3,8.36,0,0.69
366 | 365,313,102,3,3.5,4,8.9,1,0.77
367 | 366,330,114,4,4.5,3,9.17,1,0.86
368 | 367,320,104,3,3.5,4.5,8.34,1,0.74
369 | 368,311,98,1,1,2.5,7.46,0,0.57
370 | 369,298,92,1,2,2,7.88,0,0.51
371 | 370,301,98,1,2,3,8.03,1,0.67
372 | 371,310,103,2,2.5,2.5,8.24,0,0.72
373 | 372,324,110,3,3.5,3,9.22,1,0.89
374 | 373,336,119,4,4.5,4,9.62,1,0.95
375 | 374,321,109,3,3,3,8.54,1,0.79
376 | 375,315,105,2,2,2.5,7.65,0,0.39
377 | 376,304,101,2,2,2.5,7.66,0,0.38
378 | 377,297,96,2,2.5,2,7.43,0,0.34
379 | 378,290,100,1,1.5,2,7.56,0,0.47
380 | 379,303,98,1,2,2.5,7.65,0,0.56
381 | 380,311,99,1,2.5,3,8.43,1,0.71
382 | 381,322,104,3,3.5,4,8.84,1,0.78
383 | 382,319,105,3,3,3.5,8.67,1,0.73
384 | 383,324,110,4,4.5,4,9.15,1,0.82
385 | 384,300,100,3,3,3.5,8.26,0,0.62
386 | 385,340,113,4,5,5,9.74,1,0.96
387 | 386,335,117,5,5,5,9.82,1,0.96
388 | 387,302,101,2,2.5,3.5,7.96,0,0.46
389 | 388,307,105,2,2,3.5,8.1,0,0.53
390 | 389,296,97,2,1.5,2,7.8,0,0.49
391 | 390,320,108,3,3.5,4,8.44,1,0.76
392 | 391,314,102,2,2,2.5,8.24,0,0.64
393 | 392,318,106,3,2,3,8.65,0,0.71
394 | 393,326,112,4,4,3.5,9.12,1,0.84
395 | 394,317,104,2,3,3,8.76,0,0.77
396 | 395,329,111,4,4.5,4,9.23,1,0.89
397 | 396,324,110,3,3.5,3.5,9.04,1,0.82
398 | 397,325,107,3,3,3.5,9.11,1,0.84
399 | 398,330,116,4,5,4.5,9.45,1,0.91
400 | 399,312,103,3,3.5,4,8.78,0,0.67
401 | 400,333,117,4,5,4,9.66,1,0.95
402 | 401,304,100,2,3.5,3,8.22,0,0.63
403 | 402,315,105,2,3,3,8.34,0,0.66
404 | 403,324,109,3,3.5,3,8.94,1,0.78
405 | 404,330,116,4,4,3.5,9.23,1,0.91
406 | 405,311,101,3,2,2.5,7.64,1,0.62
407 | 406,302,99,3,2.5,3,7.45,0,0.52
408 | 407,322,103,4,3,2.5,8.02,1,0.61
409 | 408,298,100,3,2.5,4,7.95,1,0.58
410 | 409,297,101,3,2,4,7.67,1,0.57
411 | 410,300,98,1,2,2.5,8.02,0,0.61
412 | 411,301,96,1,3,4,7.56,0,0.54
413 | 412,313,94,2,2.5,1.5,8.13,0,0.56
414 | 413,314,102,4,2.5,2,7.88,1,0.59
415 | 414,317,101,3,3,2,7.94,1,0.49
416 | 415,321,110,4,3.5,4,8.35,1,0.72
417 | 416,327,106,4,4,4.5,8.75,1,0.76
418 | 417,315,104,3,4,2.5,8.1,0,0.65
419 | 418,316,103,3,3.5,2,7.68,0,0.52
420 | 419,309,111,2,2.5,4,8.03,0,0.6
421 | 420,308,102,2,2,3.5,7.98,1,0.58
422 | 421,299,100,3,2,3,7.42,0,0.42
423 | 422,321,112,3,3,4.5,8.95,1,0.77
424 | 423,322,112,4,3.5,2.5,9.02,1,0.73
425 | 424,334,119,5,4.5,5,9.54,1,0.94
426 | 425,325,114,5,4,5,9.46,1,0.91
427 | 426,323,111,5,4,5,9.86,1,0.92
428 | 427,312,106,3,3,5,8.57,0,0.71
429 | 428,310,101,3,3.5,5,8.65,1,0.71
430 | 429,316,103,2,2,4.5,8.74,0,0.69
431 | 430,340,115,5,5,4.5,9.06,1,0.95
432 | 431,311,104,3,4,3.5,8.13,1,0.74
433 | 432,320,112,2,3.5,3.5,8.78,1,0.73
434 | 433,324,112,4,4.5,4,9.22,1,0.86
435 | 434,316,111,4,4,5,8.54,0,0.71
436 | 435,306,103,3,3.5,3,8.21,0,0.64
437 | 436,309,105,2,2.5,4,7.68,0,0.55
438 | 437,310,110,1,1.5,4,7.23,1,0.58
439 | 438,317,106,1,1.5,3.5,7.65,1,0.61
440 | 439,318,110,1,2.5,3.5,8.54,1,0.67
441 | 440,312,105,2,1.5,3,8.46,0,0.66
442 | 441,305,104,2,2.5,1.5,7.79,0,0.53
443 | 442,332,112,1,1.5,3,8.66,1,0.79
444 | 443,331,116,4,4.5,4.5,9.44,1,0.92
445 | 444,321,114,5,4.5,4.5,9.16,1,0.87
446 | 445,324,113,5,4,5,9.25,1,0.92
447 | 446,328,116,5,4.5,5,9.08,1,0.91
448 | 447,327,118,4,5,5,9.67,1,0.93
449 | 448,320,108,3,3.5,5,8.97,1,0.84
450 | 449,312,109,2,2.5,4,9.02,0,0.8
451 | 450,315,101,3,3.5,4.5,9.13,0,0.79
452 | 451,320,112,4,3,4.5,8.86,1,0.82
453 | 452,324,113,4,4.5,4.5,9.25,1,0.89
454 | 453,328,116,4,5,3.5,9.6,1,0.93
455 | 454,319,103,3,2.5,4,8.76,1,0.73
456 | 455,310,105,2,3,3.5,8.01,0,0.71
457 | 456,305,102,2,1.5,2.5,7.64,0,0.59
458 | 457,299,100,2,2,2,7.88,0,0.51
459 | 458,295,99,1,2,1.5,7.57,0,0.37
460 | 459,312,100,1,3,3,8.53,1,0.69
461 | 460,329,113,4,4,3.5,9.36,1,0.89
462 | 461,319,105,4,4,4.5,8.66,1,0.77
463 | 462,301,102,3,2.5,2,8.13,1,0.68
464 | 463,307,105,4,3,3,7.94,0,0.62
465 | 464,304,107,3,3.5,3,7.86,0,0.57
466 | 465,298,97,2,2,3,7.21,0,0.45
467 | 466,305,96,4,3,4.5,8.26,0,0.54
468 | 467,314,99,4,3.5,4.5,8.73,1,0.71
469 | 468,318,101,5,3.5,5,8.78,1,0.78
470 | 469,323,110,4,4,5,8.88,1,0.81
471 | 470,326,114,4,4,3.5,9.16,1,0.86
472 | 471,320,110,5,4,4,9.27,1,0.87
473 | 472,311,103,3,2,4,8.09,0,0.64
474 | 473,327,116,4,4,4.5,9.48,1,0.9
475 | 474,316,102,2,4,3.5,8.15,0,0.67
476 | 475,308,105,4,3,2.5,7.95,1,0.67
477 | 476,300,101,3,3.5,2.5,7.88,0,0.59
478 | 477,304,104,3,2.5,2,8.12,0,0.62
479 | 478,309,105,4,3.5,2,8.18,0,0.65
480 | 479,318,103,3,4,4.5,8.49,1,0.71
481 | 480,325,110,4,4.5,4,8.96,1,0.79
482 | 481,321,102,3,3.5,4,9.01,1,0.8
483 | 482,323,107,4,3,2.5,8.48,1,0.78
484 | 483,328,113,4,4,2.5,8.77,1,0.83
485 | 484,304,103,5,5,3,7.92,0,0.71
486 | 485,317,106,3,3.5,3,7.89,1,0.73
487 | 486,311,101,2,2.5,3.5,8.34,1,0.7
488 | 487,319,102,3,2.5,2.5,8.37,0,0.68
489 | 488,327,115,4,3.5,4,9.14,0,0.79
490 | 489,322,112,3,3,4,8.62,1,0.76
491 | 490,302,110,3,4,4.5,8.5,0,0.65
492 | 491,307,105,2,2.5,4.5,8.12,1,0.67
493 | 492,297,99,4,3,3.5,7.81,0,0.54
494 | 493,298,101,4,2.5,4.5,7.69,1,0.53
495 | 494,300,95,2,3,1.5,8.22,1,0.62
496 | 495,301,99,3,2.5,2,8.45,1,0.68
497 | 496,332,108,5,4.5,4,9.02,1,0.87
498 | 497,337,117,5,5,5,9.87,1,0.96
499 | 498,330,120,5,4.5,5,9.56,1,0.93
500 | 499,312,103,4,4,5,8.43,0,0.73
501 | 500,327,113,4,4.5,4.5,9.04,0,0.84


--------------------------------------------------------------------------------
/datasets/vehicles_test.csv:
--------------------------------------------------------------------------------
1 | price,year,miles,doors,vtype
2 | 3000,2003,130000,4,truck
3 | 6000,2005,82500,4,car
4 | 12000,2010,60000,2,car
5 | 


--------------------------------------------------------------------------------
/datasets/vehicles_train.csv:
--------------------------------------------------------------------------------
 1 | price,year,miles,doors,vtype
 2 | 22000,2012,13000,2,car
 3 | 14000,2010,30000,2,car
 4 | 13000,2010,73500,4,car
 5 | 9500,2009,78000,4,car
 6 | 9000,2007,47000,4,car
 7 | 4000,2006,124000,2,car
 8 | 3000,2004,177000,4,car
 9 | 2000,2004,209000,4,truck
10 | 3000,2003,138000,2,car
11 | 1900,2003,160000,4,car
12 | 2500,2003,190000,2,truck
13 | 5000,2001,62000,4,car
14 | 1800,1999,163000,2,truck
15 | 1300,1997,138000,4,car
16 | 


--------------------------------------------------------------------------------
/datasets/wine.data:
--------------------------------------------------------------------------------
  1 | 1,14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065
  2 | 1,13.2,1.78,2.14,11.2,100,2.65,2.76,.26,1.28,4.38,1.05,3.4,1050
  3 | 1,13.16,2.36,2.67,18.6,101,2.8,3.24,.3,2.81,5.68,1.03,3.17,1185
  4 | 1,14.37,1.95,2.5,16.8,113,3.85,3.49,.24,2.18,7.8,.86,3.45,1480
  5 | 1,13.24,2.59,2.87,21,118,2.8,2.69,.39,1.82,4.32,1.04,2.93,735
  6 | 1,14.2,1.76,2.45,15.2,112,3.27,3.39,.34,1.97,6.75,1.05,2.85,1450
  7 | 1,14.39,1.87,2.45,14.6,96,2.5,2.52,.3,1.98,5.25,1.02,3.58,1290
  8 | 1,14.06,2.15,2.61,17.6,121,2.6,2.51,.31,1.25,5.05,1.06,3.58,1295
  9 | 1,14.83,1.64,2.17,14,97,2.8,2.98,.29,1.98,5.2,1.08,2.85,1045
 10 | 1,13.86,1.35,2.27,16,98,2.98,3.15,.22,1.85,7.22,1.01,3.55,1045
 11 | 1,14.1,2.16,2.3,18,105,2.95,3.32,.22,2.38,5.75,1.25,3.17,1510
 12 | 1,14.12,1.48,2.32,16.8,95,2.2,2.43,.26,1.57,5,1.17,2.82,1280
 13 | 1,13.75,1.73,2.41,16,89,2.6,2.76,.29,1.81,5.6,1.15,2.9,1320
 14 | 1,14.75,1.73,2.39,11.4,91,3.1,3.69,.43,2.81,5.4,1.25,2.73,1150
 15 | 1,14.38,1.87,2.38,12,102,3.3,3.64,.29,2.96,7.5,1.2,3,1547
 16 | 1,13.63,1.81,2.7,17.2,112,2.85,2.91,.3,1.46,7.3,1.28,2.88,1310
 17 | 1,14.3,1.92,2.72,20,120,2.8,3.14,.33,1.97,6.2,1.07,2.65,1280
 18 | 1,13.83,1.57,2.62,20,115,2.95,3.4,.4,1.72,6.6,1.13,2.57,1130
 19 | 1,14.19,1.59,2.48,16.5,108,3.3,3.93,.32,1.86,8.7,1.23,2.82,1680
 20 | 1,13.64,3.1,2.56,15.2,116,2.7,3.03,.17,1.66,5.1,.96,3.36,845
 21 | 1,14.06,1.63,2.28,16,126,3,3.17,.24,2.1,5.65,1.09,3.71,780
 22 | 1,12.93,3.8,2.65,18.6,102,2.41,2.41,.25,1.98,4.5,1.03,3.52,770
 23 | 1,13.71,1.86,2.36,16.6,101,2.61,2.88,.27,1.69,3.8,1.11,4,1035
 24 | 1,12.85,1.6,2.52,17.8,95,2.48,2.37,.26,1.46,3.93,1.09,3.63,1015
 25 | 1,13.5,1.81,2.61,20,96,2.53,2.61,.28,1.66,3.52,1.12,3.82,845
 26 | 1,13.05,2.05,3.22,25,124,2.63,2.68,.47,1.92,3.58,1.13,3.2,830
 27 | 1,13.39,1.77,2.62,16.1,93,2.85,2.94,.34,1.45,4.8,.92,3.22,1195
 28 | 1,13.3,1.72,2.14,17,94,2.4,2.19,.27,1.35,3.95,1.02,2.77,1285
 29 | 1,13.87,1.9,2.8,19.4,107,2.95,2.97,.37,1.76,4.5,1.25,3.4,915
 30 | 1,14.02,1.68,2.21,16,96,2.65,2.33,.26,1.98,4.7,1.04,3.59,1035
 31 | 1,13.73,1.5,2.7,22.5,101,3,3.25,.29,2.38,5.7,1.19,2.71,1285
 32 | 1,13.58,1.66,2.36,19.1,106,2.86,3.19,.22,1.95,6.9,1.09,2.88,1515
 33 | 1,13.68,1.83,2.36,17.2,104,2.42,2.69,.42,1.97,3.84,1.23,2.87,990
 34 | 1,13.76,1.53,2.7,19.5,132,2.95,2.74,.5,1.35,5.4,1.25,3,1235
 35 | 1,13.51,1.8,2.65,19,110,2.35,2.53,.29,1.54,4.2,1.1,2.87,1095
 36 | 1,13.48,1.81,2.41,20.5,100,2.7,2.98,.26,1.86,5.1,1.04,3.47,920
 37 | 1,13.28,1.64,2.84,15.5,110,2.6,2.68,.34,1.36,4.6,1.09,2.78,880
 38 | 1,13.05,1.65,2.55,18,98,2.45,2.43,.29,1.44,4.25,1.12,2.51,1105
 39 | 1,13.07,1.5,2.1,15.5,98,2.4,2.64,.28,1.37,3.7,1.18,2.69,1020
 40 | 1,14.22,3.99,2.51,13.2,128,3,3.04,.2,2.08,5.1,.89,3.53,760
 41 | 1,13.56,1.71,2.31,16.2,117,3.15,3.29,.34,2.34,6.13,.95,3.38,795
 42 | 1,13.41,3.84,2.12,18.8,90,2.45,2.68,.27,1.48,4.28,.91,3,1035
 43 | 1,13.88,1.89,2.59,15,101,3.25,3.56,.17,1.7,5.43,.88,3.56,1095
 44 | 1,13.24,3.98,2.29,17.5,103,2.64,2.63,.32,1.66,4.36,.82,3,680
 45 | 1,13.05,1.77,2.1,17,107,3,3,.28,2.03,5.04,.88,3.35,885
 46 | 1,14.21,4.04,2.44,18.9,111,2.85,2.65,.3,1.25,5.24,.87,3.33,1080
 47 | 1,14.38,3.59,2.28,16,102,3.25,3.17,.27,2.19,4.9,1.04,3.44,1065
 48 | 1,13.9,1.68,2.12,16,101,3.1,3.39,.21,2.14,6.1,.91,3.33,985
 49 | 1,14.1,2.02,2.4,18.8,103,2.75,2.92,.32,2.38,6.2,1.07,2.75,1060
 50 | 1,13.94,1.73,2.27,17.4,108,2.88,3.54,.32,2.08,8.90,1.12,3.1,1260
 51 | 1,13.05,1.73,2.04,12.4,92,2.72,3.27,.17,2.91,7.2,1.12,2.91,1150
 52 | 1,13.83,1.65,2.6,17.2,94,2.45,2.99,.22,2.29,5.6,1.24,3.37,1265
 53 | 1,13.82,1.75,2.42,14,111,3.88,3.74,.32,1.87,7.05,1.01,3.26,1190
 54 | 1,13.77,1.9,2.68,17.1,115,3,2.79,.39,1.68,6.3,1.13,2.93,1375
 55 | 1,13.74,1.67,2.25,16.4,118,2.6,2.9,.21,1.62,5.85,.92,3.2,1060
 56 | 1,13.56,1.73,2.46,20.5,116,2.96,2.78,.2,2.45,6.25,.98,3.03,1120
 57 | 1,14.22,1.7,2.3,16.3,118,3.2,3,.26,2.03,6.38,.94,3.31,970
 58 | 1,13.29,1.97,2.68,16.8,102,3,3.23,.31,1.66,6,1.07,2.84,1270
 59 | 1,13.72,1.43,2.5,16.7,108,3.4,3.67,.19,2.04,6.8,.89,2.87,1285
 60 | 2,12.37,.94,1.36,10.6,88,1.98,.57,.28,.42,1.95,1.05,1.82,520
 61 | 2,12.33,1.1,2.28,16,101,2.05,1.09,.63,.41,3.27,1.25,1.67,680
 62 | 2,12.64,1.36,2.02,16.8,100,2.02,1.41,.53,.62,5.75,.98,1.59,450
 63 | 2,13.67,1.25,1.92,18,94,2.1,1.79,.32,.73,3.8,1.23,2.46,630
 64 | 2,12.37,1.13,2.16,19,87,3.5,3.1,.19,1.87,4.45,1.22,2.87,420
 65 | 2,12.17,1.45,2.53,19,104,1.89,1.75,.45,1.03,2.95,1.45,2.23,355
 66 | 2,12.37,1.21,2.56,18.1,98,2.42,2.65,.37,2.08,4.6,1.19,2.3,678
 67 | 2,13.11,1.01,1.7,15,78,2.98,3.18,.26,2.28,5.3,1.12,3.18,502
 68 | 2,12.37,1.17,1.92,19.6,78,2.11,2,.27,1.04,4.68,1.12,3.48,510
 69 | 2,13.34,.94,2.36,17,110,2.53,1.3,.55,.42,3.17,1.02,1.93,750
 70 | 2,12.21,1.19,1.75,16.8,151,1.85,1.28,.14,2.5,2.85,1.28,3.07,718
 71 | 2,12.29,1.61,2.21,20.4,103,1.1,1.02,.37,1.46,3.05,.906,1.82,870
 72 | 2,13.86,1.51,2.67,25,86,2.95,2.86,.21,1.87,3.38,1.36,3.16,410
 73 | 2,13.49,1.66,2.24,24,87,1.88,1.84,.27,1.03,3.74,.98,2.78,472
 74 | 2,12.99,1.67,2.6,30,139,3.3,2.89,.21,1.96,3.35,1.31,3.5,985
 75 | 2,11.96,1.09,2.3,21,101,3.38,2.14,.13,1.65,3.21,.99,3.13,886
 76 | 2,11.66,1.88,1.92,16,97,1.61,1.57,.34,1.15,3.8,1.23,2.14,428
 77 | 2,13.03,.9,1.71,16,86,1.95,2.03,.24,1.46,4.6,1.19,2.48,392
 78 | 2,11.84,2.89,2.23,18,112,1.72,1.32,.43,.95,2.65,.96,2.52,500
 79 | 2,12.33,.99,1.95,14.8,136,1.9,1.85,.35,2.76,3.4,1.06,2.31,750
 80 | 2,12.7,3.87,2.4,23,101,2.83,2.55,.43,1.95,2.57,1.19,3.13,463
 81 | 2,12,.92,2,19,86,2.42,2.26,.3,1.43,2.5,1.38,3.12,278
 82 | 2,12.72,1.81,2.2,18.8,86,2.2,2.53,.26,1.77,3.9,1.16,3.14,714
 83 | 2,12.08,1.13,2.51,24,78,2,1.58,.4,1.4,2.2,1.31,2.72,630
 84 | 2,13.05,3.86,2.32,22.5,85,1.65,1.59,.61,1.62,4.8,.84,2.01,515
 85 | 2,11.84,.89,2.58,18,94,2.2,2.21,.22,2.35,3.05,.79,3.08,520
 86 | 2,12.67,.98,2.24,18,99,2.2,1.94,.3,1.46,2.62,1.23,3.16,450
 87 | 2,12.16,1.61,2.31,22.8,90,1.78,1.69,.43,1.56,2.45,1.33,2.26,495
 88 | 2,11.65,1.67,2.62,26,88,1.92,1.61,.4,1.34,2.6,1.36,3.21,562
 89 | 2,11.64,2.06,2.46,21.6,84,1.95,1.69,.48,1.35,2.8,1,2.75,680
 90 | 2,12.08,1.33,2.3,23.6,70,2.2,1.59,.42,1.38,1.74,1.07,3.21,625
 91 | 2,12.08,1.83,2.32,18.5,81,1.6,1.5,.52,1.64,2.4,1.08,2.27,480
 92 | 2,12,1.51,2.42,22,86,1.45,1.25,.5,1.63,3.6,1.05,2.65,450
 93 | 2,12.69,1.53,2.26,20.7,80,1.38,1.46,.58,1.62,3.05,.96,2.06,495
 94 | 2,12.29,2.83,2.22,18,88,2.45,2.25,.25,1.99,2.15,1.15,3.3,290
 95 | 2,11.62,1.99,2.28,18,98,3.02,2.26,.17,1.35,3.25,1.16,2.96,345
 96 | 2,12.47,1.52,2.2,19,162,2.5,2.27,.32,3.28,2.6,1.16,2.63,937
 97 | 2,11.81,2.12,2.74,21.5,134,1.6,.99,.14,1.56,2.5,.95,2.26,625
 98 | 2,12.29,1.41,1.98,16,85,2.55,2.5,.29,1.77,2.9,1.23,2.74,428
 99 | 2,12.37,1.07,2.1,18.5,88,3.52,3.75,.24,1.95,4.5,1.04,2.77,660
100 | 2,12.29,3.17,2.21,18,88,2.85,2.99,.45,2.81,2.3,1.42,2.83,406
101 | 2,12.08,2.08,1.7,17.5,97,2.23,2.17,.26,1.4,3.3,1.27,2.96,710
102 | 2,12.6,1.34,1.9,18.5,88,1.45,1.36,.29,1.35,2.45,1.04,2.77,562
103 | 2,12.34,2.45,2.46,21,98,2.56,2.11,.34,1.31,2.8,.8,3.38,438
104 | 2,11.82,1.72,1.88,19.5,86,2.5,1.64,.37,1.42,2.06,.94,2.44,415
105 | 2,12.51,1.73,1.98,20.5,85,2.2,1.92,.32,1.48,2.94,1.04,3.57,672
106 | 2,12.42,2.55,2.27,22,90,1.68,1.84,.66,1.42,2.7,.86,3.3,315
107 | 2,12.25,1.73,2.12,19,80,1.65,2.03,.37,1.63,3.4,1,3.17,510
108 | 2,12.72,1.75,2.28,22.5,84,1.38,1.76,.48,1.63,3.3,.88,2.42,488
109 | 2,12.22,1.29,1.94,19,92,2.36,2.04,.39,2.08,2.7,.86,3.02,312
110 | 2,11.61,1.35,2.7,20,94,2.74,2.92,.29,2.49,2.65,.96,3.26,680
111 | 2,11.46,3.74,1.82,19.5,107,3.18,2.58,.24,3.58,2.9,.75,2.81,562
112 | 2,12.52,2.43,2.17,21,88,2.55,2.27,.26,1.22,2,.9,2.78,325
113 | 2,11.76,2.68,2.92,20,103,1.75,2.03,.6,1.05,3.8,1.23,2.5,607
114 | 2,11.41,.74,2.5,21,88,2.48,2.01,.42,1.44,3.08,1.1,2.31,434
115 | 2,12.08,1.39,2.5,22.5,84,2.56,2.29,.43,1.04,2.9,.93,3.19,385
116 | 2,11.03,1.51,2.2,21.5,85,2.46,2.17,.52,2.01,1.9,1.71,2.87,407
117 | 2,11.82,1.47,1.99,20.8,86,1.98,1.6,.3,1.53,1.95,.95,3.33,495
118 | 2,12.42,1.61,2.19,22.5,108,2,2.09,.34,1.61,2.06,1.06,2.96,345
119 | 2,12.77,3.43,1.98,16,80,1.63,1.25,.43,.83,3.4,.7,2.12,372
120 | 2,12,3.43,2,19,87,2,1.64,.37,1.87,1.28,.93,3.05,564
121 | 2,11.45,2.4,2.42,20,96,2.9,2.79,.32,1.83,3.25,.8,3.39,625
122 | 2,11.56,2.05,3.23,28.5,119,3.18,5.08,.47,1.87,6,.93,3.69,465
123 | 2,12.42,4.43,2.73,26.5,102,2.2,2.13,.43,1.71,2.08,.92,3.12,365
124 | 2,13.05,5.8,2.13,21.5,86,2.62,2.65,.3,2.01,2.6,.73,3.1,380
125 | 2,11.87,4.31,2.39,21,82,2.86,3.03,.21,2.91,2.8,.75,3.64,380
126 | 2,12.07,2.16,2.17,21,85,2.6,2.65,.37,1.35,2.76,.86,3.28,378
127 | 2,12.43,1.53,2.29,21.5,86,2.74,3.15,.39,1.77,3.94,.69,2.84,352
128 | 2,11.79,2.13,2.78,28.5,92,2.13,2.24,.58,1.76,3,.97,2.44,466
129 | 2,12.37,1.63,2.3,24.5,88,2.22,2.45,.4,1.9,2.12,.89,2.78,342
130 | 2,12.04,4.3,2.38,22,80,2.1,1.75,.42,1.35,2.6,.79,2.57,580
131 | 3,12.86,1.35,2.32,18,122,1.51,1.25,.21,.94,4.1,.76,1.29,630
132 | 3,12.88,2.99,2.4,20,104,1.3,1.22,.24,.83,5.4,.74,1.42,530
133 | 3,12.81,2.31,2.4,24,98,1.15,1.09,.27,.83,5.7,.66,1.36,560
134 | 3,12.7,3.55,2.36,21.5,106,1.7,1.2,.17,.84,5,.78,1.29,600
135 | 3,12.51,1.24,2.25,17.5,85,2,.58,.6,1.25,5.45,.75,1.51,650
136 | 3,12.6,2.46,2.2,18.5,94,1.62,.66,.63,.94,7.1,.73,1.58,695
137 | 3,12.25,4.72,2.54,21,89,1.38,.47,.53,.8,3.85,.75,1.27,720
138 | 3,12.53,5.51,2.64,25,96,1.79,.6,.63,1.1,5,.82,1.69,515
139 | 3,13.49,3.59,2.19,19.5,88,1.62,.48,.58,.88,5.7,.81,1.82,580
140 | 3,12.84,2.96,2.61,24,101,2.32,.6,.53,.81,4.92,.89,2.15,590
141 | 3,12.93,2.81,2.7,21,96,1.54,.5,.53,.75,4.6,.77,2.31,600
142 | 3,13.36,2.56,2.35,20,89,1.4,.5,.37,.64,5.6,.7,2.47,780
143 | 3,13.52,3.17,2.72,23.5,97,1.55,.52,.5,.55,4.35,.89,2.06,520
144 | 3,13.62,4.95,2.35,20,92,2,.8,.47,1.02,4.4,.91,2.05,550
145 | 3,12.25,3.88,2.2,18.5,112,1.38,.78,.29,1.14,8.21,.65,2,855
146 | 3,13.16,3.57,2.15,21,102,1.5,.55,.43,1.3,4,.6,1.68,830
147 | 3,13.88,5.04,2.23,20,80,.98,.34,.4,.68,4.9,.58,1.33,415
148 | 3,12.87,4.61,2.48,21.5,86,1.7,.65,.47,.86,7.65,.54,1.86,625
149 | 3,13.32,3.24,2.38,21.5,92,1.93,.76,.45,1.25,8.42,.55,1.62,650
150 | 3,13.08,3.9,2.36,21.5,113,1.41,1.39,.34,1.14,9.40,.57,1.33,550
151 | 3,13.5,3.12,2.62,24,123,1.4,1.57,.22,1.25,8.60,.59,1.3,500
152 | 3,12.79,2.67,2.48,22,112,1.48,1.36,.24,1.26,10.8,.48,1.47,480
153 | 3,13.11,1.9,2.75,25.5,116,2.2,1.28,.26,1.56,7.1,.61,1.33,425
154 | 3,13.23,3.3,2.28,18.5,98,1.8,.83,.61,1.87,10.52,.56,1.51,675
155 | 3,12.58,1.29,2.1,20,103,1.48,.58,.53,1.4,7.6,.58,1.55,640
156 | 3,13.17,5.19,2.32,22,93,1.74,.63,.61,1.55,7.9,.6,1.48,725
157 | 3,13.84,4.12,2.38,19.5,89,1.8,.83,.48,1.56,9.01,.57,1.64,480
158 | 3,12.45,3.03,2.64,27,97,1.9,.58,.63,1.14,7.5,.67,1.73,880
159 | 3,14.34,1.68,2.7,25,98,2.8,1.31,.53,2.7,13,.57,1.96,660
160 | 3,13.48,1.67,2.64,22.5,89,2.6,1.1,.52,2.29,11.75,.57,1.78,620
161 | 3,12.36,3.83,2.38,21,88,2.3,.92,.5,1.04,7.65,.56,1.58,520
162 | 3,13.69,3.26,2.54,20,107,1.83,.56,.5,.8,5.88,.96,1.82,680
163 | 3,12.85,3.27,2.58,22,106,1.65,.6,.6,.96,5.58,.87,2.11,570
164 | 3,12.96,3.45,2.35,18.5,106,1.39,.7,.4,.94,5.28,.68,1.75,675
165 | 3,13.78,2.76,2.3,22,90,1.35,.68,.41,1.03,9.58,.7,1.68,615
166 | 3,13.73,4.36,2.26,22.5,88,1.28,.47,.52,1.15,6.62,.78,1.75,520
167 | 3,13.45,3.7,2.6,23,111,1.7,.92,.43,1.46,10.68,.85,1.56,695
168 | 3,12.82,3.37,2.3,19.5,88,1.48,.66,.4,.97,10.26,.72,1.75,685
169 | 3,13.58,2.58,2.69,24.5,105,1.55,.84,.39,1.54,8.66,.74,1.8,750
170 | 3,13.4,4.6,2.86,25,112,1.98,.96,.27,1.11,8.5,.67,1.92,630
171 | 3,12.2,3.03,2.32,19,96,1.25,.49,.4,.73,5.5,.66,1.83,510
172 | 3,12.77,2.39,2.28,19.5,86,1.39,.51,.48,.64,9.899999,.57,1.63,470
173 | 3,14.16,2.51,2.48,20,91,1.68,.7,.44,1.24,9.7,.62,1.71,660
174 | 3,13.71,5.65,2.45,20.5,95,1.68,.61,.52,1.06,7.7,.64,1.74,740
175 | 3,13.4,3.91,2.48,23,102,1.8,.75,.43,1.41,7.3,.7,1.56,750
176 | 3,13.27,4.28,2.26,20,120,1.59,.69,.43,1.35,10.2,.59,1.56,835
177 | 3,13.17,2.59,2.37,20,120,1.65,.68,.53,1.46,9.3,.6,1.62,840
178 | 3,14.13,4.1,2.74,24.5,96,2.05,.76,.56,1.35,9.2,.61,1.6,560
179 | 


--------------------------------------------------------------------------------
/exercises/E1-DataScienceOverview.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Exercise 1 - Data Science Overview
 3 | 
 4 | Write at least 300 words about one of this topics.
 5 | 
 6 | - Limitation of Machine Learning Algorithms
 7 | - Python vs R vs SAS for Data Science
 8 | - Differences of a Data Scientist and a Data Engineer
 9 | 
10 | write a plain txt file using as a name E1-[uniandes username].txt
11 |  
12 | 


--------------------------------------------------------------------------------
/exercises/E10-RandomForestPerformanceReview.md:
--------------------------------------------------------------------------------
1 | # E10 - Random Forest Performance Review
2 | 
3 | Read and comment the paper *Do we Need Hundreds of Classifiers to Solve Real World Classification Problems?*
4 | 
5 | ### Reference:
6 | http://jmlr.org/papers/volume15/delgado14a/delgado14a.pdf
7 | 


--------------------------------------------------------------------------------
/exercises/E11-RandomForest.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Exercise 11\n",
  8 |     "\n",
  9 |     "## Car Price Prediction\n",
 10 |     "\n",
 11 |     "Predict if the price of a car is low or high"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 9,
 17 |    "metadata": {},
 18 |    "outputs": [
 19 |     {
 20 |      "data": {
 21 |       "text/html": [
 22 |        "<div>\n",
 23 |        "<style scoped>\n",
 24 |        "    .dataframe tbody tr th:only-of-type {\n",
 25 |        "        vertical-align: middle;\n",
 26 |        "    }\n",
 27 |        "\n",
 28 |        "    .dataframe tbody tr th {\n",
 29 |        "        vertical-align: top;\n",
 30 |        "    }\n",
 31 |        "\n",
 32 |        "    .dataframe thead th {\n",
 33 |        "        text-align: right;\n",
 34 |        "    }\n",
 35 |        "</style>\n",
 36 |        "<table border=\"1\" class=\"dataframe\">\n",
 37 |        "  <thead>\n",
 38 |        "    <tr style=\"text-align: right;\">\n",
 39 |        "      <th></th>\n",
 40 |        "      <th>Year</th>\n",
 41 |        "      <th>Mileage</th>\n",
 42 |        "      <th>M_Camry</th>\n",
 43 |        "      <th>M_Camry4dr</th>\n",
 44 |        "      <th>M_CamryBase</th>\n",
 45 |        "      <th>M_CamryL</th>\n",
 46 |        "      <th>M_CamryLE</th>\n",
 47 |        "      <th>M_CamrySE</th>\n",
 48 |        "      <th>M_CamryXLE</th>\n",
 49 |        "      <th>HighPrice</th>\n",
 50 |        "    </tr>\n",
 51 |        "  </thead>\n",
 52 |        "  <tbody>\n",
 53 |        "    <tr>\n",
 54 |        "      <th>15</th>\n",
 55 |        "      <td>2016</td>\n",
 56 |        "      <td>29242</td>\n",
 57 |        "      <td>0</td>\n",
 58 |        "      <td>0</td>\n",
 59 |        "      <td>0</td>\n",
 60 |        "      <td>0</td>\n",
 61 |        "      <td>1</td>\n",
 62 |        "      <td>0</td>\n",
 63 |        "      <td>0</td>\n",
 64 |        "      <td>1</td>\n",
 65 |        "    </tr>\n",
 66 |        "    <tr>\n",
 67 |        "      <th>47</th>\n",
 68 |        "      <td>2015</td>\n",
 69 |        "      <td>26465</td>\n",
 70 |        "      <td>0</td>\n",
 71 |        "      <td>0</td>\n",
 72 |        "      <td>0</td>\n",
 73 |        "      <td>0</td>\n",
 74 |        "      <td>1</td>\n",
 75 |        "      <td>0</td>\n",
 76 |        "      <td>0</td>\n",
 77 |        "      <td>1</td>\n",
 78 |        "    </tr>\n",
 79 |        "    <tr>\n",
 80 |        "      <th>85</th>\n",
 81 |        "      <td>2012</td>\n",
 82 |        "      <td>46739</td>\n",
 83 |        "      <td>0</td>\n",
 84 |        "      <td>1</td>\n",
 85 |        "      <td>0</td>\n",
 86 |        "      <td>0</td>\n",
 87 |        "      <td>0</td>\n",
 88 |        "      <td>0</td>\n",
 89 |        "      <td>0</td>\n",
 90 |        "      <td>1</td>\n",
 91 |        "    </tr>\n",
 92 |        "    <tr>\n",
 93 |        "      <th>141</th>\n",
 94 |        "      <td>2017</td>\n",
 95 |        "      <td>41722</td>\n",
 96 |        "      <td>0</td>\n",
 97 |        "      <td>0</td>\n",
 98 |        "      <td>0</td>\n",
 99 |        "      <td>0</td>\n",
100 |        "      <td>0</td>\n",
101 |        "      <td>1</td>\n",
102 |        "      <td>0</td>\n",
103 |        "      <td>1</td>\n",
104 |        "    </tr>\n",
105 |        "    <tr>\n",
106 |        "      <th>226</th>\n",
107 |        "      <td>2014</td>\n",
108 |        "      <td>77669</td>\n",
109 |        "      <td>0</td>\n",
110 |        "      <td>0</td>\n",
111 |        "      <td>0</td>\n",
112 |        "      <td>0</td>\n",
113 |        "      <td>0</td>\n",
114 |        "      <td>0</td>\n",
115 |        "      <td>1</td>\n",
116 |        "      <td>0</td>\n",
117 |        "    </tr>\n",
118 |        "  </tbody>\n",
119 |        "</table>\n",
120 |        "</div>"
121 |       ],
122 |       "text/plain": [
123 |        "     Year  Mileage  M_Camry  M_Camry4dr  M_CamryBase  M_CamryL  M_CamryLE  \\\n",
124 |        "15   2016    29242        0           0            0         0          1   \n",
125 |        "47   2015    26465        0           0            0         0          1   \n",
126 |        "85   2012    46739        0           1            0         0          0   \n",
127 |        "141  2017    41722        0           0            0         0          0   \n",
128 |        "226  2014    77669        0           0            0         0          0   \n",
129 |        "\n",
130 |        "     M_CamrySE  M_CamryXLE  HighPrice  \n",
131 |        "15           0           0          1  \n",
132 |        "47           0           0          1  \n",
133 |        "85           0           0          1  \n",
134 |        "141          1           0          1  \n",
135 |        "226          0           1          0  "
136 |       ]
137 |      },
138 |      "execution_count": 9,
139 |      "metadata": {},
140 |      "output_type": "execute_result"
141 |     }
142 |    ],
143 |    "source": [
144 |     "%matplotlib inline\n",
145 |     "import pandas as pd\n",
146 |     "\n",
147 |     "data = pd.read_csv('https://github.com/albahnsen/PracticalMachineLearningClass/raw/master/datasets/dataTrain_carListings.zip')\n",
148 |     "data = data.loc[data['Model'].str.contains('Camry')].drop(['Make', 'State'], axis=1)\n",
149 |     "data = data.join(pd.get_dummies(data['Model'], prefix='M'))\n",
150 |     "data['HighPrice'] = (data['Price'] > data['Price'].mean()).astype(int)\n",
151 |     "data = data.drop(['Model', 'Price'], axis=1)\n",
152 |     "\n",
153 |     "data.head()"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": 12,
159 |    "metadata": {},
160 |    "outputs": [
161 |     {
162 |      "data": {
163 |       "text/plain": [
164 |        "(13150, 10)"
165 |       ]
166 |      },
167 |      "execution_count": 12,
168 |      "metadata": {},
169 |      "output_type": "execute_result"
170 |     }
171 |    ],
172 |    "source": [
173 |     "data.shape"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": 10,
179 |    "metadata": {},
180 |    "outputs": [],
181 |    "source": [
182 |     "y = data['HighPrice']\n",
183 |     "X = data.drop(['HighPrice'], axis=1)"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": 11,
189 |    "metadata": {},
190 |    "outputs": [],
191 |    "source": [
192 |     "from sklearn.model_selection import train_test_split\n",
193 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)\n"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "code",
198 |    "execution_count": null,
199 |    "metadata": {},
200 |    "outputs": [],
201 |    "source": []
202 |   },
203 |   {
204 |    "cell_type": "markdown",
205 |    "metadata": {},
206 |    "source": [
207 |     "# Exercise 11.1\n",
208 |     "\n",
209 |     "Estimate a Decision Tree Classifier Manually using the code created in the Notebook #13\n",
210 |     "\n",
211 |     "Evaluate the accuracy on the testing set"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "code",
216 |    "execution_count": null,
217 |    "metadata": {},
218 |    "outputs": [],
219 |    "source": []
220 |   },
221 |   {
222 |    "cell_type": "markdown",
223 |    "metadata": {},
224 |    "source": [
225 |     "# Exercise 11.2\n",
226 |     "\n",
227 |     "Estimate a Bagging of 10 Decision Tree Classifiers Manually using the code created in the Notebook #13\n",
228 |     "\n",
229 |     "Evaluate the accuracy on the testing set"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "code",
234 |    "execution_count": null,
235 |    "metadata": {},
236 |    "outputs": [],
237 |    "source": []
238 |   },
239 |   {
240 |    "cell_type": "markdown",
241 |    "metadata": {},
242 |    "source": [
243 |     "# Exercise 11.3\n",
244 |     "\n",
245 |     "Implement the variable max_features on the Decision Tree Classifier created in 11.1.\n",
246 |     "\n",
247 |     "Compare the impact in the results by varing the parameter max_features\n",
248 |     "\n",
249 |     "Evaluate the accuracy on the testing set"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "code",
254 |    "execution_count": null,
255 |    "metadata": {},
256 |    "outputs": [],
257 |    "source": []
258 |   },
259 |   {
260 |    "cell_type": "markdown",
261 |    "metadata": {},
262 |    "source": [
263 |     "# Exercise 11.4\n",
264 |     "\n",
265 |     "Estimate a Bagging of 10 Decision Tree Classifiers with `max_features = log(n_features)`\n",
266 |     "\n",
267 |     "Evaluate the accuracy on the testing set"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "code",
272 |    "execution_count": null,
273 |    "metadata": {},
274 |    "outputs": [],
275 |    "source": []
276 |   },
277 |   {
278 |    "cell_type": "markdown",
279 |    "metadata": {},
280 |    "source": [
281 |     "# Exercise 11.5\n",
282 |     "\n",
283 |     "Using sklearn, train a RandomForestClassifier\n",
284 |     "\n",
285 |     "Evaluate the accuracy on the testing set"
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "code",
290 |    "execution_count": null,
291 |    "metadata": {},
292 |    "outputs": [],
293 |    "source": []
294 |   },
295 |   {
296 |    "cell_type": "markdown",
297 |    "metadata": {},
298 |    "source": [
299 |     "# Exercise 11.6\n",
300 |     "\n",
301 |     "Find the best parameters of the RandomForestClassifier (max_depth, max_features, n_estimators)\n",
302 |     "\n",
303 |     "Evaluate the accuracy on the testing set"
304 |    ]
305 |   },
306 |   {
307 |    "cell_type": "code",
308 |    "execution_count": null,
309 |    "metadata": {},
310 |    "outputs": [],
311 |    "source": []
312 |   }
313 |  ],
314 |  "metadata": {
315 |   "kernelspec": {
316 |    "display_name": "Python 3",
317 |    "language": "python",
318 |    "name": "python3"
319 |   },
320 |   "language_info": {
321 |    "codemirror_mode": {
322 |     "name": "ipython",
323 |     "version": 3
324 |    },
325 |    "file_extension": ".py",
326 |    "mimetype": "text/x-python",
327 |    "name": "python",
328 |    "nbconvert_exporter": "python",
329 |    "pygments_lexer": "ipython3",
330 |    "version": "3.7.0"
331 |   }
332 |  },
333 |  "nbformat": 4,
334 |  "nbformat_minor": 1
335 | }
336 | 


--------------------------------------------------------------------------------
/exercises/E12-GradientBoostingRewiew.md:
--------------------------------------------------------------------------------
 1 | # E12 - Gradient Boosting Review
 2 | 
 3 | Search for and comment about the main differences between the algorithms implemented in: 
 4 | 
 5 | (1) [ Gradient Boosting Classifier ](https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingClassifier.html)
 6 | 
 7 | (2) [ XGB Classifier ](https://xgboost.readthedocs.io/en/latest/python/python_intro.html)
 8 | 
 9 | Write at least 300 words explaining the difference.
10 | 


--------------------------------------------------------------------------------
/exercises/E13-CategoricalEncoding.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Exercise 13\n",
  8 |     "\n",
  9 |     "This particular Automobile Data Set includes a good mix of categorical values as well as continuous values and serves as a useful example that is relatively easy to understand. Since domain understanding is an important aspect when deciding how to encode various categorical values - this data set makes a good case study."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "Read the data into Pandas"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 1,
 22 |    "metadata": {},
 23 |    "outputs": [
 24 |     {
 25 |      "data": {
 26 |       "text/html": [
 27 |        "<div>\n",
 28 |        "<style scoped>\n",
 29 |        "    .dataframe tbody tr th:only-of-type {\n",
 30 |        "        vertical-align: middle;\n",
 31 |        "    }\n",
 32 |        "\n",
 33 |        "    .dataframe tbody tr th {\n",
 34 |        "        vertical-align: top;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe thead th {\n",
 38 |        "        text-align: right;\n",
 39 |        "    }\n",
 40 |        "</style>\n",
 41 |        "<table border=\"1\" class=\"dataframe\">\n",
 42 |        "  <thead>\n",
 43 |        "    <tr style=\"text-align: right;\">\n",
 44 |        "      <th></th>\n",
 45 |        "      <th>symboling</th>\n",
 46 |        "      <th>normalized_losses</th>\n",
 47 |        "      <th>make</th>\n",
 48 |        "      <th>fuel_type</th>\n",
 49 |        "      <th>aspiration</th>\n",
 50 |        "      <th>num_doors</th>\n",
 51 |        "      <th>body_style</th>\n",
 52 |        "      <th>drive_wheels</th>\n",
 53 |        "      <th>engine_location</th>\n",
 54 |        "      <th>wheel_base</th>\n",
 55 |        "      <th>...</th>\n",
 56 |        "      <th>engine_size</th>\n",
 57 |        "      <th>fuel_system</th>\n",
 58 |        "      <th>bore</th>\n",
 59 |        "      <th>stroke</th>\n",
 60 |        "      <th>compression_ratio</th>\n",
 61 |        "      <th>horsepower</th>\n",
 62 |        "      <th>peak_rpm</th>\n",
 63 |        "      <th>city_mpg</th>\n",
 64 |        "      <th>highway_mpg</th>\n",
 65 |        "      <th>price</th>\n",
 66 |        "    </tr>\n",
 67 |        "  </thead>\n",
 68 |        "  <tbody>\n",
 69 |        "    <tr>\n",
 70 |        "      <th>0</th>\n",
 71 |        "      <td>3</td>\n",
 72 |        "      <td>NaN</td>\n",
 73 |        "      <td>alfa-romero</td>\n",
 74 |        "      <td>gas</td>\n",
 75 |        "      <td>std</td>\n",
 76 |        "      <td>two</td>\n",
 77 |        "      <td>convertible</td>\n",
 78 |        "      <td>rwd</td>\n",
 79 |        "      <td>front</td>\n",
 80 |        "      <td>88.6</td>\n",
 81 |        "      <td>...</td>\n",
 82 |        "      <td>130</td>\n",
 83 |        "      <td>mpfi</td>\n",
 84 |        "      <td>3.47</td>\n",
 85 |        "      <td>2.68</td>\n",
 86 |        "      <td>9.0</td>\n",
 87 |        "      <td>111.0</td>\n",
 88 |        "      <td>5000.0</td>\n",
 89 |        "      <td>21</td>\n",
 90 |        "      <td>27</td>\n",
 91 |        "      <td>13495.0</td>\n",
 92 |        "    </tr>\n",
 93 |        "    <tr>\n",
 94 |        "      <th>1</th>\n",
 95 |        "      <td>3</td>\n",
 96 |        "      <td>NaN</td>\n",
 97 |        "      <td>alfa-romero</td>\n",
 98 |        "      <td>gas</td>\n",
 99 |        "      <td>std</td>\n",
100 |        "      <td>two</td>\n",
101 |        "      <td>convertible</td>\n",
102 |        "      <td>rwd</td>\n",
103 |        "      <td>front</td>\n",
104 |        "      <td>88.6</td>\n",
105 |        "      <td>...</td>\n",
106 |        "      <td>130</td>\n",
107 |        "      <td>mpfi</td>\n",
108 |        "      <td>3.47</td>\n",
109 |        "      <td>2.68</td>\n",
110 |        "      <td>9.0</td>\n",
111 |        "      <td>111.0</td>\n",
112 |        "      <td>5000.0</td>\n",
113 |        "      <td>21</td>\n",
114 |        "      <td>27</td>\n",
115 |        "      <td>16500.0</td>\n",
116 |        "    </tr>\n",
117 |        "    <tr>\n",
118 |        "      <th>2</th>\n",
119 |        "      <td>1</td>\n",
120 |        "      <td>NaN</td>\n",
121 |        "      <td>alfa-romero</td>\n",
122 |        "      <td>gas</td>\n",
123 |        "      <td>std</td>\n",
124 |        "      <td>two</td>\n",
125 |        "      <td>hatchback</td>\n",
126 |        "      <td>rwd</td>\n",
127 |        "      <td>front</td>\n",
128 |        "      <td>94.5</td>\n",
129 |        "      <td>...</td>\n",
130 |        "      <td>152</td>\n",
131 |        "      <td>mpfi</td>\n",
132 |        "      <td>2.68</td>\n",
133 |        "      <td>3.47</td>\n",
134 |        "      <td>9.0</td>\n",
135 |        "      <td>154.0</td>\n",
136 |        "      <td>5000.0</td>\n",
137 |        "      <td>19</td>\n",
138 |        "      <td>26</td>\n",
139 |        "      <td>16500.0</td>\n",
140 |        "    </tr>\n",
141 |        "    <tr>\n",
142 |        "      <th>3</th>\n",
143 |        "      <td>2</td>\n",
144 |        "      <td>164.0</td>\n",
145 |        "      <td>audi</td>\n",
146 |        "      <td>gas</td>\n",
147 |        "      <td>std</td>\n",
148 |        "      <td>four</td>\n",
149 |        "      <td>sedan</td>\n",
150 |        "      <td>fwd</td>\n",
151 |        "      <td>front</td>\n",
152 |        "      <td>99.8</td>\n",
153 |        "      <td>...</td>\n",
154 |        "      <td>109</td>\n",
155 |        "      <td>mpfi</td>\n",
156 |        "      <td>3.19</td>\n",
157 |        "      <td>3.40</td>\n",
158 |        "      <td>10.0</td>\n",
159 |        "      <td>102.0</td>\n",
160 |        "      <td>5500.0</td>\n",
161 |        "      <td>24</td>\n",
162 |        "      <td>30</td>\n",
163 |        "      <td>13950.0</td>\n",
164 |        "    </tr>\n",
165 |        "    <tr>\n",
166 |        "      <th>4</th>\n",
167 |        "      <td>2</td>\n",
168 |        "      <td>164.0</td>\n",
169 |        "      <td>audi</td>\n",
170 |        "      <td>gas</td>\n",
171 |        "      <td>std</td>\n",
172 |        "      <td>four</td>\n",
173 |        "      <td>sedan</td>\n",
174 |        "      <td>4wd</td>\n",
175 |        "      <td>front</td>\n",
176 |        "      <td>99.4</td>\n",
177 |        "      <td>...</td>\n",
178 |        "      <td>136</td>\n",
179 |        "      <td>mpfi</td>\n",
180 |        "      <td>3.19</td>\n",
181 |        "      <td>3.40</td>\n",
182 |        "      <td>8.0</td>\n",
183 |        "      <td>115.0</td>\n",
184 |        "      <td>5500.0</td>\n",
185 |        "      <td>18</td>\n",
186 |        "      <td>22</td>\n",
187 |        "      <td>17450.0</td>\n",
188 |        "    </tr>\n",
189 |        "  </tbody>\n",
190 |        "</table>\n",
191 |        "<p>5 rows × 26 columns</p>\n",
192 |        "</div>"
193 |       ],
194 |       "text/plain": [
195 |        "   symboling  normalized_losses         make fuel_type aspiration num_doors  \\\n",
196 |        "0          3                NaN  alfa-romero       gas        std       two   \n",
197 |        "1          3                NaN  alfa-romero       gas        std       two   \n",
198 |        "2          1                NaN  alfa-romero       gas        std       two   \n",
199 |        "3          2              164.0         audi       gas        std      four   \n",
200 |        "4          2              164.0         audi       gas        std      four   \n",
201 |        "\n",
202 |        "    body_style drive_wheels engine_location  wheel_base   ...     engine_size  \\\n",
203 |        "0  convertible          rwd           front        88.6   ...             130   \n",
204 |        "1  convertible          rwd           front        88.6   ...             130   \n",
205 |        "2    hatchback          rwd           front        94.5   ...             152   \n",
206 |        "3        sedan          fwd           front        99.8   ...             109   \n",
207 |        "4        sedan          4wd           front        99.4   ...             136   \n",
208 |        "\n",
209 |        "   fuel_system  bore  stroke compression_ratio horsepower  peak_rpm city_mpg  \\\n",
210 |        "0         mpfi  3.47    2.68               9.0      111.0    5000.0       21   \n",
211 |        "1         mpfi  3.47    2.68               9.0      111.0    5000.0       21   \n",
212 |        "2         mpfi  2.68    3.47               9.0      154.0    5000.0       19   \n",
213 |        "3         mpfi  3.19    3.40              10.0      102.0    5500.0       24   \n",
214 |        "4         mpfi  3.19    3.40               8.0      115.0    5500.0       18   \n",
215 |        "\n",
216 |        "   highway_mpg    price  \n",
217 |        "0           27  13495.0  \n",
218 |        "1           27  16500.0  \n",
219 |        "2           26  16500.0  \n",
220 |        "3           30  13950.0  \n",
221 |        "4           22  17450.0  \n",
222 |        "\n",
223 |        "[5 rows x 26 columns]"
224 |       ]
225 |      },
226 |      "execution_count": 1,
227 |      "metadata": {},
228 |      "output_type": "execute_result"
229 |     }
230 |    ],
231 |    "source": [
232 |     "import pandas as pd\n",
233 |     "\n",
234 |     "# Define the headers since the data does not have any\n",
235 |     "headers = [\"symboling\", \"normalized_losses\", \"make\", \"fuel_type\", \"aspiration\",\n",
236 |     "           \"num_doors\", \"body_style\", \"drive_wheels\", \"engine_location\",\n",
237 |     "           \"wheel_base\", \"length\", \"width\", \"height\", \"curb_weight\",\n",
238 |     "           \"engine_type\", \"num_cylinders\", \"engine_size\", \"fuel_system\",\n",
239 |     "           \"bore\", \"stroke\", \"compression_ratio\", \"horsepower\", \"peak_rpm\",\n",
240 |     "           \"city_mpg\", \"highway_mpg\", \"price\"]\n",
241 |     "\n",
242 |     "# Read in the CSV file and convert \"?\" to NaN\n",
243 |     "df = pd.read_csv(\"http://mlr.cs.umass.edu/ml/machine-learning-databases/autos/imports-85.data\",\n",
244 |     "                  header=None, names=headers, na_values=\"?\" )\n",
245 |     "df.head()"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": 4,
251 |    "metadata": {},
252 |    "outputs": [
253 |     {
254 |      "data": {
255 |       "text/plain": [
256 |        "(205, 26)"
257 |       ]
258 |      },
259 |      "execution_count": 4,
260 |      "metadata": {},
261 |      "output_type": "execute_result"
262 |     }
263 |    ],
264 |    "source": [
265 |     "df.shape"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "code",
270 |    "execution_count": 2,
271 |    "metadata": {},
272 |    "outputs": [
273 |     {
274 |      "data": {
275 |       "text/plain": [
276 |        "symboling              int64\n",
277 |        "normalized_losses    float64\n",
278 |        "make                  object\n",
279 |        "fuel_type             object\n",
280 |        "aspiration            object\n",
281 |        "num_doors             object\n",
282 |        "body_style            object\n",
283 |        "drive_wheels          object\n",
284 |        "engine_location       object\n",
285 |        "wheel_base           float64\n",
286 |        "length               float64\n",
287 |        "width                float64\n",
288 |        "height               float64\n",
289 |        "curb_weight            int64\n",
290 |        "engine_type           object\n",
291 |        "num_cylinders         object\n",
292 |        "engine_size            int64\n",
293 |        "fuel_system           object\n",
294 |        "bore                 float64\n",
295 |        "stroke               float64\n",
296 |        "compression_ratio    float64\n",
297 |        "horsepower           float64\n",
298 |        "peak_rpm             float64\n",
299 |        "city_mpg               int64\n",
300 |        "highway_mpg            int64\n",
301 |        "price                float64\n",
302 |        "dtype: object"
303 |       ]
304 |      },
305 |      "execution_count": 2,
306 |      "metadata": {},
307 |      "output_type": "execute_result"
308 |     }
309 |    ],
310 |    "source": [
311 |     "df.dtypes"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "code",
316 |    "execution_count": 3,
317 |    "metadata": {},
318 |    "outputs": [
319 |     {
320 |      "data": {
321 |       "text/html": [
322 |        "<div>\n",
323 |        "<style scoped>\n",
324 |        "    .dataframe tbody tr th:only-of-type {\n",
325 |        "        vertical-align: middle;\n",
326 |        "    }\n",
327 |        "\n",
328 |        "    .dataframe tbody tr th {\n",
329 |        "        vertical-align: top;\n",
330 |        "    }\n",
331 |        "\n",
332 |        "    .dataframe thead th {\n",
333 |        "        text-align: right;\n",
334 |        "    }\n",
335 |        "</style>\n",
336 |        "<table border=\"1\" class=\"dataframe\">\n",
337 |        "  <thead>\n",
338 |        "    <tr style=\"text-align: right;\">\n",
339 |        "      <th></th>\n",
340 |        "      <th>make</th>\n",
341 |        "      <th>fuel_type</th>\n",
342 |        "      <th>aspiration</th>\n",
343 |        "      <th>num_doors</th>\n",
344 |        "      <th>body_style</th>\n",
345 |        "      <th>drive_wheels</th>\n",
346 |        "      <th>engine_location</th>\n",
347 |        "      <th>engine_type</th>\n",
348 |        "      <th>num_cylinders</th>\n",
349 |        "      <th>fuel_system</th>\n",
350 |        "    </tr>\n",
351 |        "  </thead>\n",
352 |        "  <tbody>\n",
353 |        "    <tr>\n",
354 |        "      <th>0</th>\n",
355 |        "      <td>alfa-romero</td>\n",
356 |        "      <td>gas</td>\n",
357 |        "      <td>std</td>\n",
358 |        "      <td>two</td>\n",
359 |        "      <td>convertible</td>\n",
360 |        "      <td>rwd</td>\n",
361 |        "      <td>front</td>\n",
362 |        "      <td>dohc</td>\n",
363 |        "      <td>four</td>\n",
364 |        "      <td>mpfi</td>\n",
365 |        "    </tr>\n",
366 |        "    <tr>\n",
367 |        "      <th>1</th>\n",
368 |        "      <td>alfa-romero</td>\n",
369 |        "      <td>gas</td>\n",
370 |        "      <td>std</td>\n",
371 |        "      <td>two</td>\n",
372 |        "      <td>convertible</td>\n",
373 |        "      <td>rwd</td>\n",
374 |        "      <td>front</td>\n",
375 |        "      <td>dohc</td>\n",
376 |        "      <td>four</td>\n",
377 |        "      <td>mpfi</td>\n",
378 |        "    </tr>\n",
379 |        "    <tr>\n",
380 |        "      <th>2</th>\n",
381 |        "      <td>alfa-romero</td>\n",
382 |        "      <td>gas</td>\n",
383 |        "      <td>std</td>\n",
384 |        "      <td>two</td>\n",
385 |        "      <td>hatchback</td>\n",
386 |        "      <td>rwd</td>\n",
387 |        "      <td>front</td>\n",
388 |        "      <td>ohcv</td>\n",
389 |        "      <td>six</td>\n",
390 |        "      <td>mpfi</td>\n",
391 |        "    </tr>\n",
392 |        "    <tr>\n",
393 |        "      <th>3</th>\n",
394 |        "      <td>audi</td>\n",
395 |        "      <td>gas</td>\n",
396 |        "      <td>std</td>\n",
397 |        "      <td>four</td>\n",
398 |        "      <td>sedan</td>\n",
399 |        "      <td>fwd</td>\n",
400 |        "      <td>front</td>\n",
401 |        "      <td>ohc</td>\n",
402 |        "      <td>four</td>\n",
403 |        "      <td>mpfi</td>\n",
404 |        "    </tr>\n",
405 |        "    <tr>\n",
406 |        "      <th>4</th>\n",
407 |        "      <td>audi</td>\n",
408 |        "      <td>gas</td>\n",
409 |        "      <td>std</td>\n",
410 |        "      <td>four</td>\n",
411 |        "      <td>sedan</td>\n",
412 |        "      <td>4wd</td>\n",
413 |        "      <td>front</td>\n",
414 |        "      <td>ohc</td>\n",
415 |        "      <td>five</td>\n",
416 |        "      <td>mpfi</td>\n",
417 |        "    </tr>\n",
418 |        "  </tbody>\n",
419 |        "</table>\n",
420 |        "</div>"
421 |       ],
422 |       "text/plain": [
423 |        "          make fuel_type aspiration num_doors   body_style drive_wheels  \\\n",
424 |        "0  alfa-romero       gas        std       two  convertible          rwd   \n",
425 |        "1  alfa-romero       gas        std       two  convertible          rwd   \n",
426 |        "2  alfa-romero       gas        std       two    hatchback          rwd   \n",
427 |        "3         audi       gas        std      four        sedan          fwd   \n",
428 |        "4         audi       gas        std      four        sedan          4wd   \n",
429 |        "\n",
430 |        "  engine_location engine_type num_cylinders fuel_system  \n",
431 |        "0           front        dohc          four        mpfi  \n",
432 |        "1           front        dohc          four        mpfi  \n",
433 |        "2           front        ohcv           six        mpfi  \n",
434 |        "3           front         ohc          four        mpfi  \n",
435 |        "4           front         ohc          five        mpfi  "
436 |       ]
437 |      },
438 |      "execution_count": 3,
439 |      "metadata": {},
440 |      "output_type": "execute_result"
441 |     }
442 |    ],
443 |    "source": [
444 |     "obj_df = df.select_dtypes(include=['object']).copy()\n",
445 |     "obj_df.head()"
446 |    ]
447 |   },
448 |   {
449 |    "cell_type": "code",
450 |    "execution_count": null,
451 |    "metadata": {},
452 |    "outputs": [],
453 |    "source": []
454 |   },
455 |   {
456 |    "cell_type": "markdown",
457 |    "metadata": {},
458 |    "source": [
459 |     "# Exercise 13.1\n",
460 |     "\n",
461 |     "Does the database contain missing values? If so, replace them using one of the methods explained in class"
462 |    ]
463 |   },
464 |   {
465 |    "cell_type": "code",
466 |    "execution_count": null,
467 |    "metadata": {},
468 |    "outputs": [],
469 |    "source": []
470 |   },
471 |   {
472 |    "cell_type": "markdown",
473 |    "metadata": {},
474 |    "source": [
475 |     "# Exercise 13.2\n",
476 |     "\n",
477 |     "Split the data into training and testing sets\n",
478 |     "\n",
479 |     "Train a Random Forest Regressor to predict the price of a car using the numeric features"
480 |    ]
481 |   },
482 |   {
483 |    "cell_type": "code",
484 |    "execution_count": null,
485 |    "metadata": {},
486 |    "outputs": [],
487 |    "source": []
488 |   },
489 |   {
490 |    "cell_type": "markdown",
491 |    "metadata": {},
492 |    "source": [
493 |     "# Exercise 13.3\n",
494 |     "\n",
495 |     "Create dummy variables for the categorical features\n",
496 |     "\n",
497 |     "Train a Random Forest Regressor and compare"
498 |    ]
499 |   },
500 |   {
501 |    "cell_type": "code",
502 |    "execution_count": null,
503 |    "metadata": {},
504 |    "outputs": [],
505 |    "source": []
506 |   },
507 |   {
508 |    "cell_type": "markdown",
509 |    "metadata": {},
510 |    "source": [
511 |     "# Exercise 13.4\n",
512 |     "\n",
513 |     "Apply two other methods of categorical encoding\n",
514 |     "\n",
515 |     "compare the results"
516 |    ]
517 |   },
518 |   {
519 |    "cell_type": "code",
520 |    "execution_count": null,
521 |    "metadata": {},
522 |    "outputs": [],
523 |    "source": []
524 |   }
525 |  ],
526 |  "metadata": {
527 |   "kernelspec": {
528 |    "display_name": "Python 3",
529 |    "language": "python",
530 |    "name": "python3"
531 |   },
532 |   "language_info": {
533 |    "codemirror_mode": {
534 |     "name": "ipython",
535 |     "version": 3
536 |    },
537 |    "file_extension": ".py",
538 |    "mimetype": "text/x-python",
539 |    "name": "python",
540 |    "nbconvert_exporter": "python",
541 |    "pygments_lexer": "ipython3",
542 |    "version": "3.7.1"
543 |   }
544 |  },
545 |  "nbformat": 4,
546 |  "nbformat_minor": 1
547 | }
548 | 


--------------------------------------------------------------------------------
/exercises/E14-UnbalancedLearningOverview.md:
--------------------------------------------------------------------------------
1 | # E14 - Unbalanced Learning Overview
2 | 
3 | Research about the impact of unbalanced datasets in binary classification problems and the most common solutions.
4 | 
5 | https://link.springer.com/content/pdf/10.1186%2Fs40537-018-0151-6.pdf
6 | 
7 | Quiz at the beggining of the next class.
8 | 


--------------------------------------------------------------------------------
/exercises/E16-NLPOverview:
--------------------------------------------------------------------------------
1 | # E16 - Natural Language Processing Overview
2 | 
3 | Research for a commercial product which uses NLP as a core(E.g. google assistant) and describes, in general words, which NLP techniques it uses.
4 | 
5 | (If two of you have the same commercial product, both of your grades will be penalized --- Be creative! )
6 | 


--------------------------------------------------------------------------------
/exercises/E18-ClassHomeworksAnalysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Exercise 18\n",
  8 |     "\n",
  9 |     "## Analyze class homeworks\n"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 101,
 15 |    "metadata": {},
 16 |    "outputs": [
 17 |     {
 18 |      "data": {
 19 |       "text/html": [
 20 |        "<div>\n",
 21 |        "<style scoped>\n",
 22 |        "    .dataframe tbody tr th:only-of-type {\n",
 23 |        "        vertical-align: middle;\n",
 24 |        "    }\n",
 25 |        "\n",
 26 |        "    .dataframe tbody tr th {\n",
 27 |        "        vertical-align: top;\n",
 28 |        "    }\n",
 29 |        "\n",
 30 |        "    .dataframe thead th {\n",
 31 |        "        text-align: right;\n",
 32 |        "    }\n",
 33 |        "</style>\n",
 34 |        "<table border=\"1\" class=\"dataframe\">\n",
 35 |        "  <thead>\n",
 36 |        "    <tr style=\"text-align: right;\">\n",
 37 |        "      <th></th>\n",
 38 |        "      <th>Sexo</th>\n",
 39 |        "      <th>T1</th>\n",
 40 |        "      <th>T2</th>\n",
 41 |        "      <th>T3</th>\n",
 42 |        "      <th>T4</th>\n",
 43 |        "      <th>T5</th>\n",
 44 |        "      <th>T6</th>\n",
 45 |        "    </tr>\n",
 46 |        "  </thead>\n",
 47 |        "  <tbody>\n",
 48 |        "    <tr>\n",
 49 |        "      <th>0</th>\n",
 50 |        "      <td>H</td>\n",
 51 |        "      <td>Un científico de datos y un ingeniero de datos...</td>\n",
 52 |        "      <td>NaN</td>\n",
 53 |        "      <td>Los árboles de decisión es uno de los algoritm...</td>\n",
 54 |        "      <td>NaN</td>\n",
 55 |        "      <td>En el documento \"Do We Need Hundreds of Classi...</td>\n",
 56 |        "      <td>Boosting builds models from individual so call...</td>\n",
 57 |        "    </tr>\n",
 58 |        "    <tr>\n",
 59 |        "      <th>1</th>\n",
 60 |        "      <td>H</td>\n",
 61 |        "      <td>Aunque los dos perfiles cuentan con habilidade...</td>\n",
 62 |        "      <td>Aunque el conceso no es total en el tema de ma...</td>\n",
 63 |        "      <td>NaN</td>\n",
 64 |        "      <td>En Machine Learing el propósito de los métodos...</td>\n",
 65 |        "      <td>El paper publicado en 2014 hace una evaluación...</td>\n",
 66 |        "      <td>Gradient Boosting ClassifierGradient Boosting ...</td>\n",
 67 |        "    </tr>\n",
 68 |        "    <tr>\n",
 69 |        "      <th>2</th>\n",
 70 |        "      <td>H</td>\n",
 71 |        "      <td>Python vs Stata. Analizar información es un pr...</td>\n",
 72 |        "      <td>En el marco del Machine Learning encontramos c...</td>\n",
 73 |        "      <td>Árbol de clasificación Como su nombre lo indic...</td>\n",
 74 |        "      <td>Este es un mecanismo que utiliza varios algori...</td>\n",
 75 |        "      <td>El paper analiza 179 clasificadores de 17 dife...</td>\n",
 76 |        "      <td>Gradient Boosting Classifier busca aprender de...</td>\n",
 77 |        "    </tr>\n",
 78 |        "    <tr>\n",
 79 |        "      <th>3</th>\n",
 80 |        "      <td>M</td>\n",
 81 |        "      <td>Los algoritmos de machine learning buscan pred...</td>\n",
 82 |        "      <td>Los algoritmos de machine learning buscan pred...</td>\n",
 83 |        "      <td>Los arboles de decisión pertenecen a los algor...</td>\n",
 84 |        "      <td>Consideremos el siguiente ejemplo: Una persona...</td>\n",
 85 |        "      <td>¿Se necesitan cientos de modelos para resolver...</td>\n",
 86 |        "      <td>NaN</td>\n",
 87 |        "    </tr>\n",
 88 |        "    <tr>\n",
 89 |        "      <th>4</th>\n",
 90 |        "      <td>H</td>\n",
 91 |        "      <td>Python vs R para análisis de datos. A pesar de...</td>\n",
 92 |        "      <td>Tipos de aprendizaje en Machine Learning. El M...</td>\n",
 93 |        "      <td>Tipos de árboles de decisión y aplicaciones. U...</td>\n",
 94 |        "      <td>La combinación de modelos o ensemble es un mod...</td>\n",
 95 |        "      <td>En el articulo se analizan cerca de 180 clasif...</td>\n",
 96 |        "      <td>XGBoosting vs GBoosting. A pesar de que el alg...</td>\n",
 97 |        "    </tr>\n",
 98 |        "  </tbody>\n",
 99 |        "</table>\n",
100 |        "</div>"
101 |       ],
102 |       "text/plain": [
103 |        "  Sexo                                                 T1  \\\n",
104 |        "0    H  Un científico de datos y un ingeniero de datos...   \n",
105 |        "1    H  Aunque los dos perfiles cuentan con habilidade...   \n",
106 |        "2    H  Python vs Stata. Analizar información es un pr...   \n",
107 |        "3    M  Los algoritmos de machine learning buscan pred...   \n",
108 |        "4    H  Python vs R para análisis de datos. A pesar de...   \n",
109 |        "\n",
110 |        "                                                  T2  \\\n",
111 |        "0                                                NaN   \n",
112 |        "1  Aunque el conceso no es total en el tema de ma...   \n",
113 |        "2  En el marco del Machine Learning encontramos c...   \n",
114 |        "3  Los algoritmos de machine learning buscan pred...   \n",
115 |        "4  Tipos de aprendizaje en Machine Learning. El M...   \n",
116 |        "\n",
117 |        "                                                  T3  \\\n",
118 |        "0  Los árboles de decisión es uno de los algoritm...   \n",
119 |        "1                                                NaN   \n",
120 |        "2  Árbol de clasificación Como su nombre lo indic...   \n",
121 |        "3  Los arboles de decisión pertenecen a los algor...   \n",
122 |        "4  Tipos de árboles de decisión y aplicaciones. U...   \n",
123 |        "\n",
124 |        "                                                  T4  \\\n",
125 |        "0                                                NaN   \n",
126 |        "1  En Machine Learing el propósito de los métodos...   \n",
127 |        "2  Este es un mecanismo que utiliza varios algori...   \n",
128 |        "3  Consideremos el siguiente ejemplo: Una persona...   \n",
129 |        "4  La combinación de modelos o ensemble es un mod...   \n",
130 |        "\n",
131 |        "                                                  T5  \\\n",
132 |        "0  En el documento \"Do We Need Hundreds of Classi...   \n",
133 |        "1  El paper publicado en 2014 hace una evaluación...   \n",
134 |        "2  El paper analiza 179 clasificadores de 17 dife...   \n",
135 |        "3  ¿Se necesitan cientos de modelos para resolver...   \n",
136 |        "4  En el articulo se analizan cerca de 180 clasif...   \n",
137 |        "\n",
138 |        "                                                  T6  \n",
139 |        "0  Boosting builds models from individual so call...  \n",
140 |        "1  Gradient Boosting ClassifierGradient Boosting ...  \n",
141 |        "2  Gradient Boosting Classifier busca aprender de...  \n",
142 |        "3                                                NaN  \n",
143 |        "4  XGBoosting vs GBoosting. A pesar de que el alg...  "
144 |       ]
145 |      },
146 |      "execution_count": 101,
147 |      "metadata": {},
148 |      "output_type": "execute_result"
149 |     }
150 |    ],
151 |    "source": [
152 |     "import pandas as pd\n",
153 |     "import numpy as np\n",
154 |     "\n",
155 |     "%matplotlib inline\n",
156 |     "import matplotlib.pyplot as plt\n",
157 |     "\n",
158 |     "data = pd.read_excel('../datasets/E18.xlsx')\n",
159 |     "data.head()"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "markdown",
164 |    "metadata": {},
165 |    "source": [
166 |     "# Exercise 18.1\n",
167 |     "\n",
168 |     "Analyze the writing patterns of each student"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": null,
174 |    "metadata": {},
175 |    "outputs": [],
176 |    "source": []
177 |   },
178 |   {
179 |    "cell_type": "markdown",
180 |    "metadata": {},
181 |    "source": [
182 |     "# Exercise 18.2\n",
183 |     "\n",
184 |     "Evaluate the similarities of the homeworks of the students\n",
185 |     "\n",
186 |     "tip: https://github.com/orsinium/textdistance"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": null,
192 |    "metadata": {},
193 |    "outputs": [],
194 |    "source": []
195 |   },
196 |   {
197 |    "cell_type": "markdown",
198 |    "metadata": {},
199 |    "source": [
200 |     "# Exercise 18.3\n",
201 |     "\n",
202 |     "Create a classifier to predict the sex of each student\n",
203 |     "\n"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": null,
209 |    "metadata": {},
210 |    "outputs": [],
211 |    "source": []
212 |   }
213 |  ],
214 |  "metadata": {
215 |   "kernelspec": {
216 |    "display_name": "Python 3",
217 |    "language": "python",
218 |    "name": "python3"
219 |   },
220 |   "language_info": {
221 |    "codemirror_mode": {
222 |     "name": "ipython",
223 |     "version": 3
224 |    },
225 |    "file_extension": ".py",
226 |    "mimetype": "text/x-python",
227 |    "name": "python",
228 |    "nbconvert_exporter": "python",
229 |    "pygments_lexer": "ipython3",
230 |    "version": "3.7.1"
231 |   }
232 |  },
233 |  "nbformat": 4,
234 |  "nbformat_minor": 2
235 | }
236 | 


--------------------------------------------------------------------------------
/exercises/E2- Python Text Analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Exercise 2.1\n",
  8 |     "\n",
  9 |     "Complete the function that calculates the similarities of all texts in the dataframe"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import pandas as pd"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 6,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "data = pd.DataFrame(columns=['Text', 'Undergrad', 'Semester', 'Age', 'Company'])"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 15,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "# Example dataset\n",
 37 |     "data.loc[0] = ['Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.',\n",
 38 |     "               'Ingenieria Industrial', 0, 25, 'Uniandes']\n",
 39 |     "data.loc[1] = ['Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui in ea voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla pariatur?',\n",
 40 |     "               'Ingenieria Electrica', 2, 32, 'Bancolombia']\n",
 41 |     "data.loc[2] = ['At vero eos et accusamus et iusto odio dignissimos ducimus qui blanditiis praesentium voluptatum deleniti atque corrupti quos dolores et quas molestias excepturi sint occaecati cupiditate non provident, similique sunt in culpa qui officia deserunt mollitia animi, id est laborum et dolorum fuga. Et harum quidem rerum facilis est et expedita distinctio. Nam libero tempore, cum soluta nobis est eligendi optio cumque nihil impedit quo minus id quod maxime placeat facere possimus, omnis voluptas assumenda est, omnis dolor repellendus. Temporibus autem quibusdam et aut officiis debitis aut rerum necessitatibus saepe eveniet ut et voluptates repudiandae sint et molestiae non recusandae. Itaque earum rerum hic tenetur a sapiente delectus, ut aut reiciendis voluptatibus maiores alias consequatur aut perferendis doloribus asperiores repellat.',\n",
 42 |     "               'Ingenieria Sistemas', 1, 22, 'Colpatria']\n",
 43 |     "data.loc[3] = ['At vero eos et accusamus et iusto odio dignissimos ducimus qui blanditiis praesentium voluptatum deleniti atque corrupti quos dolores et quas molestias excepturi sint occaecati cupiditate non provident, similique sunt in culpa qui officia deserunt mollitia animi, id est laborum et dolorum fuga. Et harum quidem rerum facilis est et expedita distinctio. Nam libero tempore, cum soluta nobis est eligendi optio cumque nihil impedit quo minus id quod maxime placeat facere possimus, omnis voluptas assumenda est, omnis dolor repellendus. Temporibus autem quibusdam et aut officiis debitis aut rerum necessitatibus saepe eveniet ut et voluptates repudiandae sint et molestiae non recusandae. Itaque earum rerum hic tenetur a sapiente delectus, ut aut reiciendis voluptatibus maiores alias consequatur aut perferendis doloribus asperiores repellat.',\n",
 44 |     "               'Estadistica', 1, 26, 'Colpatria']"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 17,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "def similarity(data):\n",
 54 |     "\n",
 55 |     "    results = pd.DataFrame(0, index=data.index, columns=data.index)\n",
 56 |     "    \n",
 57 |     "    # your code in here\n",
 58 |     "        \n",
 59 |     "    \n",
 60 |     "    assert results.shape == (data.shape[0], data.shape[0])\n",
 61 |     "    \n",
 62 |     "    return results"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 18,
 68 |    "metadata": {},
 69 |    "outputs": [
 70 |     {
 71 |      "data": {
 72 |       "text/html": [
 73 |        "<div>\n",
 74 |        "<style scoped>\n",
 75 |        "    .dataframe tbody tr th:only-of-type {\n",
 76 |        "        vertical-align: middle;\n",
 77 |        "    }\n",
 78 |        "\n",
 79 |        "    .dataframe tbody tr th {\n",
 80 |        "        vertical-align: top;\n",
 81 |        "    }\n",
 82 |        "\n",
 83 |        "    .dataframe thead th {\n",
 84 |        "        text-align: right;\n",
 85 |        "    }\n",
 86 |        "</style>\n",
 87 |        "<table border=\"1\" class=\"dataframe\">\n",
 88 |        "  <thead>\n",
 89 |        "    <tr style=\"text-align: right;\">\n",
 90 |        "      <th></th>\n",
 91 |        "      <th>0</th>\n",
 92 |        "      <th>1</th>\n",
 93 |        "      <th>2</th>\n",
 94 |        "      <th>3</th>\n",
 95 |        "    </tr>\n",
 96 |        "  </thead>\n",
 97 |        "  <tbody>\n",
 98 |        "    <tr>\n",
 99 |        "      <th>0</th>\n",
100 |        "      <td>0</td>\n",
101 |        "      <td>0</td>\n",
102 |        "      <td>0</td>\n",
103 |        "      <td>0</td>\n",
104 |        "    </tr>\n",
105 |        "    <tr>\n",
106 |        "      <th>1</th>\n",
107 |        "      <td>0</td>\n",
108 |        "      <td>0</td>\n",
109 |        "      <td>0</td>\n",
110 |        "      <td>0</td>\n",
111 |        "    </tr>\n",
112 |        "    <tr>\n",
113 |        "      <th>2</th>\n",
114 |        "      <td>0</td>\n",
115 |        "      <td>0</td>\n",
116 |        "      <td>0</td>\n",
117 |        "      <td>0</td>\n",
118 |        "    </tr>\n",
119 |        "    <tr>\n",
120 |        "      <th>3</th>\n",
121 |        "      <td>0</td>\n",
122 |        "      <td>0</td>\n",
123 |        "      <td>0</td>\n",
124 |        "      <td>0</td>\n",
125 |        "    </tr>\n",
126 |        "  </tbody>\n",
127 |        "</table>\n",
128 |        "</div>"
129 |       ],
130 |       "text/plain": [
131 |        "   0  1  2  3\n",
132 |        "0  0  0  0  0\n",
133 |        "1  0  0  0  0\n",
134 |        "2  0  0  0  0\n",
135 |        "3  0  0  0  0"
136 |       ]
137 |      },
138 |      "execution_count": 18,
139 |      "metadata": {},
140 |      "output_type": "execute_result"
141 |     }
142 |    ],
143 |    "source": [
144 |     "similarity(data)"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "markdown",
149 |    "metadata": {},
150 |    "source": [
151 |     "# Exercise 2.2\n",
152 |     "\n",
153 |     "Calculate descriptive statistics of the dataset\n",
154 |     "\n",
155 |     "- Pivot tables analyzing companies, universities, age and master semester."
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": null,
161 |    "metadata": {},
162 |    "outputs": [],
163 |    "source": []
164 |   }
165 |  ],
166 |  "metadata": {
167 |   "kernelspec": {
168 |    "display_name": "Python 3",
169 |    "language": "python",
170 |    "name": "python3"
171 |   },
172 |   "language_info": {
173 |    "codemirror_mode": {
174 |     "name": "ipython",
175 |     "version": 3
176 |    },
177 |    "file_extension": ".py",
178 |    "mimetype": "text/x-python",
179 |    "name": "python",
180 |    "nbconvert_exporter": "python",
181 |    "pygments_lexer": "ipython3",
182 |    "version": "3.6.7"
183 |   }
184 |  },
185 |  "nbformat": 4,
186 |  "nbformat_minor": 2
187 | }
188 | 


--------------------------------------------------------------------------------
/exercises/E3-LearningOverview.md:
--------------------------------------------------------------------------------
1 | # E3 - Types of Learning
2 | 
3 | Write at least 300 words explaining the types of learning in Ml.
4 | 


--------------------------------------------------------------------------------
/exercises/E5-DecisionTreesOverview.md:
--------------------------------------------------------------------------------
1 | # E5 - Decision Trees Overview
2 | 
3 | Write at least 300 words explaining the types of decision trees algorithms and applications.
4 | 


--------------------------------------------------------------------------------
/exercises/E6-SVM&Regularization.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Exercise 6\n",
  8 |     "\n",
  9 |     "## SVM & Regularization\n",
 10 |     "\n",
 11 |     "\n",
 12 |     "For this homework we consider a set of observations on a number of red and white wine varieties involving their chemical properties and ranking by tasters. Wine industry shows a recent growth spurt as social drinking is on the rise. The price of wine depends on a rather abstract concept of wine appreciation by wine tasters, opinion among whom may have a high degree of variability. Pricing of wine depends on such a volatile factor to some extent. Another key factor in wine certification and quality assessment is physicochemical tests which are laboratory-based and takes into account factors like acidity, pH level, presence of sugar and other chemical properties. For the wine market, it would be of interest if human quality of tasting can be related to the chemical properties of wine so that certification and quality assessment and assurance process is more controlled.\n",
 13 |     "\n",
 14 |     "Two datasets are available of which one dataset is on red wine and have 1599 different varieties and the other is on white wine and have 4898 varieties. All wines are produced in a particular area of Portugal. Data are collected on 12 different properties of the wines one of which is Quality, based on sensory data, and the rest are on chemical properties of the wines including density, acidity, alcohol content etc. All chemical properties of wines are continuous variables. Quality is an ordinal variable with possible ranking from 1 (worst) to 10 (best). Each variety of wine is tasted by three independent tasters and the final rank assigned is the median rank given by the tasters.\n",
 15 |     "\n",
 16 |     "A predictive model developed on this data is expected to provide guidance to vineyards regarding quality and price expected on their produce without heavy reliance on volatility of wine tasters."
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 1,
 22 |    "metadata": {},
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "import pandas as pd\n",
 26 |     "import numpy as np"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 4,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "data_r = pd.read_csv('https://github.com/albahnsen/PracticalMachineLearningClass/raw/master/datasets/Wine_data_red.csv')\n",
 36 |     "data_w = pd.read_csv('https://github.com/albahnsen/PracticalMachineLearningClass/raw/master/datasets/Wine_data_white.csv')"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 5,
 42 |    "metadata": {},
 43 |    "outputs": [
 44 |     {
 45 |      "data": {
 46 |       "text/html": [
 47 |        "<div>\n",
 48 |        "<style scoped>\n",
 49 |        "    .dataframe tbody tr th:only-of-type {\n",
 50 |        "        vertical-align: middle;\n",
 51 |        "    }\n",
 52 |        "\n",
 53 |        "    .dataframe tbody tr th {\n",
 54 |        "        vertical-align: top;\n",
 55 |        "    }\n",
 56 |        "\n",
 57 |        "    .dataframe thead th {\n",
 58 |        "        text-align: right;\n",
 59 |        "    }\n",
 60 |        "</style>\n",
 61 |        "<table border=\"1\" class=\"dataframe\">\n",
 62 |        "  <thead>\n",
 63 |        "    <tr style=\"text-align: right;\">\n",
 64 |        "      <th></th>\n",
 65 |        "      <th>fixed acidity</th>\n",
 66 |        "      <th>volatile acidity</th>\n",
 67 |        "      <th>citric acid</th>\n",
 68 |        "      <th>residual sugar</th>\n",
 69 |        "      <th>chlorides</th>\n",
 70 |        "      <th>free sulfur dioxide</th>\n",
 71 |        "      <th>total sulfur dioxide</th>\n",
 72 |        "      <th>density</th>\n",
 73 |        "      <th>pH</th>\n",
 74 |        "      <th>sulphates</th>\n",
 75 |        "      <th>alcohol</th>\n",
 76 |        "      <th>quality</th>\n",
 77 |        "      <th>type</th>\n",
 78 |        "    </tr>\n",
 79 |        "  </thead>\n",
 80 |        "  <tbody>\n",
 81 |        "    <tr>\n",
 82 |        "      <th>3594</th>\n",
 83 |        "      <td>6.8</td>\n",
 84 |        "      <td>0.19</td>\n",
 85 |        "      <td>0.32</td>\n",
 86 |        "      <td>7.6</td>\n",
 87 |        "      <td>0.049</td>\n",
 88 |        "      <td>37.0</td>\n",
 89 |        "      <td>107.0</td>\n",
 90 |        "      <td>0.99332</td>\n",
 91 |        "      <td>3.12</td>\n",
 92 |        "      <td>0.44</td>\n",
 93 |        "      <td>10.7</td>\n",
 94 |        "      <td>7</td>\n",
 95 |        "      <td>white</td>\n",
 96 |        "    </tr>\n",
 97 |        "    <tr>\n",
 98 |        "      <th>2412</th>\n",
 99 |        "      <td>6.3</td>\n",
100 |        "      <td>0.38</td>\n",
101 |        "      <td>0.17</td>\n",
102 |        "      <td>8.8</td>\n",
103 |        "      <td>0.080</td>\n",
104 |        "      <td>50.0</td>\n",
105 |        "      <td>212.0</td>\n",
106 |        "      <td>0.99803</td>\n",
107 |        "      <td>3.47</td>\n",
108 |        "      <td>0.66</td>\n",
109 |        "      <td>9.4</td>\n",
110 |        "      <td>4</td>\n",
111 |        "      <td>white</td>\n",
112 |        "    </tr>\n",
113 |        "    <tr>\n",
114 |        "      <th>5378</th>\n",
115 |        "      <td>10.6</td>\n",
116 |        "      <td>0.28</td>\n",
117 |        "      <td>0.39</td>\n",
118 |        "      <td>15.5</td>\n",
119 |        "      <td>0.069</td>\n",
120 |        "      <td>6.0</td>\n",
121 |        "      <td>23.0</td>\n",
122 |        "      <td>1.00260</td>\n",
123 |        "      <td>3.12</td>\n",
124 |        "      <td>0.66</td>\n",
125 |        "      <td>9.2</td>\n",
126 |        "      <td>5</td>\n",
127 |        "      <td>red</td>\n",
128 |        "    </tr>\n",
129 |        "    <tr>\n",
130 |        "      <th>2905</th>\n",
131 |        "      <td>7.6</td>\n",
132 |        "      <td>0.31</td>\n",
133 |        "      <td>0.26</td>\n",
134 |        "      <td>1.7</td>\n",
135 |        "      <td>0.073</td>\n",
136 |        "      <td>40.0</td>\n",
137 |        "      <td>157.0</td>\n",
138 |        "      <td>0.99380</td>\n",
139 |        "      <td>3.10</td>\n",
140 |        "      <td>0.46</td>\n",
141 |        "      <td>9.8</td>\n",
142 |        "      <td>5</td>\n",
143 |        "      <td>white</td>\n",
144 |        "    </tr>\n",
145 |        "    <tr>\n",
146 |        "      <th>6432</th>\n",
147 |        "      <td>6.6</td>\n",
148 |        "      <td>0.56</td>\n",
149 |        "      <td>0.14</td>\n",
150 |        "      <td>2.4</td>\n",
151 |        "      <td>0.064</td>\n",
152 |        "      <td>13.0</td>\n",
153 |        "      <td>29.0</td>\n",
154 |        "      <td>0.99397</td>\n",
155 |        "      <td>3.42</td>\n",
156 |        "      <td>0.62</td>\n",
157 |        "      <td>11.7</td>\n",
158 |        "      <td>7</td>\n",
159 |        "      <td>red</td>\n",
160 |        "    </tr>\n",
161 |        "  </tbody>\n",
162 |        "</table>\n",
163 |        "</div>"
164 |       ],
165 |       "text/plain": [
166 |        "      fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \\\n",
167 |        "3594            6.8              0.19         0.32             7.6      0.049   \n",
168 |        "2412            6.3              0.38         0.17             8.8      0.080   \n",
169 |        "5378           10.6              0.28         0.39            15.5      0.069   \n",
170 |        "2905            7.6              0.31         0.26             1.7      0.073   \n",
171 |        "6432            6.6              0.56         0.14             2.4      0.064   \n",
172 |        "\n",
173 |        "      free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \\\n",
174 |        "3594                 37.0                 107.0  0.99332  3.12       0.44   \n",
175 |        "2412                 50.0                 212.0  0.99803  3.47       0.66   \n",
176 |        "5378                  6.0                  23.0  1.00260  3.12       0.66   \n",
177 |        "2905                 40.0                 157.0  0.99380  3.10       0.46   \n",
178 |        "6432                 13.0                  29.0  0.99397  3.42       0.62   \n",
179 |        "\n",
180 |        "      alcohol  quality   type  \n",
181 |        "3594     10.7        7  white  \n",
182 |        "2412      9.4        4  white  \n",
183 |        "5378      9.2        5    red  \n",
184 |        "2905      9.8        5  white  \n",
185 |        "6432     11.7        7    red  "
186 |       ]
187 |      },
188 |      "execution_count": 5,
189 |      "metadata": {},
190 |      "output_type": "execute_result"
191 |     }
192 |    ],
193 |    "source": [
194 |     "data = data_w.assign(type = 'white')\n",
195 |     "\n",
196 |     "data = data.append(data_r.assign(type = 'red'), ignore_index=True)\n",
197 |     "data.sample(5)"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "markdown",
202 |    "metadata": {},
203 |    "source": [
204 |     "# Exercise 6.1\n",
205 |     "\n",
206 |     "Show the frecuency table of the quality by type of wine"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": null,
212 |    "metadata": {},
213 |    "outputs": [],
214 |    "source": []
215 |   },
216 |   {
217 |    "cell_type": "markdown",
218 |    "metadata": {},
219 |    "source": [
220 |     "# SVM"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "markdown",
225 |    "metadata": {},
226 |    "source": [
227 |     "# Exercise 6.2\n",
228 |     "\n",
229 |     "* Standarized the features (not the quality)\n",
230 |     "* Create a binary target for each type of wine\n",
231 |     "* Create two Linear SVM's for the white and red wines, repectively.\n"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "code",
236 |    "execution_count": null,
237 |    "metadata": {
238 |     "collapsed": true
239 |    },
240 |    "outputs": [],
241 |    "source": []
242 |   },
243 |   {
244 |    "cell_type": "markdown",
245 |    "metadata": {},
246 |    "source": [
247 |     "# Exercise 6.3\n",
248 |     "\n",
249 |     "Test the two SVM's using the different kernels (‘poly’, ‘rbf’, ‘sigmoid’)\n"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "code",
254 |    "execution_count": null,
255 |    "metadata": {
256 |     "collapsed": true
257 |    },
258 |    "outputs": [],
259 |    "source": []
260 |   },
261 |   {
262 |    "cell_type": "markdown",
263 |    "metadata": {},
264 |    "source": [
265 |     "# Exercise 6.4\n",
266 |     "Using the best SVM find the parameters that gives the best performance\n",
267 |     "\n",
268 |     "'C': [0.1, 1, 10, 100, 1000], 'gamma': [0.01, 0.001, 0.0001]"
269 |    ]
270 |   },
271 |   {
272 |    "cell_type": "code",
273 |    "execution_count": null,
274 |    "metadata": {
275 |     "collapsed": true
276 |    },
277 |    "outputs": [],
278 |    "source": []
279 |   },
280 |   {
281 |    "cell_type": "markdown",
282 |    "metadata": {},
283 |    "source": [
284 |     "# Exercise 6.5\n",
285 |     "\n",
286 |     "Compare the results with other methods"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "code",
291 |    "execution_count": null,
292 |    "metadata": {
293 |     "collapsed": true
294 |    },
295 |    "outputs": [],
296 |    "source": []
297 |   },
298 |   {
299 |    "cell_type": "markdown",
300 |    "metadata": {},
301 |    "source": [
302 |     "# Regularization"
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "markdown",
307 |    "metadata": {},
308 |    "source": [
309 |     "# Exercise 6.6\n",
310 |     "\n",
311 |     "\n",
312 |     "* Train a linear regression to predict wine quality (Continous)\n",
313 |     "\n",
314 |     "* Analyze the coefficients\n",
315 |     "\n",
316 |     "* Evaluate the RMSE"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "code",
321 |    "execution_count": null,
322 |    "metadata": {
323 |     "collapsed": true
324 |    },
325 |    "outputs": [],
326 |    "source": []
327 |   },
328 |   {
329 |    "cell_type": "markdown",
330 |    "metadata": {},
331 |    "source": [
332 |     "# Exercise 6.7\n",
333 |     "\n",
334 |     "* Estimate a ridge regression with alpha equals 0.1 and 1.\n",
335 |     "* Compare the coefficients with the linear regression\n",
336 |     "* Evaluate the RMSE"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "code",
341 |    "execution_count": null,
342 |    "metadata": {
343 |     "collapsed": true
344 |    },
345 |    "outputs": [],
346 |    "source": []
347 |   },
348 |   {
349 |    "cell_type": "markdown",
350 |    "metadata": {},
351 |    "source": [
352 |     "# Exercise 6.8\n",
353 |     "\n",
354 |     "* Estimate a lasso regression with alpha equals 0.01, 0.1 and 1.\n",
355 |     "* Compare the coefficients with the linear regression\n",
356 |     "* Evaluate the RMSE"
357 |    ]
358 |   },
359 |   {
360 |    "cell_type": "code",
361 |    "execution_count": null,
362 |    "metadata": {
363 |     "collapsed": true
364 |    },
365 |    "outputs": [],
366 |    "source": []
367 |   },
368 |   {
369 |    "cell_type": "markdown",
370 |    "metadata": {},
371 |    "source": [
372 |     "# Exercise 6.9\n",
373 |     "\n",
374 |     "* Create a binary target\n",
375 |     "\n",
376 |     "* Train a logistic regression to predict wine quality (binary)\n",
377 |     "\n",
378 |     "* Analyze the coefficients\n",
379 |     "\n",
380 |     "* Evaluate the f1score"
381 |    ]
382 |   },
383 |   {
384 |    "cell_type": "code",
385 |    "execution_count": null,
386 |    "metadata": {
387 |     "collapsed": true
388 |    },
389 |    "outputs": [],
390 |    "source": []
391 |   },
392 |   {
393 |    "cell_type": "markdown",
394 |    "metadata": {},
395 |    "source": [
396 |     "# Exercise 6.10\n",
397 |     "\n",
398 |     "* Estimate a regularized logistic regression using:\n",
399 |     "* C = 0.01, 0.1 & 1.0\n",
400 |     "* penalty = ['l1, 'l2']\n",
401 |     "* Compare the coefficients and the f1score"
402 |    ]
403 |   },
404 |   {
405 |    "cell_type": "code",
406 |    "execution_count": null,
407 |    "metadata": {
408 |     "collapsed": true
409 |    },
410 |    "outputs": [],
411 |    "source": []
412 |   },
413 |   {
414 |    "cell_type": "code",
415 |    "execution_count": null,
416 |    "metadata": {},
417 |    "outputs": [],
418 |    "source": []
419 |   }
420 |  ],
421 |  "metadata": {
422 |   "kernelspec": {
423 |    "display_name": "Python 3",
424 |    "language": "python",
425 |    "name": "python3"
426 |   },
427 |   "language_info": {
428 |    "codemirror_mode": {
429 |     "name": "ipython",
430 |     "version": 3
431 |    },
432 |    "file_extension": ".py",
433 |    "mimetype": "text/x-python",
434 |    "name": "python",
435 |    "nbconvert_exporter": "python",
436 |    "pygments_lexer": "ipython3",
437 |    "version": "3.7.0"
438 |   }
439 |  },
440 |  "nbformat": 4,
441 |  "nbformat_minor": 1
442 | }
443 | 


--------------------------------------------------------------------------------
/exercises/E8-EnsembleTreesOverview.md:
--------------------------------------------------------------------------------
1 | # E8 - Ensemble Trees Overview
2 | 
3 | Write at least 300 words explaining why ensemble is a successful strategy in machine learning.
4 | 


--------------------------------------------------------------------------------
/exercises/E9-Bagging.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Exercise 9\n",
  8 |     "\n",
  9 |     "## Mashable news stories analysis\n",
 10 |     "\n",
 11 |     "Predicting if a news story is going to be popular\n"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 27,
 17 |    "metadata": {},
 18 |    "outputs": [
 19 |     {
 20 |      "data": {
 21 |       "text/html": [
 22 |        "<div>\n",
 23 |        "<style scoped>\n",
 24 |        "    .dataframe tbody tr th:only-of-type {\n",
 25 |        "        vertical-align: middle;\n",
 26 |        "    }\n",
 27 |        "\n",
 28 |        "    .dataframe tbody tr th {\n",
 29 |        "        vertical-align: top;\n",
 30 |        "    }\n",
 31 |        "\n",
 32 |        "    .dataframe thead th {\n",
 33 |        "        text-align: right;\n",
 34 |        "    }\n",
 35 |        "</style>\n",
 36 |        "<table border=\"1\" class=\"dataframe\">\n",
 37 |        "  <thead>\n",
 38 |        "    <tr style=\"text-align: right;\">\n",
 39 |        "      <th></th>\n",
 40 |        "      <th>url</th>\n",
 41 |        "      <th>timedelta</th>\n",
 42 |        "      <th>n_tokens_title</th>\n",
 43 |        "      <th>n_tokens_content</th>\n",
 44 |        "      <th>n_unique_tokens</th>\n",
 45 |        "      <th>n_non_stop_words</th>\n",
 46 |        "      <th>n_non_stop_unique_tokens</th>\n",
 47 |        "      <th>num_hrefs</th>\n",
 48 |        "      <th>num_self_hrefs</th>\n",
 49 |        "      <th>num_imgs</th>\n",
 50 |        "      <th>...</th>\n",
 51 |        "      <th>min_positive_polarity</th>\n",
 52 |        "      <th>max_positive_polarity</th>\n",
 53 |        "      <th>avg_negative_polarity</th>\n",
 54 |        "      <th>min_negative_polarity</th>\n",
 55 |        "      <th>max_negative_polarity</th>\n",
 56 |        "      <th>title_subjectivity</th>\n",
 57 |        "      <th>title_sentiment_polarity</th>\n",
 58 |        "      <th>abs_title_subjectivity</th>\n",
 59 |        "      <th>abs_title_sentiment_polarity</th>\n",
 60 |        "      <th>Popular</th>\n",
 61 |        "    </tr>\n",
 62 |        "  </thead>\n",
 63 |        "  <tbody>\n",
 64 |        "    <tr>\n",
 65 |        "      <th>0</th>\n",
 66 |        "      <td>http://mashable.com/2014/12/10/cia-torture-rep...</td>\n",
 67 |        "      <td>28.0</td>\n",
 68 |        "      <td>9.0</td>\n",
 69 |        "      <td>188.0</td>\n",
 70 |        "      <td>0.732620</td>\n",
 71 |        "      <td>1.0</td>\n",
 72 |        "      <td>0.844262</td>\n",
 73 |        "      <td>5.0</td>\n",
 74 |        "      <td>1.0</td>\n",
 75 |        "      <td>1.0</td>\n",
 76 |        "      <td>...</td>\n",
 77 |        "      <td>0.200000</td>\n",
 78 |        "      <td>0.80</td>\n",
 79 |        "      <td>-0.487500</td>\n",
 80 |        "      <td>-0.60</td>\n",
 81 |        "      <td>-0.250000</td>\n",
 82 |        "      <td>0.9</td>\n",
 83 |        "      <td>0.8</td>\n",
 84 |        "      <td>0.4</td>\n",
 85 |        "      <td>0.8</td>\n",
 86 |        "      <td>1</td>\n",
 87 |        "    </tr>\n",
 88 |        "    <tr>\n",
 89 |        "      <th>1</th>\n",
 90 |        "      <td>http://mashable.com/2013/10/18/bitlock-kicksta...</td>\n",
 91 |        "      <td>447.0</td>\n",
 92 |        "      <td>7.0</td>\n",
 93 |        "      <td>297.0</td>\n",
 94 |        "      <td>0.653199</td>\n",
 95 |        "      <td>1.0</td>\n",
 96 |        "      <td>0.815789</td>\n",
 97 |        "      <td>9.0</td>\n",
 98 |        "      <td>4.0</td>\n",
 99 |        "      <td>1.0</td>\n",
100 |        "      <td>...</td>\n",
101 |        "      <td>0.160000</td>\n",
102 |        "      <td>0.50</td>\n",
103 |        "      <td>-0.135340</td>\n",
104 |        "      <td>-0.40</td>\n",
105 |        "      <td>-0.050000</td>\n",
106 |        "      <td>0.1</td>\n",
107 |        "      <td>-0.1</td>\n",
108 |        "      <td>0.4</td>\n",
109 |        "      <td>0.1</td>\n",
110 |        "      <td>0</td>\n",
111 |        "    </tr>\n",
112 |        "    <tr>\n",
113 |        "      <th>2</th>\n",
114 |        "      <td>http://mashable.com/2013/07/24/google-glass-po...</td>\n",
115 |        "      <td>533.0</td>\n",
116 |        "      <td>11.0</td>\n",
117 |        "      <td>181.0</td>\n",
118 |        "      <td>0.660377</td>\n",
119 |        "      <td>1.0</td>\n",
120 |        "      <td>0.775701</td>\n",
121 |        "      <td>4.0</td>\n",
122 |        "      <td>3.0</td>\n",
123 |        "      <td>1.0</td>\n",
124 |        "      <td>...</td>\n",
125 |        "      <td>0.136364</td>\n",
126 |        "      <td>1.00</td>\n",
127 |        "      <td>0.000000</td>\n",
128 |        "      <td>0.00</td>\n",
129 |        "      <td>0.000000</td>\n",
130 |        "      <td>0.3</td>\n",
131 |        "      <td>1.0</td>\n",
132 |        "      <td>0.2</td>\n",
133 |        "      <td>1.0</td>\n",
134 |        "      <td>0</td>\n",
135 |        "    </tr>\n",
136 |        "    <tr>\n",
137 |        "      <th>3</th>\n",
138 |        "      <td>http://mashable.com/2013/11/21/these-are-the-m...</td>\n",
139 |        "      <td>413.0</td>\n",
140 |        "      <td>12.0</td>\n",
141 |        "      <td>781.0</td>\n",
142 |        "      <td>0.497409</td>\n",
143 |        "      <td>1.0</td>\n",
144 |        "      <td>0.677350</td>\n",
145 |        "      <td>10.0</td>\n",
146 |        "      <td>3.0</td>\n",
147 |        "      <td>1.0</td>\n",
148 |        "      <td>...</td>\n",
149 |        "      <td>0.100000</td>\n",
150 |        "      <td>1.00</td>\n",
151 |        "      <td>-0.195701</td>\n",
152 |        "      <td>-0.40</td>\n",
153 |        "      <td>-0.071429</td>\n",
154 |        "      <td>0.0</td>\n",
155 |        "      <td>0.0</td>\n",
156 |        "      <td>0.5</td>\n",
157 |        "      <td>0.0</td>\n",
158 |        "      <td>0</td>\n",
159 |        "    </tr>\n",
160 |        "    <tr>\n",
161 |        "      <th>4</th>\n",
162 |        "      <td>http://mashable.com/2014/02/11/parking-ticket-...</td>\n",
163 |        "      <td>331.0</td>\n",
164 |        "      <td>8.0</td>\n",
165 |        "      <td>177.0</td>\n",
166 |        "      <td>0.685714</td>\n",
167 |        "      <td>1.0</td>\n",
168 |        "      <td>0.830357</td>\n",
169 |        "      <td>3.0</td>\n",
170 |        "      <td>2.0</td>\n",
171 |        "      <td>1.0</td>\n",
172 |        "      <td>...</td>\n",
173 |        "      <td>0.100000</td>\n",
174 |        "      <td>0.55</td>\n",
175 |        "      <td>-0.175000</td>\n",
176 |        "      <td>-0.25</td>\n",
177 |        "      <td>-0.100000</td>\n",
178 |        "      <td>0.0</td>\n",
179 |        "      <td>0.0</td>\n",
180 |        "      <td>0.5</td>\n",
181 |        "      <td>0.0</td>\n",
182 |        "      <td>0</td>\n",
183 |        "    </tr>\n",
184 |        "  </tbody>\n",
185 |        "</table>\n",
186 |        "<p>5 rows × 61 columns</p>\n",
187 |        "</div>"
188 |       ],
189 |       "text/plain": [
190 |        "                                                 url  timedelta  \\\n",
191 |        "0  http://mashable.com/2014/12/10/cia-torture-rep...       28.0   \n",
192 |        "1  http://mashable.com/2013/10/18/bitlock-kicksta...      447.0   \n",
193 |        "2  http://mashable.com/2013/07/24/google-glass-po...      533.0   \n",
194 |        "3  http://mashable.com/2013/11/21/these-are-the-m...      413.0   \n",
195 |        "4  http://mashable.com/2014/02/11/parking-ticket-...      331.0   \n",
196 |        "\n",
197 |        "   n_tokens_title  n_tokens_content  n_unique_tokens  n_non_stop_words  \\\n",
198 |        "0             9.0             188.0         0.732620               1.0   \n",
199 |        "1             7.0             297.0         0.653199               1.0   \n",
200 |        "2            11.0             181.0         0.660377               1.0   \n",
201 |        "3            12.0             781.0         0.497409               1.0   \n",
202 |        "4             8.0             177.0         0.685714               1.0   \n",
203 |        "\n",
204 |        "   n_non_stop_unique_tokens  num_hrefs  num_self_hrefs  num_imgs  ...  \\\n",
205 |        "0                  0.844262        5.0             1.0       1.0  ...   \n",
206 |        "1                  0.815789        9.0             4.0       1.0  ...   \n",
207 |        "2                  0.775701        4.0             3.0       1.0  ...   \n",
208 |        "3                  0.677350       10.0             3.0       1.0  ...   \n",
209 |        "4                  0.830357        3.0             2.0       1.0  ...   \n",
210 |        "\n",
211 |        "   min_positive_polarity  max_positive_polarity  avg_negative_polarity  \\\n",
212 |        "0               0.200000                   0.80              -0.487500   \n",
213 |        "1               0.160000                   0.50              -0.135340   \n",
214 |        "2               0.136364                   1.00               0.000000   \n",
215 |        "3               0.100000                   1.00              -0.195701   \n",
216 |        "4               0.100000                   0.55              -0.175000   \n",
217 |        "\n",
218 |        "   min_negative_polarity  max_negative_polarity  title_subjectivity  \\\n",
219 |        "0                  -0.60              -0.250000                 0.9   \n",
220 |        "1                  -0.40              -0.050000                 0.1   \n",
221 |        "2                   0.00               0.000000                 0.3   \n",
222 |        "3                  -0.40              -0.071429                 0.0   \n",
223 |        "4                  -0.25              -0.100000                 0.0   \n",
224 |        "\n",
225 |        "   title_sentiment_polarity  abs_title_subjectivity  \\\n",
226 |        "0                       0.8                     0.4   \n",
227 |        "1                      -0.1                     0.4   \n",
228 |        "2                       1.0                     0.2   \n",
229 |        "3                       0.0                     0.5   \n",
230 |        "4                       0.0                     0.5   \n",
231 |        "\n",
232 |        "   abs_title_sentiment_polarity  Popular  \n",
233 |        "0                           0.8        1  \n",
234 |        "1                           0.1        0  \n",
235 |        "2                           1.0        0  \n",
236 |        "3                           0.0        0  \n",
237 |        "4                           0.0        0  \n",
238 |        "\n",
239 |        "[5 rows x 61 columns]"
240 |       ]
241 |      },
242 |      "execution_count": 27,
243 |      "metadata": {},
244 |      "output_type": "execute_result"
245 |     }
246 |    ],
247 |    "source": [
248 |     "import pandas as pd\n",
249 |     "\n",
250 |     "url = 'https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/master/datasets/mashable.csv'\n",
251 |     "df = pd.read_csv(url, index_col=0)\n",
252 |     "df.head()"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "code",
257 |    "execution_count": 28,
258 |    "metadata": {},
259 |    "outputs": [
260 |     {
261 |      "data": {
262 |       "text/plain": [
263 |        "(6000, 61)"
264 |       ]
265 |      },
266 |      "execution_count": 28,
267 |      "metadata": {},
268 |      "output_type": "execute_result"
269 |     }
270 |    ],
271 |    "source": [
272 |     "train_df.shape"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "code",
277 |    "execution_count": 29,
278 |    "metadata": {},
279 |    "outputs": [],
280 |    "source": [
281 |     "X = train_df.drop(['url', 'Popular'], axis=1)\n",
282 |     "y = train_df['Popular']"
283 |    ]
284 |   },
285 |   {
286 |    "cell_type": "code",
287 |    "execution_count": 30,
288 |    "metadata": {},
289 |    "outputs": [
290 |     {
291 |      "data": {
292 |       "text/plain": [
293 |        "0.5"
294 |       ]
295 |      },
296 |      "execution_count": 30,
297 |      "metadata": {},
298 |      "output_type": "execute_result"
299 |     }
300 |    ],
301 |    "source": [
302 |     "y.mean()"
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "code",
307 |    "execution_count": 32,
308 |    "metadata": {},
309 |    "outputs": [],
310 |    "source": [
311 |     "# train/test split\n",
312 |     "from sklearn.model_selection import train_test_split\n",
313 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)"
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "code",
318 |    "execution_count": null,
319 |    "metadata": {},
320 |    "outputs": [],
321 |    "source": []
322 |   },
323 |   {
324 |    "cell_type": "markdown",
325 |    "metadata": {},
326 |    "source": [
327 |     "# Exercise 9.1\n",
328 |     "\n",
329 |     "Estimate a Decision Tree Classifier and a Logistic Regression\n",
330 |     "\n",
331 |     "Evaluate using the following metrics:\n",
332 |     "* Accuracy\n",
333 |     "* F1-Score"
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "code",
338 |    "execution_count": null,
339 |    "metadata": {},
340 |    "outputs": [],
341 |    "source": []
342 |   },
343 |   {
344 |    "cell_type": "markdown",
345 |    "metadata": {},
346 |    "source": [
347 |     "# Exercise 9.2\n",
348 |     "\n",
349 |     "Estimate 300 bagged samples\n",
350 |     "\n",
351 |     "Estimate the following set of classifiers:\n",
352 |     "\n",
353 |     "* 100 Decision Trees where max_depth=None\n",
354 |     "* 100 Decision Trees where max_depth=2\n",
355 |     "* 100 Logistic Regressions"
356 |    ]
357 |   },
358 |   {
359 |    "cell_type": "code",
360 |    "execution_count": null,
361 |    "metadata": {
362 |     "collapsed": true
363 |    },
364 |    "outputs": [],
365 |    "source": []
366 |   },
367 |   {
368 |    "cell_type": "markdown",
369 |    "metadata": {},
370 |    "source": [
371 |     "# Exercise 9.3\n",
372 |     "\n",
373 |     "Ensemble using majority voting\n",
374 |     "\n",
375 |     "Evaluate using the following metrics:\n",
376 |     "* Accuracy\n",
377 |     "* F1-Score"
378 |    ]
379 |   },
380 |   {
381 |    "cell_type": "code",
382 |    "execution_count": null,
383 |    "metadata": {
384 |     "collapsed": true
385 |    },
386 |    "outputs": [],
387 |    "source": []
388 |   },
389 |   {
390 |    "cell_type": "markdown",
391 |    "metadata": {},
392 |    "source": [
393 |     "# Exercise 9.4\n",
394 |     "\n",
395 |     "Estimate te probability as %models that predict positive\n",
396 |     "\n",
397 |     "Modify the probability threshold and select the one that maximizes the F1-Score"
398 |    ]
399 |   },
400 |   {
401 |    "cell_type": "code",
402 |    "execution_count": null,
403 |    "metadata": {
404 |     "collapsed": true
405 |    },
406 |    "outputs": [],
407 |    "source": []
408 |   },
409 |   {
410 |    "cell_type": "markdown",
411 |    "metadata": {},
412 |    "source": [
413 |     "# Exercise 9.5\n",
414 |     "\n",
415 |     "Ensemble using weighted voting using the oob_error\n",
416 |     "\n",
417 |     "Evaluate using the following metrics:\n",
418 |     "* Accuracy\n",
419 |     "* F1-Score"
420 |    ]
421 |   },
422 |   {
423 |    "cell_type": "code",
424 |    "execution_count": null,
425 |    "metadata": {},
426 |    "outputs": [],
427 |    "source": []
428 |   },
429 |   {
430 |    "cell_type": "markdown",
431 |    "metadata": {},
432 |    "source": [
433 |     "# Exercise 9.6\n",
434 |     "\n",
435 |     "Estimate te probability of the weighted voting\n",
436 |     "\n",
437 |     "Modify the probability threshold and select the one that maximizes the F1-Score"
438 |    ]
439 |   },
440 |   {
441 |    "cell_type": "code",
442 |    "execution_count": null,
443 |    "metadata": {
444 |     "collapsed": true
445 |    },
446 |    "outputs": [],
447 |    "source": []
448 |   },
449 |   {
450 |    "cell_type": "markdown",
451 |    "metadata": {},
452 |    "source": [
453 |     "# Exercise 9.7\n",
454 |     "\n",
455 |     "Estimate a logistic regression using as input the estimated classifiers\n",
456 |     "\n",
457 |     "Modify the probability threshold such that maximizes the F1-Score"
458 |    ]
459 |   },
460 |   {
461 |    "cell_type": "code",
462 |    "execution_count": null,
463 |    "metadata": {
464 |     "collapsed": true
465 |    },
466 |    "outputs": [],
467 |    "source": []
468 |   }
469 |  ],
470 |  "metadata": {
471 |   "kernelspec": {
472 |    "display_name": "Python 3",
473 |    "language": "python",
474 |    "name": "python3"
475 |   },
476 |   "language_info": {
477 |    "codemirror_mode": {
478 |     "name": "ipython",
479 |     "version": 3
480 |    },
481 |    "file_extension": ".py",
482 |    "mimetype": "text/x-python",
483 |    "name": "python",
484 |    "nbconvert_exporter": "python",
485 |    "pygments_lexer": "ipython3",
486 |    "version": "3.7.0"
487 |   }
488 |  },
489 |  "nbformat": 4,
490 |  "nbformat_minor": 1
491 | }
492 | 


--------------------------------------------------------------------------------
/exercises/P3-ConversationsToxicityDetection.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Project 3\n",
  8 |     "\n",
  9 |     "\n",
 10 |     "# Conversations Toxicity Detection\n",
 11 |     "\n",
 12 |     "Jigsaw Unintended Bias in Toxicity Classification \n",
 13 |     "\n",
 14 |     "Detect toxicity across a diverse range of conversations\n",
 15 |     "\n",
 16 |     "\n",
 17 |     "https://www.kaggle.com/c/jigsaw-unintended-bias-in-toxicity-classification/data#\n",
 18 |     "\n",
 19 |     "\n",
 20 |     "Try Colab\n",
 21 |     "https://albahnsen.com/2018/07/22/how-to-download-kaggle-data-into-google-colab/\n",
 22 |     "\n",
 23 |     "\n",
 24 |     "## Description\n",
 25 |     "\n",
 26 |     "## Background\n",
 27 |     "At the end of 2017 the Civil Comments platform shut down and chose make their ~2m public comments from their platform available in a lasting open archive so that researchers could understand and improve civility in online conversations for years to come. Jigsaw sponsored this effort and extended annotation of this data by human raters for various toxic conversational attributes.\n",
 28 |     "\n",
 29 |     "In the data supplied for this competition, the text of the individual comment is found in the comment_text column. Each comment in Train has a toxicity label (target), and models should predict the target toxicity for the Test data. This attribute (and all others) are fractional values which represent the fraction of human raters who believed the attribute applied to the given comment. For evaluation, test set examples with target >= 0.5 will be considered to be in the positive class (toxic).\n",
 30 |     "\n",
 31 |     "The data also has several additional toxicity subtype attributes. Models do not need to predict these attributes for the competition, they are included as an additional avenue for research. Subtype attributes are:\n",
 32 |     "\n",
 33 |     "- severe_toxicity\n",
 34 |     "- obscene\n",
 35 |     "- threat\n",
 36 |     "- insult\n",
 37 |     "- identity_attack\n",
 38 |     "- sexual_explicit\n",
 39 |     "\n",
 40 |     "Additionally, a subset of comments have been labelled with a variety of identity attributes, representing the identities that are mentioned in the comment. The columns corresponding to identity attributes are listed below. Only identities with more than 500 examples in the test set (combined public and private) will be included in the evaluation calculation. These identities are shown in bold.\n",
 41 |     "\n",
 42 |     "- male\n",
 43 |     "- female\n",
 44 |     "- transgender\n",
 45 |     "- other_gender\n",
 46 |     "- heterosexual\n",
 47 |     "- homosexual_gay_or_lesbian\n",
 48 |     "- bisexual\n",
 49 |     "- other_sexual_orientation\n",
 50 |     "- christian\n",
 51 |     "- jewish\n",
 52 |     "- muslim\n",
 53 |     "- hindu\n",
 54 |     "- buddhist\n",
 55 |     "- atheist\n",
 56 |     "- other_religion\n",
 57 |     "- black\n",
 58 |     "- white\n",
 59 |     "- asian\n",
 60 |     "- latino\n",
 61 |     "- other_race_or_ethnicity\n",
 62 |     "- physical_disability\n",
 63 |     "- intellectual_or_learning_disability\n",
 64 |     "- psychiatric_or_mental_illness\n",
 65 |     "- other_disability\n",
 66 |     "\n",
 67 |     "Note that the data contains different comments that can have the exact same text. Different comments that have the same text may have been labeled with different targets or subgroups.\n",
 68 |     "\n",
 69 |     "## Examples\n",
 70 |     "Here are a few examples of comments and their associated toxicity and identity labels. Label values range from 0.0 - 1.0 represented the fraction of raters who believed the label fit the comment.\n",
 71 |     "\n",
 72 |     "- Comment: i'm a white woman in my late 60's and believe me, they are not too crazy about me either!!\n",
 73 |     "\n",
 74 |     "Toxicity Labels: All 0.0\n",
 75 |     "Identity Mention Labels: female: 1.0, white: 1.0 (all others 0.0)\n",
 76 |     "- Comment: Why would you assume that the nurses in this story were women?\n",
 77 |     "\n",
 78 |     "Toxicity Labels: All 0.0\n",
 79 |     "Identity Mention Labels: female: 0.8 (all others 0.0)\n",
 80 |     "- Comment: Continue to stand strong LGBT community. Yes, indeed, you'll overcome and you have.\n",
 81 |     "\n",
 82 |     "Toxicity Labels: All 0.0\n",
 83 |     "Identity Mention Labels: homosexual_gay_or_lesbian: 0.8, bisexual: 0.6, transgender: 0.3 (all others 0.0)\n",
 84 |     "\n",
 85 |     "In addition to the labels described above, the dataset also provides metadata from Jigsaw's annotation: toxicity_annotator_count and identity_annotator_count, and metadata from Civil Comments: created_date, publication_id, parent_id, article_id, rating, funny, wow, sad, likes, disagree. Civil Comments' label rating is the civility rating Civil Comments users gave the comment.\n",
 86 |     "\n",
 87 |     "## Labelling Schema\n",
 88 |     "To obtain the toxicity labels, each comment was shown to up to 10 annotators*. Annotators were asked to: \"Rate the toxicity of this comment\"\n",
 89 |     "\n",
 90 |     "- Very Toxic (a very hateful, aggressive, or disrespectful comment that is very likely to make you leave a discussion or give up on sharing your perspective)\n",
 91 |     "- Toxic (a rude, disrespectful, or unreasonable comment that is somewhat likely to make you leave a discussion or give up on sharing your perspective)\n",
 92 |     "- Hard to Say\n",
 93 |     "- Not Toxic\n",
 94 |     "\n",
 95 |     "These ratings were then aggregated with the target value representing the fraction of annotations that annotations fell within the former two categories.\n",
 96 |     "\n",
 97 |     "To collect the identity labels, annotators were asked to indicate all identities that were mentioned in the comment. An example question that was asked as part of this annotation effort was: \"What genders are mentioned in the comment?\"\n",
 98 |     "\n",
 99 |     "- Male\n",
100 |     "- Female\n",
101 |     "- Transgender\n",
102 |     "- Other gender\n",
103 |     "- No gender mentioned\n",
104 |     "\n",
105 |     "Again, these were aggregated into fractional values representing the fraction of raters who said the identity was mentioned in the comment.\n",
106 |     "\n",
107 |     "The distributions of labels and subgroup between Train and Test can be assumed to be similar, but not exact.\n",
108 |     "\n",
109 |     "*Note: Some comments were seen by many more than 10 annotators (up to thousands), due to sampling and strategies used to enforce rater accuracy.\n",
110 |     "\n",
111 |     "## File descriptions\n",
112 |     "- train.csv - the training set, which includes subgroups\n",
113 |     "- test.csv - the test set, which does not include subgroups\n",
114 |     "- sample_submission.csv - a sample submission file in the correct format\n",
115 |     "\n",
116 |     "\n",
117 |     "# Evaluation\n",
118 |     "\n",
119 |     "- 20% API\n",
120 |     "- 40% Create a solution using with a Machine Learning algorithm - Presentation - Only show what you did different or what other teams can learn from your solution\n",
121 |     "- 40% Performance in the Kaggle competition (Normalized acording to class performance in the private leaderboard)"
122 |    ]
123 |   }
124 |  ],
125 |  "metadata": {
126 |   "kernelspec": {
127 |    "display_name": "Python 3",
128 |    "language": "python",
129 |    "name": "python3"
130 |   },
131 |   "language_info": {
132 |    "codemirror_mode": {
133 |     "name": "ipython",
134 |     "version": 3
135 |    },
136 |    "file_extension": ".py",
137 |    "mimetype": "text/x-python",
138 |    "name": "python",
139 |    "nbconvert_exporter": "python",
140 |    "pygments_lexer": "ipython3",
141 |    "version": "3.7.1"
142 |   }
143 |  },
144 |  "nbformat": 4,
145 |  "nbformat_minor": 1
146 | }
147 | 


--------------------------------------------------------------------------------
/exercises/images/classification_kiank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/exercises/images/classification_kiank.png


--------------------------------------------------------------------------------
/exercises/images/grad_summary.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/exercises/images/grad_summary.png


--------------------------------------------------------------------------------
/exercises/images/hidden_tunning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/exercises/images/hidden_tunning.png


--------------------------------------------------------------------------------
/exercises/images/sgd.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/exercises/images/sgd.gif


--------------------------------------------------------------------------------
/exercises/images/sgd_bad.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/exercises/images/sgd_bad.gif


--------------------------------------------------------------------------------
/exercises/moviegenre.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/exercises/moviegenre.png


--------------------------------------------------------------------------------
/notebooks/10-CreatingAPIinAWS.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "slideshow": {
  7 |      "slide_type": "slide"
  8 |     }
  9 |    },
 10 |    "source": [
 11 |     "# 10 - Creating APIs in AWS\n",
 12 |     "\n",
 13 |     "by [Alejandro Correa Bahnsen](albahnsen.com/) and [Jesus Solano](https://github.com/jesugome)\n",
 14 |     "\n",
 15 |     "version 1.4, February 2019\n",
 16 |     "\n",
 17 |     "## Part of the class [Practical Machine Learning](https://github.com/albahnsen/PracticalMachineLearningClass)\n",
 18 |     "\n",
 19 |     "\n",
 20 |     "\n",
 21 |     "This notebook is licensed under a [Creative Commons Attribution-ShareAlike 3.0 Unported License](http://creativecommons.org/licenses/by-sa/3.0/deed.en_US). \n"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {
 27 |     "slideshow": {
 28 |      "slide_type": "slide"
 29 |     }
 30 |    },
 31 |    "source": [
 32 |     "# AWS"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {
 38 |     "slideshow": {
 39 |      "slide_type": "slide"
 40 |     }
 41 |    },
 42 |    "source": [
 43 |     "# Create an account\n",
 44 |     "https://portal.aws.amazon.com/billing/signup\n",
 45 |     "\n",
 46 |     "![](https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/master/notebooks/images/img001.PNG)"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "markdown",
 51 |    "metadata": {
 52 |     "slideshow": {
 53 |      "slide_type": "subslide"
 54 |     }
 55 |    },
 56 |    "source": [
 57 |     "## It is going to take you time to register ..."
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "markdown",
 62 |    "metadata": {
 63 |     "slideshow": {
 64 |      "slide_type": "subslide"
 65 |     }
 66 |    },
 67 |    "source": [
 68 |     "# Log in into the AWS Management Console\n",
 69 |     "\n",
 70 |     "![](https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/master/notebooks/images/img002.GIF)"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "metadata": {
 76 |     "slideshow": {
 77 |      "slide_type": "subslide"
 78 |     }
 79 |    },
 80 |    "source": [
 81 |     "# Wait until the account is activated\n",
 82 |     "\n",
 83 |     "![](https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/master/notebooks/images/img003.PNG)"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {
 89 |     "slideshow": {
 90 |      "slide_type": "slide"
 91 |     }
 92 |    },
 93 |    "source": [
 94 |     "# Creating a EC2 instance\n",
 95 |     "\n",
 96 |     "![](https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/master/notebooks/images/img004.PNG)"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "metadata": {
102 |     "slideshow": {
103 |      "slide_type": "subslide"
104 |     }
105 |    },
106 |    "source": [
107 |     "![](https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/master/notebooks/images/img005.PNG)"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "markdown",
112 |    "metadata": {
113 |     "slideshow": {
114 |      "slide_type": "subslide"
115 |     }
116 |    },
117 |    "source": [
118 |     "## Configure Security group\n",
119 |     "![](https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/master/notebooks/images/img006.PNG)"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "markdown",
124 |    "metadata": {
125 |     "slideshow": {
126 |      "slide_type": "subslide"
127 |     }
128 |    },
129 |    "source": [
130 |     "### Add TCP port 5000 (used by flask)\n",
131 |     "![](https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/master/notebooks/images/img007.PNG)"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "markdown",
136 |    "metadata": {
137 |     "slideshow": {
138 |      "slide_type": "subslide"
139 |     }
140 |    },
141 |    "source": [
142 |     "## Launch\n",
143 |     "![](https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/master/notebooks/images/img008.PNG)"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "markdown",
148 |    "metadata": {
149 |     "slideshow": {
150 |      "slide_type": "subslide"
151 |     }
152 |    },
153 |    "source": [
154 |     "## Instance Security\n",
155 |     "![](https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/master/notebooks/images/img009.PNG)"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "markdown",
160 |    "metadata": {
161 |     "slideshow": {
162 |      "slide_type": "subslide"
163 |     }
164 |    },
165 |    "source": [
166 |     "## Done\n",
167 |     "![](https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/master/notebooks/images/img010.PNG)"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "markdown",
172 |    "metadata": {
173 |     "slideshow": {
174 |      "slide_type": "slide"
175 |     }
176 |    },
177 |    "source": [
178 |     "# Conecting to the AWS Instance\n",
179 |     "\n",
180 |     "Follow the tutorial on https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AccessingInstances.html\n",
181 |     "\n",
182 |     "![](https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/master/notebooks/images/img011.PNG)"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "markdown",
187 |    "metadata": {
188 |     "slideshow": {
189 |      "slide_type": "subslide"
190 |     }
191 |    },
192 |    "source": [
193 |     "### Test connection\n",
194 |     "\n",
195 |     "![](https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/master/notebooks/images/img012.PNG)"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "markdown",
200 |    "metadata": {
201 |     "slideshow": {
202 |      "slide_type": "slide"
203 |     }
204 |    },
205 |    "source": [
206 |     "# Deploy Flask API"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "markdown",
211 |    "metadata": {
212 |     "slideshow": {
213 |      "slide_type": "subslide"
214 |     }
215 |    },
216 |    "source": [
217 |     "### Install anaconda\n",
218 |     "\n",
219 |     "`wget https://repo.anaconda.com/archive/Anaconda3-2018.12-Linux-x86_64.sh`\n",
220 |     "\n",
221 |     "`bash Anaconda3-2018.12-Linux-x86_64.sh`\n",
222 |     "\n",
223 |     "Test the instalation\n",
224 |     "\n",
225 |     "![](https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/master/notebooks/images/img013.PNG)"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "markdown",
230 |    "metadata": {
231 |     "slideshow": {
232 |      "slide_type": "subslide"
233 |     }
234 |    },
235 |    "source": [
236 |     "### If not Anaconda do:\n",
237 |     "\n",
238 |     "`export PATH=\"/home/ubuntu/anaconda3/bin:$PATH\"`\n"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "markdown",
243 |    "metadata": {
244 |     "slideshow": {
245 |      "slide_type": "subslide"
246 |     }
247 |    },
248 |    "source": [
249 |     "### Copy files\n",
250 |     "\n",
251 |     "`git clone https://github.com/albahnsen/PracticalMachineLearningClass.git`\n",
252 |     "\n",
253 |     "#### Go to the model_deployment folder\n",
254 |     "\n",
255 |     "![](https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/master/notebooks/images/img014.PNG)"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "markdown",
260 |    "metadata": {
261 |     "slideshow": {
262 |      "slide_type": "subslide"
263 |     }
264 |    },
265 |    "source": [
266 |     "### Install the additional libraries\n",
267 |     "\n",
268 |     "`pip install flask-restplus`"
269 |    ]
270 |   },
271 |   {
272 |    "cell_type": "markdown",
273 |    "metadata": {
274 |     "slideshow": {
275 |      "slide_type": "subslide"
276 |     }
277 |    },
278 |    "source": [
279 |     "## Deploy the Flask API\n",
280 |     "\n",
281 |     "`nohup python api.py &`"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "markdown",
286 |    "metadata": {
287 |     "slideshow": {
288 |      "slide_type": "subslide"
289 |     }
290 |    },
291 |    "source": [
292 |     "## Check the Flask API\n",
293 |     "\n",
294 |     "Go to -ec2-machine-ip-:5000\n",
295 |     "\n",
296 |     "In my example http://54.175.140.184:5000/\n",
297 |     "\n",
298 |     "![](https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/master/notebooks/images/img015.PNG)",
299 |     "\n",
300 |     "## Command to get process number and finish it\n",
301 |     "**ps -ef |grep jupyter** \n",
302 |     "Next use the command **kill -9 xxxx** \n",
303 |     "xxxx is the process number \n"
304 |     
305 |     
306 |    ]
307 |   },
308 |   {
309 |    "cell_type": "code",
310 |    "execution_count": null,
311 |    "metadata": {},
312 |    "outputs": [],
313 |    "source": []
314 |   }
315 |  ],
316 |  "metadata": {
317 |   "celltoolbar": "Slideshow",
318 |   "kernelspec": {
319 |    "display_name": "Python 3",
320 |    "language": "python",
321 |    "name": "python3"
322 |   },
323 |   "language_info": {
324 |    "codemirror_mode": {
325 |     "name": "ipython",
326 |     "version": 3
327 |    },
328 |    "file_extension": ".py",
329 |    "mimetype": "text/x-python",
330 |    "name": "python",
331 |    "nbconvert_exporter": "python",
332 |    "pygments_lexer": "ipython3",
333 |    "version": "3.7.0"
334 |   }
335 |  },
336 |  "nbformat": 4,
337 |  "nbformat_minor": 1
338 | }
339 | 
340 | 


--------------------------------------------------------------------------------
/notebooks/12-Ensembles_Boosting.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# 12 - Ensemble Methods - Boosting\n",
  8 |     "\n",
  9 |     "\n",
 10 |     "by [Alejandro Correa Bahnsen](albahnsen.com/) and [Jesus Solano](https://github.com/jesugome)\n",
 11 |     "\n",
 12 |     "version 1.5, February 2019\n",
 13 |     "\n",
 14 |     "## Part of the class [Practical Machine Learning](https://github.com/albahnsen/PracticalMachineLearningClass)\n",
 15 |     "\n",
 16 |     "\n",
 17 |     "\n",
 18 |     "This notebook is licensed under a [Creative Commons Attribution-ShareAlike 3.0 Unported License](http://creativecommons.org/licenses/by-sa/3.0/deed.en_US). Special thanks goes to [Kevin Markham](https://github.com/justmarkham))"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "markdown",
 23 |    "metadata": {},
 24 |    "source": [
 25 |     "Why are we learning about ensembling?\n",
 26 |     "\n",
 27 |     "- Very popular method for improving the predictive performance of machine learning models\n",
 28 |     "- Provides a foundation for understanding more sophisticated models"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "markdown",
 33 |    "metadata": {},
 34 |    "source": [
 35 |     "# Part 5: Boosting\n",
 36 |     "\n",
 37 |     "While boosting is not algorithmically constrained, most boosting algorithms consist of iteratively learning weak classifiers with respect to a distribution and adding them to a final strong classifier. When they are added, they are typically weighted in some way that is usually related to the weak learners' accuracy. After a weak learner is added, the data is reweighted: examples that are misclassified gain weight and examples that are classified correctly lose weight (some boosting algorithms actually decrease the weight of repeatedly misclassified examples, e.g., boost by majority and BrownBoost). Thus, future weak learners focus more on the examples that previous weak learners misclassified. (Wikipedia)"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 32,
 43 |    "metadata": {},
 44 |    "outputs": [
 45 |     {
 46 |      "data": {
 47 |       "text/html": [
 48 |        "<img src=\"http://vision.cs.chubu.ac.jp/wp/wp-content/uploads/2013/07/OurMethodv81.png\" width=\"900\"/>"
 49 |       ],
 50 |       "text/plain": [
 51 |        "<IPython.core.display.Image object>"
 52 |       ]
 53 |      },
 54 |      "execution_count": 32,
 55 |      "metadata": {},
 56 |      "output_type": "execute_result"
 57 |     }
 58 |    ],
 59 |    "source": [
 60 |     "from IPython.display import Image\n",
 61 |     "Image(url= \"http://vision.cs.chubu.ac.jp/wp/wp-content/uploads/2013/07/OurMethodv81.png\", width=900)"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "metadata": {},
 67 |    "source": [
 68 |     "## Adaboost\n",
 69 |     "\n",
 70 |     "AdaBoost (adaptive boosting) is an ensemble learning algorithm that can be used for classification or regression. Although AdaBoost is more resistant to overfitting than many machine learning algorithms, it is often sensitive to noisy data and outliers.\n",
 71 |     "\n",
 72 |     "AdaBoost is called adaptive because it uses multiple iterations to generate a single composite strong learner. AdaBoost creates the strong learner (a classifier that is well-correlated to the true classifier) by iteratively adding weak learners (a classifier that is only slightly correlated to the true classifier). During each round of training, a new weak learner is added to the ensemble and a weighting vector is adjusted to focus on examples that were misclassified in previous rounds. The result is a classifier that has higher accuracy than the weak learners’ classifiers."
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "markdown",
 77 |    "metadata": {},
 78 |    "source": [
 79 |     "Algorithm:\n",
 80 |     "\n",
 81 |     "* Initialize all weights ($w_i$) to 1 / n_samples\n",
 82 |     "* Train a classifier $h_t$ using weights\n",
 83 |     "* Estimate training error $e_t$\n",
 84 |     "* set $alpha_t = log\\left(\\frac{1-e_t}{e_t}\\right)$\n",
 85 |     "* Update weights \n",
 86 |     "$$w_i^{t+1} = w_i^{t}e^{\\left(\\alpha_t \\mathbf{I}\\left(y_i \\ne h_t(x_t)\\right)\\right)}$$\n",
 87 |     "* Repeat while $e_t<0.5$ and $t<T$\n"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 33,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "# read in and prepare the churn data\n",
 97 |     "# Download the dataset\n",
 98 |     "import pandas as pd\n",
 99 |     "import numpy as np\n",
100 |     "\n",
101 |     "url = 'https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/master/datasets/churn.csv'\n",
102 |     "data = pd.read_csv(url)\n",
103 |     "\n",
104 |     "# Create X and y\n",
105 |     "\n",
106 |     "# Select only the numeric features\n",
107 |     "X = data.iloc[:, [1,2,6,7,8,9,10]].astype(np.float)\n",
108 |     "# Convert bools to floats\n",
109 |     "X = X.join((data.iloc[:, [4,5]] == 'no').astype(np.float))\n",
110 |     "\n",
111 |     "y = (data.iloc[:, -1] == 'True.').astype(np.int)\n",
112 |     "\n",
113 |     "from sklearn.model_selection import train_test_split\n",
114 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)\n",
115 |     "n_samples = X_train.shape[0]"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": 34,
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": [
124 |     "n_estimators = 10\n",
125 |     "weights = pd.DataFrame(index=X_train.index, columns=list(range(n_estimators)))"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": 35,
131 |    "metadata": {},
132 |    "outputs": [],
133 |    "source": [
134 |     "t = 0\n",
135 |     "weights[t] = 1 / n_samples"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "markdown",
140 |    "metadata": {},
141 |    "source": [
142 |     "Train the classifier"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": 36,
148 |    "metadata": {},
149 |    "outputs": [
150 |     {
151 |      "data": {
152 |       "text/plain": [
153 |        "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=1,\n",
154 |        "            max_features=None, max_leaf_nodes=None,\n",
155 |        "            min_impurity_decrease=0.0, min_impurity_split=None,\n",
156 |        "            min_samples_leaf=1, min_samples_split=2,\n",
157 |        "            min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
158 |        "            splitter='best')"
159 |       ]
160 |      },
161 |      "execution_count": 36,
162 |      "metadata": {},
163 |      "output_type": "execute_result"
164 |     }
165 |    ],
166 |    "source": [
167 |     "from sklearn.tree import DecisionTreeClassifier\n",
168 |     "trees = []\n",
169 |     "trees.append(DecisionTreeClassifier(max_depth=1))\n",
170 |     "trees[t].fit(X_train, y_train, sample_weight=weights[t].values)"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "markdown",
175 |    "metadata": {},
176 |    "source": [
177 |     "Estimate error"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": 37,
183 |    "metadata": {},
184 |    "outputs": [
185 |     {
186 |      "data": {
187 |       "text/plain": [
188 |        "0.13613972234661886"
189 |       ]
190 |      },
191 |      "execution_count": 37,
192 |      "metadata": {},
193 |      "output_type": "execute_result"
194 |     }
195 |    ],
196 |    "source": [
197 |     "y_pred_ = trees[t].predict(X_train)\n",
198 |     "error = []\n",
199 |     "error.append(1 - metrics.accuracy_score(y_pred_, y_train))\n",
200 |     "error[t]"
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "code",
205 |    "execution_count": 38,
206 |    "metadata": {},
207 |    "outputs": [
208 |     {
209 |      "data": {
210 |       "text/plain": [
211 |        "1.8477293114995077"
212 |       ]
213 |      },
214 |      "execution_count": 38,
215 |      "metadata": {},
216 |      "output_type": "execute_result"
217 |     }
218 |    ],
219 |    "source": [
220 |     "alpha = []\n",
221 |     "alpha.append(np.log((1 - error[t]) / error[t]))\n",
222 |     "alpha[t]"
223 |    ]
224 |   },
225 |   {
226 |    "cell_type": "markdown",
227 |    "metadata": {},
228 |    "source": [
229 |     "Update weights"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "code",
234 |    "execution_count": 39,
235 |    "metadata": {},
236 |    "outputs": [],
237 |    "source": [
238 |     "weights[t + 1] = weights[t]\n",
239 |     "filter_ = y_pred_ != y_train"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "code",
244 |    "execution_count": 40,
245 |    "metadata": {},
246 |    "outputs": [],
247 |    "source": [
248 |     "weights.loc[filter_, t + 1] = weights.loc[filter_, t] * np.exp(alpha[t])"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "markdown",
253 |    "metadata": {},
254 |    "source": [
255 |     "Normalize weights"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "code",
260 |    "execution_count": 41,
261 |    "metadata": {},
262 |    "outputs": [],
263 |    "source": [
264 |     "weights[t + 1] = weights[t + 1] / weights[t + 1].sum()"
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "markdown",
269 |    "metadata": {},
270 |    "source": [
271 |     "**Iteration 2 - n_estimators**"
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "code",
276 |    "execution_count": 42,
277 |    "metadata": {},
278 |    "outputs": [],
279 |    "source": [
280 |     "for t in range(1, n_estimators):\n",
281 |     "    trees.append(DecisionTreeClassifier(max_depth=1))\n",
282 |     "    trees[t].fit(X_train, y_train, sample_weight=weights[t].values)\n",
283 |     "    y_pred_ = trees[t].predict(X_train)\n",
284 |     "    error.append(1 - metrics.accuracy_score(y_pred_, y_train))\n",
285 |     "    alpha.append(np.log((1 - error[t]) / error[t]))\n",
286 |     "    weights[t + 1] = weights[t]\n",
287 |     "    filter_ = y_pred_ != y_train\n",
288 |     "    weights.loc[filter_, t + 1] = weights.loc[filter_, t] * np.exp(alpha[t])\n",
289 |     "    weights[t + 1] = weights[t + 1] / weights[t + 1].sum()"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "code",
294 |    "execution_count": 43,
295 |    "metadata": {},
296 |    "outputs": [
297 |     {
298 |      "data": {
299 |       "text/plain": [
300 |        "[0.13613972234661886,\n",
301 |        " 0.15629198387819077,\n",
302 |        " 0.8437080161218092,\n",
303 |        " 0.8437080161218092,\n",
304 |        " 0.8437080161218092,\n",
305 |        " 0.8437080161218092,\n",
306 |        " 0.8437080161218092,\n",
307 |        " 0.8437080161218092,\n",
308 |        " 0.8437080161218092,\n",
309 |        " 0.8437080161218092]"
310 |       ]
311 |      },
312 |      "execution_count": 43,
313 |      "metadata": {},
314 |      "output_type": "execute_result"
315 |     }
316 |    ],
317 |    "source": [
318 |     "error"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "markdown",
323 |    "metadata": {},
324 |    "source": [
325 |     "### Create classification\n",
326 |     "\n",
327 |     "Only classifiers when error < 0.5"
328 |    ]
329 |   },
330 |   {
331 |    "cell_type": "code",
332 |    "execution_count": 44,
333 |    "metadata": {},
334 |    "outputs": [],
335 |    "source": [
336 |     "new_n_estimators = np.sum([x<0.5 for x in error])"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "code",
341 |    "execution_count": 45,
342 |    "metadata": {},
343 |    "outputs": [],
344 |    "source": [
345 |     "y_pred_all = np.zeros((X_test.shape[0], new_n_estimators))\n",
346 |     "for t in range(new_n_estimators):\n",
347 |     "    y_pred_all[:, t] = trees[t].predict(X_test)"
348 |    ]
349 |   },
350 |   {
351 |    "cell_type": "code",
352 |    "execution_count": 46,
353 |    "metadata": {},
354 |    "outputs": [],
355 |    "source": [
356 |     "y_pred = (np.sum(y_pred_all * alpha[:new_n_estimators], axis=1) >= 1).astype(np.int)"
357 |    ]
358 |   },
359 |   {
360 |    "cell_type": "code",
361 |    "execution_count": 47,
362 |    "metadata": {},
363 |    "outputs": [
364 |     {
365 |      "data": {
366 |       "text/plain": [
367 |        "(0.5105105105105104, 0.8518181818181818)"
368 |       ]
369 |      },
370 |      "execution_count": 47,
371 |      "metadata": {},
372 |      "output_type": "execute_result"
373 |     }
374 |    ],
375 |    "source": [
376 |     "metrics.f1_score(y_pred, y_test.values), metrics.accuracy_score(y_pred, y_test.values)"
377 |    ]
378 |   },
379 |   {
380 |    "cell_type": "markdown",
381 |    "metadata": {},
382 |    "source": [
383 |     "### Using sklearn"
384 |    ]
385 |   },
386 |   {
387 |    "cell_type": "code",
388 |    "execution_count": 48,
389 |    "metadata": {},
390 |    "outputs": [],
391 |    "source": [
392 |     "from sklearn.ensemble import AdaBoostClassifier"
393 |    ]
394 |   },
395 |   {
396 |    "cell_type": "code",
397 |    "execution_count": 49,
398 |    "metadata": {},
399 |    "outputs": [
400 |     {
401 |      "data": {
402 |       "text/plain": [
403 |        "AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,\n",
404 |        "          learning_rate=1.0, n_estimators=50, random_state=None)"
405 |       ]
406 |      },
407 |      "execution_count": 49,
408 |      "metadata": {},
409 |      "output_type": "execute_result"
410 |     }
411 |    ],
412 |    "source": [
413 |     "clf = AdaBoostClassifier()\n",
414 |     "clf"
415 |    ]
416 |   },
417 |   {
418 |    "cell_type": "code",
419 |    "execution_count": 50,
420 |    "metadata": {},
421 |    "outputs": [
422 |     {
423 |      "data": {
424 |       "text/plain": [
425 |        "(0.29107981220657275, 0.8627272727272727)"
426 |       ]
427 |      },
428 |      "execution_count": 50,
429 |      "metadata": {},
430 |      "output_type": "execute_result"
431 |     }
432 |    ],
433 |    "source": [
434 |     "clf.fit(X_train, y_train)\n",
435 |     "y_pred = clf.predict(X_test)\n",
436 |     "metrics.f1_score(y_pred, y_test.values), metrics.accuracy_score(y_pred, y_test.values)"
437 |    ]
438 |   },
439 |   {
440 |    "cell_type": "markdown",
441 |    "metadata": {},
442 |    "source": [
443 |     "### Gradient Boosting"
444 |    ]
445 |   },
446 |   {
447 |    "cell_type": "code",
448 |    "execution_count": 51,
449 |    "metadata": {},
450 |    "outputs": [
451 |     {
452 |      "data": {
453 |       "text/plain": [
454 |        "GradientBoostingClassifier(criterion='friedman_mse', init=None,\n",
455 |        "              learning_rate=0.1, loss='deviance', max_depth=3,\n",
456 |        "              max_features=None, max_leaf_nodes=None,\n",
457 |        "              min_impurity_decrease=0.0, min_impurity_split=None,\n",
458 |        "              min_samples_leaf=1, min_samples_split=2,\n",
459 |        "              min_weight_fraction_leaf=0.0, n_estimators=100,\n",
460 |        "              n_iter_no_change=None, presort='auto', random_state=None,\n",
461 |        "              subsample=1.0, tol=0.0001, validation_fraction=0.1,\n",
462 |        "              verbose=0, warm_start=False)"
463 |       ]
464 |      },
465 |      "execution_count": 51,
466 |      "metadata": {},
467 |      "output_type": "execute_result"
468 |     }
469 |    ],
470 |    "source": [
471 |     "from sklearn.ensemble import GradientBoostingClassifier\n",
472 |     "\n",
473 |     "clf = GradientBoostingClassifier()\n",
474 |     "clf"
475 |    ]
476 |   },
477 |   {
478 |    "cell_type": "code",
479 |    "execution_count": 52,
480 |    "metadata": {},
481 |    "outputs": [
482 |     {
483 |      "data": {
484 |       "text/plain": [
485 |        "(0.5289256198347108, 0.8963636363636364)"
486 |       ]
487 |      },
488 |      "execution_count": 52,
489 |      "metadata": {},
490 |      "output_type": "execute_result"
491 |     }
492 |    ],
493 |    "source": [
494 |     "clf.fit(X_train, y_train)\n",
495 |     "y_pred = clf.predict(X_test)\n",
496 |     "metrics.f1_score(y_pred, y_test.values), metrics.accuracy_score(y_pred, y_test.values)"
497 |    ]
498 |   }
499 |  ],
500 |  "metadata": {
501 |   "kernelspec": {
502 |    "display_name": "Python 3",
503 |    "language": "python",
504 |    "name": "python3"
505 |   },
506 |   "language_info": {
507 |    "codemirror_mode": {
508 |     "name": "ipython",
509 |     "version": 3
510 |    },
511 |    "file_extension": ".py",
512 |    "mimetype": "text/x-python",
513 |    "name": "python",
514 |    "nbconvert_exporter": "python",
515 |    "pygments_lexer": "ipython3",
516 |    "version": "3.7.0"
517 |   }
518 |  },
519 |  "nbformat": 4,
520 |  "nbformat_minor": 1
521 | }
522 | 


--------------------------------------------------------------------------------
/notebooks/16-IntroNLP.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/16-IntroNLP.pdf


--------------------------------------------------------------------------------
/notebooks/18-IntroNLP_II.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/18-IntroNLP_II.pdf


--------------------------------------------------------------------------------
/notebooks/images/MLP.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/MLP.png


--------------------------------------------------------------------------------
/notebooks/images/Perceptron.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/Perceptron.png


--------------------------------------------------------------------------------
/notebooks/images/backprop (1).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/backprop (1).png


--------------------------------------------------------------------------------
/notebooks/images/backprop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/backprop.png


--------------------------------------------------------------------------------
/notebooks/images/bkwd_step_net.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/bkwd_step_net.png


--------------------------------------------------------------------------------
/notebooks/images/classification_kiank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/classification_kiank.png


--------------------------------------------------------------------------------
/notebooks/images/dl_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/dl_overview.png


--------------------------------------------------------------------------------
/notebooks/images/fwd_step.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/fwd_step.png


--------------------------------------------------------------------------------
/notebooks/images/fwd_step_net.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/fwd_step_net.png


--------------------------------------------------------------------------------
/notebooks/images/grad_summary.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/grad_summary.png


--------------------------------------------------------------------------------
/notebooks/images/img001.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/img001.PNG


--------------------------------------------------------------------------------
/notebooks/images/img002.GIF:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/img002.GIF


--------------------------------------------------------------------------------
/notebooks/images/img003.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/img003.PNG


--------------------------------------------------------------------------------
/notebooks/images/img004.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/img004.PNG


--------------------------------------------------------------------------------
/notebooks/images/img005.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/img005.PNG


--------------------------------------------------------------------------------
/notebooks/images/img006.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/img006.PNG


--------------------------------------------------------------------------------
/notebooks/images/img007.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/img007.PNG


--------------------------------------------------------------------------------
/notebooks/images/img008.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/img008.PNG


--------------------------------------------------------------------------------
/notebooks/images/img009.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/img009.PNG


--------------------------------------------------------------------------------
/notebooks/images/img010.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/img010.PNG


--------------------------------------------------------------------------------
/notebooks/images/img011.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/img011.PNG


--------------------------------------------------------------------------------
/notebooks/images/img012.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/img012.PNG


--------------------------------------------------------------------------------
/notebooks/images/img013.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/img013.PNG


--------------------------------------------------------------------------------
/notebooks/images/img014.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/img014.PNG


--------------------------------------------------------------------------------
/notebooks/images/img015.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/img015.PNG


--------------------------------------------------------------------------------
/notebooks/images/img016.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/img016.PNG


--------------------------------------------------------------------------------
/notebooks/images/keras-logo-small.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/keras-logo-small.jpg


--------------------------------------------------------------------------------
/notebooks/images/logistic_function.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/logistic_function.png


--------------------------------------------------------------------------------
/notebooks/images/multi-layers-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/multi-layers-1.png


--------------------------------------------------------------------------------
/notebooks/images/multi-layers-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/multi-layers-2.png


--------------------------------------------------------------------------------
/notebooks/images/sgd.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/sgd.gif


--------------------------------------------------------------------------------
/notebooks/images/sgd_bad.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/sgd_bad.gif


--------------------------------------------------------------------------------
/notebooks/images/single_layer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/single_layer.png


--------------------------------------------------------------------------------
/notebooks/images/updateParameters.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/images/updateParameters.png


--------------------------------------------------------------------------------
/notebooks/model_deployment/api.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | from flask import Flask
 3 | from flask_restplus import Api, Resource, fields
 4 | from sklearn.externals import joblib
 5 | from m09_model_deployment import predict_proba
 6 | 
 7 | app = Flask(__name__)
 8 | 
 9 | api = Api(
10 |     app, 
11 |     version='1.0', 
12 |     title='Phishing Prediction API',
13 |     description='Phishing Prediction API')
14 | 
15 | ns = api.namespace('predict', 
16 |      description='Phishing Classifier')
17 |    
18 | parser = api.parser()
19 | 
20 | parser.add_argument(
21 |     'URL', 
22 |     type=str, 
23 |     required=True, 
24 |     help='URL to be analyzed', 
25 |     location='args')
26 | 
27 | resource_fields = api.model('Resource', {
28 |     'result': fields.String,
29 | })
30 | 
31 | @ns.route('/')
32 | class PhishingApi(Resource):
33 | 
34 |     @api.doc(parser=parser)
35 |     @api.marshal_with(resource_fields)
36 |     def get(self):
37 |         args = parser.parse_args()
38 |         
39 |         return {
40 |          "result": predict_proba(args['URL'])
41 |         }, 200
42 |     
43 |     
44 | if __name__ == '__main__':
45 |     app.run(debug=True, use_reloader=False, host='0.0.0.0', port=8888)
46 | 


--------------------------------------------------------------------------------
/notebooks/model_deployment/m09_model_deployment.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import pandas as pd
 4 | from sklearn.externals import joblib
 5 | import sys
 6 | import os
 7 | 
 8 | def predict_proba(url):
 9 | 
10 |     clf = joblib.load(os.path.dirname(__file__) + '/phishing_clf.pkl') 
11 | 
12 |     url_ = pd.DataFrame([url], columns=['url'])
13 |   
14 |     # Create features
15 |     keywords = ['https', 'login', '.php', '.html', '@', 'sign']
16 |     for keyword in keywords:
17 |         url_['keyword_' + keyword] = url_.url.str.contains(keyword).astype(int)
18 | 
19 |     url_['lenght'] = url_.url.str.len() - 2
20 |     domain = url_.url.str.split('/', expand=True).iloc[:, 2]
21 |     url_['lenght_domain'] = domain.str.len()
22 |     url_['isIP'] = (url_.url.str.replace('.', '') * 1).str.isnumeric().astype(int)
23 |     url_['count_com'] = url_.url.str.count('com')
24 | 
25 |     # Make prediction
26 |     p1 = clf.predict_proba(url_.drop('url', axis=1))[0,1]
27 | 
28 |     return p1
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     
33 |     if len(sys.argv) == 1:
34 |         print('Please add an URL')
35 |         
36 |     else:
37 | 
38 |         url = sys.argv[1]
39 | 
40 |         p1 = predict_proba(url)
41 |         
42 |         print(url)
43 |         print('Probability of Phishing: ', p1)
44 |         


--------------------------------------------------------------------------------
/notebooks/model_deployment/phishing_clf.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/3679820d8782a5787305c5779c0bc4a10cbf2da0/notebooks/model_deployment/phishing_clf.pkl


--------------------------------------------------------------------------------