├── .gitignore
├── README.md
└── projects
    ├── boston_housing
        ├── README.md
        ├── boston_housing.ipynb
        ├── housing.csv
        ├── project_description.md
        └── visuals.py
    ├── capstone
        ├── README.md
        ├── capstone_proposal_template.md
        ├── capstone_report_template.md
        ├── proposal_project_description.md
        ├── report-example-1.pdf
        └── report-example-3.pdf
    ├── customer_segments
        ├── README.md
        ├── customer_segments.ipynb
        ├── customers.csv
        ├── project_description.md
        └── visuals.py
    ├── digit_recognition
        ├── README.md
        ├── digit_recognition.ipynb
        └── project_description.md
    ├── finding_donors
        ├── README.md
        ├── census.csv
        ├── finding_donors.ipynb
        ├── project_description.md
        └── visuals.py
    ├── image-classification
        ├── ReadMe.md
        ├── helper.py
        ├── image_classification.ipynb
        └── problem_unittests.py
    ├── intro-to-tensorflow
        ├── environment.yml
        ├── environment_win.yml
        ├── image
        │   ├── Learn Rate Tune - Image.png
        │   ├── Mean Variance - Image.png
        │   ├── network_diagram.png
        │   └── notmnist.png
        ├── intro_to_tensorflow.ipynb
        └── intro_to_tensorflow_solution.ipynb
    ├── practice_projects
        ├── cnn
        │   ├── .gitignore
        │   ├── README.md
        │   ├── cifar10-augmentation
        │   │   ├── aug_model.weights.best.hdf5
        │   │   └── cifar10_augmentation.ipynb
        │   ├── cifar10-classification
        │   │   ├── MLP.weights.best.hdf5
        │   │   ├── cifar10_cnn.ipynb
        │   │   ├── cifar10_mlp.ipynb
        │   │   └── model.weights.best.hdf5
        │   ├── conv-visualization
        │   │   ├── conv_visualization.ipynb
        │   │   └── images
        │   │   │   └── udacity_sdc.png
        │   ├── mnist-mlp
        │   │   ├── mnist.model.best.hdf5
        │   │   └── mnist_mlp.ipynb
        │   ├── requirements
        │   │   ├── aind-dog-linux.yml
        │   │   ├── aind-dog-mac.yml
        │   │   ├── aind-dog-windows.yml
        │   │   └── requirements.txt
        │   └── transfer-learning
        │   │   ├── bottleneck_features.ipynb
        │   │   ├── bottleneck_features
        │   │       └── .gitignore
        │   │   ├── dogvgg16.weights.best.hdf5
        │   │   ├── figures
        │   │       ├── vgg16.png
        │   │       └── vgg16_transfer.png
        │   │   ├── images
        │   │       ├── American_water_spaniel_00648.jpg
        │   │       ├── Brittany_02625.jpg
        │   │       ├── Curly-coated_retriever_03896.jpg
        │   │       ├── Labrador_retriever_06449.jpg
        │   │       ├── Labrador_retriever_06455.jpg
        │   │       ├── Labrador_retriever_06457.jpg
        │   │       ├── Welsh_springer_spaniel_08203.jpg
        │   │       └── sopa.jpg
        │   │   └── transfer_learning.ipynb
        ├── imdb
        │   ├── .gitignore
        │   ├── IMDB_In_Keras.ipynb
        │   ├── IMDB_In_Keras_Solutions.ipynb
        │   ├── README.md
        │   ├── Student_Admissions.ipynb
        │   ├── requirements
        │   │   ├── aind-dl-mac-linux.yml
        │   │   ├── aind-dl-windows.yml
        │   │   └── requirements.txt
        │   └── student_data.csv
        └── naive_bayes_tutorial
        │   ├── Bayesian_Inference.ipynb
        │   ├── Bayesian_Inference_solution.ipynb
        │   ├── ReadMe.md
        │   ├── images
        │       ├── bayes_formula.png
        │       ├── countvectorizer.png
        │       ├── dqnb.png
        │       ├── naivebayes.png
        │       └── tfidf.png
        │   └── smsspamcollection
        │       ├── SMSSpamCollection
        │       └── readme
    ├── smartcab
        ├── README.md
        ├── project_description.md
        ├── smartcab.ipynb
        ├── smartcab
        │   ├── __init__.py
        │   ├── agent.py
        │   ├── environment.py
        │   ├── images
        │   │   ├── car-black.png
        │   │   ├── car-blue.png
        │   │   ├── car-cyan.png
        │   │   ├── car-green.png
        │   │   ├── car-magenta.png
        │   │   ├── car-orange.png
        │   │   ├── car-red.png
        │   │   ├── car-white.png
        │   │   ├── car-yellow.png
        │   │   ├── east-west.png
        │   │   ├── logo.png
        │   │   └── north-south.png
        │   ├── planner.py
        │   └── simulator.py
        └── visuals.py
    ├── student_intervention
        ├── README.md
        ├── project_description.md
        ├── student-data.csv
        └── student_intervention.ipynb
    └── titanic_survival_exploration
        ├── README.md
        ├── project_description.md
        ├── titanic_data.csv
        ├── titanic_survival_exploration.ipynb
        └── visuals.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Mac OS
 2 | .DS_Store
 3 | 
 4 | # Byte-compiled / optimized / DLL files
 5 | __pycache__/
 6 | *.py[cod]
 7 | *$py.class
 8 | 
 9 | # C extensions
10 | *.so
11 | 
12 | # Distribution / packaging
13 | .Python
14 | env/
15 | build/
16 | develop-eggs/
17 | dist/
18 | downloads/
19 | eggs/
20 | .eggs/
21 | lib/
22 | lib64/
23 | parts/
24 | sdist/
25 | var/
26 | *.egg-info/
27 | .installed.cfg
28 | *.egg
29 | 
30 | # PyInstaller
31 | #  Usually these files are written by a python script from a template
32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 | 
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 | 
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *,cover
49 | .hypothesis/
50 | 
51 | # Translations
52 | *.mo
53 | *.pot
54 | 
55 | # Django stuff:
56 | *.log
57 | 
58 | # Sphinx documentation
59 | docs/_build/
60 | 
61 | # PyBuilder
62 | target/
63 | 
64 | #Ipython Notebook
65 | .ipynb_checkpoints
66 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # machine-learning
2 | Content for Udacity's Machine Learning curriculum, which includes projects and their descriptions.
3 | 
4 | <a rel="license" href="http://creativecommons.org/licenses/by-nc-nd/4.0/"><img alt="Creative Commons License" style="border-width:0" src="https://i.creativecommons.org/l/by-nc-nd/4.0/88x31.png" /></a><br />This work is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-nc-nd/4.0/">Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International License</a>. Please refer to [Udacity Terms of Service](https://www.udacity.com/legal) for further information.
5 | 


--------------------------------------------------------------------------------
/projects/boston_housing/README.md:
--------------------------------------------------------------------------------
 1 | # Machine Learning Engineer Nanodegree
 2 | # Model Evaluation and Validation
 3 | ## Project: Predicting Boston Housing Prices
 4 | 
 5 | ### Install
 6 | 
 7 | This project requires **Python** and the following Python libraries installed:
 8 | 
 9 | - [NumPy](http://www.numpy.org/)
10 | - [Pandas](http://pandas.pydata.org/)
11 | - [matplotlib](http://matplotlib.org/)
12 | - [scikit-learn](http://scikit-learn.org/stable/)
13 | 
14 | You will also need to have software installed to run and execute a [Jupyter Notebook](http://ipython.org/notebook.html)
15 | 
16 | If you do not have Python installed yet, it is highly recommended that you install the [Anaconda](http://continuum.io/downloads) distribution of Python, which already has the above packages and more included. 
17 | 
18 | ### Code
19 | 
20 | Template code is provided in the `boston_housing.ipynb` notebook file. You will also be required to use the included `visuals.py` Python file and the `housing.csv` dataset file to complete your work. While some code has already been implemented to get you started, you will need to implement additional functionality when requested to successfully complete the project. Note that the code included in `visuals.py` is meant to be used out-of-the-box and not intended for students to manipulate. If you are interested in how the visualizations are created in the notebook, please feel free to explore this Python file.
21 | 
22 | ### Run
23 | 
24 | In a terminal or command window, navigate to the top-level project directory `boston_housing/` (that contains this README) and run one of the following commands:
25 | 
26 | ```bash
27 | ipython notebook boston_housing.ipynb
28 | ```  
29 | or
30 | ```bash
31 | jupyter notebook boston_housing.ipynb
32 | ```
33 | 
34 | This will open the Jupyter Notebook software and project file in your browser.
35 | 
36 | ### Data
37 | 
38 | The modified Boston housing dataset consists of 489 data points, with each datapoint having 3 features. This dataset is a modified version of the Boston Housing dataset found on the [UCI Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Housing).
39 | 
40 | **Features**
41 | 1.  `RM`: average number of rooms per dwelling
42 | 2. `LSTAT`: percentage of population considered lower status
43 | 3. `PTRATIO`: pupil-teacher ratio by town
44 | 
45 | **Target Variable**
46 | 4. `MEDV`: median value of owner-occupied homes


--------------------------------------------------------------------------------
/projects/boston_housing/project_description.md:
--------------------------------------------------------------------------------
 1 | # Content: Model Evaluation and Validation
 2 | ## Project: Predicting Boston Housing Prices
 3 | 
 4 | ## Project Overview
 5 | In this project, you will apply basic machine learning concepts on data collected for housing prices in the Boston, Massachusetts area to predict the selling price of a new home. You will first explore the data to obtain important features and descriptive statistics about the dataset. Next, you will properly split the data into testing and training subsets, and determine a suitable performance metric for this problem. You will then analyze performance graphs for a learning algorithm with varying parameters and training set sizes. This will enable you to pick the optimal model that best generalizes for unseen data. Finally, you will test this optimal model on a new sample and compare the predicted selling price to your statistics.
 6 | 
 7 | ## Project Highlights
 8 | This project is designed to get you acquainted to working with datasets in Python and applying basic machine learning techniques using NumPy and Scikit-Learn. Before being expected to use many of the available algorithms in the sklearn library, it will be helpful to first practice analyzing and interpreting the performance of your model.
 9 | 
10 | Things you will learn by completing this project:
11 | 
12 | - How to use NumPy to investigate the latent features of a dataset.
13 | - How to analyze various learning performance plots for variance and bias.
14 | - How to determine the best-guess model for predictions from unseen data.
15 | - How to evaluate a model's performance on unseen data using previous data.
16 | 
17 | ## Description
18 | The Boston housing market is highly competitive, and you want to be the best real estate agent in the area. To compete with your peers, you decide to leverage a few basic machine learning concepts to assist you and a client with finding the best selling price for their home. Luckily, you\'ve come across the Boston Housing dataset which contains aggregated data on various features for houses in Greater Boston communities, including the median value of homes for each of those areas. Your task is to build an optimal model based on a statistical analysis with the tools available. This model will then be used to estimate the best selling price for your clients\' homes.
19 | 
20 | ## Software and Libraries
21 | This project uses the following software and Python libraries:
22 | 
23 | - [Python](https://www.python.org/download/releases/3.0/)
24 | - [NumPy](http://www.numpy.org/)
25 | - [pandas](http://pandas.pydata.org/)
26 | - [scikit-learn](http://scikit-learn.org/stable/)
27 | - [matplotlib](http://matplotlib.org/)
28 | 
29 | You will also need to have software installed to run and execute a [Jupyter Notebook](http://ipython.org/notebook.html).
30 | 
31 | If you do not have Python installed yet, it is highly recommended that you install the [Anaconda](http://continuum.io/downloads) distribution of Python, which already has the above packages and more included. 
32 | 
33 | ## Starting the Project
34 | 
35 | For this assignment, you can find the `boston_housing` folder containing the necessary project files on the [Machine Learning projects GitHub](https://github.com/udacity/machine-learning), under the `projects` folder. You may download all of the files for projects we'll use in this Nanodegree program directly from this repo. Please make sure that you use the most recent version of project files when completing a project!
36 | 
37 | This project contains three files:
38 | 
39 | - `boston_housing.ipynb`: This is the main file where you will be performing your work on the project.
40 | - `housing.csv`: The project dataset. You'll load this data in the notebook.
41 | - `visuals.py`: This Python script provides supplementary visualizations for the project. Do not modify.
42 | 
43 | In the Terminal or Command Prompt, navigate to the folder containing the project files, and then use the command `jupyter notebook boston_housing.ipynb` to open up a browser window or tab to work with your notebook. Alternatively, you can use the command `jupyter notebook` or `ipython notebook` and navigate to the notebook file in the browser window that opens. Follow the instructions in the notebook and answer each question presented to successfully complete the project. A **README** file has also been provided with the project files which may contain additional necessary information or instruction for the project. 
44 | 
45 | ## Submitting the Project
46 | 
47 | ### Evaluation
48 | Your project will be reviewed by a Udacity reviewer against the **<a href="https://review.udacity.com/#!/rubrics/103/view" target="_blank">Predicting Boston Housing Prices project rubric</a>**. Be sure to review this rubric thoroughly and self-evaluate your project before submission. All criteria found in the rubric must be *meeting specifications* for you to pass.
49 | 
50 | ### Submission Files
51 | When you are ready to submit your project, collect the following files and compress them into a single archive for upload. Alternatively, you may supply the following files on your GitHub Repo in a folder named `boston_housing` for ease of access:
52 |  - The `boston_housing.ipynb` notebook file with all questions answered and all code cells executed and displaying output.
53 |  - An **HTML** export of the project notebook with the name **report.html**. This file *must* be present for your project to be evaluated.
54 | 
55 | Once you have collected these files and reviewed the project rubric, proceed to the project submission page.
56 | 
57 | ### I'm Ready!
58 | When you're ready to submit your project, click on the **Submit Project** button at the bottom of the page.
59 | 
60 | If you are having any problems submitting your project or wish to check on the status of your submission, please email us at **machine-support@udacity.com** or visit us in the <a href="http://discussions.udacity.com" target="_blank">discussion forums</a>.
61 | 
62 | ### What's Next?
63 | You will get an email as soon as your reviewer has feedback for you. In the meantime, review your next project and feel free to get started on it or the courses supporting it!
64 | 


--------------------------------------------------------------------------------
/projects/boston_housing/visuals.py:
--------------------------------------------------------------------------------
  1 | ###########################################
  2 | # Suppress matplotlib user warnings
  3 | # Necessary for newer version of matplotlib
  4 | import warnings
  5 | warnings.filterwarnings("ignore", category = UserWarning, module = "matplotlib")
  6 | #
  7 | # Display inline matplotlib plots with IPython
  8 | from IPython import get_ipython
  9 | get_ipython().run_line_magic('matplotlib', 'inline')
 10 | ###########################################
 11 | 
 12 | import matplotlib.pyplot as pl
 13 | import numpy as np
 14 | from sklearn.model_selection import learning_curve
 15 | from sklearn.model_selection import validation_curve
 16 | from sklearn.tree import DecisionTreeRegressor
 17 | from sklearn.model_selection import ShuffleSplit, train_test_split
 18 | 
 19 | def ModelLearning(X, y):
 20 |     """ Calculates the performance of several models with varying sizes of training data.
 21 |         The learning and testing scores for each model are then plotted. """
 22 | 
 23 |     # Create 10 cross-validation sets for training and testing
 24 |     cv = ShuffleSplit(n_splits = 10, test_size = 0.2, random_state = 0)
 25 | 
 26 |     # Generate the training set sizes increasing by 50
 27 |     train_sizes = np.rint(np.linspace(1, X.shape[0]*0.8 - 1, 9)).astype(int)
 28 | 
 29 |     # Create the figure window
 30 |     fig = pl.figure(figsize=(10,7))
 31 | 
 32 |     # Create three different models based on max_depth
 33 |     for k, depth in enumerate([1,3,6,10]):
 34 | 
 35 |         # Create a Decision tree regressor at max_depth = depth
 36 |         regressor = DecisionTreeRegressor(max_depth = depth)
 37 | 
 38 |         # Calculate the training and testing scores
 39 |         sizes, train_scores, test_scores = learning_curve(regressor, X, y, \
 40 |             cv = cv, train_sizes = train_sizes, scoring = 'r2')
 41 | 
 42 |         # Find the mean and standard deviation for smoothing
 43 |         train_std = np.std(train_scores, axis = 1)
 44 |         train_mean = np.mean(train_scores, axis = 1)
 45 |         test_std = np.std(test_scores, axis = 1)
 46 |         test_mean = np.mean(test_scores, axis = 1)
 47 | 
 48 |         # Subplot the learning curve
 49 |         ax = fig.add_subplot(2, 2, k+1)
 50 |         ax.plot(sizes, train_mean, 'o-', color = 'r', label = 'Training Score')
 51 |         ax.plot(sizes, test_mean, 'o-', color = 'g', label = 'Testing Score')
 52 |         ax.fill_between(sizes, train_mean - train_std, \
 53 |             train_mean + train_std, alpha = 0.15, color = 'r')
 54 |         ax.fill_between(sizes, test_mean - test_std, \
 55 |             test_mean + test_std, alpha = 0.15, color = 'g')
 56 | 
 57 |         # Labels
 58 |         ax.set_title('max_depth = %s'%(depth))
 59 |         ax.set_xlabel('Number of Training Points')
 60 |         ax.set_ylabel('Score')
 61 |         ax.set_xlim([0, X.shape[0]*0.8])
 62 |         ax.set_ylim([-0.05, 1.05])
 63 | 
 64 |     # Visual aesthetics
 65 |     ax.legend(bbox_to_anchor=(1.05, 2.05), loc='lower left', borderaxespad = 0.)
 66 |     fig.suptitle('Decision Tree Regressor Learning Performances', fontsize = 16, y = 1.03)
 67 |     fig.tight_layout()
 68 |     fig.show()
 69 | 
 70 | 
 71 | def ModelComplexity(X, y):
 72 |     """ Calculates the performance of the model as model complexity increases.
 73 |         The learning and testing errors rates are then plotted. """
 74 | 
 75 |     # Create 10 cross-validation sets for training and testing
 76 |     cv = ShuffleSplit(n_splits = 10, test_size = 0.2, random_state = 0)
 77 | 
 78 |     # Vary the max_depth parameter from 1 to 10
 79 |     max_depth = np.arange(1,11)
 80 | 
 81 |     # Calculate the training and testing scores
 82 |     train_scores, test_scores = validation_curve(DecisionTreeRegressor(), X, y, \
 83 |         param_name = "max_depth", param_range = max_depth, cv = cv, scoring = 'r2')
 84 | 
 85 |     # Find the mean and standard deviation for smoothing
 86 |     train_mean = np.mean(train_scores, axis=1)
 87 |     train_std = np.std(train_scores, axis=1)
 88 |     test_mean = np.mean(test_scores, axis=1)
 89 |     test_std = np.std(test_scores, axis=1)
 90 | 
 91 |     # Plot the validation curve
 92 |     pl.figure(figsize=(7, 5))
 93 |     pl.title('Decision Tree Regressor Complexity Performance')
 94 |     pl.plot(max_depth, train_mean, 'o-', color = 'r', label = 'Training Score')
 95 |     pl.plot(max_depth, test_mean, 'o-', color = 'g', label = 'Validation Score')
 96 |     pl.fill_between(max_depth, train_mean - train_std, \
 97 |         train_mean + train_std, alpha = 0.15, color = 'r')
 98 |     pl.fill_between(max_depth, test_mean - test_std, \
 99 |         test_mean + test_std, alpha = 0.15, color = 'g')
100 | 
101 |     # Visual aesthetics
102 |     pl.legend(loc = 'lower right')
103 |     pl.xlabel('Maximum Depth')
104 |     pl.ylabel('Score')
105 |     pl.ylim([-0.05,1.05])
106 |     pl.show()
107 | 
108 | 
109 | def PredictTrials(X, y, fitter, data):
110 |     """ Performs trials of fitting and predicting data. """
111 | 
112 |     # Store the predicted prices
113 |     prices = []
114 | 
115 |     for k in range(10):
116 |         # Split the data
117 |         X_train, X_test, y_train, y_test = train_test_split(X, y, \
118 |             test_size = 0.2, random_state = k)
119 | 
120 |         # Fit the data
121 |         reg = fitter(X_train, y_train)
122 | 
123 |         # Make a prediction
124 |         pred = reg.predict([data[0]])[0]
125 |         prices.append(pred)
126 | 
127 |         # Result
128 |         print("Trial {}: ${:,.2f}".format(k+1, pred))
129 | 
130 |     # Display price range
131 |     print("\nRange in prices: ${:,.2f}".format(max(prices) - min(prices)))
132 | 


--------------------------------------------------------------------------------
/projects/capstone/README.md:
--------------------------------------------------------------------------------
 1 | # Machine Learning Engineer Nanodegree
 2 | ## Specializations
 3 | ## Project: Capstone Proposal and Capstone Project
 4 | 
 5 | **Note**
 6 | 
 7 | The Capstone is a two-staged project. The first is the proposal component, where you can receive valuable feedback about your project idea, design, and proposed solution. This must be completed prior to your implementation and submitting for the capstone project. 
 8 | 
 9 | You can find the [capstone proposal rubric here](https://review.udacity.com/#!/rubrics/410/view), and the [capstone project rubric here](https://review.udacity.com/#!/rubrics/108/view). Please ensure that you are following directions correctly before submitting these two stages which encapsulate your capstone.
10 | 
11 | Please email [machine-support@udacity.com](mailto:machine-support@udacity.com) if you have any questions.
12 | 


--------------------------------------------------------------------------------
/projects/capstone/capstone_proposal_template.md:
--------------------------------------------------------------------------------
 1 | # Machine Learning Engineer Nanodegree
 2 | ## Capstone Proposal
 3 | Joe Udacity  
 4 | December 31st, 2050
 5 | 
 6 | ## Proposal
 7 | _(approx. 2-3 pages)_
 8 | 
 9 | ### Domain Background
10 | _(approx. 1-2 paragraphs)_
11 | 
12 | In this section, provide brief details on the background information of the domain from which the project is proposed. Historical information relevant to the project should be included. It should be clear how or why a problem in the domain can or should be solved. Related academic research should be appropriately cited in this section, including why that research is relevant. Additionally, a discussion of your personal motivation for investigating a particular problem in the domain is encouraged but not required.
13 | 
14 | ### Problem Statement
15 | _(approx. 1 paragraph)_
16 | 
17 | In this section, clearly describe the problem that is to be solved. The problem described should be well defined and should have at least one relevant potential solution. Additionally, describe the problem thoroughly such that it is clear that the problem is quantifiable (the problem can be expressed in mathematical or logical terms) , measurable (the problem can be measured by some metric and clearly observed), and replicable (the problem can be reproduced and occurs more than once).
18 | 
19 | ### Datasets and Inputs
20 | _(approx. 2-3 paragraphs)_
21 | 
22 | In this section, the dataset(s) and/or input(s) being considered for the project should be thoroughly described, such as how they relate to the problem and why they should be used. Information such as how the dataset or input is (was) obtained, and the characteristics of the dataset or input, should be included with relevant references and citations as necessary It should be clear how the dataset(s) or input(s) will be used in the project and whether their use is appropriate given the context of the problem.
23 | 
24 | ### Solution Statement
25 | _(approx. 1 paragraph)_
26 | 
27 | In this section, clearly describe a solution to the problem. The solution should be applicable to the project domain and appropriate for the dataset(s) or input(s) given. Additionally, describe the solution thoroughly such that it is clear that the solution is quantifiable (the solution can be expressed in mathematical or logical terms) , measurable (the solution can be measured by some metric and clearly observed), and replicable (the solution can be reproduced and occurs more than once).
28 | 
29 | ### Benchmark Model
30 | _(approximately 1-2 paragraphs)_
31 | 
32 | In this section, provide the details for a benchmark model or result that relates to the domain, problem statement, and intended solution. Ideally, the benchmark model or result contextualizes existing methods or known information in the domain and problem given, which could then be objectively compared to the solution. Describe how the benchmark model or result is measurable (can be measured by some metric and clearly observed) with thorough detail.
33 | 
34 | ### Evaluation Metrics
35 | _(approx. 1-2 paragraphs)_
36 | 
37 | In this section, propose at least one evaluation metric that can be used to quantify the performance of both the benchmark model and the solution model. The evaluation metric(s) you propose should be appropriate given the context of the data, the problem statement, and the intended solution. Describe how the evaluation metric(s) are derived and provide an example of their mathematical representations (if applicable). Complex evaluation metrics should be clearly defined and quantifiable (can be expressed in mathematical or logical terms).
38 | 
39 | ### Project Design
40 | _(approx. 1 page)_
41 | 
42 | In this final section, summarize a theoretical workflow for approaching a solution given the problem. Provide thorough discussion for what strategies you may consider employing, what analysis of the data might be required before being used, or which algorithms will be considered for your implementation. The workflow and discussion that you provide should align with the qualities of the previous sections. Additionally, you are encouraged to include small visualizations, pseudocode, or diagrams to aid in describing the project design, but it is not required. The discussion should clearly outline your intended workflow of the capstone project.
43 | 
44 | -----------
45 | 
46 | **Before submitting your proposal, ask yourself. . .**
47 | 
48 | - Does the proposal you have written follow a well-organized structure similar to that of the project template?
49 | - Is each section (particularly **Solution Statement** and **Project Design**) written in a clear, concise and specific fashion? Are there any ambiguous terms or phrases that need clarification?
50 | - Would the intended audience of your project be able to understand your proposal?
51 | - Have you properly proofread your proposal to assure there are minimal grammatical and spelling mistakes?
52 | - Are all the resources used for this project correctly cited and referenced?
53 | 


--------------------------------------------------------------------------------
/projects/capstone/capstone_report_template.md:
--------------------------------------------------------------------------------
  1 | # Machine Learning Engineer Nanodegree
  2 | ## Capstone Project
  3 | Joe Udacity  
  4 | December 31st, 2050
  5 | 
  6 | ## I. Definition
  7 | _(approx. 1-2 pages)_
  8 | 
  9 | ### Project Overview
 10 | In this section, look to provide a high-level overview of the project in layman’s terms. Questions to ask yourself when writing this section:
 11 | - _Has an overview of the project been provided, such as the problem domain, project origin, and related datasets or input data?_
 12 | - _Has enough background information been given so that an uninformed reader would understand the problem domain and following problem statement?_
 13 | 
 14 | ### Problem Statement
 15 | In this section, you will want to clearly define the problem that you are trying to solve, including the strategy (outline of tasks) you will use to achieve the desired solution. You should also thoroughly discuss what the intended solution will be for this problem. Questions to ask yourself when writing this section:
 16 | - _Is the problem statement clearly defined? Will the reader understand what you are expecting to solve?_
 17 | - _Have you thoroughly discussed how you will attempt to solve the problem?_
 18 | - _Is an anticipated solution clearly defined? Will the reader understand what results you are looking for?_
 19 | 
 20 | ### Metrics
 21 | In this section, you will need to clearly define the metrics or calculations you will use to measure performance of a model or result in your project. These calculations and metrics should be justified based on the characteristics of the problem and problem domain. Questions to ask yourself when writing this section:
 22 | - _Are the metrics you’ve chosen to measure the performance of your models clearly discussed and defined?_
 23 | - _Have you provided reasonable justification for the metrics chosen based on the problem and solution?_
 24 | 
 25 | 
 26 | ## II. Analysis
 27 | _(approx. 2-4 pages)_
 28 | 
 29 | ### Data Exploration
 30 | In this section, you will be expected to analyze the data you are using for the problem. This data can either be in the form of a dataset (or datasets), input data (or input files), or even an environment. The type of data should be thoroughly described and, if possible, have basic statistics and information presented (such as discussion of input features or defining characteristics about the input or environment). Any abnormalities or interesting qualities about the data that may need to be addressed have been identified (such as features that need to be transformed or the possibility of outliers). Questions to ask yourself when writing this section:
 31 | - _If a dataset is present for this problem, have you thoroughly discussed certain features about the dataset? Has a data sample been provided to the reader?_
 32 | - _If a dataset is present for this problem, are statistics about the dataset calculated and reported? Have any relevant results from this calculation been discussed?_
 33 | - _If a dataset is **not** present for this problem, has discussion been made about the input space or input data for your problem?_
 34 | - _Are there any abnormalities or characteristics about the input space or dataset that need to be addressed? (categorical variables, missing values, outliers, etc.)_
 35 | 
 36 | ### Exploratory Visualization
 37 | In this section, you will need to provide some form of visualization that summarizes or extracts a relevant characteristic or feature about the data. The visualization should adequately support the data being used. Discuss why this visualization was chosen and how it is relevant. Questions to ask yourself when writing this section:
 38 | - _Have you visualized a relevant characteristic or feature about the dataset or input data?_
 39 | - _Is the visualization thoroughly analyzed and discussed?_
 40 | - _If a plot is provided, are the axes, title, and datum clearly defined?_
 41 | 
 42 | ### Algorithms and Techniques
 43 | In this section, you will need to discuss the algorithms and techniques you intend to use for solving the problem. You should justify the use of each one based on the characteristics of the problem and the problem domain. Questions to ask yourself when writing this section:
 44 | - _Are the algorithms you will use, including any default variables/parameters in the project clearly defined?_
 45 | - _Are the techniques to be used thoroughly discussed and justified?_
 46 | - _Is it made clear how the input data or datasets will be handled by the algorithms and techniques chosen?_
 47 | 
 48 | ### Benchmark
 49 | In this section, you will need to provide a clearly defined benchmark result or threshold for comparing across performances obtained by your solution. The reasoning behind the benchmark (in the case where it is not an established result) should be discussed. Questions to ask yourself when writing this section:
 50 | - _Has some result or value been provided that acts as a benchmark for measuring performance?_
 51 | - _Is it clear how this result or value was obtained (whether by data or by hypothesis)?_
 52 | 
 53 | 
 54 | ## III. Methodology
 55 | _(approx. 3-5 pages)_
 56 | 
 57 | ### Data Preprocessing
 58 | In this section, all of your preprocessing steps will need to be clearly documented, if any were necessary. From the previous section, any of the abnormalities or characteristics that you identified about the dataset will be addressed and corrected here. Questions to ask yourself when writing this section:
 59 | - _If the algorithms chosen require preprocessing steps like feature selection or feature transformations, have they been properly documented?_
 60 | - _Based on the **Data Exploration** section, if there were abnormalities or characteristics that needed to be addressed, have they been properly corrected?_
 61 | - _If no preprocessing is needed, has it been made clear why?_
 62 | 
 63 | ### Implementation
 64 | In this section, the process for which metrics, algorithms, and techniques that you implemented for the given data will need to be clearly documented. It should be abundantly clear how the implementation was carried out, and discussion should be made regarding any complications that occurred during this process. Questions to ask yourself when writing this section:
 65 | - _Is it made clear how the algorithms and techniques were implemented with the given datasets or input data?_
 66 | - _Were there any complications with the original metrics or techniques that required changing prior to acquiring a solution?_
 67 | - _Was there any part of the coding process (e.g., writing complicated functions) that should be documented?_
 68 | 
 69 | ### Refinement
 70 | In this section, you will need to discuss the process of improvement you made upon the algorithms and techniques you used in your implementation. For example, adjusting parameters for certain models to acquire improved solutions would fall under the refinement category. Your initial and final solutions should be reported, as well as any significant intermediate results as necessary. Questions to ask yourself when writing this section:
 71 | - _Has an initial solution been found and clearly reported?_
 72 | - _Is the process of improvement clearly documented, such as what techniques were used?_
 73 | - _Are intermediate and final solutions clearly reported as the process is improved?_
 74 | 
 75 | 
 76 | ## IV. Results
 77 | _(approx. 2-3 pages)_
 78 | 
 79 | ### Model Evaluation and Validation
 80 | In this section, the final model and any supporting qualities should be evaluated in detail. It should be clear how the final model was derived and why this model was chosen. In addition, some type of analysis should be used to validate the robustness of this model and its solution, such as manipulating the input data or environment to see how the model’s solution is affected (this is called sensitivity analysis). Questions to ask yourself when writing this section:
 81 | - _Is the final model reasonable and aligning with solution expectations? Are the final parameters of the model appropriate?_
 82 | - _Has the final model been tested with various inputs to evaluate whether the model generalizes well to unseen data?_
 83 | - _Is the model robust enough for the problem? Do small perturbations (changes) in training data or the input space greatly affect the results?_
 84 | - _Can results found from the model be trusted?_
 85 | 
 86 | ### Justification
 87 | In this section, your model’s final solution and its results should be compared to the benchmark you established earlier in the project using some type of statistical analysis. You should also justify whether these results and the solution are significant enough to have solved the problem posed in the project. Questions to ask yourself when writing this section:
 88 | - _Are the final results found stronger than the benchmark result reported earlier?_
 89 | - _Have you thoroughly analyzed and discussed the final solution?_
 90 | - _Is the final solution significant enough to have solved the problem?_
 91 | 
 92 | 
 93 | ## V. Conclusion
 94 | _(approx. 1-2 pages)_
 95 | 
 96 | ### Free-Form Visualization
 97 | In this section, you will need to provide some form of visualization that emphasizes an important quality about the project. It is much more free-form, but should reasonably support a significant result or characteristic about the problem that you want to discuss. Questions to ask yourself when writing this section:
 98 | - _Have you visualized a relevant or important quality about the problem, dataset, input data, or results?_
 99 | - _Is the visualization thoroughly analyzed and discussed?_
100 | - _If a plot is provided, are the axes, title, and datum clearly defined?_
101 | 
102 | ### Reflection
103 | In this section, you will summarize the entire end-to-end problem solution and discuss one or two particular aspects of the project you found interesting or difficult. You are expected to reflect on the project as a whole to show that you have a firm understanding of the entire process employed in your work. Questions to ask yourself when writing this section:
104 | - _Have you thoroughly summarized the entire process you used for this project?_
105 | - _Were there any interesting aspects of the project?_
106 | - _Were there any difficult aspects of the project?_
107 | - _Does the final model and solution fit your expectations for the problem, and should it be used in a general setting to solve these types of problems?_
108 | 
109 | ### Improvement
110 | In this section, you will need to provide discussion as to how one aspect of the implementation you designed could be improved. As an example, consider ways your implementation can be made more general, and what would need to be modified. You do not need to make this improvement, but the potential solutions resulting from these changes are considered and compared/contrasted to your current solution. Questions to ask yourself when writing this section:
111 | - _Are there further improvements that could be made on the algorithms or techniques you used in this project?_
112 | - _Were there algorithms or techniques you researched that you did not know how to implement, but would consider using if you knew how?_
113 | - _If you used your final solution as the new benchmark, do you think an even better solution exists?_
114 | 
115 | -----------
116 | 
117 | **Before submitting, ask yourself. . .**
118 | 
119 | - Does the project report you’ve written follow a well-organized structure similar to that of the project template?
120 | - Is each section (particularly **Analysis** and **Methodology**) written in a clear, concise and specific fashion? Are there any ambiguous terms or phrases that need clarification?
121 | - Would the intended audience of your project be able to understand your analysis, methods, and results?
122 | - Have you properly proof-read your project report to assure there are minimal grammatical and spelling mistakes?
123 | - Are all the resources used for this project correctly cited and referenced?
124 | - Is the code that implements your solution easily readable and properly commented?
125 | - Does the code execute without error and produce results similar to those reported?
126 | 


--------------------------------------------------------------------------------
/projects/capstone/proposal_project_description.md:
--------------------------------------------------------------------------------
 1 | # Content: Specializations
 2 | ## Project: Capstone Proposal and Capstone Project
 3 | 
 4 | ## Capstone Proposal Overview
 5 | In this capstone project proposal, prior to completing the following **Capstone Project**, you you will leverage what you've learned throughout the Nanodegree program to author a proposal for solving a problem of your choice by applying machine learning algorithms and techniques. A project proposal encompasses seven key points: 
 6 | - The project's **domain background** : the field of research where the project is derived;
 7 | - A **problem statement** : a problem being investigated for which a solution will be defined;
 8 | - The **datasets and inputs** : data or inputs being used for the problem;
 9 | - A **solution statement** : a the solution proposed for the problem given;
10 | - A **benchmark model** : some simple or historical model or result to compare the defined solution to;
11 | - A set of **evaluation metrics** : functional representations for how the solution can be measured;
12 | - An outline of the **project design** : how the solution will be developed and results obtained.
13 | 
14 | ## Capstone Proposal Highlights
15 | The capstone project proposal is designed to introduce you to writing proposals for major projects. Typically, before you begin working on a solution to a problem, a proposal is written to your peers, advisor, manager, etc., to outline the details of the problem, your research, and your approach to a solution.
16 | 
17 | Things you will learn by completing this project proposal:
18 | - How to research a real-world problem of interest.
19 | - How to author a technical proposal document.
20 | - How to organize a proposed workflow for designing a solution.
21 | 
22 | ## Capstone Proposal Description
23 | 
24 | Think about a technical field or domain that you are passionate about, such as robotics, virtual reality, finance, natural language processing, or even artificial intelligence (the possibilities are endless!). Then, choose an existing problem within that domain that you are interested in which you could solve by applying machine learning algorithms and techniques. Be sure that you have collected all of the resources needed (such as datasets, inputs, and research) to complete this project, and make the appropriate citations wherever necessary in your proposal. Below are a few suggested problem areas you could explore if you are unsure what your passion is:
25 | 
26 | - [Robot Motion Planning](https://docs.google.com/document/d/1ZFCH6jS3A5At7_v5IUM5OpAXJYiutFuSIjTzV_E-vdE/pub)
27 | - [Healthcare](https://docs.google.com/document/d/1WzurKKa9AX2DnOH7KiB38mvozdOSemfkGpex8hdTy8c/pub)
28 | - [Computer Vision](https://docs.google.com/document/d/1y-XfjkPFgUQxFIQ9bBncUSjs4HOf5E-45FrLYNBsZb4/pub)
29 | - [Education](https://docs.google.com/document/d/1vjerjRQnWs1kLbZagDYT6rNqiwAG23Yj45oUY88IAxI/pub)
30 | - [Investment and Trading](https://docs.google.com/document/d/1ycGeb1QYKATG6jvz74SAMqxrlek9Ed4RYrzWNhWS-0Q/pub)
31 | 
32 | In addition, you may find a technical domain (along with the problem and dataset) as *competitions* on platforms such as [Kaggle](http://kaggle.com), or [Devpost](http://devpost.com). This can be helpful for discovering a particular problem you may be interested in solving as an alternative to the suggested problem areas above. In many cases, some of the requirements for the capstone proposal are already defined for you when choosing from these platforms. 
33 | 
34 | To determine whether your project and the problem you want to solve fits Udacity's vision of a Machine Learning Capstone Project , please refer to the [capstone proposal rubric](https://review.udacity.com/#!/rubrics/410/view) and the [capstone project rubric](https://review.udacity.com/#!/rubrics/108/view) and make a note of each rubric criteria you will be evaluated on. A satisfactory project will have a proposal that clearly satisfies these requirements.
35 | 
36 | ## Software Requirements
37 | **Your proposed project must be written in Python 2.7.** Given the free-form nature of the machine learning capstone, the software and libraries you will need to successfully complete your work will vary depending on the chosen application area and problem definition. Because of this, it is imperative that all necessary software and libraries you consider using in your capstone project are accessible clearly documented. Please note that proprietary software, software that requires private licenses, or software behind a paywall or login account should be avoided.
38 | 
39 | ## Data Requirements
40 | Every machine learning capstone project will most certainly require some form of dataset or input data structure (input text files, images, etc.). Similar to the software requirements above, the data you are considering must either be publicly accessible or provided by you during the submission process, and private or proprietary data should not be used without expressed permission. Please take into consideration the file size of your data â€” while there is no strict upper limit, input files that are excessively large may require reviewers longer than an acceptable amount of time to acquire all of your project files. This can take away from the reviewer's time that could be put towards evaluating your proposal. If the data you are considering fits the criteria of being too large, consider whether you could work with a subset of the data instead, or provide a representative sample of the data.
41 | 
42 | ## Ethics
43 | Udacity's A/B Testing course, as part of the Data Analyst Nanodegree, has a segment that discusses [the sensitivity of data](https://classroom.udacity.com/nanodegrees/nd002/parts/00213454013/modules/411033896375460/lessons/3998098714/concepts/39997087540923#) and the expectation of privacy from those whose information has been collected. While most data you find available to the public will not have any ethical complications, it is extremely important that you are considering where the data you are using came from, and whether that data contains any sensitive information. For example, if you worked for a bank and wanted to use customers' bank statements as part of your project, this would most likely be an unethical choice of data and should be avoided.
44 | 
45 | ## Proposal Guidelines
46 | Your project submission will be evaluated on the written proposal that is submitted. Additionally, depending on the project you are proposing, other materials such as the data being used will be evaluated. It is expected that the proposal contains enough detail, documentation, analysis, and discussion to adequately reflect the work you intend to complete for the project. Because of this, it is extremely important that the proposal is written in a professional, standardized way, so those who review your project's proposal are able to clearly identify each component of your project in the report. Without a properly written proposal, your project cannot be sufficiently evaluated. A [project proposal template](https://github.com/udacity/machine-learning/blob/master/projects/capstone/capstone_proposal_template.md) is provided for you to understand how a project proposal should be structured. We strongly encourage students to have a proposal that is approximately **two to three pages in length**.
47 | 
48 | The Machine Learning Capstone Project proposal should be treated no different than a written research paper for academics. Your goal is to ultimately present the research you've discovered into the respective problem domain you've chosen, and then clearly articulate your intended project to your peers. The narrative found in the [project proposal template](https://github.com/udacity/machine-learning/blob/master/projects/capstone/capstone_proposal_template.md) provides for a *"proposal checklist"* that will aid you in fully completing a documented proposal. Please make use of this resource!
49 | 
50 | ## Submitting the Project
51 | 
52 | ### Evaluation
53 | Your project will be reviewed by a Udacity reviewer against the **<a href="https://review.udacity.com/#!/rubrics/410/view" target="_blank">Capstone Project Proposal rubric</a>**. Be sure to review this rubric thoroughly and self-evaluate your project before submission. All criteria found in the rubric must be *meeting specifications* for you to pass.
54 | 
55 | ### Submission Files
56 | At minimum, your submission will be required to have the following files listed below. If your submission method of choice is uploading an archive (`*.zip`), please take into consideration the total file size. You will need to include
57 | - A project proposal, *in PDF format only*, with the name **proposal.pdf**, addressing each of the seven key points of a proposal. The recommended page length for a proposal is approximately *two to three pages*.
58 | - Any additional supporting material such as datasets, images, or input files that are necessary for your project and proposal. If these files are too large and you are uploading your submission, instead provide appropriate means of acquiring the necessary files in an included `README.md` file.
59 | 
60 | Once you have collected these files and reviewed the project rubric, proceed to the project submission page.
61 | 
62 | ### I'm Ready!
63 | When you're ready to submit your project, click on the **Submit Project** button at the bottom of the page.
64 | 
65 | If you are having any problems submitting your project or wish to check on the status of your submission, please email us at **machine-support@udacity.com** or visit us in the <a href="http://discussions.udacity.com" target="_blank">discussion forums</a>.
66 | 
67 | ### What's Next?
68 | You will get an email as soon as your reviewer has feedback for you. In the meantime, review your next project and feel free to get started on it or the courses supporting it!
69 | 


--------------------------------------------------------------------------------
/projects/capstone/report-example-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/capstone/report-example-1.pdf


--------------------------------------------------------------------------------
/projects/capstone/report-example-3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/capstone/report-example-3.pdf


--------------------------------------------------------------------------------
/projects/customer_segments/README.md:
--------------------------------------------------------------------------------
 1 | # Content: Unsupervised Learning
 2 | ## Project: Creating Customer Segments
 3 | 
 4 | ### Install
 5 | 
 6 | This project requires **Python 2.7** and the following Python libraries installed:
 7 | 
 8 | - [NumPy](http://www.numpy.org/)
 9 | - [Pandas](http://pandas.pydata.org)
10 | - [matplotlib](http://matplotlib.org/)
11 | - [scikit-learn](http://scikit-learn.org/stable/)
12 | 
13 | You will also need to have software installed to run and execute a [Jupyter Notebook](http://ipython.org/notebook.html)
14 | 
15 | If you do not have Python installed yet, it is highly recommended that you install the [Anaconda](http://continuum.io/downloads) distribution of Python, which already has the above packages and more included. Make sure that you select the Python 2.7 installer and not the Python 3.x installer. 
16 | 
17 | ### Code
18 | 
19 | Template code is provided in the `customer_segments.ipynb` notebook file. You will also be required to use the included `visuals.py` Python file and the `customers.csv` dataset file to complete your work. While some code has already been implemented to get you started, you will need to implement additional functionality when requested to successfully complete the project. Note that the code included in `visuals.py` is meant to be used out-of-the-box and not intended for students to manipulate. If you are interested in how the visualizations are created in the notebook, please feel free to explore this Python file.
20 | 
21 | ### Run
22 | 
23 | In a terminal or command window, navigate to the top-level project directory `customer_segments/` (that contains this README) and run one of the following commands:
24 | 
25 | ```bash
26 | ipython notebook customer_segments.ipynb
27 | ```  
28 | or
29 | ```bash
30 | jupyter notebook customer_segments.ipynb
31 | ```
32 | 
33 | This will open the Jupyter Notebook software and project file in your browser.
34 | 
35 | ## Data
36 | 
37 | The customer segments data is included as a selection of 440 data points collected on data found from clients of a wholesale distributor in Lisbon, Portugal. More information can be found on the [UCI Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Wholesale+customers).
38 | 
39 | Note (m.u.) is shorthand for *monetary units*.
40 | 
41 | **Features**
42 | 1) `Fresh`: annual spending (m.u.) on fresh products (Continuous); 
43 | 2) `Milk`: annual spending (m.u.) on milk products (Continuous); 
44 | 3) `Grocery`: annual spending (m.u.) on grocery products (Continuous); 
45 | 4) `Frozen`: annual spending (m.u.) on frozen products (Continuous);
46 | 5) `Detergents_Paper`: annual spending (m.u.) on detergents and paper products (Continuous);
47 | 6) `Delicatessen`: annual spending (m.u.) on and delicatessen products (Continuous); 
48 | 7) `Channel`: {Hotel/Restaurant/Cafe - 1, Retail - 2} (Nominal)
49 | 8) `Region`: {Lisbon - 1, Oporto - 2, or Other - 3} (Nominal) 


--------------------------------------------------------------------------------
/projects/customer_segments/project_description.md:
--------------------------------------------------------------------------------
 1 | # Machine Learning Engineer Nanodegree
 2 | # Unsupervised Learning
 3 | ## Project: Creating Customer Segments
 4 | 
 5 | ## Project Overview
 6 | In this project you will apply unsupervised learning techniques on product spending data collected for customers of a wholesale distributor in Lisbon, Portugal to identify customer segments hidden in the data. You will first explore the data by selecting a small subset to sample and determine if any product categories highly correlate with one another. Afterwards, you will preprocess the data by scaling each product category and then identifying (and removing) unwanted outliers. With the good, clean customer spending data, you will apply PCA transformations to the data and implement clustering algorithms to segment the transformed customer data. Finally, you will compare the segmentation found with an additional labeling and consider ways this information could assist the wholesale distributor with future service changes.
 7 | 
 8 | ## Project Highlights
 9 | This project is designed to give you a hands-on experience with unsupervised learning and work towards developing conclusions for a potential client on a real-world dataset. Many companies today collect vast amounts of data on customers and clientele, and have a strong desire to understand the meaningful relationships hidden in their customer base. Being equipped with this information can assist a company engineer future products and services that best satisfy the demands or needs of their customers.
10 | 
11 | Things you will learn by completing this project:
12 | 
13 | - How to apply preprocessing techniques such as feature scaling and outlier detection.
14 | - How to interpret data points that have been scaled, transformed, or reduced from PCA.
15 | - How to analyze PCA dimensions and construct a new feature space.
16 | - How to optimally cluster a set of data to find hidden patterns in a dataset.
17 | - How to assess information given by cluster data and use it in a meaningful way.
18 | 
19 | ## Description
20 | A wholesale distributor recently tested a change to their delivery method for some customers, by moving from a morning delivery service five days a week to a cheaper evening delivery service three days a week. Initial testing did not discover any significant unsatisfactory results, so they implemented the cheaper option for all customers. Almost immediately, the distributor began getting complaints about the delivery service change and customers were canceling deliveries, losing the distributor more money than what was being saved. You've been hired by the wholesale distributor to find what types of customers they have to help them make better, more informed business decisions in the future. Your task is to use unsupervised learning techniques to see if any similarities exist between customers, and how to best segment customers into distinct categories.
21 | 
22 | ## Software and Libraries
23 | This project uses the following software and Python libraries:
24 | 
25 | - [Python 2.7](https://www.python.org/download/releases/2.7/)
26 | - [NumPy](http://www.numpy.org/)
27 | - [pandas](http://pandas.pydata.org/)
28 | - [scikit-learn](http://scikit-learn.org/stable/)
29 | - [matplotlib](http://matplotlib.org/)
30 | 
31 | You will also need to have software installed to run and execute a [Jupyter Notebook](http://ipython.org/notebook.html).
32 | 
33 | If you do not have Python installed yet, it is highly recommended that you install the [Anaconda](http://continuum.io/downloads) distribution of Python, which already has the above packages and more included. Make sure that you select the Python 2.7 installer and not the Python 3.x installer.
34 | 
35 | ## Starting the Project
36 | 
37 | For this assignment, you can find the `customer_segments` folder containing the necessary project files on the [Machine Learning projects GitHub](https://github.com/udacity/machine-learning), under the `projects` folder. You may download all of the files for projects we'll use in this Nanodegree program directly from this repo. Please make sure that you use the most recent version of project files when completing a project!
38 | 
39 | This project contains three files:
40 | 
41 | - `customer_segments.ipynb`: This is the main file where you will be performing your work on the project.
42 | - `customers.csv`: The project dataset. You'll load this data in the notebook.
43 | - `visuals.py`: This Python script provides supplementary visualizations for the project. Do not modify.
44 | 
45 | In the Terminal or Command Prompt, navigate to the folder containing the project files, and then use the command `jupyter notebook customer_segments.ipynb` to open up a browser window or tab to work with your notebook. Alternatively, you can use the command `jupyter notebook` or `ipython notebook` and navigate to the notebook file in the browser window that opens. Follow the instructions in the notebook and answer each question presented to successfully complete the project. A **README** file has also been provided with the project files which may contain additional necessary information or instruction for the project. 
46 | 
47 | ## Submitting the Project
48 | 
49 | ### Evaluation
50 | Your project will be reviewed by a Udacity reviewer against the **<a href="https://review.udacity.com/#!/rubrics/105/view" target="_blank">Creating Customer Segments project rubric</a>**. Be sure to review this rubric thoroughly and self-evaluate your project before submission. All criteria found in the rubric must be *meeting specifications* for you to pass.
51 | 
52 | ### Submission Files
53 | When you are ready to submit your project, collect the following files and compress them into a single archive for upload. Alternatively, you may supply the following files on your GitHub Repo in a folder named `customer_segments` for ease of access:
54 |  - The `customer_segments.ipynb` notebook file with all questions answered and all code cells executed and displaying output.
55 |  - An **HTML** export of the project notebook with the name **report.html**. This file *must* be present for your project to be evaluated.
56 | 
57 | Once you have collected these files and reviewed the project rubric, proceed to the project submission page.
58 | 
59 | ### I'm Ready!
60 | When you're ready to submit your project, click on the **Submit Project** button at the bottom of the page.
61 | 
62 | If you are having any problems submitting your project or wish to check on the status of your submission, please email us at **machine-support@udacity.com** or visit us in the <a href="http://discussions.udacity.com" target="_blank">discussion forums</a>.
63 | 
64 | ### What's Next?
65 | You will get an email as soon as your reviewer has feedback for you. In the meantime, review your next project and feel free to get started on it or the courses supporting it!
66 | 


--------------------------------------------------------------------------------
/projects/customer_segments/visuals.py:
--------------------------------------------------------------------------------
  1 | ###########################################
  2 | # Suppress matplotlib user warnings
  3 | # Necessary for newer version of matplotlib
  4 | import warnings
  5 | warnings.filterwarnings("ignore", category = UserWarning, module = "matplotlib")
  6 | #
  7 | # Display inline matplotlib plots with IPython
  8 | from IPython import get_ipython
  9 | get_ipython().run_line_magic('matplotlib', 'inline')
 10 | ###########################################
 11 | 
 12 | import matplotlib.pyplot as plt
 13 | import matplotlib.cm as cm
 14 | import pandas as pd
 15 | import numpy as np
 16 | 
 17 | def pca_results(good_data, pca):
 18 | 	'''
 19 | 	Create a DataFrame of the PCA results
 20 | 	Includes dimension feature weights and explained variance
 21 | 	Visualizes the PCA results
 22 | 	'''
 23 | 
 24 | 	# Dimension indexing
 25 | 	dimensions = dimensions = ['Dimension {}'.format(i) for i in range(1,len(pca.components_)+1)]
 26 | 
 27 | 	# PCA components
 28 | 	components = pd.DataFrame(np.round(pca.components_, 4), columns = list(good_data.keys()))
 29 | 	components.index = dimensions
 30 | 
 31 | 	# PCA explained variance
 32 | 	ratios = pca.explained_variance_ratio_.reshape(len(pca.components_), 1)
 33 | 	variance_ratios = pd.DataFrame(np.round(ratios, 4), columns = ['Explained Variance'])
 34 | 	variance_ratios.index = dimensions
 35 | 
 36 | 	# Create a bar plot visualization
 37 | 	fig, ax = plt.subplots(figsize = (14,8))
 38 | 
 39 | 	# Plot the feature weights as a function of the components
 40 | 	components.plot(ax = ax, kind = 'bar');
 41 | 	ax.set_ylabel("Feature Weights")
 42 | 	ax.set_xticklabels(dimensions, rotation=0)
 43 | 
 44 | 
 45 | 	# Display the explained variance ratios
 46 | 	for i, ev in enumerate(pca.explained_variance_ratio_):
 47 | 		ax.text(i-0.40, ax.get_ylim()[1] + 0.05, "Explained Variance\n          %.4f"%(ev))
 48 | 
 49 | 	# Return a concatenated DataFrame
 50 | 	return pd.concat([variance_ratios, components], axis = 1)
 51 | 
 52 | def cluster_results(reduced_data, preds, centers, pca_samples):
 53 | 	'''
 54 | 	Visualizes the PCA-reduced cluster data in two dimensions
 55 | 	Adds cues for cluster centers and student-selected sample data
 56 | 	'''
 57 | 
 58 | 	predictions = pd.DataFrame(preds, columns = ['Cluster'])
 59 | 	plot_data = pd.concat([predictions, reduced_data], axis = 1)
 60 | 
 61 | 	# Generate the cluster plot
 62 | 	fig, ax = plt.subplots(figsize = (14,8))
 63 | 
 64 | 	# Color map
 65 | 	cmap = cm.get_cmap('gist_rainbow')
 66 | 
 67 | 	# Color the points based on assigned cluster
 68 | 	for i, cluster in plot_data.groupby('Cluster'):   
 69 | 	    cluster.plot(ax = ax, kind = 'scatter', x = 'Dimension 1', y = 'Dimension 2', \
 70 | 	                 color = cmap((i)*1.0/(len(centers)-1)), label = 'Cluster %i'%(i), s=30);
 71 | 
 72 | 	# Plot centers with indicators
 73 | 	for i, c in enumerate(centers):
 74 | 	    ax.scatter(x = c[0], y = c[1], color = 'white', edgecolors = 'black', \
 75 | 	               alpha = 1, linewidth = 2, marker = 'o', s=200);
 76 | 	    ax.scatter(x = c[0], y = c[1], marker='$%d$'%(i), alpha = 1, s=100);
 77 | 
 78 | 	# Plot transformed sample points 
 79 | 	ax.scatter(x = pca_samples[:,0], y = pca_samples[:,1], \
 80 | 	           s = 150, linewidth = 4, color = 'black', marker = 'x');
 81 | 
 82 | 	# Set plot title
 83 | 	ax.set_title("Cluster Learning on PCA-Reduced Data - Centroids Marked by Number\nTransformed Sample Data Marked by Black Cross");
 84 | 
 85 | 
 86 | def biplot(good_data, reduced_data, pca):
 87 |     '''
 88 |     Produce a biplot that shows a scatterplot of the reduced
 89 |     data and the projections of the original features.
 90 |     
 91 |     good_data: original data, before transformation.
 92 |                Needs to be a pandas dataframe with valid column names
 93 |     reduced_data: the reduced data (the first two dimensions are plotted)
 94 |     pca: pca object that contains the components_ attribute
 95 | 
 96 |     return: a matplotlib AxesSubplot object (for any additional customization)
 97 |     
 98 |     This procedure is inspired by the script:
 99 |     https://github.com/teddyroland/python-biplot
100 |     '''
101 | 
102 |     fig, ax = plt.subplots(figsize = (14,8))
103 |     # scatterplot of the reduced data    
104 |     ax.scatter(x=reduced_data.loc[:, 'Dimension 1'], y=reduced_data.loc[:, 'Dimension 2'], 
105 |         facecolors='b', edgecolors='b', s=70, alpha=0.5)
106 |     
107 |     feature_vectors = pca.components_.T
108 | 
109 |     # we use scaling factors to make the arrows easier to see
110 |     arrow_size, text_pos = 7.0, 8.0,
111 | 
112 |     # projections of the original features
113 |     for i, v in enumerate(feature_vectors):
114 |         ax.arrow(0, 0, arrow_size*v[0], arrow_size*v[1], 
115 |                   head_width=0.2, head_length=0.2, linewidth=2, color='red')
116 |         ax.text(v[0]*text_pos, v[1]*text_pos, good_data.columns[i], color='black', 
117 |                  ha='center', va='center', fontsize=18)
118 | 
119 |     ax.set_xlabel("Dimension 1", fontsize=14)
120 |     ax.set_ylabel("Dimension 2", fontsize=14)
121 |     ax.set_title("PC plane with original feature projections.", fontsize=16);
122 |     return ax
123 |     
124 | 
125 | def channel_results(reduced_data, outliers, pca_samples):
126 | 	'''
127 | 	Visualizes the PCA-reduced cluster data in two dimensions using the full dataset
128 | 	Data is labeled by "Channel" and cues added for student-selected sample data
129 | 	'''
130 | 
131 | 	# Check that the dataset is loadable
132 | 	try:
133 | 	    full_data = pd.read_csv("customers.csv")
134 | 	except:
135 | 	    print("Dataset could not be loaded. Is the file missing?")       
136 | 	    return False
137 | 
138 | 	# Create the Channel DataFrame
139 | 	channel = pd.DataFrame(full_data['Channel'], columns = ['Channel'])
140 | 	channel = channel.drop(channel.index[outliers]).reset_index(drop = True)
141 | 	labeled = pd.concat([reduced_data, channel], axis = 1)
142 | 	
143 | 	# Generate the cluster plot
144 | 	fig, ax = plt.subplots(figsize = (14,8))
145 | 
146 | 	# Color map
147 | 	cmap = cm.get_cmap('gist_rainbow')
148 | 
149 | 	# Color the points based on assigned Channel
150 | 	labels = ['Hotel/Restaurant/Cafe', 'Retailer']
151 | 	grouped = labeled.groupby('Channel')
152 | 	for i, channel in grouped:   
153 | 	    channel.plot(ax = ax, kind = 'scatter', x = 'Dimension 1', y = 'Dimension 2', \
154 | 	                 color = cmap((i-1)*1.0/2), label = labels[i-1], s=30);
155 | 	    
156 | 	# Plot transformed sample points   
157 | 	for i, sample in enumerate(pca_samples):
158 | 		ax.scatter(x = sample[0], y = sample[1], \
159 | 	           s = 200, linewidth = 3, color = 'black', marker = 'o', facecolors = 'none');
160 | 		ax.scatter(x = sample[0]+0.25, y = sample[1]+0.3, marker='$%d$'%(i), alpha = 1, s=125);
161 | 
162 | 	# Set plot title
163 | 	ax.set_title("PCA-Reduced Data Labeled by 'Channel'\nTransformed Sample Data Circled");


--------------------------------------------------------------------------------
/projects/digit_recognition/README.md:
--------------------------------------------------------------------------------
 1 | # Machine Learning Engineer Nanodegree
 2 | # Deep Learning
 3 | ## Project: Build a Digit Recognition Program
 4 | 
 5 | ### Install
 6 | 
 7 | This project requires **Python 2.x or Python 3.x** and the following Python libraries installed:
 8 | 
 9 | - [NumPy](http://www.numpy.org/)
10 | - [SciPy](https://www.scipy.org/)
11 | - [scikit-learn](http://scikit-learn.org/0.17/install.html) (v0.17)
12 | - [TensorFlow](http://tensorflow.org)
13 | 
14 | You will also need to have software installed to run and execute a [Jupyter Notebook](http://ipython.org/notebook.html).
15 | 
16 | In addition to the above, for those optionally seeking to use image processing software, you may need one of the following:
17 | - [PyGame](http://pygame.org/)
18 |    - Helpful links for installing PyGame:
19 |    - [Getting Started](https://www.pygame.org/wiki/GettingStarted)
20 |    - [PyGame Information](http://www.pygame.org/wiki/info)
21 |    - [Google Group](https://groups.google.com/forum/#!forum/pygame-mirror-on-google-groups)
22 |    - [PyGame subreddit](https://www.reddit.com/r/pygame/)
23 | - [OpenCV](http://opencv.org/)
24 | 
25 | For those optionally seeking to deploy an Android application:
26 | - Android SDK & NDK (see this [README](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/android/README.md))
27 | 
28 | If you do not have Python installed yet, it is highly recommended that you install the [Anaconda](http://continuum.io/downloads) distribution of Python, which already has the above packages and more included. Make sure that you select the Python 2.7 installer and not the Python 3.x installer. `pygame` and `OpenCV` can then be installed using one of the following commands:
29 | 
30 | Mac:  
31 | ```bash
32 | conda install -c https://conda.anaconda.org/quasiben pygame
33 | conda install -c menpo opencv=2.4.11
34 | ```
35 | 
36 | Windows & Linux:  
37 | ```bash
38 | conda install -c https://conda.anaconda.org/tlatorre pygame
39 | conda install -c menpo opencv=2.4.11
40 | ```
41 | 
42 | ### Code
43 | 
44 | A template notebook is provided as `digit_recognition.ipynb`. While no code is included in the notebook, you will be required to use the notebook to implement the basic functionality of your project and answer questions about your implementation and results. 
45 | 
46 | ### Run
47 | 
48 | In a terminal or command window, navigate to the top-level project directory `digit_recognition/` (that contains this README) and run one of the following commands:
49 | 
50 | ```bash
51 | ipython notebook digit_recognition.ipynb
52 | ```  
53 | or
54 | ```bash
55 | jupyter notebook digit_recognition.ipynb
56 | ```
57 | 
58 | This will open the Jupyter Notebook software and notebook file in your browser.
59 | 
60 | 
61 | ### Data
62 | 
63 | While no data is directly provided with the project, you will be required to download and use the [Street View House Numbers (SVHN) dataset](http://ufldl.stanford.edu/housenumbers/), along with either the [notMNIST](http://yaroslavvb.blogspot.com/2011/09/notmnist-dataset.html) or [MNIST](http://yann.lecun.com/exdb/mnist/) datasets. If you've completed the course material, the **notMINIST** dataset should already be available.
64 | 


--------------------------------------------------------------------------------
/projects/digit_recognition/project_description.md:
--------------------------------------------------------------------------------
  1 | # Content: Deep Learning
  2 | ## Project: Build a Digit Recognition Program
  3 | 
  4 | ## Project Overview
  5 | 
  6 | In this project, you will use what you've learned about deep neural networks and convolutional neural networks to create a live camera application or program that prints numbers it observes in real time from images it is given. First, you will design and test a model architecture that can identify sequences of digits in an image. Next, you will train that model so it can decode sequences of digits from natural images by using the [Street View House Numbers (SVHN) dataset](http://ufldl.stanford.edu/housenumbers/). After the model is properly trained, you will then test your model using a live camera application (optional) or program on newly-captured images. Finally, once you obtain meaningful results, you will refine your implementation to also *localize where numbers are on the image*, and test this localization on newly-captured images.
  7 | 
  8 | ## Software Requirements
  9 | This project uses the following software and Python libraries:
 10 | 
 11 | - [Python 2.7](https://www.python.org/download/releases/2.7/)
 12 | - [NumPy](http://www.numpy.org/)
 13 | - [SciPy](https://www.scipy.org/)
 14 | - [scikit-learn](http://scikit-learn.org/0.17/install.html) (v0.17)
 15 | - [TensorFlow](http://tensorflow.org)
 16 | 
 17 | You will also need to have software installed to run and execute a [Jupyter Notebook](http://ipython.org/notebook.html).
 18 | 
 19 | In addition to the above, for those optionally seeking to use image processing software, you may need one of the following:
 20 | - [PyGame](http://pygame.org/)
 21 |    - Helpful links for installing PyGame:
 22 |    - [Getting Started](https://www.pygame.org/wiki/GettingStarted)
 23 |    - [PyGame Information](http://www.pygame.org/wiki/info)
 24 |    - [Google Group](https://groups.google.com/forum/#!forum/pygame-mirror-on-google-groups)
 25 |    - [PyGame subreddit](https://www.reddit.com/r/pygame/)
 26 | - [OpenCV](http://opencv.org/)
 27 | 
 28 | For those optionally seeking to deploy an Android application:
 29 | - Android SDK & NDK (see this [README](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/android/README.md))
 30 | 
 31 | If you do not have Python installed yet, it is highly recommended that you install the [Anaconda](http://continuum.io/downloads) distribution of Python, which already has the above packages and more included. Make sure that you select the Python 2.7 installer and not the Python 3.x installer. `pygame` and `OpenCV` can then be installed using one of the following commands:
 32 | 
 33 | **opencv**  
 34 | `conda install -c menpo opencv=2.4.11`
 35 | 
 36 | **PyGame:**  
 37 | Mac:  `conda install -c https://conda.anaconda.org/quasiben pygame`  
 38 | Windows: `conda install -c https://conda.anaconda.org/tlatorre pygame`  
 39 | Linux:  `conda install -c https://conda.anaconda.org/prkrekel pygame`  
 40 | 
 41 | ## Starting the Project
 42 | 
 43 | For this assignment, you can find the `digit_recognition` folder containing the necessary project files on the [Machine Learning projects GitHub](https://github.com/udacity/machine-learning), under the `projects` folder. You may download all of the files for projects we'll use in this Nanodegree program directly from this repo. Please make sure that you use the most recent version of project files when completing a project!
 44 | 
 45 | This project contains one file:
 46 | 
 47 | - `digit_recognition.ipynb`: This is the main file where you will be performing your work on the project.
 48 | 
 49 | In addition, you will need to download the [Street View House Numbers (SVHN) dataset](http://ufldl.stanford.edu/housenumbers/), along with either the [notMNIST](http://yaroslavvb.blogspot.com/2011/09/notmnist-dataset.html) or [MNIST](http://yann.lecun.com/exdb/mnist/) datasets. If you've completed the course material, the **notMINIST** dataset should already be available.
 50 | 
 51 | In the Terminal or Command Prompt, navigate to the folder containing the project files, and then use the command `jupyter notebook digit_recognition.ipynb` to open up a browser window or tab to work with your notebook. Alternatively, you can use the command `jupyter notebook` or `ipython notebook` and navigate to the notebook file in the browser window that opens. Follow the instructions in the notebook and answer each question presented to successfully complete the project. A **README** file has also been provided with the project files which may contain additional necessary information or instruction for the project. 
 52 | 
 53 | ## Tasks
 54 | 
 55 | ### Project Report
 56 | You will be required to answer questions about your implementation as part of your submission in the provided `digit_recognition.ipynb.` As you complete the tasks below, include thorough, detailed answers to each question *provided in italics*.
 57 | 
 58 | ### Step 1: Design and Test a Model Architecture
 59 | Design and implement a deep learning model that learns to recognize sequences of digits. Train the model using synthetic data generated by concatenating character images from [notMNIST](http://yaroslavvb.blogspot.com/2011/09/notmnist-dataset.html) or [MNIST](http://yann.lecun.com/exdb/mnist/). To produce a synthetic sequence of digits for testing, you can for example limit yourself to sequences up to five digits, and use five classifiers on top of your deep network. You would have to incorporate an additional ?blank? character to account for shorter number sequences.
 60 | 
 61 | There are various aspects to consider when thinking about this problem:
 62 | - Your model can be derived from a deep neural net or a convolutional network.
 63 | - You could experiment sharing or not the weights between the softmax classifiers.
 64 | - You can also use a recurrent network in your deep neural net to replace the classification layers and directly emit the sequence of digits one-at-a-time.
 65 | 
 66 | Here is an example of a [published baseline model on this problem](http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/42241.pdf). ([video](https://www.youtube.com/watch?v=vGPI_JvLoN0))
 67 | 
 68 | ***QUESTION:*** _What approach did you take in coming up with a solution to this problem?_
 69 | 
 70 | ***QUESTION:*** _What does your final architecture look like? (Type of model, layers, sizes, connectivity, etc.)_
 71 | 
 72 | ***QUESTION:*** _How did you train your model? How did you generate your synthetic dataset?_
 73 | 
 74 | ### Step 2: Train a Model on a Realistic Dataset
 75 | Once you have settled on a good architecture, you can train your model on real data. In particular, the [Street View House Numbers (SVHN)](http://ufldl.stanford.edu/housenumbers/) dataset is a good large-scale dataset collected from house numbers in Google Street View. Training on this more challenging dataset, where the digits are not neatly lined-up and have various skews, fonts and colors, likely means you have to do some hyperparameter exploration to perform well.
 76 | 
 77 | ***QUESTION:*** _Describe how you set up the training and testing data for your model. How does the model perform on a realistic dataset?_
 78 | 
 79 | ***QUESTION:*** _What changes did you have to make, if any, to achieve "good" results? Were there any options you explored that made the results worse?_
 80 | 
 81 | ***QUESTION:*** _What were your initial and final results with testing on a realistic dataset? Do you believe your model is doing a good enough job at classifying numbers correctly?_
 82 | 
 83 | ### Step 3: Test a Model on Newly-Captured Images
 84 | 
 85 | Take several pictures of numbers that you find around you (at least five), and run them through your classifier on your computer to produce example results. Alternatively (optionally), you can try using OpenCV / SimpleCV / Pygame to capture live images from a webcam and run those through your classifier.
 86 | 
 87 | ***QUESTION:*** _Choose five candidate images of numbers you took from around you and provide them in the report. Are there any particular qualities of the image(s) that might make classification difficult?_
 88 | 
 89 | ***QUESTION:*** _Is your model able to perform equally well on captured pictures or a live camera stream when compared to testing on the realistic dataset?_
 90 | 
 91 | ***QUESTION:*** _If necessary, provide documentation for how an interface was built for your model to load and classify newly-acquired images._
 92 | 
 93 | ### Step 4: Explore an Improvement for a Model
 94 | 
 95 | There are many things you can do once you have the basic classifier in place. One example would be to also localize where the numbers are on the image. The SVHN dataset provides bounding boxes that you can tune to train a localizer. Train a regression loss to the coordinates of the bounding box, and then test it. 
 96 | 
 97 | ***QUESTION:*** _How well does your model localize numbers on the testing set from the realistic dataset? Do your classification results change at all with localization included?_
 98 | 
 99 | ***QUESTION:*** _Test the localization function on the images you captured in **Step 3**. Does the model accurately calculate a bounding box for the numbers in the images you found? If you did not use a graphical interface, you may need to investigate the bounding boxes by hand._
100 | 
101 | ### Step 5: Build an Application or Program for a Model (Optional)
102 | Take your project one step further. If you're interested, look to build an Android application or even a more robust Python program that can interface with input images and display the classified numbers and even the bounding boxes. You can for example try to build an augmented reality app by overlaying your answer on the image like the [Word Lens](https://en.wikipedia.org/wiki/Word_Lens) app does.
103 | 
104 | Loading a TensorFlow model into a camera app on Android is demonstrated in the [TensorFlow Android demo app](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android), which you can simply modify.
105 | 
106 | If you decide to explore this optional route, be sure to document your interface and implementation, along with significant results you find. You can see the additional rubric items that you could be evaluated on by [following this link](https://review.udacity.com/#!/rubrics/413/view).
107 | 
108 | ## Submitting the Project
109 | 
110 | ### Evaluation
111 | Your project will be reviewed by a Udacity reviewer against the **<a href="https://review.udacity.com/#!/rubrics/413/view" target="_blank">Build a Digit Recognition Program project rubric</a>**. Be sure to review this rubric thoroughly and self-evaluate your project before submission. All criteria found in the rubric must be *meeting specifications* for you to pass.
112 | 
113 | ### Submission Files
114 | When you are ready to submit your project, collect the following files and compress them into a single archive for upload. Alternatively, you may supply the following files on your GitHub Repo in a folder named `digit_recognition` for ease of access:
115 |  - The `digit_recognition.ipynb` notebook file with all questions answered and all code cells executed and displaying output.
116 |  - An **HTML** export of the project notebook with the name **report.html**. This file *must* be present for your project to be evaluated.
117 |  - Any additional datasets or images used for the project that are not from the SVHN, notMNIST, or MNIST datasets.
118 |  - For the optional image recognition software component, any additional Python files necessary to run the code.
119 |  - For the optional Android application component, documentation for accessing the application. This should be a PDF report with the name **documentation.pdf**
120 | 
121 | Once you have collected these files and reviewed the project rubric, proceed to the project submission page.
122 | 
123 | ### I'm Ready!
124 | When you're ready to submit your project, click on the **Submit Project** button at the bottom of the page.
125 | 
126 | If you are having any problems submitting your project or wish to check on the status of your submission, please email us at **machine-support@udacity.com** or visit us in the <a href="http://discussions.udacity.com" target="_blank">discussion forums</a>.
127 | 
128 | ### What's Next?
129 | You will get an email as soon as your reviewer has feedback for you. In the meantime, review your next project and feel free to get started on it or the courses supporting it!
130 | 


--------------------------------------------------------------------------------
/projects/finding_donors/README.md:
--------------------------------------------------------------------------------
 1 | # Machine Learning Engineer Nanodegree
 2 | # Supervised Learning
 3 | ## Project: Finding Donors for CharityML
 4 | 
 5 | ### Install
 6 | 
 7 | This project requires **Python 2.7** and the following Python libraries installed:
 8 | 
 9 | - [NumPy](http://www.numpy.org/)
10 | - [Pandas](http://pandas.pydata.org)
11 | - [matplotlib](http://matplotlib.org/)
12 | - [scikit-learn](http://scikit-learn.org/stable/)
13 | 
14 | You will also need to have software installed to run and execute an [iPython Notebook](http://ipython.org/notebook.html)
15 | 
16 | We recommend students install [Anaconda](https://www.continuum.io/downloads), a pre-packaged Python distribution that contains all of the necessary libraries and software for this project. 
17 | 
18 | ### Code
19 | 
20 | Template code is provided in the `finding_donors.ipynb` notebook file. You will also be required to use the included `visuals.py` Python file and the `census.csv` dataset file to complete your work. While some code has already been implemented to get you started, you will need to implement additional functionality when requested to successfully complete the project. Note that the code included in `visuals.py` is meant to be used out-of-the-box and not intended for students to manipulate. If you are interested in how the visualizations are created in the notebook, please feel free to explore this Python file.
21 | 
22 | ### Run
23 | 
24 | In a terminal or command window, navigate to the top-level project directory `finding_donors/` (that contains this README) and run one of the following commands:
25 | 
26 | ```bash
27 | ipython notebook finding_donors.ipynb
28 | ```  
29 | or
30 | ```bash
31 | jupyter notebook finding_donors.ipynb
32 | ```
33 | 
34 | This will open the iPython Notebook software and project file in your browser.
35 | 
36 | ### Data
37 | 
38 | The modified census dataset consists of approximately 32,000 data points, with each datapoint having 13 features. This dataset is a modified version of the dataset published in the paper *"Scaling Up the Accuracy of Naive-Bayes Classifiers: a Decision-Tree Hybrid",* by Ron Kohavi. You may find this paper [online](https://www.aaai.org/Papers/KDD/1996/KDD96-033.pdf), with the original dataset hosted on [UCI](https://archive.ics.uci.edu/ml/datasets/Census+Income).
39 | 
40 | **Features**
41 | - `age`: Age
42 | - `workclass`: Working Class (Private, Self-emp-not-inc, Self-emp-inc, Federal-gov, Local-gov, State-gov, Without-pay, Never-worked)
43 | - `education_level`: Level of Education (Bachelors, Some-college, 11th, HS-grad, Prof-school, Assoc-acdm, Assoc-voc, 9th, 7th-8th, 12th, Masters, 1st-4th, 10th, Doctorate, 5th-6th, Preschool)
44 | - `education-num`: Number of educational years completed
45 | - `marital-status`: Marital status (Married-civ-spouse, Divorced, Never-married, Separated, Widowed, Married-spouse-absent, Married-AF-spouse)
46 | - `occupation`: Work Occupation (Tech-support, Craft-repair, Other-service, Sales, Exec-managerial, Prof-specialty, Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving, Priv-house-serv, Protective-serv, Armed-Forces)
47 | - `relationship`: Relationship Status (Wife, Own-child, Husband, Not-in-family, Other-relative, Unmarried)
48 | - `race`: Race (White, Asian-Pac-Islander, Amer-Indian-Eskimo, Other, Black)
49 | - `sex`: Sex (Female, Male)
50 | - `capital-gain`: Monetary Capital Gains
51 | - `capital-loss`: Monetary Capital Losses
52 | - `hours-per-week`: Average Hours Per Week Worked
53 | - `native-country`: Native Country (United-States, Cambodia, England, Puerto-Rico, Canada, Germany, Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba, Iran, Honduras, Philippines, Italy, Poland, Jamaica, Vietnam, Mexico, Portugal, Ireland, France, Dominican-Republic, Laos, Ecuador, Taiwan, Haiti, Columbia, Hungary, Guatemala, Nicaragua, Scotland, Thailand, Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands)
54 | 
55 | **Target Variable**
56 | - `income`: Income Class (<=50K, >50K)
57 | 


--------------------------------------------------------------------------------
/projects/finding_donors/project_description.md:
--------------------------------------------------------------------------------
 1 | # Content: Supervised Learning
 2 | ## Project: Finding Donors for CharityML
 3 | 
 4 | ## Project Overview
 5 | In this project, you will apply supervised learning techniques and an analytical mind on data collected for the U.S. census to help CharityML (a fictitious charity organization) identify people most likely to donate to their cause. You will first explore the data to learn how the census data is recorded. Next, you will apply a series of transformations and preprocessing techniques to manipulate the data into a workable format. You will then evaluate several supervised learners of your choice on the data, and consider which is best suited for the solution. Afterwards, you will optimize the model you've selected and present it as your solution to CharityML. Finally, you will explore the chosen model and its predictions under the hood, to see just how well it's performing when considering the data it's given.
 6 | predicted selling price to your statistics.
 7 | 
 8 | ## Project Highlights
 9 | This project is designed to get you acquainted with the many supervised learning algorithms available in sklearn, and to also provide for a method of evaluating just how each model works and performs on a certain type of data. It is important in machine learning to understand exactly when and where a certain algorithm should be used, and when one should be avoided.
10 | 
11 | Things you will learn by completing this project:
12 | - How to identify when preprocessing is needed, and how to apply it.
13 | - How to establish a benchmark for a solution to the problem.
14 | - What each of several supervised learning algorithms accomplishes given a specific dataset.
15 | - How to investigate whether a candidate solution model is adequate for the problem.
16 | 
17 | ## Software Requirements
18 | 
19 | This project uses the following software and Python libraries:
20 | 
21 | - [Python 2.7](https://www.python.org/download/releases/2.7/)
22 | - [NumPy](http://www.numpy.org/)
23 | - [Pandas](http://pandas.pydata.org/)
24 | - [scikit-learn](http://scikit-learn.org/stable/)
25 | - [matplotlib](http://matplotlib.org/)
26 | 
27 | You will also need to have software installed to run and execute a [Jupyter Notebook](http://ipython.org/notebook.html)
28 | 
29 | If you do not have Python installed yet, it is highly recommended that you install the [Anaconda](http://continuum.io/downloads) distribution of Python, which already has the above packages and more included. Make sure that you select the Python 2.7 installer and not the Python 3.x installer.
30 | 
31 | ## Starting the Project
32 | 
33 | For this assignment, you can find the `finding_donors` folder containing the necessary project files on the [Machine Learning projects GitHub](https://github.com/udacity/machine-learning), under the `projects` folder. You may download all of the files for projects we'll use in this Nanodegree program directly from this repo. Please make sure that you use the most recent version of project files when completing a project!
34 | 
35 | This project contains three files:
36 | 
37 | - `finding_donors.ipynb`: This is the main file where you will be performing your work on the project.
38 | - `census.csv`: The project dataset. You'll load this data in the notebook.
39 | - `visuals.py`: A Python file containing visualization code that is run behind-the-scenes. Do not modify
40 | 
41 | In the Terminal or Command Prompt, navigate to the folder containing the project files, and then use the command `jupyter notebook finding_donors.ipynb` to open up a browser window or tab to work with your notebook. Alternatively, you can use the command `jupyter notebook` or `ipython notebook` and navigate to the notebook file in the browser window that opens. Follow the instructions in the notebook and answer each question presented to successfully complete the project. A **README** file has also been provided with the project files which may contain additional necessary information or instruction for the project. 
42 | 
43 | ## Submitting the Project
44 | 
45 | ### Evaluation
46 | Your project will be reviewed by a Udacity reviewer against the **<a href="https://review.udacity.com/#!/rubrics/406/view" target="_blank">Finding Donors for CharityML project rubric</a>**. Be sure to review this rubric thoroughly and self-evaluate your project before submission. All criteria found in the rubric must be *meeting specifications* for you to pass.
47 | 
48 | ### Submission Files
49 | When you are ready to submit your project, collect the following files and compress them into a single archive for upload. Alternatively, you may supply the following files on your GitHub Repo in a folder named `student_intervention` for ease of access:
50 |  - The `finding_donors.ipynb` notebook file with all questions answered and all code cells executed and displaying output.
51 |  - An **HTML** export of the project notebook with the name **report.html**. This file *must* be present for your project to be evaluated.
52 | 
53 | Once you have collected these files and reviewed the project rubric, proceed to the project submission page.
54 | 
55 | ### I'm Ready!
56 | When you're ready to submit your project, click on the **Submit Project** button at the bottom of the page.
57 | 
58 | If you are having any problems submitting your project or wish to check on the status of your submission, please email us at **machine-support@udacity.com** or visit us in the <a href="http://discussions.udacity.com" target="_blank">discussion forums</a>.
59 | 
60 | ### What's Next?
61 | You will get an email as soon as your reviewer has feedback for you. In the meantime, review your next project and feel free to get started on it or the courses supporting it!
62 | 


--------------------------------------------------------------------------------
/projects/finding_donors/visuals.py:
--------------------------------------------------------------------------------
  1 | ###########################################
  2 | # Suppress matplotlib user warnings
  3 | # Necessary for newer version of matplotlib
  4 | import warnings
  5 | warnings.filterwarnings("ignore", category = UserWarning, module = "matplotlib")
  6 | #
  7 | # Display inline matplotlib plots with IPython
  8 | from IPython import get_ipython
  9 | get_ipython().run_line_magic('matplotlib', 'inline')
 10 | ###########################################
 11 | 
 12 | import matplotlib.pyplot as pl
 13 | import matplotlib.patches as mpatches
 14 | import numpy as np
 15 | import pandas as pd
 16 | from time import time
 17 | from sklearn.metrics import f1_score, accuracy_score
 18 | 
 19 | 
 20 | def distribution(data, transformed = False):
 21 |     """
 22 |     Visualization code for displaying skewed distributions of features
 23 |     """
 24 |     
 25 |     # Create figure
 26 |     fig = pl.figure(figsize = (11,5));
 27 | 
 28 |     # Skewed feature plotting
 29 |     for i, feature in enumerate(['capital-gain','capital-loss']):
 30 |         ax = fig.add_subplot(1, 2, i+1)
 31 |         ax.hist(data[feature], bins = 25, color = '#00A0A0')
 32 |         ax.set_title("'%s' Feature Distribution"%(feature), fontsize = 14)
 33 |         ax.set_xlabel("Value")
 34 |         ax.set_ylabel("Number of Records")
 35 |         ax.set_ylim((0, 2000))
 36 |         ax.set_yticks([0, 500, 1000, 1500, 2000])
 37 |         ax.set_yticklabels([0, 500, 1000, 1500, ">2000"])
 38 | 
 39 |     # Plot aesthetics
 40 |     if transformed:
 41 |         fig.suptitle("Log-transformed Distributions of Continuous Census Data Features", \
 42 |             fontsize = 16, y = 1.03)
 43 |     else:
 44 |         fig.suptitle("Skewed Distributions of Continuous Census Data Features", \
 45 |             fontsize = 16, y = 1.03)
 46 | 
 47 |     fig.tight_layout()
 48 |     fig.show()
 49 | 
 50 | 
 51 | def evaluate(results, accuracy, f1):
 52 |     """
 53 |     Visualization code to display results of various learners.
 54 |     
 55 |     inputs:
 56 |       - learners: a list of supervised learners
 57 |       - stats: a list of dictionaries of the statistic results from 'train_predict()'
 58 |       - accuracy: The score for the naive predictor
 59 |       - f1: The score for the naive predictor
 60 |     """
 61 |   
 62 |     # Create figure
 63 |     fig, ax = pl.subplots(2, 4, figsize = (11,7))
 64 | 
 65 |     # Constants
 66 |     bar_width = 0.3
 67 |     colors = ['#A00000','#00A0A0','#00A000']
 68 |     
 69 |     # Super loop to plot four panels of data
 70 |     for k, learner in enumerate(results.keys()):
 71 |         for j, metric in enumerate(['train_time', 'acc_train', 'f_train', 'pred_time', 'acc_test', 'f_test']):
 72 |             for i in np.arange(3):
 73 |                 
 74 |                 # Creative plot code
 75 |                 ax[j//3, j%3].bar(i+k*bar_width, results[learner][i][metric], width = bar_width, color = colors[k])
 76 |                 ax[j//3, j%3].set_xticks([0.45, 1.45, 2.45])
 77 |                 ax[j//3, j%3].set_xticklabels(["1%", "10%", "100%"])
 78 |                 ax[j//3, j%3].set_xlabel("Training Set Size")
 79 |                 ax[j//3, j%3].set_xlim((-0.1, 3.0))
 80 |     
 81 |     # Add unique y-labels
 82 |     ax[0, 0].set_ylabel("Time (in seconds)")
 83 |     ax[0, 1].set_ylabel("Accuracy Score")
 84 |     ax[0, 2].set_ylabel("F-score")
 85 |     ax[1, 0].set_ylabel("Time (in seconds)")
 86 |     ax[1, 1].set_ylabel("Accuracy Score")
 87 |     ax[1, 2].set_ylabel("F-score")
 88 |     
 89 |     # Add titles
 90 |     ax[0, 0].set_title("Model Training")
 91 |     ax[0, 1].set_title("Accuracy Score on Training Subset")
 92 |     ax[0, 2].set_title("F-score on Training Subset")
 93 |     ax[1, 0].set_title("Model Predicting")
 94 |     ax[1, 1].set_title("Accuracy Score on Testing Set")
 95 |     ax[1, 2].set_title("F-score on Testing Set")
 96 |     
 97 |     # Add horizontal lines for naive predictors
 98 |     ax[0, 1].axhline(y = accuracy, xmin = -0.1, xmax = 3.0, linewidth = 1, color = 'k', linestyle = 'dashed')
 99 |     ax[1, 1].axhline(y = accuracy, xmin = -0.1, xmax = 3.0, linewidth = 1, color = 'k', linestyle = 'dashed')
100 |     ax[0, 2].axhline(y = f1, xmin = -0.1, xmax = 3.0, linewidth = 1, color = 'k', linestyle = 'dashed')
101 |     ax[1, 2].axhline(y = f1, xmin = -0.1, xmax = 3.0, linewidth = 1, color = 'k', linestyle = 'dashed')
102 |     
103 |     # Set y-limits for score panels
104 |     ax[0, 1].set_ylim((0, 1))
105 |     ax[0, 2].set_ylim((0, 1))
106 |     ax[1, 1].set_ylim((0, 1))
107 |     ax[1, 2].set_ylim((0, 1))
108 | 
109 |     # Set additional plots invisibles
110 |     ax[0, 3].set_visible(False)
111 |     ax[1, 3].axis('off')
112 | 
113 |     # Create legend
114 |     for i, learner in enumerate(results.keys()):
115 |         pl.bar(0, 0, color=colors[i], label=learner)
116 |     pl.legend()
117 |     
118 |     # Aesthetics
119 |     pl.suptitle("Performance Metrics for Three Supervised Learning Models", fontsize = 16, y = 1.10)
120 |     pl.tight_layout()
121 |     pl.show()
122 |     
123 | 
124 | def feature_plot(importances, X_train, y_train):
125 |     
126 |     # Display the five most important features
127 |     indices = np.argsort(importances)[::-1]
128 |     columns = X_train.columns.values[indices[:5]]
129 |     values = importances[indices][:5]
130 | 
131 |     # Creat the plot
132 |     fig = pl.figure(figsize = (9,5))
133 |     pl.title("Normalized Weights for First Five Most Predictive Features", fontsize = 16)
134 |     pl.bar(np.arange(5), values, width = 0.6, align="center", color = '#00A000', \
135 |           label = "Feature Weight")
136 |     pl.bar(np.arange(5) - 0.3, np.cumsum(values), width = 0.2, align = "center", color = '#00A0A0', \
137 |           label = "Cumulative Feature Weight")
138 |     pl.xticks(np.arange(5), columns)
139 |     pl.xlim((-0.5, 4.5))
140 |     pl.ylabel("Weight", fontsize = 12)
141 |     pl.xlabel("Feature", fontsize = 12)
142 |     
143 |     pl.legend(loc = 'upper center')
144 |     pl.tight_layout()
145 |     pl.show()  
146 | 


--------------------------------------------------------------------------------
/projects/image-classification/ReadMe.md:
--------------------------------------------------------------------------------
1 | This project has been written in Python 3.x.


--------------------------------------------------------------------------------
/projects/image-classification/helper.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import numpy as np
  3 | import matplotlib.pyplot as plt
  4 | from sklearn.preprocessing import LabelBinarizer
  5 | 
  6 | 
  7 | def _load_label_names():
  8 |     """
  9 |     Load the label names from file
 10 |     """
 11 |     return ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
 12 | 
 13 | 
 14 | def load_cfar10_batch(cifar10_dataset_folder_path, batch_id):
 15 |     """
 16 |     Load a batch of the dataset
 17 |     """
 18 |     with open(cifar10_dataset_folder_path + '/data_batch_' + str(batch_id), mode='rb') as file:
 19 |         batch = pickle.load(file, encoding='latin1')
 20 | 
 21 |     features = batch['data'].reshape((len(batch['data']), 3, 32, 32)).transpose(0, 2, 3, 1)
 22 |     labels = batch['labels']
 23 | 
 24 |     return features, labels
 25 | 
 26 | 
 27 | def display_stats(cifar10_dataset_folder_path, batch_id, sample_id):
 28 |     """
 29 |     Display Stats of the the dataset
 30 |     """
 31 |     batch_ids = list(range(1, 6))
 32 | 
 33 |     if batch_id not in batch_ids:
 34 |         print('Batch Id out of Range. Possible Batch Ids: {}'.format(batch_ids))
 35 |         return None
 36 | 
 37 |     features, labels = load_cfar10_batch(cifar10_dataset_folder_path, batch_id)
 38 | 
 39 |     if not (0 <= sample_id < len(features)):
 40 |         print('{} samples in batch {}.  {} is out of range.'.format(len(features), batch_id, sample_id))
 41 |         return None
 42 | 
 43 |     print('\nStats of batch {}:'.format(batch_id))
 44 |     print('Samples: {}'.format(len(features)))
 45 |     print('Label Counts: {}'.format(dict(zip(*np.unique(labels, return_counts=True)))))
 46 |     print('First 20 Labels: {}'.format(labels[:20]))
 47 | 
 48 |     sample_image = features[sample_id]
 49 |     sample_label = labels[sample_id]
 50 |     label_names = _load_label_names()
 51 | 
 52 |     print('\nExample of Image {}:'.format(sample_id))
 53 |     print('Image - Min Value: {} Max Value: {}'.format(sample_image.min(), sample_image.max()))
 54 |     print('Image - Shape: {}'.format(sample_image.shape))
 55 |     print('Label - Label Id: {} Name: {}'.format(sample_label, label_names[sample_label]))
 56 |     plt.axis('off')
 57 |     plt.imshow(sample_image)
 58 | 
 59 | 
 60 | def _preprocess_and_save(normalize, one_hot_encode, features, labels, filename):
 61 |     """
 62 |     Preprocess data and save it to file
 63 |     """
 64 |     features = normalize(features)
 65 |     labels = one_hot_encode(labels)
 66 | 
 67 |     pickle.dump((features, labels), open(filename, 'wb'))
 68 | 
 69 | 
 70 | def preprocess_and_save_data(cifar10_dataset_folder_path, normalize, one_hot_encode):
 71 |     """
 72 |     Preprocess Training and Validation Data
 73 |     """
 74 |     n_batches = 5
 75 |     valid_features = []
 76 |     valid_labels = []
 77 | 
 78 |     for batch_i in range(1, n_batches + 1):
 79 |         features, labels = load_cfar10_batch(cifar10_dataset_folder_path, batch_i)
 80 |         validation_count = int(len(features) * 0.1)
 81 | 
 82 |         # Prprocess and save a batch of training data
 83 |         _preprocess_and_save(
 84 |             normalize,
 85 |             one_hot_encode,
 86 |             features[:-validation_count],
 87 |             labels[:-validation_count],
 88 |             'preprocess_batch_' + str(batch_i) + '.p')
 89 | 
 90 |         # Use a portion of training batch for validation
 91 |         valid_features.extend(features[-validation_count:])
 92 |         valid_labels.extend(labels[-validation_count:])
 93 | 
 94 |     # Preprocess and Save all validation data
 95 |     _preprocess_and_save(
 96 |         normalize,
 97 |         one_hot_encode,
 98 |         np.array(valid_features),
 99 |         np.array(valid_labels),
100 |         'preprocess_validation.p')
101 | 
102 |     with open(cifar10_dataset_folder_path + '/test_batch', mode='rb') as file:
103 |         batch = pickle.load(file, encoding='latin1')
104 | 
105 |     # load the training data
106 |     test_features = batch['data'].reshape((len(batch['data']), 3, 32, 32)).transpose(0, 2, 3, 1)
107 |     test_labels = batch['labels']
108 | 
109 |     # Preprocess and Save all training data
110 |     _preprocess_and_save(
111 |         normalize,
112 |         one_hot_encode,
113 |         np.array(test_features),
114 |         np.array(test_labels),
115 |         'preprocess_training.p')
116 | 
117 | 
118 | def batch_features_labels(features, labels, batch_size):
119 |     """
120 |     Split features and labels into batches
121 |     """
122 |     for start in range(0, len(features), batch_size):
123 |         end = min(start + batch_size, len(features))
124 |         yield features[start:end], labels[start:end]
125 | 
126 | 
127 | def load_preprocess_training_batch(batch_id, batch_size):
128 |     """
129 |     Load the Preprocessed Training data and return them in batches of <batch_size> or less
130 |     """
131 |     filename = 'preprocess_batch_' + str(batch_id) + '.p'
132 |     features, labels = pickle.load(open(filename, mode='rb'))
133 | 
134 |     # Return the training data in batches of size <batch_size> or less
135 |     return batch_features_labels(features, labels, batch_size)
136 | 
137 | 
138 | def display_image_predictions(features, labels, predictions):
139 |     n_classes = 10
140 |     label_names = _load_label_names()
141 |     label_binarizer = LabelBinarizer()
142 |     label_binarizer.fit(range(n_classes))
143 |     label_ids = label_binarizer.inverse_transform(np.array(labels))
144 | 
145 |     fig, axies = plt.subplots(nrows=4, ncols=2)
146 |     fig.tight_layout()
147 |     fig.suptitle('Softmax Predictions', fontsize=20, y=1.1)
148 | 
149 |     n_predictions = 3
150 |     margin = 0.05
151 |     ind = np.arange(n_predictions)
152 |     width = (1. - 2. * margin) / n_predictions
153 | 
154 |     for image_i, (feature, label_id, pred_indicies, pred_values) in enumerate(zip(features, label_ids, predictions.indices, predictions.values)):
155 |         pred_names = [label_names[pred_i] for pred_i in pred_indicies]
156 |         correct_name = label_names[label_id]
157 | 
158 |         axies[image_i][0].imshow(feature)
159 |         axies[image_i][0].set_title(correct_name)
160 |         axies[image_i][0].set_axis_off()
161 | 
162 |         axies[image_i][1].barh(ind + margin, pred_values[::-1], width)
163 |         axies[image_i][1].set_yticks(ind + margin)
164 |         axies[image_i][1].set_yticklabels(pred_names[::-1])
165 |         axies[image_i][1].set_xticks([0, 0.5, 1.0])
166 | 


--------------------------------------------------------------------------------
/projects/image-classification/problem_unittests.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | import random
  5 | from unittest.mock import MagicMock
  6 | 
  7 | 
  8 | def _print_success_message():
  9 |     print('Tests Passed')
 10 | 
 11 | 
 12 | def test_folder_path(cifar10_dataset_folder_path):
 13 |     assert cifar10_dataset_folder_path is not None,\
 14 |         'Cifar-10 data folder not set.'
 15 |     assert cifar10_dataset_folder_path[-1] != '/',\
 16 |         'The "/" shouldn\'t be added to the end of the path.'
 17 |     assert os.path.exists(cifar10_dataset_folder_path),\
 18 |         'Path not found.'
 19 |     assert os.path.isdir(cifar10_dataset_folder_path),\
 20 |         '{} is not a folder.'.format(os.path.basename(cifar10_dataset_folder_path))
 21 | 
 22 |     train_files = [cifar10_dataset_folder_path + '/data_batch_' + str(batch_id) for batch_id in range(1, 6)]
 23 |     other_files = [cifar10_dataset_folder_path + '/batches.meta', cifar10_dataset_folder_path + '/test_batch']
 24 |     missing_files = [path for path in train_files + other_files if not os.path.exists(path)]
 25 | 
 26 |     assert not missing_files,\
 27 |         'Missing files in directory: {}'.format(missing_files)
 28 | 
 29 |     print('All files found!')
 30 | 
 31 | 
 32 | def test_normalize(normalize):
 33 |     test_shape = (np.random.choice(range(1000)), 32, 32, 3)
 34 |     test_numbers = np.random.choice(range(256), test_shape)
 35 |     normalize_out = normalize(test_numbers)
 36 | 
 37 |     assert type(normalize_out).__module__ == np.__name__,\
 38 |         'Not Numpy Object'
 39 | 
 40 |     assert normalize_out.shape == test_shape,\
 41 |         'Incorrect Shape. {} shape found'.format(normalize_out.shape)
 42 | 
 43 |     assert normalize_out.max() <= 1 and normalize_out.min() >= 0,\
 44 |         'Incorect Range. {} to {} found'.format(normalize_out.min(), normalize_out.max())
 45 | 
 46 |     _print_success_message()
 47 | 
 48 | 
 49 | def test_one_hot_encode(one_hot_encode):
 50 |     test_shape = np.random.choice(range(1000))
 51 |     test_numbers = np.random.choice(range(10), test_shape)
 52 |     one_hot_out = one_hot_encode(test_numbers)
 53 | 
 54 |     assert type(one_hot_out).__module__ == np.__name__,\
 55 |         'Not Numpy Object'
 56 | 
 57 |     assert one_hot_out.shape == (test_shape, 10),\
 58 |         'Incorrect Shape. {} shape found'.format(one_hot_out.shape)
 59 | 
 60 |     n_encode_tests = 5
 61 |     test_pairs = list(zip(test_numbers, one_hot_out))
 62 |     test_indices = np.random.choice(len(test_numbers), n_encode_tests)
 63 |     labels = [test_pairs[test_i][0] for test_i in test_indices]
 64 |     enc_labels = np.array([test_pairs[test_i][1] for test_i in test_indices])
 65 |     new_enc_labels = one_hot_encode(labels)
 66 | 
 67 |     assert np.array_equal(enc_labels, new_enc_labels),\
 68 |         'Encodings returned different results for the same numbers.\n' \
 69 |         'For the first call it returned:\n' \
 70 |         '{}\n' \
 71 |         'For the second call it returned\n' \
 72 |         '{}\n' \
 73 |         'Make sure you save the map of labels to encodings outside of the function.'.format(enc_labels, new_enc_labels)
 74 | 
 75 |     _print_success_message()
 76 | 
 77 | 
 78 | def test_nn_image_inputs(neural_net_image_input):
 79 |     image_shape = (32, 32, 3)
 80 |     nn_inputs_out_x = neural_net_image_input(image_shape)
 81 | 
 82 |     assert nn_inputs_out_x.get_shape().as_list() == [None, image_shape[0], image_shape[1], image_shape[2]],\
 83 |         'Incorrect Image Shape.  Found {} shape'.format(nn_inputs_out_x.get_shape().as_list())
 84 | 
 85 |     assert nn_inputs_out_x.op.type == 'Placeholder',\
 86 |         'Incorrect Image Type.  Found {} type'.format(nn_inputs_out_x.op.type)
 87 | 
 88 |     assert nn_inputs_out_x.name == 'x:0', \
 89 |         'Incorrect Name.  Found {}'.format(nn_inputs_out_x.name)
 90 | 
 91 |     print('Image Input Tests Passed.')
 92 | 
 93 | 
 94 | def test_nn_label_inputs(neural_net_label_input):
 95 |     n_classes = 10
 96 |     nn_inputs_out_y = neural_net_label_input(n_classes)
 97 | 
 98 |     assert nn_inputs_out_y.get_shape().as_list() == [None, n_classes],\
 99 |         'Incorrect Label Shape.  Found {} shape'.format(nn_inputs_out_y.get_shape().as_list())
100 | 
101 |     assert nn_inputs_out_y.op.type == 'Placeholder',\
102 |         'Incorrect Label Type.  Found {} type'.format(nn_inputs_out_y.op.type)
103 | 
104 |     assert nn_inputs_out_y.name == 'y:0', \
105 |         'Incorrect Name.  Found {}'.format(nn_inputs_out_y.name)
106 | 
107 |     print('Label Input Tests Passed.')
108 | 
109 | 
110 | def test_nn_keep_prob_inputs(neural_net_keep_prob_input):
111 |     nn_inputs_out_k = neural_net_keep_prob_input()
112 | 
113 |     assert nn_inputs_out_k.get_shape().ndims is None,\
114 |         'Too many dimensions found for keep prob.  Found {} dimensions.  It should be a scalar (0-Dimension Tensor).'.format(nn_inputs_out_k.get_shape().ndims)
115 | 
116 |     assert nn_inputs_out_k.op.type == 'Placeholder',\
117 |         'Incorrect keep prob Type.  Found {} type'.format(nn_inputs_out_k.op.type)
118 | 
119 |     assert nn_inputs_out_k.name == 'keep_prob:0', \
120 |         'Incorrect Name.  Found {}'.format(nn_inputs_out_k.name)
121 | 
122 |     print('Keep Prob Tests Passed.')
123 | 
124 | 
125 | def test_con_pool(conv2d_maxpool):
126 |     test_x = tf.placeholder(tf.float32, [None, 32, 32, 5])
127 |     test_num_outputs = 10
128 |     test_con_k = (2, 2)
129 |     test_con_s = (4, 4)
130 |     test_pool_k = (2, 2)
131 |     test_pool_s = (2, 2)
132 | 
133 |     conv2d_maxpool_out = conv2d_maxpool(test_x, test_num_outputs, test_con_k, test_con_s, test_pool_k, test_pool_s)
134 | 
135 |     assert conv2d_maxpool_out.get_shape().as_list() == [None, 4, 4, 10],\
136 |         'Incorrect Shape.  Found {} shape'.format(conv2d_maxpool_out.get_shape().as_list())
137 | 
138 |     _print_success_message()
139 | 
140 | 
141 | def test_flatten(flatten):
142 |     test_x = tf.placeholder(tf.float32, [None, 10, 30, 6])
143 |     flat_out = flatten(test_x)
144 | 
145 |     assert flat_out.get_shape().as_list() == [None, 10*30*6],\
146 |         'Incorrect Shape.  Found {} shape'.format(flat_out.get_shape().as_list())
147 | 
148 |     _print_success_message()
149 | 
150 | 
151 | def test_fully_conn(fully_conn):
152 |     test_x = tf.placeholder(tf.float32, [None, 128])
153 |     test_num_outputs = 40
154 | 
155 |     fc_out = fully_conn(test_x, test_num_outputs)
156 | 
157 |     assert fc_out.get_shape().as_list() == [None, 40],\
158 |         'Incorrect Shape.  Found {} shape'.format(fc_out.get_shape().as_list())
159 | 
160 |     _print_success_message()
161 | 
162 | 
163 | def test_output(output):
164 |     test_x = tf.placeholder(tf.float32, [None, 128])
165 |     test_num_outputs = 40
166 | 
167 |     output_out = output(test_x, test_num_outputs)
168 | 
169 |     assert output_out.get_shape().as_list() == [None, 40],\
170 |         'Incorrect Shape.  Found {} shape'.format(output_out.get_shape().as_list())
171 | 
172 |     _print_success_message()
173 | 
174 | 
175 | def test_conv_net(conv_net):
176 |     test_x = tf.placeholder(tf.float32, [None, 32, 32, 3])
177 |     test_k = tf.placeholder(tf.float32)
178 | 
179 |     logits_out = conv_net(test_x, test_k)
180 | 
181 |     assert logits_out.get_shape().as_list() == [None, 10],\
182 |         'Incorrect Model Output.  Found {}'.format(logits_out.get_shape().as_list())
183 | 
184 |     print('Neural Network Built!')
185 | 
186 | 
187 | def test_train_nn(train_neural_network):
188 |     mock_session = tf.Session()
189 |     test_x = np.random.rand(128, 32, 32, 3)
190 |     test_y = np.random.rand(128, 10)
191 |     test_k = np.random.rand(1)
192 |     test_optimizer = tf.train.AdamOptimizer()
193 | 
194 |     mock_session.run = MagicMock()
195 |     train_neural_network(mock_session, test_optimizer, test_k, test_x, test_y)
196 | 
197 |     assert mock_session.run.called, 'Session not used'
198 | 
199 |     _print_success_message()
200 | 


--------------------------------------------------------------------------------
/projects/intro-to-tensorflow/environment.yml:
--------------------------------------------------------------------------------
 1 | name: dlnd-tf-lab
 2 | dependencies:
 3 | - openssl=1.0.2j
 4 | - pip>=8.1.2
 5 | - psutil=4.4.1
 6 | - python>=3.4.0
 7 | - readline=6.2
 8 | - setuptools=27.2.0
 9 | - sqlite=3.13.0
10 | - tk=8.5.18
11 | - wheel=0.29.0
12 | - xz=5.2.2
13 | - zlib=1.2.8
14 | - pip:
15 |   - appnope==0.1.0
16 |   - cycler==0.10.0
17 |   - decorator==4.0.10
18 |   - entrypoints==0.2.2
19 |   - ipykernel==4.5.0
20 |   - ipython==5.1.0
21 |   - ipython-genutils==0.1.0
22 |   - ipywidgets==5.2.2
23 |   - jinja2==2.8
24 |   - jsonschema==2.5.1
25 |   - jupyter==1.0.0
26 |   - jupyter-client==4.4.0
27 |   - jupyter-console==5.0.0
28 |   - jupyter-core==4.2.0
29 |   - markupsafe==0.23
30 |   - matplotlib==1.5.3
31 |   - mistune==0.7.3
32 |   - nbconvert==4.2.0
33 |   - nbformat==4.1.0
34 |   - notebook==4.2.3
35 |   - numpy==1.11.2
36 |   - pexpect==4.2.1
37 |   - pickleshare==0.7.4
38 |   - pillow==3.4.2
39 |   - prompt-toolkit==1.0.8
40 |   - protobuf==3.1.0.post1
41 |   - ptyprocess==0.5.1
42 |   - pygments==2.1.3
43 |   - pyparsing==2.1.10
44 |   - python-dateutil==2.5.3
45 |   - pytz==2016.7
46 |   - pyzmq==16.0.0
47 |   - qtconsole==4.2.1
48 |   - scikit-learn==0.18
49 |   - scipy==0.18.1
50 |   - simplegeneric==0.8.1
51 |   - six==1.10.0
52 |   - sklearn==0.0
53 |   - tensorflow>=0.12.1
54 |   - terminado==0.6
55 |   - tornado==4.4.2
56 |   - tqdm==4.8.4
57 |   - traitlets==4.3.1
58 |   - wcwidth==0.1.7
59 |   - widgetsnbextension==1.2.6
60 | 


--------------------------------------------------------------------------------
/projects/intro-to-tensorflow/environment_win.yml:
--------------------------------------------------------------------------------
 1 | name: dlnd-tf-lab
 2 | channels: !!python/tuple
 3 | - defaults
 4 | dependencies:
 5 | - bleach=1.5.0=py35_0
 6 | - bzip2=1.0.6=vc14_3
 7 | - colorama=0.3.7=py35_0
 8 | - cycler=0.10.0=py35_0
 9 | - decorator=4.0.11=py35_0
10 | - entrypoints=0.2.2=py35_1
11 | - freetype=2.5.5=vc14_2
12 | - html5lib=0.999=py35_0
13 | - icu=57.1=vc14_0
14 | - ipykernel=4.5.2=py35_0
15 | - ipython=5.2.2=py35_0
16 | - ipython_genutils=0.1.0=py35_0
17 | - ipywidgets=5.2.2=py35_1
18 | - jinja2=2.9.4=py35_0
19 | - jpeg=9b=vc14_0
20 | - jsonschema=2.5.1=py35_0
21 | - jupyter=1.0.0=py35_3
22 | - jupyter_client=4.4.0=py35_0
23 | - jupyter_console=5.0.0=py35_0
24 | - jupyter_core=4.3.0=py35_0
25 | - libpng=1.6.27=vc14_0
26 | - libtiff=4.0.6=vc14_3
27 | - markupsafe=0.23=py35_2
28 | - matplotlib=2.0.0=np112py35_0
29 | - mistune=0.7.3=py35_0
30 | - mkl=2017.0.1=0
31 | - nbconvert=5.1.1=py35_0
32 | - nbformat=4.2.0=py35_0
33 | - notebook=4.3.1=py35_1
34 | - numpy=1.12.0=py35_0
35 | - olefile=0.44=py35_0
36 | - openssl=1.0.2k=vc14_0
37 | - pandas=0.19.2=np112py35_1
38 | - pandocfilters=1.4.1=py35_0
39 | - path.py=10.1=py35_0
40 | - pickleshare=0.7.4=py35_0
41 | - pillow=4.0.0=py35_1
42 | - pip=9.0.1=py35_1
43 | - prompt_toolkit=1.0.9=py35_0
44 | - pygments=2.1.3=py35_0
45 | - pyparsing=2.1.4=py35_0
46 | - pyqt=5.6.0=py35_2
47 | - python=3.5.2=0
48 | - python-dateutil=2.6.0=py35_0
49 | - pytz=2016.10=py35_0
50 | - pyzmq=16.0.2=py35_0
51 | - qt=5.6.2=vc14_3
52 | - qtconsole=4.2.1=py35_2
53 | - scikit-learn=0.18.1=np112py35_1
54 | - scipy=0.18.1=np112py35_1
55 | - setuptools=27.2.0=py35_1
56 | - simplegeneric=0.8.1=py35_1
57 | - sip=4.18=py35_0
58 | - six=1.10.0=py35_0
59 | - testpath=0.3=py35_0
60 | - tk=8.5.18=vc14_0
61 | - tornado=4.4.2=py35_0
62 | - traitlets=4.3.1=py35_0
63 | - vs2015_runtime=14.0.25123=0
64 | - wcwidth=0.1.7=py35_0
65 | - wheel=0.29.0=py35_0
66 | - widgetsnbextension=1.2.6=py35_0
67 | - win_unicode_console=0.5=py35_0
68 | - zlib=1.2.8=vc14_3
69 | - pip:
70 |   - ipython-genutils==0.1.0
71 |   - jupyter-client==4.4.0
72 |   - jupyter-console==5.0.0
73 |   - jupyter-core==4.3.0
74 |   - prompt-toolkit==1.0.9
75 |   - protobuf==3.2.0
76 |   - tensorflow==1.0.0
77 |   - tqdm==4.11.2
78 |   - win-unicode-console==0.5
79 | prefix: C:\Users\Mat\Anaconda3\envs\dlnd-tf-lab
80 | 
81 | 


--------------------------------------------------------------------------------
/projects/intro-to-tensorflow/image/Learn Rate Tune - Image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/intro-to-tensorflow/image/Learn Rate Tune - Image.png


--------------------------------------------------------------------------------
/projects/intro-to-tensorflow/image/Mean Variance - Image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/intro-to-tensorflow/image/Mean Variance - Image.png


--------------------------------------------------------------------------------
/projects/intro-to-tensorflow/image/network_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/intro-to-tensorflow/image/network_diagram.png


--------------------------------------------------------------------------------
/projects/intro-to-tensorflow/image/notmnist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/intro-to-tensorflow/image/notmnist.png


--------------------------------------------------------------------------------
/projects/intro-to-tensorflow/intro_to_tensorflow_solution.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "collapsed": true
  7 |    },
  8 |    "source": [
  9 |     "# Solutions\n",
 10 |     "## Problem 1\n",
 11 |     "Implement the Min-Max scaling function ($X'=a+{\\frac {\\left(X-X_{\\min }\\right)\\left(b-a\\right)}{X_{\\max }-X_{\\min }}}$) with the parameters:\n",
 12 |     "\n",
 13 |     "$X_{\\min }=0$\n",
 14 |     "\n",
 15 |     "$X_{\\max }=255$\n",
 16 |     "\n",
 17 |     "$a=0.1$\n",
 18 |     "\n",
 19 |     "$b=0.9$"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": null,
 25 |    "metadata": {
 26 |     "collapsed": true
 27 |    },
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "# Problem 1 - Implement Min-Max scaling for grayscale image data\n",
 31 |     "def normalize_grayscale(image_data):\n",
 32 |     "    \"\"\"\n",
 33 |     "    Normalize the image data with Min-Max scaling to a range of [0.1, 0.9]\n",
 34 |     "    :param image_data: The image data to be normalized\n",
 35 |     "    :return: Normalized image data\n",
 36 |     "    \"\"\"\n",
 37 |     "    a = 0.1\n",
 38 |     "    b = 0.9\n",
 39 |     "    grayscale_min = 0\n",
 40 |     "    grayscale_max = 255\n",
 41 |     "    return a + ( ( (image_data - grayscale_min)*(b - a) )/( grayscale_max - grayscale_min ) )"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "markdown",
 46 |    "metadata": {},
 47 |    "source": [
 48 |     "## Problem 2\n",
 49 |     "- Use [tf.placeholder()](https://www.tensorflow.org/api_docs/python/io_ops.html#placeholder) for `features` and `labels` since they are the inputs to the model.\n",
 50 |     "- Any math operations must have the same type on both sides of the operator.  The weights are float32, so the `features` and `labels` must also be float32.\n",
 51 |     "- Use [tf.Variable()](https://www.tensorflow.org/api_docs/python/state_ops.html#Variable) to allow `weights` and `biases` to be modified.\n",
 52 |     "- The `weights` must be the dimensions of features by labels.  The number of features is the size of the image, 28*28=784.  The size of labels is 10.\n",
 53 |     "- The `biases` must be the dimensions of the labels, which is 10."
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "metadata": {
 60 |     "collapsed": true
 61 |    },
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "features_count = 784\n",
 65 |     "labels_count = 10\n",
 66 |     "\n",
 67 |     "# Problem 2 - Set the features and labels tensors\n",
 68 |     "features = tf.placeholder(tf.float32)\n",
 69 |     "labels = tf.placeholder(tf.float32)\n",
 70 |     "\n",
 71 |     "# Problem 2 - Set the weights and biases tensors\n",
 72 |     "weights = tf.Variable(tf.truncated_normal((features_count, labels_count)))\n",
 73 |     "biases = tf.Variable(tf.zeros(labels_count))"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "markdown",
 78 |    "metadata": {},
 79 |    "source": [
 80 |     "# Problem 3\n",
 81 |     "Configuration 1\n",
 82 |     "* **Epochs:** 1\n",
 83 |     "* **Learning Rate:** 0.1\n",
 84 |     "\n",
 85 |     "Configuration 2\n",
 86 |     "* **Epochs:** 4 or 5\n",
 87 |     "* **Learning Rate:** 0.2"
 88 |    ]
 89 |   }
 90 |  ],
 91 |  "metadata": {
 92 |   "kernelspec": {
 93 |    "display_name": "Python 3",
 94 |    "language": "python",
 95 |    "name": "python3"
 96 |   },
 97 |   "language_info": {
 98 |    "codemirror_mode": {
 99 |     "name": "ipython",
100 |     "version": 3
101 |    },
102 |    "file_extension": ".py",
103 |    "mimetype": "text/x-python",
104 |    "name": "python",
105 |    "nbconvert_exporter": "python",
106 |    "pygments_lexer": "ipython3",
107 |    "version": "3.5.2"
108 |   }
109 |  },
110 |  "nbformat": 4,
111 |  "nbformat_minor": 0
112 | }
113 | 


--------------------------------------------------------------------------------
/projects/practice_projects/cnn/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | mnist-mlp/.ipynb_checkpoints/
 3 | mnist-mlp/.DS_Store
 4 | conv-visualization/.ipynb_checkpoints/
 5 | conv-visualization/.DS_Store
 6 | cifar10-classification/.ipynb_checkpoints/
 7 | cifar10-classification/.DS_Store
 8 | cifar10-augmentation/.ipynb_checkpoints/
 9 | cifar10-augmentation/.DS_Store
10 | transfer-learning/dogImages
11 | transfer-learning/bottleneck_features/DogVGG16Data.npz
12 | transfer-learning/.ipynb_checkpoints/
13 | transfer-learning/.DS_Store
14 | 


--------------------------------------------------------------------------------
/projects/practice_projects/cnn/README.md:
--------------------------------------------------------------------------------
1 | # cnn practice projects


--------------------------------------------------------------------------------
/projects/practice_projects/cnn/cifar10-augmentation/aug_model.weights.best.hdf5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/practice_projects/cnn/cifar10-augmentation/aug_model.weights.best.hdf5


--------------------------------------------------------------------------------
/projects/practice_projects/cnn/cifar10-classification/MLP.weights.best.hdf5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/practice_projects/cnn/cifar10-classification/MLP.weights.best.hdf5


--------------------------------------------------------------------------------
/projects/practice_projects/cnn/cifar10-classification/model.weights.best.hdf5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/practice_projects/cnn/cifar10-classification/model.weights.best.hdf5


--------------------------------------------------------------------------------
/projects/practice_projects/cnn/conv-visualization/images/udacity_sdc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/practice_projects/cnn/conv-visualization/images/udacity_sdc.png


--------------------------------------------------------------------------------
/projects/practice_projects/cnn/mnist-mlp/mnist.model.best.hdf5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/practice_projects/cnn/mnist-mlp/mnist.model.best.hdf5


--------------------------------------------------------------------------------
/projects/practice_projects/cnn/requirements/aind-dog-linux.yml:
--------------------------------------------------------------------------------
 1 | name: aind-dog
 2 | channels:
 3 | - defaults
 4 | dependencies:
 5 | - openssl=1.0.2l=0
 6 | - pip=9.0.1=py36_1
 7 | - python=3.6.1=2
 8 | - readline=6.2=2
 9 | - setuptools=27.2.0=py36_0
10 | - sqlite=3.13.0=0
11 | - tk=8.5.18=0
12 | - wheel=0.29.0=py36_0
13 | - xz=5.2.2=1
14 | - zlib=1.2.8=3
15 | - pip:
16 |   - bleach==2.0.0
17 |   - cycler==0.10.0
18 |   - decorator==4.0.11
19 |   - entrypoints==0.2.3
20 |   - h5py==2.6.0
21 |   - html5lib==0.999999999
22 |   - ipykernel==4.6.1
23 |   - ipython==6.1.0
24 |   - ipython-genutils==0.2.0
25 |   - ipywidgets==6.0.0
26 |   - jedi==0.10.2
27 |   - jinja2==2.9.6
28 |   - jsonschema==2.6.0
29 |   - jupyter==1.0.0
30 |   - jupyter-client==5.0.1
31 |   - jupyter-console==5.1.0
32 |   - jupyter-core==4.3.0
33 |   - keras==2.0.2
34 |   - markupsafe==1.0
35 |   - matplotlib==2.0.0
36 |   - mistune==0.7.4
37 |   - nbconvert==5.2.1
38 |   - nbformat==4.3.0
39 |   - notebook==5.0.0
40 |   - numpy==1.12.0
41 |   - olefile==0.44
42 |   - opencv-python==3.2.0.6
43 |   - pandocfilters==1.4.1
44 |   - pexpect==4.2.1
45 |   - pickleshare==0.7.4
46 |   - pillow==4.0.0
47 |   - prompt-toolkit==1.0.14
48 |   - protobuf==3.3.0
49 |   - ptyprocess==0.5.1
50 |   - pygments==2.2.0
51 |   - pyparsing==2.2.0
52 |   - python-dateutil==2.6.0
53 |   - pytz==2017.2
54 |   - pyyaml==3.12
55 |   - pyzmq==16.0.2
56 |   - qtconsole==4.3.0
57 |   - scikit-learn==0.18.1
58 |   - scipy==0.18.1
59 |   - simplegeneric==0.8.1
60 |   - six==1.10.0
61 |   - tensorflow==1.0.0
62 |   - terminado==0.6
63 |   - testpath==0.3.1
64 |   - theano==0.9.0
65 |   - tornado==4.5.1
66 |   - tqdm==4.11.2
67 |   - traitlets==4.3.2
68 |   - wcwidth==0.1.7
69 |   - webencodings==0.5.1
70 |   - widgetsnbextension==2.0.0
71 | 


--------------------------------------------------------------------------------
/projects/practice_projects/cnn/requirements/aind-dog-mac.yml:
--------------------------------------------------------------------------------
  1 | name: aind-dog
  2 | channels:
  3 | - damianavila82
  4 | - defaults
  5 | dependencies:
  6 | - rise=4.0.0b1=py35_0
  7 | - _license=1.1=py35_1
  8 | - alabaster=0.7.10=py35_0
  9 | - anaconda-client=1.6.2=py35_0
 10 | - anaconda=custom=py35_0
 11 | - anaconda-navigator=1.5.0=py35_0
 12 | - anaconda-project=0.4.1=py35_0
 13 | - appnope=0.1.0=py35_0
 14 | - appscript=1.0.1=py35_0
 15 | - astroid=1.4.9=py35_0
 16 | - astropy=1.3=np112py35_0
 17 | - babel=2.3.4=py35_0
 18 | - backports=1.0=py35_0
 19 | - beautifulsoup4=4.5.3=py35_0
 20 | - bitarray=0.8.1=py35_0
 21 | - blaze=0.10.1=py35_0
 22 | - bleach=1.5.0=py35_0
 23 | - bokeh=0.12.4=py35_0
 24 | - boto=2.46.1=py35_0
 25 | - bottleneck=1.2.0=np112py35_0
 26 | - cffi=1.9.1=py35_0
 27 | - chardet=2.3.0=py35_0
 28 | - chest=0.2.3=py35_0
 29 | - click=6.7=py35_0
 30 | - cloudpickle=0.2.2=py35_0
 31 | - clyent=1.2.2=py35_0
 32 | - colorama=0.3.7=py35_0
 33 | - configobj=5.0.6=py35_0
 34 | - contextlib2=0.5.4=py35_0
 35 | - cryptography=1.7.1=py35_0
 36 | - curl=7.52.1=0
 37 | - cycler=0.10.0=py35_0
 38 | - cython=0.25.2=py35_0
 39 | - cytoolz=0.8.2=py35_0
 40 | - dask=0.14.0=py35_0
 41 | - datashape=0.5.4=py35_0
 42 | - decorator=4.0.11=py35_0
 43 | - dill=0.2.5=py35_0
 44 | - docutils=0.13.1=py35_0
 45 | - entrypoints=0.2.2=py35_1
 46 | - et_xmlfile=1.0.1=py35_0
 47 | - fastcache=1.0.2=py35_1
 48 | - flask=0.12=py35_0
 49 | - flask-cors=3.0.2=py35_0
 50 | - freetype=2.5.5=2
 51 | - get_terminal_size=1.0.0=py35_0
 52 | - gevent=1.2.1=py35_0
 53 | - greenlet=0.4.12=py35_0
 54 | - h5py=2.6.0=np112py35_2
 55 | - hdf5=1.8.17=1
 56 | - heapdict=1.0.0=py35_1
 57 | - html5lib=0.999=py35_0
 58 | - icu=54.1=0
 59 | - idna=2.2=py35_0
 60 | - imagesize=0.7.1=py35_0
 61 | - ipykernel=4.5.2=py35_0
 62 | - ipython=5.3.0=py35_0
 63 | - ipython_genutils=0.1.0=py35_0
 64 | - ipywidgets=6.0.0=py35_0
 65 | - isort=4.2.5=py35_0
 66 | - itsdangerous=0.24=py35_0
 67 | - jbig=2.1=0
 68 | - jdcal=1.3=py35_0
 69 | - jedi=0.9.0=py35_1
 70 | - jinja2=2.9.5=py35_0
 71 | - jpeg=9b=0
 72 | - jsonschema=2.5.1=py35_0
 73 | - jupyter=1.0.0=py35_3
 74 | - jupyter_client=5.0.0=py35_0
 75 | - jupyter_console=5.1.0=py35_0
 76 | - jupyter_core=4.3.0=py35_0
 77 | - lazy-object-proxy=1.2.2=py35_0
 78 | - libiconv=1.14=0
 79 | - libpng=1.6.27=0
 80 | - libtiff=4.0.6=3
 81 | - libxml2=2.9.4=0
 82 | - libxslt=1.1.29=0
 83 | - llvmlite=0.16.0=py35_0
 84 | - locket=0.2.0=py35_1
 85 | - lxml=3.7.3=py35_0
 86 | - markupsafe=0.23=py35_2
 87 | - matplotlib=2.0.0=np112py35_0
 88 | - mistune=0.7.4=py35_0
 89 | - mkl=2017.0.1=0
 90 | - mkl-service=1.1.2=py35_3
 91 | - mpmath=0.19=py35_1
 92 | - multipledispatch=0.4.9=py35_0
 93 | - nbconvert=5.1.1=py35_0
 94 | - nbformat=4.3.0=py35_0
 95 | - networkx=1.11=py35_0
 96 | - nltk=3.2.2=py35_0
 97 | - nose=1.3.7=py35_1
 98 | - notebook=4.4.1=py35_0
 99 | - numba=0.31.0=np112py35_0
100 | - numexpr=2.6.2=np112py35_0
101 | - numpy=1.12.0=py35_0
102 | - numpydoc=0.6.0=py35_0
103 | - odo=0.5.0=py35_1
104 | - olefile=0.44=py35_0
105 | - openpyxl=2.4.1=py35_0
106 | - openssl=1.0.2k=0
107 | - pandas=0.19.2=np112py35_1
108 | - pandocfilters=1.4.1=py35_0
109 | - partd=0.3.7=py35_0
110 | - path.py=10.1=py35_0
111 | - pathlib2=2.2.0=py35_0
112 | - patsy=0.4.1=py35_0
113 | - pep8=1.7.0=py35_0
114 | - pexpect=4.2.1=py35_0
115 | - pickleshare=0.7.4=py35_0
116 | - pillow=4.0.0=py35_1
117 | - pip=9.0.1=py35_1
118 | - ply=3.10=py35_0
119 | - prompt_toolkit=1.0.13=py35_0
120 | - psutil=5.2.0=py35_0
121 | - ptyprocess=0.5.1=py35_0
122 | - py=1.4.32=py35_0
123 | - pyasn1=0.2.3=py35_0
124 | - pycosat=0.6.1=py35_1
125 | - pycparser=2.17=py35_0
126 | - pycrypto=2.6.1=py35_4
127 | - pycurl=7.43.0=py35_2
128 | - pyflakes=1.5.0=py35_0
129 | - pygments=2.2.0=py35_0
130 | - pylint=1.6.4=py35_1
131 | - pyopenssl=16.2.0=py35_0
132 | - pyparsing=2.1.4=py35_0
133 | - pyqt=5.6.0=py35_2
134 | - pytables=3.3.0=np112py35_0
135 | - pytest=3.0.6=py35_0
136 | - python=3.5.3=1
137 | - python-dateutil=2.6.0=py35_0
138 | - python.app=1.2=py35_4
139 | - pytz=2016.10=py35_0
140 | - pyyaml=3.12=py35_0
141 | - pyzmq=16.0.2=py35_0
142 | - qt=5.6.2=0
143 | - qtawesome=0.4.4=py35_0
144 | - qtconsole=4.2.1=py35_1
145 | - qtpy=1.2.1=py35_0
146 | - readline=6.2=2
147 | - redis=3.2.0=0
148 | - redis-py=2.10.5=py35_0
149 | - requests=2.13.0=py35_0
150 | - rope=0.9.4=py35_1
151 | - ruamel_yaml=0.11.14=py35_1
152 | - scikit-image=0.12.3=np112py35_1
153 | - scikit-learn=0.18.1=np112py35_1
154 | - scipy=0.19.0=np112py35_0
155 | - seaborn=0.7.1=py35_0
156 | - setuptools=27.2.0=py35_0
157 | - simplegeneric=0.8.1=py35_1
158 | - singledispatch=3.4.0.3=py35_0
159 | - sip=4.18=py35_0
160 | - six=1.10.0=py35_0
161 | - snowballstemmer=1.2.1=py35_0
162 | - sockjs-tornado=1.0.3=py35_0
163 | - sphinx=1.5.1=py35_0
164 | - spyder=3.1.3=py35_0
165 | - sqlalchemy=1.1.6=py35_0
166 | - sqlite=3.13.0=0
167 | - statsmodels=0.8.0=np112py35_0
168 | - sympy=1.0=py35_0
169 | - terminado=0.6=py35_0
170 | - testpath=0.3=py35_0
171 | - tk=8.5.18=0
172 | - toolz=0.8.2=py35_0
173 | - tornado=4.4.2=py35_0
174 | - traitlets=4.3.2=py35_0
175 | - unicodecsv=0.14.1=py35_0
176 | - wcwidth=0.1.7=py35_0
177 | - werkzeug=0.12=py35_0
178 | - wheel=0.29.0=py35_0
179 | - widgetsnbextension=2.0.0=py35_0
180 | - wrapt=1.10.8=py35_0
181 | - xlrd=1.0.0=py35_0
182 | - xlsxwriter=0.9.6=py35_0
183 | - xlwings=0.10.2=py35_0
184 | - xlwt=1.2.0=py35_0
185 | - xz=5.2.2=1
186 | - yaml=0.1.6=0
187 | - zlib=1.2.8=3
188 | - pip:
189 |   - backports.shutil-get-terminal-size==1.0.0
190 |   - cvxopt==1.1.9
191 |   - et-xmlfile==1.0.1
192 |   - ipython-genutils==0.1.0
193 |   - jupyter-client==5.0.0
194 |   - jupyter-console==5.1.0
195 |   - jupyter-core==4.3.0
196 |   - keras==2.0.0
197 |   - opencv-python==3.2.0.6
198 |   - prompt-toolkit==1.0.13
199 |   - protobuf==3.2.0
200 |   - rope-py3k==0.9.4.post1
201 |   - tables==3.3.0
202 |   - tensorflow==1.0.0
203 |   - theano==0.8.2
204 |   - tqdm==4.11.2


--------------------------------------------------------------------------------
/projects/practice_projects/cnn/requirements/aind-dog-windows.yml:
--------------------------------------------------------------------------------
 1 | name: aind-dog
 2 | channels:
 3 | - defaults
 4 | dependencies:
 5 | - _nb_ext_conf=0.3.0=py35_0
 6 | - anaconda-client=1.6.2=py35_0
 7 | - bleach=1.5.0=py35_0
 8 | - bzip2=1.0.6=vc14_3
 9 | - clyent=1.2.2=py35_0
10 | - colorama=0.3.7=py35_0
11 | - cycler=0.10.0=py35_0
12 | - decorator=4.0.11=py35_0
13 | - entrypoints=0.2.2=py35_1
14 | - freetype=2.5.5=vc14_2
15 | - h5py=2.7.0=np112py35_0
16 | - hdf5=1.8.15.1=vc14_4
17 | - html5lib=0.999=py35_0
18 | - icu=57.1=vc14_0
19 | - ipykernel=4.5.2=py35_0
20 | - ipython=5.3.0=py35_0
21 | - ipython_genutils=0.1.0=py35_0
22 | - ipywidgets=6.0.0=py35_0
23 | - jinja2=2.9.5=py35_0
24 | - jpeg=9b=vc14_0
25 | - jsonschema=2.5.1=py35_0
26 | - jupyter=1.0.0=py35_3
27 | - jupyter_client=5.0.0=py35_0
28 | - jupyter_console=5.1.0=py35_0
29 | - jupyter_core=4.3.0=py35_0
30 | - libpng=1.6.27=vc14_0
31 | - libtiff=4.0.6=vc14_3
32 | - markupsafe=0.23=py35_2
33 | - matplotlib=2.0.0=np112py35_0
34 | - mistune=0.7.4=py35_0
35 | - mkl=2017.0.1=0
36 | - nb_anacondacloud=1.2.0=py35_0
37 | - nb_conda=2.0.0=py35_0
38 | - nb_conda_kernels=2.0.0=py35_0
39 | - nbconvert=5.1.1=py35_0
40 | - nbformat=4.3.0=py35_0
41 | - nbpresent=3.0.2=py35_0
42 | - notebook=4.4.1=py35_0
43 | - numpy=1.12.1=py35_0
44 | - olefile=0.44=py35_0
45 | - openssl=1.0.2k=vc14_0
46 | - pandocfilters=1.4.1=py35_0
47 | - path.py=10.1=py35_0
48 | - pickleshare=0.7.4=py35_0
49 | - pillow=4.0.0=py35_1
50 | - pip=9.0.1=py35_1
51 | - prompt_toolkit=1.0.13=py35_0
52 | - pygments=2.2.0=py35_0
53 | - pyparsing=2.1.4=py35_0
54 | - pyqt=5.6.0=py35_2
55 | - python=3.5.3=0
56 | - python-dateutil=2.6.0=py35_0
57 | - pytz=2016.10=py35_0
58 | - pyyaml=3.12=py35_0
59 | - pyzmq=16.0.2=py35_0
60 | - qt=5.6.2=vc14_3
61 | - qtconsole=4.2.1=py35_2
62 | - requests=2.13.0=py35_0
63 | - scikit-learn=0.18.1=np112py35_1
64 | - scipy=0.19.0=np112py35_0
65 | - setuptools=27.2.0=py35_1
66 | - simplegeneric=0.8.1=py35_1
67 | - sip=4.18=py35_0
68 | - six=1.10.0=py35_0
69 | - testpath=0.3=py35_0
70 | - tk=8.5.18=vc14_0
71 | - tornado=4.4.2=py35_0
72 | - traitlets=4.3.2=py35_0
73 | - vs2015_runtime=14.0.25123=0
74 | - wcwidth=0.1.7=py35_0
75 | - wheel=0.29.0=py35_0
76 | - widgetsnbextension=2.0.0=py35_0
77 | - win_unicode_console=0.5=py35_0
78 | - zlib=1.2.8=vc14_3
79 | - pip:
80 |   - ipython-genutils==0.1.0
81 |   - jupyter-client==5.0.0
82 |   - jupyter-console==5.1.0
83 |   - jupyter-core==4.3.0
84 |   - keras==2.0.2
85 |   - nb-anacondacloud==1.2.0
86 |   - nb-conda==2.0.0
87 |   - nb-conda-kernels==2.0.0
88 |   - opencv-python==3.1.0.0
89 |   - prompt-toolkit==1.0.13
90 |   - protobuf==3.2.0
91 |   - tensorflow==1.0.1
92 |   - theano==0.9.0
93 |   - tqdm==4.11.2
94 |   - win-unicode-console==0.5
95 | 
96 | 
97 | 


--------------------------------------------------------------------------------
/projects/practice_projects/cnn/requirements/requirements.txt:
--------------------------------------------------------------------------------
 1 | opencv-python==3.2.0.6
 2 | h5py==2.6.0
 3 | matplotlib==2.0.0
 4 | numpy==1.12.0
 5 | scipy==0.18.1
 6 | tqdm==4.11.2
 7 | keras==2.0.2
 8 | scikit-learn==0.18.1
 9 | pillow==4.0.0
10 | tensorflow==1.0.0


--------------------------------------------------------------------------------
/projects/practice_projects/cnn/transfer-learning/bottleneck_features/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/practice_projects/cnn/transfer-learning/bottleneck_features/.gitignore


--------------------------------------------------------------------------------
/projects/practice_projects/cnn/transfer-learning/dogvgg16.weights.best.hdf5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/practice_projects/cnn/transfer-learning/dogvgg16.weights.best.hdf5


--------------------------------------------------------------------------------
/projects/practice_projects/cnn/transfer-learning/figures/vgg16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/practice_projects/cnn/transfer-learning/figures/vgg16.png


--------------------------------------------------------------------------------
/projects/practice_projects/cnn/transfer-learning/figures/vgg16_transfer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/practice_projects/cnn/transfer-learning/figures/vgg16_transfer.png


--------------------------------------------------------------------------------
/projects/practice_projects/cnn/transfer-learning/images/American_water_spaniel_00648.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/practice_projects/cnn/transfer-learning/images/American_water_spaniel_00648.jpg


--------------------------------------------------------------------------------
/projects/practice_projects/cnn/transfer-learning/images/Brittany_02625.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/practice_projects/cnn/transfer-learning/images/Brittany_02625.jpg


--------------------------------------------------------------------------------
/projects/practice_projects/cnn/transfer-learning/images/Curly-coated_retriever_03896.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/practice_projects/cnn/transfer-learning/images/Curly-coated_retriever_03896.jpg


--------------------------------------------------------------------------------
/projects/practice_projects/cnn/transfer-learning/images/Labrador_retriever_06449.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/practice_projects/cnn/transfer-learning/images/Labrador_retriever_06449.jpg


--------------------------------------------------------------------------------
/projects/practice_projects/cnn/transfer-learning/images/Labrador_retriever_06455.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/practice_projects/cnn/transfer-learning/images/Labrador_retriever_06455.jpg


--------------------------------------------------------------------------------
/projects/practice_projects/cnn/transfer-learning/images/Labrador_retriever_06457.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/practice_projects/cnn/transfer-learning/images/Labrador_retriever_06457.jpg


--------------------------------------------------------------------------------
/projects/practice_projects/cnn/transfer-learning/images/Welsh_springer_spaniel_08203.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/practice_projects/cnn/transfer-learning/images/Welsh_springer_spaniel_08203.jpg


--------------------------------------------------------------------------------
/projects/practice_projects/cnn/transfer-learning/images/sopa.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/practice_projects/cnn/transfer-learning/images/sopa.jpg


--------------------------------------------------------------------------------
/projects/practice_projects/imdb/.gitignore:
--------------------------------------------------------------------------------
  1 | # Log files (e.g. for TensorBoard)
  2 | logs/
  3 | 
  4 | # Mac
  5 | .DS_Store
  6 | 
  7 | # Byte-compiled / optimized / DLL files
  8 | __pycache__/
  9 | *.py[cod]
 10 | *$py.class
 11 | 
 12 | # C extensions
 13 | *.so
 14 | 
 15 | # Distribution / packaging
 16 | .Python
 17 | env/
 18 | build/
 19 | develop-eggs/
 20 | dist/
 21 | downloads/
 22 | eggs/
 23 | .eggs/
 24 | lib/
 25 | lib64/
 26 | parts/
 27 | sdist/
 28 | var/
 29 | wheels/
 30 | *.egg-info/
 31 | .installed.cfg
 32 | *.egg
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | .hypothesis/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | 
 63 | # Flask stuff:
 64 | instance/
 65 | .webassets-cache
 66 | 
 67 | # Scrapy stuff:
 68 | .scrapy
 69 | 
 70 | # Sphinx documentation
 71 | docs/_build/
 72 | 
 73 | # PyBuilder
 74 | target/
 75 | 
 76 | # Jupyter Notebook
 77 | .ipynb_checkpoints
 78 | 
 79 | # pyenv
 80 | .python-version
 81 | 
 82 | # celery beat schedule file
 83 | celerybeat-schedule
 84 | 
 85 | # SageMath parsed files
 86 | *.sage.py
 87 | 
 88 | # dotenv
 89 | .env
 90 | 
 91 | # virtualenv
 92 | .venv
 93 | venv/
 94 | ENV/
 95 | 
 96 | # Spyder project settings
 97 | .spyderproject
 98 | .spyproject
 99 | 
100 | # Rope project settings
101 | .ropeproject
102 | 
103 | # mkdocs documentation
104 | /site
105 | 
106 | # mypy
107 | .mypy_cache/
108 | 


--------------------------------------------------------------------------------
/projects/practice_projects/imdb/IMDB_In_Keras.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Analyzing IMDB Data in Keras"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {
 14 |     "collapsed": false,
 15 |     "deletable": true,
 16 |     "editable": true
 17 |    },
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "# Imports\n",
 21 |     "import numpy as np\n",
 22 |     "import keras\n",
 23 |     "from keras.datasets import imdb\n",
 24 |     "from keras.models import Sequential\n",
 25 |     "from keras.layers import Dense, Dropout, Activation\n",
 26 |     "from keras.preprocessing.text import Tokenizer\n",
 27 |     "import matplotlib.pyplot as plt\n",
 28 |     "%matplotlib inline\n",
 29 |     "\n",
 30 |     "np.random.seed(42)"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "## 1. Loading the data\n",
 38 |     "This dataset comes preloaded with Keras, so one simple command will get us training and testing data. There is a parameter for how many words we want to look at. We've set it at 1000, but feel free to experiment."
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {
 45 |     "collapsed": false,
 46 |     "deletable": true,
 47 |     "editable": true
 48 |    },
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "# Loading the data (it's preloaded in Keras)\n",
 52 |     "(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=1000)\n",
 53 |     "\n",
 54 |     "print(x_train.shape)\n",
 55 |     "print(x_test.shape)"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "## 2. Examining the data\n",
 63 |     "Notice that the data has been already pre-processed, where all the words have numbers, and the reviews come in as a vector with the words that the review contains. For example, if the word 'the' is the first one in our dictionary, and a review contains the word 'the', then there is a 1 in the corresponding vector.\n",
 64 |     "\n",
 65 |     "The output comes as a vector of 1's and 0's, where 1 is a positive sentiment for the review, and 0 is negative."
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": null,
 71 |    "metadata": {
 72 |     "collapsed": false
 73 |    },
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "print(x_train[0])\n",
 77 |     "print(y_train[0])"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "metadata": {},
 83 |    "source": [
 84 |     "## 3. One-hot encoding the output\n",
 85 |     "Here, we'll turn the input vectors into (0,1)-vectors. For example, if the pre-processed vector contains the number 14, then in the processed vector, the 14th entry will be 1."
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": null,
 91 |    "metadata": {
 92 |     "collapsed": false,
 93 |     "deletable": true,
 94 |     "editable": true
 95 |    },
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "# One-hot encoding the output into vector mode, each of length 1000\n",
 99 |     "tokenizer = Tokenizer(num_words=1000)\n",
100 |     "x_train = tokenizer.sequences_to_matrix(x_train, mode='binary')\n",
101 |     "x_test = tokenizer.sequences_to_matrix(x_test, mode='binary')\n",
102 |     "print(x_train[0])"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "markdown",
107 |    "metadata": {},
108 |    "source": [
109 |     "And we'll also one-hot encode the output."
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": null,
115 |    "metadata": {
116 |     "collapsed": false,
117 |     "deletable": true,
118 |     "editable": true
119 |    },
120 |    "outputs": [],
121 |    "source": [
122 |     "# One-hot encoding the output\n",
123 |     "num_classes = 2\n",
124 |     "y_train = keras.utils.to_categorical(y_train, num_classes)\n",
125 |     "y_test = keras.utils.to_categorical(y_test, num_classes)\n",
126 |     "print(y_train.shape)\n",
127 |     "print(y_test.shape)"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "markdown",
132 |    "metadata": {},
133 |    "source": [
134 |     "## 4. Building the  model architecture\n",
135 |     "Build a model here using sequential. Feel free to experiment with different layers and sizes! Also, experiment adding dropout to reduce overfitting."
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": null,
141 |    "metadata": {
142 |     "collapsed": false,
143 |     "deletable": true,
144 |     "editable": true
145 |    },
146 |    "outputs": [],
147 |    "source": [
148 |     "# TODO: Build the model architecture\n",
149 |     "\n",
150 |     "# TODO: Compile the model using a loss function and an optimizer.\n"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "markdown",
155 |    "metadata": {},
156 |    "source": [
157 |     "## 5. Training the model\n",
158 |     "Run the model here. Experiment with different batch_size, and number of epochs!"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": null,
164 |    "metadata": {
165 |     "collapsed": false,
166 |     "deletable": true,
167 |     "editable": true
168 |    },
169 |    "outputs": [],
170 |    "source": [
171 |     "# TODO: Run the model. Feel free to experiment with different batch sizes and number of epochs."
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "markdown",
176 |    "metadata": {},
177 |    "source": [
178 |     "## 6. Evaluating the model\n",
179 |     "This will give you the accuracy of the model, as evaluated on the testing set. Can you get something over 85%?"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": null,
185 |    "metadata": {
186 |     "collapsed": false,
187 |     "deletable": true,
188 |     "editable": true
189 |    },
190 |    "outputs": [],
191 |    "source": [
192 |     "score = model.evaluate(x_test, y_test, verbose=0)\n",
193 |     "print(\"Accuracy: \", score[1])"
194 |    ]
195 |   }
196 |  ],
197 |  "metadata": {
198 |   "kernelspec": {
199 |    "display_name": "Python 3",
200 |    "language": "python",
201 |    "name": "python3"
202 |   },
203 |   "language_info": {
204 |    "codemirror_mode": {
205 |     "name": "ipython",
206 |     "version": 3
207 |    },
208 |    "file_extension": ".py",
209 |    "mimetype": "text/x-python",
210 |    "name": "python",
211 |    "nbconvert_exporter": "python",
212 |    "pygments_lexer": "ipython3",
213 |    "version": "3.5.2"
214 |   }
215 |  },
216 |  "nbformat": 4,
217 |  "nbformat_minor": 2
218 | }
219 | 


--------------------------------------------------------------------------------
/projects/practice_projects/imdb/IMDB_In_Keras_Solutions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "deletable": true,
  7 |     "editable": true
  8 |    },
  9 |    "source": [
 10 |     "# Analyzing IMDB Data in Keras - Solution"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 29,
 16 |    "metadata": {
 17 |     "collapsed": false,
 18 |     "deletable": true,
 19 |     "editable": true
 20 |    },
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "# Imports\n",
 24 |     "import numpy as np\n",
 25 |     "import keras\n",
 26 |     "from keras.datasets import imdb\n",
 27 |     "from keras.models import Sequential\n",
 28 |     "from keras.layers import Dense, Dropout, Activation\n",
 29 |     "from keras.preprocessing.text import Tokenizer\n",
 30 |     "import matplotlib.pyplot as plt\n",
 31 |     "%matplotlib inline\n",
 32 |     "\n",
 33 |     "np.random.seed(42)"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {
 39 |     "deletable": true,
 40 |     "editable": true
 41 |    },
 42 |    "source": [
 43 |     "## 1. Loading the data\n",
 44 |     "This dataset comes preloaded with Keras, so one simple command will get us training and testing data. There is a parameter for how many words we want to look at. We've set it at 1000, but feel free to experiment."
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 30,
 50 |    "metadata": {
 51 |     "collapsed": false,
 52 |     "deletable": true,
 53 |     "editable": true
 54 |    },
 55 |    "outputs": [
 56 |     {
 57 |      "name": "stdout",
 58 |      "output_type": "stream",
 59 |      "text": [
 60 |       "(25000,)\n",
 61 |       "(25000,)\n"
 62 |      ]
 63 |     }
 64 |    ],
 65 |    "source": [
 66 |     "# Loading the data (it's preloaded in Keras)\n",
 67 |     "(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=1000)\n",
 68 |     "\n",
 69 |     "print(x_train.shape)\n",
 70 |     "print(x_test.shape)"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "metadata": {
 76 |     "deletable": true,
 77 |     "editable": true
 78 |    },
 79 |    "source": [
 80 |     "## 2. Examining the data\n",
 81 |     "Notice that the data has been already pre-processed, where all the words have numbers, and the reviews come in as a vector with the words that the review contains. For example, if the word 'the' is the first one in our dictionary, and a review contains the word 'the', then there is a 1 in the corresponding vector.\n",
 82 |     "\n",
 83 |     "The output comes as a vector of 1's and 0's, where 1 is a positive sentiment for the review, and 0 is negative."
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 31,
 89 |    "metadata": {
 90 |     "collapsed": false,
 91 |     "deletable": true,
 92 |     "editable": true
 93 |    },
 94 |    "outputs": [
 95 |     {
 96 |      "name": "stdout",
 97 |      "output_type": "stream",
 98 |      "text": [
 99 |       "[1, 14, 22, 16, 43, 530, 973, 2, 2, 65, 458, 2, 66, 2, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 2, 2, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2, 19, 14, 22, 4, 2, 2, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 2, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2, 2, 16, 480, 66, 2, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 2, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 2, 15, 256, 4, 2, 7, 2, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 2, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2, 56, 26, 141, 6, 194, 2, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 2, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 2, 88, 12, 16, 283, 5, 16, 2, 113, 103, 32, 15, 16, 2, 19, 178, 32]\n",
100 |       "1\n"
101 |      ]
102 |     }
103 |    ],
104 |    "source": [
105 |     "print(x_train[0])\n",
106 |     "print(y_train[0])"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "markdown",
111 |    "metadata": {
112 |     "deletable": true,
113 |     "editable": true
114 |    },
115 |    "source": [
116 |     "## 3. One-hot encoding the output\n",
117 |     "Here, we'll turn the input vectors into (0,1)-vectors. For example, if the pre-processed vector contains the number 14, then in the processed vector, the 14th entry will be 1."
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 32,
123 |    "metadata": {
124 |     "collapsed": false,
125 |     "deletable": true,
126 |     "editable": true
127 |    },
128 |    "outputs": [
129 |     {
130 |      "name": "stdout",
131 |      "output_type": "stream",
132 |      "text": [
133 |       "(25000, 1000)\n",
134 |       "(25000, 1000)\n"
135 |      ]
136 |     }
137 |    ],
138 |    "source": [
139 |     "# Turning the output into vector mode, each of length 1000\n",
140 |     "tokenizer = Tokenizer(num_words=1000)\n",
141 |     "x_train = tokenizer.sequences_to_matrix(x_train, mode='binary')\n",
142 |     "x_test = tokenizer.sequences_to_matrix(x_test, mode='binary')\n",
143 |     "print(x_train.shape)\n",
144 |     "print(x_test.shape)"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "markdown",
149 |    "metadata": {
150 |     "deletable": true,
151 |     "editable": true
152 |    },
153 |    "source": [
154 |     "And we'll one-hot encode the output."
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": 33,
160 |    "metadata": {
161 |     "collapsed": false,
162 |     "deletable": true,
163 |     "editable": true
164 |    },
165 |    "outputs": [
166 |     {
167 |      "name": "stdout",
168 |      "output_type": "stream",
169 |      "text": [
170 |       "(25000, 2)\n",
171 |       "(25000, 2)\n"
172 |      ]
173 |     }
174 |    ],
175 |    "source": [
176 |     "# One-hot encoding the output\n",
177 |     "num_classes = 2\n",
178 |     "y_train = keras.utils.to_categorical(y_train, num_classes)\n",
179 |     "y_test = keras.utils.to_categorical(y_test, num_classes)\n",
180 |     "print(y_train.shape)\n",
181 |     "print(y_test.shape)"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "markdown",
186 |    "metadata": {
187 |     "deletable": true,
188 |     "editable": true
189 |    },
190 |    "source": [
191 |     "## 4. Building the  model architecture\n",
192 |     "Build a model here using sequential. Feel free to experiment with different layers and sizes! Also, experiment adding dropout to reduce overfitting."
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": 34,
198 |    "metadata": {
199 |     "collapsed": false,
200 |     "deletable": true,
201 |     "editable": true
202 |    },
203 |    "outputs": [
204 |     {
205 |      "name": "stdout",
206 |      "output_type": "stream",
207 |      "text": [
208 |       "_________________________________________________________________\n",
209 |       "Layer (type)                 Output Shape              Param #   \n",
210 |       "=================================================================\n",
211 |       "dense_3 (Dense)              (None, 512)               512512    \n",
212 |       "_________________________________________________________________\n",
213 |       "dropout_2 (Dropout)          (None, 512)               0         \n",
214 |       "_________________________________________________________________\n",
215 |       "dense_4 (Dense)              (None, 2)                 1026      \n",
216 |       "=================================================================\n",
217 |       "Total params: 513,538.0\n",
218 |       "Trainable params: 513,538.0\n",
219 |       "Non-trainable params: 0.0\n",
220 |       "_________________________________________________________________\n"
221 |      ]
222 |     }
223 |    ],
224 |    "source": [
225 |     "# Building the model architecture with one layer of length 100\n",
226 |     "model = Sequential()\n",
227 |     "model.add(Dense(512, activation='relu', input_dim=1000))\n",
228 |     "model.add(Dropout(0.5))\n",
229 |     "model.add(Dense(num_classes, activation='softmax'))\n",
230 |     "model.summary()\n",
231 |     "\n",
232 |     "# Compiling the model using categorical_crossentropy loss, and rmsprop optimizer.\n",
233 |     "model.compile(loss='categorical_crossentropy',\n",
234 |     "              optimizer='rmsprop',\n",
235 |     "              metrics=['accuracy'])"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "markdown",
240 |    "metadata": {
241 |     "deletable": true,
242 |     "editable": true
243 |    },
244 |    "source": [
245 |     "## 5. Training the model\n",
246 |     "Run the model here. Experiment with different batch_size, and number of epochs!"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": 35,
252 |    "metadata": {
253 |     "collapsed": false,
254 |     "deletable": true,
255 |     "editable": true
256 |    },
257 |    "outputs": [
258 |     {
259 |      "name": "stdout",
260 |      "output_type": "stream",
261 |      "text": [
262 |       "Train on 25000 samples, validate on 25000 samples\n",
263 |       "Epoch 1/10\n",
264 |       "9s - loss: 0.3969 - acc: 0.8260 - val_loss: 0.3429 - val_acc: 0.8568\n",
265 |       "Epoch 2/10\n",
266 |       "9s - loss: 0.3339 - acc: 0.8670 - val_loss: 0.3413 - val_acc: 0.8632\n",
267 |       "Epoch 3/10\n",
268 |       "9s - loss: 0.3219 - acc: 0.8778 - val_loss: 0.3552 - val_acc: 0.8614\n",
269 |       "Epoch 4/10\n",
270 |       "9s - loss: 0.3110 - acc: 0.8853 - val_loss: 0.3718 - val_acc: 0.8602\n",
271 |       "Epoch 5/10\n",
272 |       "9s - loss: 0.3056 - acc: 0.8920 - val_loss: 0.4086 - val_acc: 0.8542\n",
273 |       "Epoch 6/10\n",
274 |       "10s - loss: 0.2951 - acc: 0.8983 - val_loss: 0.3938 - val_acc: 0.8608\n",
275 |       "Epoch 7/10\n",
276 |       "9s - loss: 0.2864 - acc: 0.9037 - val_loss: 0.4258 - val_acc: 0.8566\n",
277 |       "Epoch 8/10\n",
278 |       "9s - loss: 0.2738 - acc: 0.9100 - val_loss: 0.4733 - val_acc: 0.8509\n",
279 |       "Epoch 9/10\n",
280 |       "8s - loss: 0.2622 - acc: 0.9162 - val_loss: 0.4658 - val_acc: 0.8536\n",
281 |       "Epoch 10/10\n",
282 |       "12s - loss: 0.2520 - acc: 0.9216 - val_loss: 0.4877 - val_acc: 0.8583\n"
283 |      ]
284 |     }
285 |    ],
286 |    "source": [
287 |     "# Running and evaluating the model\n",
288 |     "hist = model.fit(x_train, y_train,\n",
289 |     "          batch_size=32,\n",
290 |     "          epochs=10,\n",
291 |     "          validation_data=(x_test, y_test), \n",
292 |     "          verbose=2)"
293 |    ]
294 |   },
295 |   {
296 |    "cell_type": "markdown",
297 |    "metadata": {
298 |     "deletable": true,
299 |     "editable": true
300 |    },
301 |    "source": [
302 |     "## 6. Evaluating the model\n",
303 |     "This will give you the accuracy of the model. Can you get something over 85%?"
304 |    ]
305 |   },
306 |   {
307 |    "cell_type": "code",
308 |    "execution_count": 36,
309 |    "metadata": {
310 |     "collapsed": false,
311 |     "deletable": true,
312 |     "editable": true
313 |    },
314 |    "outputs": [
315 |     {
316 |      "name": "stdout",
317 |      "output_type": "stream",
318 |      "text": [
319 |       "accuracy:  0.85828\n"
320 |      ]
321 |     }
322 |    ],
323 |    "source": [
324 |     "score = model.evaluate(x_test, y_test, verbose=0)\n",
325 |     "print(\"accuracy: \", score[1])"
326 |    ]
327 |   }
328 |  ],
329 |  "metadata": {
330 |   "kernelspec": {
331 |    "display_name": "Python 3",
332 |    "language": "python",
333 |    "name": "python3"
334 |   },
335 |   "language_info": {
336 |    "codemirror_mode": {
337 |     "name": "ipython",
338 |     "version": 3
339 |    },
340 |    "file_extension": ".py",
341 |    "mimetype": "text/x-python",
342 |    "name": "python",
343 |    "nbconvert_exporter": "python",
344 |    "pygments_lexer": "ipython3",
345 |    "version": "3.5.2"
346 |   }
347 |  },
348 |  "nbformat": 4,
349 |  "nbformat_minor": 2
350 | }
351 | 


--------------------------------------------------------------------------------
/projects/practice_projects/imdb/README.md:
--------------------------------------------------------------------------------
1 | # imdb practice project


--------------------------------------------------------------------------------
/projects/practice_projects/imdb/requirements/aind-dl-mac-linux.yml:
--------------------------------------------------------------------------------
  1 | name: aind-dl
  2 | channels:
  3 | - damianavila82
  4 | - defaults
  5 | dependencies:
  6 | - rise=4.0.0b1=py35_0
  7 | - _license=1.1=py35_1
  8 | - alabaster=0.7.10=py35_0
  9 | - anaconda-client=1.6.2=py35_0
 10 | - anaconda=custom=py35_0
 11 | - anaconda-navigator=1.5.0=py35_0
 12 | - anaconda-project=0.4.1=py35_0
 13 | - appnope=0.1.0=py35_0
 14 | - appscript=1.0.1=py35_0
 15 | - astroid=1.4.9=py35_0
 16 | - astropy=1.3=np112py35_0
 17 | - babel=2.3.4=py35_0
 18 | - backports=1.0=py35_0
 19 | - beautifulsoup4=4.5.3=py35_0
 20 | - bitarray=0.8.1=py35_0
 21 | - blaze=0.10.1=py35_0
 22 | - bleach=1.5.0=py35_0
 23 | - bokeh=0.12.4=py35_0
 24 | - boto=2.46.1=py35_0
 25 | - bottleneck=1.2.0=np112py35_0
 26 | - cffi=1.9.1=py35_0
 27 | - chardet=2.3.0=py35_0
 28 | - chest=0.2.3=py35_0
 29 | - click=6.7=py35_0
 30 | - cloudpickle=0.2.2=py35_0
 31 | - clyent=1.2.2=py35_0
 32 | - colorama=0.3.7=py35_0
 33 | - configobj=5.0.6=py35_0
 34 | - contextlib2=0.5.4=py35_0
 35 | - cryptography=1.7.1=py35_0
 36 | - curl=7.52.1=0
 37 | - cycler=0.10.0=py35_0
 38 | - cython=0.25.2=py35_0
 39 | - cytoolz=0.8.2=py35_0
 40 | - dask=0.14.0=py35_0
 41 | - datashape=0.5.4=py35_0
 42 | - decorator=4.0.11=py35_0
 43 | - dill=0.2.5=py35_0
 44 | - docutils=0.13.1=py35_0
 45 | - entrypoints=0.2.2=py35_1
 46 | - et_xmlfile=1.0.1=py35_0
 47 | - fastcache=1.0.2=py35_1
 48 | - flask=0.12=py35_0
 49 | - flask-cors=3.0.2=py35_0
 50 | - freetype=2.5.5=2
 51 | - get_terminal_size=1.0.0=py35_0
 52 | - gevent=1.2.1=py35_0
 53 | - greenlet=0.4.12=py35_0
 54 | - h5py=2.6.0=np112py35_2
 55 | - hdf5=1.8.17=1
 56 | - heapdict=1.0.0=py35_1
 57 | - html5lib=0.999=py35_0
 58 | - icu=54.1=0
 59 | - idna=2.2=py35_0
 60 | - imagesize=0.7.1=py35_0
 61 | - ipykernel=4.5.2=py35_0
 62 | - ipython=5.3.0=py35_0
 63 | - ipython_genutils=0.1.0=py35_0
 64 | - ipywidgets=6.0.0=py35_0
 65 | - isort=4.2.5=py35_0
 66 | - itsdangerous=0.24=py35_0
 67 | - jbig=2.1=0
 68 | - jdcal=1.3=py35_0
 69 | - jedi=0.9.0=py35_1
 70 | - jinja2=2.9.5=py35_0
 71 | - jpeg=9b=0
 72 | - jsonschema=2.5.1=py35_0
 73 | - jupyter=1.0.0=py35_3
 74 | - jupyter_client=5.0.0=py35_0
 75 | - jupyter_console=5.1.0=py35_0
 76 | - jupyter_core=4.3.0=py35_0
 77 | - lazy-object-proxy=1.2.2=py35_0
 78 | - libiconv=1.14=0
 79 | - libpng=1.6.27=0
 80 | - libtiff=4.0.6=3
 81 | - libxml2=2.9.4=0
 82 | - libxslt=1.1.29=0
 83 | - llvmlite=0.16.0=py35_0
 84 | - locket=0.2.0=py35_1
 85 | - lxml=3.7.3=py35_0
 86 | - markupsafe=0.23=py35_2
 87 | - matplotlib=2.0.0=np112py35_0
 88 | - mistune=0.7.4=py35_0
 89 | - mkl=2017.0.1=0
 90 | - mkl-service=1.1.2=py35_3
 91 | - mpmath=0.19=py35_1
 92 | - multipledispatch=0.4.9=py35_0
 93 | - nbconvert=5.1.1=py35_0
 94 | - nbformat=4.3.0=py35_0
 95 | - networkx=1.11=py35_0
 96 | - nltk=3.2.2=py35_0
 97 | - nose=1.3.7=py35_1
 98 | - notebook=4.4.1=py35_0
 99 | - numba=0.31.0=np112py35_0
100 | - numexpr=2.6.2=np112py35_0
101 | - numpy=1.12.0=py35_0
102 | - numpydoc=0.6.0=py35_0
103 | - odo=0.5.0=py35_1
104 | - olefile=0.44=py35_0
105 | - openpyxl=2.4.1=py35_0
106 | - openssl=1.0.2k=0
107 | - pandas=0.19.2=np112py35_1
108 | - pandocfilters=1.4.1=py35_0
109 | - partd=0.3.7=py35_0
110 | - path.py=10.1=py35_0
111 | - pathlib2=2.2.0=py35_0
112 | - patsy=0.4.1=py35_0
113 | - pep8=1.7.0=py35_0
114 | - pexpect=4.2.1=py35_0
115 | - pickleshare=0.7.4=py35_0
116 | - pillow=4.0.0=py35_1
117 | - pip=9.0.1=py35_1
118 | - ply=3.10=py35_0
119 | - prompt_toolkit=1.0.13=py35_0
120 | - psutil=5.2.0=py35_0
121 | - ptyprocess=0.5.1=py35_0
122 | - py=1.4.32=py35_0
123 | - pyasn1=0.2.3=py35_0
124 | - pycosat=0.6.1=py35_1
125 | - pycparser=2.17=py35_0
126 | - pycrypto=2.6.1=py35_4
127 | - pycurl=7.43.0=py35_2
128 | - pyflakes=1.5.0=py35_0
129 | - pygments=2.2.0=py35_0
130 | - pylint=1.6.4=py35_1
131 | - pyopenssl=16.2.0=py35_0
132 | - pyparsing=2.1.4=py35_0
133 | - pyqt=5.6.0=py35_2
134 | - pytables=3.3.0=np112py35_0
135 | - pytest=3.0.6=py35_0
136 | - python=3.5.3=1
137 | - python-dateutil=2.6.0=py35_0
138 | - python.app=1.2=py35_4
139 | - pytz=2016.10=py35_0
140 | - pyyaml=3.12=py35_0
141 | - pyzmq=16.0.2=py35_0
142 | - qt=5.6.2=0
143 | - qtawesome=0.4.4=py35_0
144 | - qtconsole=4.2.1=py35_1
145 | - qtpy=1.2.1=py35_0
146 | - readline=6.2=2
147 | - redis=3.2.0=0
148 | - redis-py=2.10.5=py35_0
149 | - requests=2.13.0=py35_0
150 | - rope=0.9.4=py35_1
151 | - ruamel_yaml=0.11.14=py35_1
152 | - scikit-image=0.12.3=np112py35_1
153 | - scikit-learn=0.18.1=np112py35_1
154 | - scipy=0.19.0=np112py35_0
155 | - seaborn=0.7.1=py35_0
156 | - setuptools=27.2.0=py35_0
157 | - simplegeneric=0.8.1=py35_1
158 | - singledispatch=3.4.0.3=py35_0
159 | - sip=4.18=py35_0
160 | - six=1.10.0=py35_0
161 | - snowballstemmer=1.2.1=py35_0
162 | - sockjs-tornado=1.0.3=py35_0
163 | - sphinx=1.5.1=py35_0
164 | - spyder=3.1.3=py35_0
165 | - sqlalchemy=1.1.6=py35_0
166 | - sqlite=3.13.0=0
167 | - statsmodels=0.8.0=np112py35_0
168 | - sympy=1.0=py35_0
169 | - terminado=0.6=py35_0
170 | - testpath=0.3=py35_0
171 | - tk=8.5.18=0
172 | - toolz=0.8.2=py35_0
173 | - tornado=4.4.2=py35_0
174 | - traitlets=4.3.2=py35_0
175 | - unicodecsv=0.14.1=py35_0
176 | - wcwidth=0.1.7=py35_0
177 | - werkzeug=0.12=py35_0
178 | - wheel=0.29.0=py35_0
179 | - widgetsnbextension=2.0.0=py35_0
180 | - wrapt=1.10.8=py35_0
181 | - xlrd=1.0.0=py35_0
182 | - xlsxwriter=0.9.6=py35_0
183 | - xlwings=0.10.2=py35_0
184 | - xlwt=1.2.0=py35_0
185 | - xz=5.2.2=1
186 | - yaml=0.1.6=0
187 | - zlib=1.2.8=3
188 | - pip:
189 |   - backports.shutil-get-terminal-size==1.0.0
190 |   - cvxopt==1.1.9
191 |   - et-xmlfile==1.0.1
192 |   - ipython-genutils==0.1.0
193 |   - jupyter-client==5.0.0
194 |   - jupyter-console==5.1.0
195 |   - jupyter-core==4.3.0
196 |   - keras==2.0.0
197 |   - opencv-python==3.2.0.6
198 |   - prompt-toolkit==1.0.13
199 |   - protobuf==3.2.0
200 |   - rope-py3k==0.9.4.post1
201 |   - tables==3.3.0
202 |   - tensorflow==1.0.0
203 |   - theano==0.8.2
204 |   - tqdm==4.11.2
205 | 


--------------------------------------------------------------------------------
/projects/practice_projects/imdb/requirements/aind-dl-windows.yml:
--------------------------------------------------------------------------------
 1 | name: aind-dl
 2 | channels:
 3 | - defaults
 4 | dependencies:
 5 | - _nb_ext_conf=0.3.0=py35_0
 6 | - anaconda-client=1.6.2=py35_0
 7 | - bleach=1.5.0=py35_0
 8 | - bzip2=1.0.6=vc14_3
 9 | - clyent=1.2.2=py35_0
10 | - colorama=0.3.7=py35_0
11 | - cycler=0.10.0=py35_0
12 | - decorator=4.0.11=py35_0
13 | - entrypoints=0.2.2=py35_1
14 | - freetype=2.5.5=vc14_2
15 | - h5py=2.7.0=np112py35_0
16 | - hdf5=1.8.15.1=vc14_4
17 | - html5lib=0.999=py35_0
18 | - icu=57.1=vc14_0
19 | - ipykernel=4.5.2=py35_0
20 | - ipython=5.3.0=py35_0
21 | - ipython_genutils=0.1.0=py35_0
22 | - ipywidgets=6.0.0=py35_0
23 | - jinja2=2.9.5=py35_0
24 | - jpeg=9b=vc14_0
25 | - jsonschema=2.5.1=py35_0
26 | - jupyter=1.0.0=py35_3
27 | - jupyter_client=5.0.0=py35_0
28 | - jupyter_console=5.1.0=py35_0
29 | - jupyter_core=4.3.0=py35_0
30 | - libpng=1.6.27=vc14_0
31 | - libtiff=4.0.6=vc14_3
32 | - markupsafe=0.23=py35_2
33 | - matplotlib=2.0.0=np112py35_0
34 | - mistune=0.7.4=py35_0
35 | - mkl=2017.0.1=0
36 | - nb_anacondacloud=1.2.0=py35_0
37 | - nb_conda=2.0.0=py35_0
38 | - nb_conda_kernels=2.0.0=py35_0
39 | - nbconvert=5.1.1=py35_0
40 | - nbformat=4.3.0=py35_0
41 | - nbpresent=3.0.2=py35_0
42 | - notebook=4.4.1=py35_0
43 | - numpy=1.12.1=py35_0
44 | - olefile=0.44=py35_0
45 | - openssl=1.0.2k=vc14_0
46 | - pandocfilters=1.4.1=py35_0
47 | - path.py=10.1=py35_0
48 | - pickleshare=0.7.4=py35_0
49 | - pillow=4.0.0=py35_1
50 | - pip=9.0.1=py35_1
51 | - prompt_toolkit=1.0.13=py35_0
52 | - pygments=2.2.0=py35_0
53 | - pyparsing=2.1.4=py35_0
54 | - pyqt=5.6.0=py35_2
55 | - python=3.5.3=0
56 | - python-dateutil=2.6.0=py35_0
57 | - pytz=2016.10=py35_0
58 | - pyyaml=3.12=py35_0
59 | - pyzmq=16.0.2=py35_0
60 | - qt=5.6.2=vc14_3
61 | - qtconsole=4.2.1=py35_2
62 | - requests=2.13.0=py35_0
63 | - scikit-learn=0.18.1=np112py35_1
64 | - scipy=0.19.0=np112py35_0
65 | - setuptools=27.2.0=py35_1
66 | - simplegeneric=0.8.1=py35_1
67 | - sip=4.18=py35_0
68 | - six=1.10.0=py35_0
69 | - testpath=0.3=py35_0
70 | - tk=8.5.18=vc14_0
71 | - tornado=4.4.2=py35_0
72 | - traitlets=4.3.2=py35_0
73 | - vs2015_runtime=14.0.25123=0
74 | - wcwidth=0.1.7=py35_0
75 | - wheel=0.29.0=py35_0
76 | - widgetsnbextension=2.0.0=py35_0
77 | - win_unicode_console=0.5=py35_0
78 | - zlib=1.2.8=vc14_3
79 | - pip:
80 |   - ipython-genutils==0.1.0
81 |   - jupyter-client==5.0.0
82 |   - jupyter-console==5.1.0
83 |   - jupyter-core==4.3.0
84 |   - keras==2.0.2
85 |   - nb-anacondacloud==1.2.0
86 |   - nb-conda==2.0.0
87 |   - nb-conda-kernels==2.0.0
88 |   - opencv-python==3.1.0.0
89 |   - prompt-toolkit==1.0.13
90 |   - protobuf==3.2.0
91 |   - tensorflow==1.0.1
92 |   - theano==0.9.0
93 |   - tqdm==4.11.2
94 |   - win-unicode-console==0.5
95 | 
96 | 
97 | 


--------------------------------------------------------------------------------
/projects/practice_projects/imdb/requirements/requirements.txt:
--------------------------------------------------------------------------------
 1 | opencv-python==3.2.0.6
 2 | h5py==2.6.0
 3 | matplotlib==2.0.0
 4 | numpy==1.12.0
 5 | scipy==0.18.1
 6 | tqdm==4.11.2
 7 | keras==2.0.2
 8 | scikit-learn==0.18.1
 9 | pillow==4.0.0
10 | tensorflow==1.0.0
11 | pandas==0.19.2
12 | 


--------------------------------------------------------------------------------
/projects/practice_projects/imdb/student_data.csv:
--------------------------------------------------------------------------------
  1 | admit,gre,gpa,rank
  2 | 0,380,3.61,3
  3 | 1,660,3.67,3
  4 | 1,800,4,1
  5 | 1,640,3.19,4
  6 | 0,520,2.93,4
  7 | 1,760,3,2
  8 | 1,560,2.98,1
  9 | 0,400,3.08,2
 10 | 1,540,3.39,3
 11 | 0,700,3.92,2
 12 | 0,800,4,4
 13 | 0,440,3.22,1
 14 | 1,760,4,1
 15 | 0,700,3.08,2
 16 | 1,700,4,1
 17 | 0,480,3.44,3
 18 | 0,780,3.87,4
 19 | 0,360,2.56,3
 20 | 0,800,3.75,2
 21 | 1,540,3.81,1
 22 | 0,500,3.17,3
 23 | 1,660,3.63,2
 24 | 0,600,2.82,4
 25 | 0,680,3.19,4
 26 | 1,760,3.35,2
 27 | 1,800,3.66,1
 28 | 1,620,3.61,1
 29 | 1,520,3.74,4
 30 | 1,780,3.22,2
 31 | 0,520,3.29,1
 32 | 0,540,3.78,4
 33 | 0,760,3.35,3
 34 | 0,600,3.4,3
 35 | 1,800,4,3
 36 | 0,360,3.14,1
 37 | 0,400,3.05,2
 38 | 0,580,3.25,1
 39 | 0,520,2.9,3
 40 | 1,500,3.13,2
 41 | 1,520,2.68,3
 42 | 0,560,2.42,2
 43 | 1,580,3.32,2
 44 | 1,600,3.15,2
 45 | 0,500,3.31,3
 46 | 0,700,2.94,2
 47 | 1,460,3.45,3
 48 | 1,580,3.46,2
 49 | 0,500,2.97,4
 50 | 0,440,2.48,4
 51 | 0,400,3.35,3
 52 | 0,640,3.86,3
 53 | 0,440,3.13,4
 54 | 0,740,3.37,4
 55 | 1,680,3.27,2
 56 | 0,660,3.34,3
 57 | 1,740,4,3
 58 | 0,560,3.19,3
 59 | 0,380,2.94,3
 60 | 0,400,3.65,2
 61 | 0,600,2.82,4
 62 | 1,620,3.18,2
 63 | 0,560,3.32,4
 64 | 0,640,3.67,3
 65 | 1,680,3.85,3
 66 | 0,580,4,3
 67 | 0,600,3.59,2
 68 | 0,740,3.62,4
 69 | 0,620,3.3,1
 70 | 0,580,3.69,1
 71 | 0,800,3.73,1
 72 | 0,640,4,3
 73 | 0,300,2.92,4
 74 | 0,480,3.39,4
 75 | 0,580,4,2
 76 | 0,720,3.45,4
 77 | 0,720,4,3
 78 | 0,560,3.36,3
 79 | 1,800,4,3
 80 | 0,540,3.12,1
 81 | 1,620,4,1
 82 | 0,700,2.9,4
 83 | 0,620,3.07,2
 84 | 0,500,2.71,2
 85 | 0,380,2.91,4
 86 | 1,500,3.6,3
 87 | 0,520,2.98,2
 88 | 0,600,3.32,2
 89 | 0,600,3.48,2
 90 | 0,700,3.28,1
 91 | 1,660,4,2
 92 | 0,700,3.83,2
 93 | 1,720,3.64,1
 94 | 0,800,3.9,2
 95 | 0,580,2.93,2
 96 | 1,660,3.44,2
 97 | 0,660,3.33,2
 98 | 0,640,3.52,4
 99 | 0,480,3.57,2
100 | 0,700,2.88,2
101 | 0,400,3.31,3
102 | 0,340,3.15,3
103 | 0,580,3.57,3
104 | 0,380,3.33,4
105 | 0,540,3.94,3
106 | 1,660,3.95,2
107 | 1,740,2.97,2
108 | 1,700,3.56,1
109 | 0,480,3.13,2
110 | 0,400,2.93,3
111 | 0,480,3.45,2
112 | 0,680,3.08,4
113 | 0,420,3.41,4
114 | 0,360,3,3
115 | 0,600,3.22,1
116 | 0,720,3.84,3
117 | 0,620,3.99,3
118 | 1,440,3.45,2
119 | 0,700,3.72,2
120 | 1,800,3.7,1
121 | 0,340,2.92,3
122 | 1,520,3.74,2
123 | 1,480,2.67,2
124 | 0,520,2.85,3
125 | 0,500,2.98,3
126 | 0,720,3.88,3
127 | 0,540,3.38,4
128 | 1,600,3.54,1
129 | 0,740,3.74,4
130 | 0,540,3.19,2
131 | 0,460,3.15,4
132 | 1,620,3.17,2
133 | 0,640,2.79,2
134 | 0,580,3.4,2
135 | 0,500,3.08,3
136 | 0,560,2.95,2
137 | 0,500,3.57,3
138 | 0,560,3.33,4
139 | 0,700,4,3
140 | 0,620,3.4,2
141 | 1,600,3.58,1
142 | 0,640,3.93,2
143 | 1,700,3.52,4
144 | 0,620,3.94,4
145 | 0,580,3.4,3
146 | 0,580,3.4,4
147 | 0,380,3.43,3
148 | 0,480,3.4,2
149 | 0,560,2.71,3
150 | 1,480,2.91,1
151 | 0,740,3.31,1
152 | 1,800,3.74,1
153 | 0,400,3.38,2
154 | 1,640,3.94,2
155 | 0,580,3.46,3
156 | 0,620,3.69,3
157 | 1,580,2.86,4
158 | 0,560,2.52,2
159 | 1,480,3.58,1
160 | 0,660,3.49,2
161 | 0,700,3.82,3
162 | 0,600,3.13,2
163 | 0,640,3.5,2
164 | 1,700,3.56,2
165 | 0,520,2.73,2
166 | 0,580,3.3,2
167 | 0,700,4,1
168 | 0,440,3.24,4
169 | 0,720,3.77,3
170 | 0,500,4,3
171 | 0,600,3.62,3
172 | 0,400,3.51,3
173 | 0,540,2.81,3
174 | 0,680,3.48,3
175 | 1,800,3.43,2
176 | 0,500,3.53,4
177 | 1,620,3.37,2
178 | 0,520,2.62,2
179 | 1,620,3.23,3
180 | 0,620,3.33,3
181 | 0,300,3.01,3
182 | 0,620,3.78,3
183 | 0,500,3.88,4
184 | 0,700,4,2
185 | 1,540,3.84,2
186 | 0,500,2.79,4
187 | 0,800,3.6,2
188 | 0,560,3.61,3
189 | 0,,,2
190 | 0,560,3.07,2
191 | 0,500,3.35,2
192 | 1,640,2.94,2
193 | 0,800,3.54,3
194 | 0,640,3.76,3
195 | 0,380,3.59,4
196 | 1,600,3.47,2
197 | 0,560,3.59,2
198 | 0,660,3.07,3
199 | 1,400,3.23,4
200 | 0,600,3.63,3
201 | 0,580,3.77,4
202 | 0,800,3.31,3
203 | 1,580,3.2,2
204 | 1,700,4,1
205 | 0,420,3.92,4
206 | 1,600,3.89,1
207 | 1,780,3.8,3
208 | 0,740,3.54,1
209 | 1,640,3.63,1
210 | 0,540,3.16,3
211 | 0,580,3.5,2
212 | 0,740,3.34,4
213 | 0,580,3.02,2
214 | 0,,2.87,2
215 | 0,640,3.38,3
216 | 1,600,3.56,2
217 | 1,660,2.91,3
218 | 0,340,2.9,1
219 | 1,460,3.64,1
220 | 0,460,2.98,1
221 | 1,560,3.59,2
222 | 0,540,3.28,3
223 | 0,680,3.99,3
224 | 1,480,3.02,1
225 | 0,800,3.47,3
226 | 0,800,2.9,2
227 | 1,720,3.5,3
228 | 0,620,3.58,2
229 | 0,540,3.02,4
230 | 0,480,3.43,2
231 | 1,720,3.42,2
232 | 0,580,3.29,4
233 | 0,600,3.28,3
234 | 0,380,3.38,2
235 | 0,420,2.67,3
236 | 1,800,3.53,1
237 | 0,620,3.05,2
238 | 1,660,,
239 | 0,480,4,2
240 | 0,500,2.86,4
241 | 0,700,3.45,3
242 | 0,440,2.76,2
243 | 1,520,3.81,1
244 | 1,680,2.96,3
245 | 0,620,3.22,2
246 | 0,540,3.04,1
247 | 0,800,3.91,3
248 | 0,680,3.34,2
249 | 0,440,3.17,2
250 | 0,680,3.64,3
251 | 0,640,3.73,3
252 | 0,660,3.31,4
253 | 0,620,3.21,4
254 | 1,520,4,2
255 | 1,540,3.55,4
256 | 1,740,3.52,4
257 | 0,640,3.35,3
258 | 1,520,3.3,2
259 | 1,620,3.95,3
260 | 0,520,3.51,2
261 | 0,640,3.81,2
262 | 0,680,3.11,2
263 | 0,440,3.15,2
264 | 1,520,3.19,3
265 | 1,620,3.95,3
266 | 1,520,3.9,3
267 | 0,380,3.34,3
268 | 0,560,3.24,4
269 | 1,600,3.64,3
270 | 1,680,3.46,2
271 | 0,500,2.81,3
272 | 1,640,3.95,2
273 | 0,540,3.33,3
274 | 1,680,3.67,2
275 | 0,660,3.32,1
276 | 0,520,3.12,2
277 | 1,600,2.98,2
278 | 0,460,3.77,3
279 | 1,580,3.58,1
280 | 1,680,3,4
281 | 1,660,3.14,2
282 | 0,660,3.94,2
283 | 0,360,3.27,3
284 | 0,660,3.45,4
285 | 0,520,3.1,4
286 | 1,440,3.39,2
287 | 0,600,3.31,4
288 | 1,800,3.22,1
289 | 1,660,3.7,4
290 | 0,800,3.15,4
291 | 0,420,2.26,4
292 | 1,620,3.45,2
293 | 0,800,2.78,2
294 | 0,680,3.7,2
295 | 0,800,3.97,1
296 | 0,480,2.55,1
297 | 0,520,3.25,3
298 | 0,560,3.16,1
299 | 0,460,3.07,2
300 | 0,540,3.5,2
301 | 0,720,3.4,3
302 | 0,640,3.3,2
303 | 1,660,3.6,3
304 | 1,400,3.15,2
305 | 1,680,3.98,2
306 | 0,220,2.83,3
307 | 0,580,3.46,4
308 | 1,540,3.17,1
309 | 0,580,3.51,2
310 | 0,540,3.13,2
311 | 0,440,2.98,3
312 | 0,560,4,3
313 | 0,660,3.67,2
314 | 0,660,3.77,3
315 | 1,520,3.65,4
316 | 0,540,3.46,4
317 | 1,300,2.84,2
318 | 1,340,3,2
319 | 1,780,3.63,4
320 | 1,480,3.71,4
321 | 0,540,3.28,1
322 | 0,460,3.14,3
323 | 0,460,3.58,2
324 | 0,500,3.01,4
325 | 0,420,2.69,2
326 | 0,520,2.7,3
327 | 0,680,3.9,1
328 | 0,680,3.31,2
329 | 1,560,3.48,2
330 | 0,580,3.34,2
331 | 0,500,2.93,4
332 | 0,740,4,3
333 | 0,660,3.59,3
334 | 0,420,2.96,1
335 | 0,560,3.43,3
336 | 1,460,3.64,3
337 | 1,620,3.71,1
338 | 0,520,3.15,3
339 | 0,620,3.09,4
340 | 0,540,3.2,1
341 | 1,660,3.47,3
342 | 0,500,3.23,4
343 | 1,560,2.65,3
344 | 0,500,3.95,4
345 | 0,580,3.06,2
346 | 0,520,3.35,3
347 | 0,500,3.03,3
348 | 0,600,3.35,2
349 | 0,580,3.8,2
350 | 0,400,3.36,2
351 | 0,620,2.85,2
352 | 1,780,4,2
353 | 0,620,3.43,3
354 | 1,580,3.12,3
355 | 0,700,3.52,2
356 | 1,540,3.78,2
357 | 1,760,2.81,1
358 | 0,700,3.27,2
359 | 0,720,3.31,1
360 | 1,560,3.69,3
361 | 0,720,3.94,3
362 | 1,520,4,1
363 | 1,540,3.49,1
364 | 0,680,3.14,2
365 | 0,460,3.44,2
366 | 1,560,3.36,1
367 | 0,480,2.78,3
368 | 0,460,2.93,3
369 | 0,620,3.63,3
370 | 0,580,4,1
371 | 0,800,3.89,2
372 | 1,540,3.77,2
373 | 1,680,3.76,3
374 | 1,680,2.42,1
375 | 1,620,3.37,1
376 | 0,560,3.78,2
377 | 0,560,3.49,4
378 | 0,620,3.63,2
379 | 1,800,4,2
380 | 0,640,3.12,3
381 | 0,540,2.7,2
382 | 0,700,3.65,2
383 | 1,540,3.49,2
384 | 0,540,3.51,2
385 | 0,660,4,1
386 | 1,480,2.62,2
387 | 0,420,3.02,1
388 | 1,740,3.86,2
389 | 0,580,3.36,2
390 | 0,640,3.17,2
391 | 0,640,3.51,2
392 | 1,800,3.05,2
393 | 1,660,3.88,2
394 | 1,600,3.38,3
395 | 1,620,3.75,2
396 | 1,460,3.99,3
397 | 0,620,4,2
398 | 0,560,3.04,3
399 | 0,460,2.63,2
400 | 0,700,3.65,2
401 | 0,600,3.89,3


--------------------------------------------------------------------------------
/projects/practice_projects/naive_bayes_tutorial/ReadMe.md:
--------------------------------------------------------------------------------
1 | Naive Bayes tutorial from scratch. Can be viewed in the iPython notebook. Happy learning!


--------------------------------------------------------------------------------
/projects/practice_projects/naive_bayes_tutorial/images/bayes_formula.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/practice_projects/naive_bayes_tutorial/images/bayes_formula.png


--------------------------------------------------------------------------------
/projects/practice_projects/naive_bayes_tutorial/images/countvectorizer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/practice_projects/naive_bayes_tutorial/images/countvectorizer.png


--------------------------------------------------------------------------------
/projects/practice_projects/naive_bayes_tutorial/images/dqnb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/practice_projects/naive_bayes_tutorial/images/dqnb.png


--------------------------------------------------------------------------------
/projects/practice_projects/naive_bayes_tutorial/images/naivebayes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/practice_projects/naive_bayes_tutorial/images/naivebayes.png


--------------------------------------------------------------------------------
/projects/practice_projects/naive_bayes_tutorial/images/tfidf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/practice_projects/naive_bayes_tutorial/images/tfidf.png


--------------------------------------------------------------------------------
/projects/practice_projects/naive_bayes_tutorial/smsspamcollection/readme:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/practice_projects/naive_bayes_tutorial/smsspamcollection/readme


--------------------------------------------------------------------------------
/projects/smartcab/README.md:
--------------------------------------------------------------------------------
 1 | # Machine Learning Engineer Nanodegree
 2 | # Reinforcement Learning
 3 | ## Project: Train a Smartcab How to Drive
 4 | 
 5 | ### Install
 6 | 
 7 | This project requires **Python 2.7** with the [pygame](https://www.pygame.org/wiki/GettingStarted
 8 | ) library installed
 9 | 
10 | ### Code
11 | 
12 | Template code is provided in the `smartcab/agent.py` python file. Additional supporting python code can be found in `smartcab/enviroment.py`, `smartcab/planner.py`, and `smartcab/simulator.py`. Supporting images for the graphical user interface can be found in the `images` folder. While some code has already been implemented to get you started, you will need to implement additional functionality for the `LearningAgent` class in `agent.py` when requested to successfully complete the project. 
13 | 
14 | ### Run
15 | 
16 | In a terminal or command window, navigate to the top-level project directory `smartcab/` (that contains this README) and run one of the following commands:
17 | 
18 | ```python smartcab/agent.py```  
19 | ```python -m smartcab.agent```
20 | 
21 | This will run the `agent.py` file and execute your agent code.
22 | 


--------------------------------------------------------------------------------
/projects/smartcab/project_description.md:
--------------------------------------------------------------------------------
  1 | # Content: Reinforcement Learning
  2 | ## Project: Train a Smartcab How to Drive
  3 | 
  4 | ## Project Overview
  5 | 
  6 | In this project you will apply reinforcement learning techniques for a self-driving agent in a simplified world to aid it in effectively reaching its destinations in the allotted time. You will first investigate the environment the agent operates in by constructing a very basic driving implementation. Once your agent is successful at operating within the environment, you will then identify each possible state the agent can be in when considering such things as traffic lights and oncoming traffic at each intersection. With states identified, you will then implement a Q-Learning algorithm for the self-driving agent to guide the agent towards its destination within the allotted time. Finally, you will improve upon the Q-Learning algorithm to find the best configuration of learning and exploration factors to ensure the self-driving agent is reaching its destinations with consistently positive results.
  7 | 
  8 | ## Description
  9 | In the not-so-distant future, taxicab companies across the United States no longer employ human drivers to operate their fleet of vehicles. Instead, the taxicabs are operated by self-driving agents, known as *smartcabs*, to transport people from one location to another within the cities those companies operate. In major metropolitan areas, such as Chicago, New York City, and San Francisco, an increasing number of people have come to depend on *smartcabs* to get to where they need to go as safely and reliably as possible. Although *smartcabs* have become the transport of choice, concerns have arose that a self-driving agent might not be as safe or reliable as human drivers, particularly when considering city traffic lights and other vehicles. To alleviate these concerns, your task as an employee for a national taxicab company is to use reinforcement learning techniques to construct a demonstration of a *smartcab* operating in real-time to prove that both safety and reliability can be achieved.
 10 | 
 11 | ## Software Requirements
 12 | This project uses the following software and Python libraries:
 13 | 
 14 | - [Python 2.7](https://www.python.org/download/releases/2.7/)
 15 | - [NumPy](http://www.numpy.org/)
 16 | - [pandas](http://pandas.pydata.org/)
 17 | - [matplotlib](http://matplotlib.org/)
 18 | - [PyGame](http://pygame.org/)
 19 | 
 20 | If you do not have Python installed yet, it is highly recommended that you install the [Anaconda](http://continuum.io/downloads) distribution of Python, which already has the above packages and more included. Make sure that you select the Python 2.7 installer and not the Python 3.x installer. `pygame` can then be installed using one of the following commands:
 21 | 
 22 | Mac:  `conda install -c https://conda.anaconda.org/quasiben pygame`  
 23 | Windows: `conda install -c https://conda.anaconda.org/prkrekel pygame`  
 24 | Linux:  `conda install -c https://conda.anaconda.org/tlatorre pygame`  
 25 | 
 26 | ## Fixing Common PyGame Problems
 27 | 
 28 | The PyGame library can in some cases require a bit of troubleshooting to work correctly for this project. While the PyGame aspect of the project is not required for a successful submission  (you can complete the project without a visual simulation, although it is more difficult), it is very helpful to have it working! If you encounter an issue with PyGame, first see these helpful links below that are developed by communities of users working with the library:
 29 | - [Getting Started](https://www.pygame.org/wiki/GettingStarted)
 30 | - [PyGame Information](http://www.pygame.org/wiki/info)
 31 | - [Google Group](https://groups.google.com/forum/#!forum/pygame-mirror-on-google-groups)
 32 | - [PyGame subreddit](https://www.reddit.com/r/pygame/)
 33 | 
 34 | ### Problems most often reported by students
 35 | _"PyGame won't install on my machine; there was an issue with the installation."_  
 36 | **Solution:** As has been recommended for previous projects, Udacity suggests that you are using the Anaconda distribution of Python, which can then allow you to install PyGame through the `conda`-specific command.
 37 | 
 38 | _"I'm seeing a black screen when running the code; output says that it can't load car images."_  
 39 | **Solution:** The code will not operate correctly unless it is run from the top-level directory for `smartcab`. The top-level directory is the one that contains the **README** and the project notebook.
 40 | 
 41 | If you continue to have problems with the project code in regards to PyGame, you can also [use the discussion forums](https://discussions.udacity.com/c/nd009-reinforcement-learning) to find posts from students that encountered issues that you may be experiencing. Additionally, you can seek help from a swath of students in the [MLND Student Slack Community](http://mlnd.slack.com).
 42 | 
 43 | ## Starting the Project
 44 | 
 45 | For this assignment, you can find the `smartcab` folder containing the necessary project files on the [Machine Learning projects GitHub](https://github.com/udacity/machine-learning), under the `projects` folder. You may download all of the files for projects we'll use in this Nanodegree program directly from this repo. Please make sure that you use the most recent version of project files when completing a project!
 46 | 
 47 | This project contains three directories:
 48 | 
 49 | - `/logs/`: This folder will contain all log files that are given from the simulation when specific prerequisites are met.
 50 | - `/images/`: This folder contains various images of cars to be used in the graphical user interface. You will not need to modify or create any files in this directory.
 51 | - `/smartcab/`: This folder contains the Python scripts that create the environment, graphical user interface, the simulation, and the agents. You will not need to modify or create any files in this directory except for `agent.py`.
 52 | 
 53 | It also contains two files:
 54 | - `smartcab.ipynb`: This is the main file where you will answer questions and provide an analysis for your work.
 55 | -`visuals.py`: This Python script provides supplementary visualizations for the analysis. Do not modify.
 56 | 
 57 | Finally, in `/smartcab/` are the following four files:
 58 | - **Modify:**
 59 |   - `agent.py`: This is the main Python file where you will be performing your work on the project.
 60 | - **Do not modify:**
 61 |   - `environment.py`: This Python file will create the *smartcab* environment.
 62 |   - `planner.py`: This Python file creates a high-level planner for the agent to follow towards a set goal.
 63 |   - `simulation.py`: This Python file creates the simulation and graphical user interface. 
 64 | 
 65 | ### Running the Code
 66 | In a terminal or command window, navigate to the top-level project directory `smartcab/` (that contains the two project directories) and run one of the following commands:
 67 | 
 68 | `python smartcab/agent.py` or  
 69 | `python -m smartcab.agent`
 70 | 
 71 | This will run the `agent.py` file and execute your implemented agent code into the environment. Additionally, use the command `jupyter notebook smartcab.ipynb` from this same directory to open up a browser window or tab to work with your analysis notebook. Alternatively, you can use the command `jupyter notebook` or `ipython notebook` and navigate to the notebook file in the browser window that opens. Follow the instructions in the notebook and answer each question presented to successfully complete the implementation necessary for your `agent.py` agent file. A **README** file has also been provided with the project files which may contain additional necessary information or instruction for the project.
 72 | 
 73 | ## Definitions
 74 | 
 75 | ### Environment
 76 | The *smartcab* operates in an ideal, grid-like city (similar to New York City), with roads going in the North-South and East-West directions. Other vehicles will certainly be present on the road, but there will be no pedestrians to be concerned with. At each intersection there is a traffic light that either allows traffic in the North-South direction or the East-West direction. U.S. Right-of-Way rules apply: 
 77 | - On a green light, a left turn is permitted if there is no oncoming traffic making a right turn or coming straight through the intersection.
 78 | - On a red light, a right turn is permitted if no oncoming traffic is approaching from your left through the intersection.
 79 | To understand how to correctly yield to oncoming traffic when turning left, you may refer to [this official drivers? education video](https://www.youtube.com/watch?v=TW0Eq2Q-9Ac), or [this passionate exposition](https://www.youtube.com/watch?v=0EdkxI6NeuA).
 80 | 
 81 | ### Inputs and Outputs
 82 | Assume that the *smartcab* is assigned a route plan based on the passengers? starting location and destination. The route is split at each intersection into waypoints, and you may assume that the *smartcab*, at any instant, is at some intersection in the world. Therefore, the next waypoint to the destination, assuming the destination has not already been reached, is one intersection away in one direction (North, South, East, or West). The *smartcab* has only an egocentric view of the intersection it is at: It can determine the state of the traffic light for its direction of movement, and whether there is a vehicle at the intersection for each of the oncoming directions. For each action, the *smartcab* may either idle at the intersection, or drive to the next intersection to the left, right, or ahead of it. Finally, each trip has a time to reach the destination which decreases for each action taken (the passengers want to get there quickly).  If the allotted time becomes zero before reaching the destination, the trip has failed.
 83 | 
 84 | ### Rewards and Goal
 85 | The *smartcab* will receive positive or negative rewards based on the action it as taken. Expectedly, the *smartcab* will receive a small positive reward when making a good action, and a varying amount of negative reward dependent on the severity of the traffic violation it would have committed. Based on the rewards and penalties the *smartcab* receives, the self-driving agent implementation should learn an optimal policy for driving on the city roads while obeying traffic rules, avoiding accidents, and reaching passengers? destinations in the allotted time.
 86 | 
 87 | ## Submitting the Project
 88 | 
 89 | ### Evaluation
 90 | Your project will be reviewed by a Udacity reviewer against the **<a href="https://review.udacity.com/#!/rubrics/106/view" target="_blank">Train a Smartcab to Drive project rubric</a>**. Be sure to review this rubric thoroughly and self-evaluate your project before submission. All criteria found in the rubric must be *meeting specifications* for you to pass.
 91 | 
 92 | ### Submission Files
 93 | When you are ready to submit your project, collect the following files and compress them into a single archive for upload. Alternatively, you may supply the following files on your GitHub Repo in a folder named `smartcab` for ease of access:
 94 | - The `agent.py` Python file with all code implemented as required in the instructed tasks.
 95 | - The `/logs/` folder which should contain **five** log files that were produced from your simulation and used in the analysis.
 96 | - The `smartcab.ipynb` notebook file with all questions answered and all visualization cells executed and displaying results.
 97 |  - An **HTML** export of the project notebook with the name **report.html**. This file *must* be present for your project to be evaluated.
 98 | 
 99 | Once you have collected these files and reviewed the project rubric, proceed to the project submission page.
100 | 
101 | ### I'm Ready!
102 | When you're ready to submit your project, click on the **Submit Project** button at the bottom of the page.
103 | 
104 | If you are having any problems submitting your project or wish to check on the status of your submission, please email us at **machine-support@udacity.com** or visit us in the <a href="http://discussions.udacity.com" target="_blank">discussion forums</a>.
105 | 
106 | ### What's Next?
107 | You will get an email as soon as your reviewer has feedback for you. In the meantime, review your next project and feel free to get started on it or the courses supporting it!


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/smartcab/smartcab/__init__.py


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/agent.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import math
  3 | from environment import Agent, Environment
  4 | from planner import RoutePlanner
  5 | from simulator import Simulator
  6 | 
  7 | class LearningAgent(Agent):
  8 |     """ An agent that learns to drive in the Smartcab world.
  9 |         This is the object you will be modifying. """ 
 10 | 
 11 |     def __init__(self, env, learning=False, epsilon=1.0, alpha=0.5):
 12 |         super(LearningAgent, self).__init__(env)     # Set the agent in the evironment 
 13 |         self.planner = RoutePlanner(self.env, self)  # Create a route planner
 14 |         self.valid_actions = self.env.valid_actions  # The set of valid actions
 15 | 
 16 |         # Set parameters of the learning agent
 17 |         self.learning = learning # Whether the agent is expected to learn
 18 |         self.Q = dict()          # Create a Q-table which will be a dictionary of tuples
 19 |         self.epsilon = epsilon   # Random exploration factor
 20 |         self.alpha = alpha       # Learning factor
 21 | 
 22 |         ###########
 23 |         ## TO DO ##
 24 |         ###########
 25 |         # Set any additional class parameters as needed
 26 | 
 27 | 
 28 |     def reset(self, destination=None, testing=False):
 29 |         """ The reset function is called at the beginning of each trial.
 30 |             'testing' is set to True if testing trials are being used
 31 |             once training trials have completed. """
 32 | 
 33 |         # Select the destination as the new location to route to
 34 |         self.planner.route_to(destination)
 35 |         
 36 |         ########### 
 37 |         ## TO DO ##
 38 |         ###########
 39 |         # Update epsilon using a decay function of your choice
 40 |         # Update additional class parameters as needed
 41 |         # If 'testing' is True, set epsilon and alpha to 0
 42 | 
 43 |         return None
 44 | 
 45 |     def build_state(self):
 46 |         """ The build_state function is called when the agent requests data from the 
 47 |             environment. The next waypoint, the intersection inputs, and the deadline 
 48 |             are all features available to the agent. """
 49 | 
 50 |         # Collect data about the environment
 51 |         waypoint = self.planner.next_waypoint() # The next waypoint 
 52 |         inputs = self.env.sense(self)           # Visual input - intersection light and traffic
 53 |         deadline = self.env.get_deadline(self)  # Remaining deadline
 54 | 
 55 |         ########### 
 56 |         ## TO DO ##
 57 |         ###########
 58 |         
 59 |         # NOTE : you are not allowed to engineer features outside of the inputs available.
 60 |         # Because the aim of this project is to teach Reinforcement Learning, we have placed 
 61 |         # constraints in order for you to learn how to adjust epsilon and alpha, and thus learn about the balance between exploration and exploitation.
 62 |         # With the hand-engineered features, this learning process gets entirely negated.
 63 |         
 64 |         # Set 'state' as a tuple of relevant data for the agent        
 65 |         state = None
 66 | 
 67 |         return state
 68 | 
 69 | 
 70 |     def get_maxQ(self, state):
 71 |         """ The get_maxQ function is called when the agent is asked to find the
 72 |             maximum Q-value of all actions based on the 'state' the smartcab is in. """
 73 | 
 74 |         ########### 
 75 |         ## TO DO ##
 76 |         ###########
 77 |         # Calculate the maximum Q-value of all actions for a given state
 78 | 
 79 |         maxQ = None
 80 | 
 81 |         return maxQ 
 82 | 
 83 | 
 84 |     def createQ(self, state):
 85 |         """ The createQ function is called when a state is generated by the agent. """
 86 | 
 87 |         ########### 
 88 |         ## TO DO ##
 89 |         ###########
 90 |         # When learning, check if the 'state' is not in the Q-table
 91 |         # If it is not, create a new dictionary for that state
 92 |         #   Then, for each action available, set the initial Q-value to 0.0
 93 | 
 94 |         return
 95 | 
 96 | 
 97 |     def choose_action(self, state):
 98 |         """ The choose_action function is called when the agent is asked to choose
 99 |             which action to take, based on the 'state' the smartcab is in. """
100 | 
101 |         # Set the agent state and default action
102 |         self.state = state
103 |         self.next_waypoint = self.planner.next_waypoint()
104 |         action = None
105 | 
106 |         ########### 
107 |         ## TO DO ##
108 |         ###########
109 |         # When not learning, choose a random action
110 |         # When learning, choose a random action with 'epsilon' probability
111 |         # Otherwise, choose an action with the highest Q-value for the current state
112 |         # Be sure that when choosing an action with highest Q-value that you randomly select between actions that "tie".
113 |         return action
114 | 
115 | 
116 |     def learn(self, state, action, reward):
117 |         """ The learn function is called after the agent completes an action and
118 |             receives a reward. This function does not consider future rewards 
119 |             when conducting learning. """
120 | 
121 |         ########### 
122 |         ## TO DO ##
123 |         ###########
124 |         # When learning, implement the value iteration update rule
125 |         #   Use only the learning rate 'alpha' (do not use the discount factor 'gamma')
126 | 
127 |         return
128 | 
129 | 
130 |     def update(self):
131 |         """ The update function is called when a time step is completed in the 
132 |             environment for a given trial. This function will build the agent
133 |             state, choose an action, receive a reward, and learn if enabled. """
134 | 
135 |         state = self.build_state()          # Get current state
136 |         self.createQ(state)                 # Create 'state' in Q-table
137 |         action = self.choose_action(state)  # Choose an action
138 |         reward = self.env.act(self, action) # Receive a reward
139 |         self.learn(state, action, reward)   # Q-learn
140 | 
141 |         return
142 |         
143 | 
144 | def run():
145 |     """ Driving function for running the simulation. 
146 |         Press ESC to close the simulation, or [SPACE] to pause the simulation. """
147 | 
148 |     ##############
149 |     # Create the environment
150 |     # Flags:
151 |     #   verbose     - set to True to display additional output from the simulation
152 |     #   num_dummies - discrete number of dummy agents in the environment, default is 100
153 |     #   grid_size   - discrete number of intersections (columns, rows), default is (8, 6)
154 |     env = Environment()
155 |     
156 |     ##############
157 |     # Create the driving agent
158 |     # Flags:
159 |     #   learning   - set to True to force the driving agent to use Q-learning
160 |     #    * epsilon - continuous value for the exploration factor, default is 1
161 |     #    * alpha   - continuous value for the learning rate, default is 0.5
162 |     agent = env.create_agent(LearningAgent)
163 |     
164 |     ##############
165 |     # Follow the driving agent
166 |     # Flags:
167 |     #   enforce_deadline - set to True to enforce a deadline metric
168 |     env.set_primary_agent(agent)
169 | 
170 |     ##############
171 |     # Create the simulation
172 |     # Flags:
173 |     #   update_delay - continuous time (in seconds) between actions, default is 2.0 seconds
174 |     #   display      - set to False to disable the GUI if PyGame is enabled
175 |     #   log_metrics  - set to True to log trial and simulation results to /logs
176 |     #   optimized    - set to True to change the default log file name
177 |     sim = Simulator(env)
178 |     
179 |     ##############
180 |     # Run the simulator
181 |     # Flags:
182 |     #   tolerance  - epsilon tolerance before beginning testing, default is 0.05 
183 |     #   n_test     - discrete number of testing trials to perform, default is 0
184 |     sim.run()
185 | 
186 | 
187 | if __name__ == '__main__':
188 |     run()
189 | 


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/images/car-black.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/smartcab/smartcab/images/car-black.png


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/images/car-blue.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/smartcab/smartcab/images/car-blue.png


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/images/car-cyan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/smartcab/smartcab/images/car-cyan.png


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/images/car-green.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/smartcab/smartcab/images/car-green.png


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/images/car-magenta.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/smartcab/smartcab/images/car-magenta.png


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/images/car-orange.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/smartcab/smartcab/images/car-orange.png


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/images/car-red.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/smartcab/smartcab/images/car-red.png


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/images/car-white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/smartcab/smartcab/images/car-white.png


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/images/car-yellow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/smartcab/smartcab/images/car-yellow.png


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/images/east-west.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/smartcab/smartcab/images/east-west.png


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/smartcab/smartcab/images/logo.png


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/images/north-south.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clementmihailescu/machine-learning/7ba8bd66d491a31f1650f73f7a0d26ff619e20cd/projects/smartcab/smartcab/images/north-south.png


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/planner.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | class RoutePlanner(object):
 4 |     """ Complex route planner that is meant for a perpendicular grid network. """
 5 | 
 6 |     def __init__(self, env, agent):
 7 |         self.env = env
 8 |         self.agent = agent
 9 |         self.destination = None
10 | 
11 |     def route_to(self, destination=None):
12 |         """ Select the destination if one is provided, otherwise choose a random intersection. """
13 | 
14 |         self.destination = destination if destination is not None else random.choice(self.env.intersections.keys())
15 | 
16 |     def next_waypoint(self):
17 |         """ Creates the next waypoint based on current heading, location,
18 |             intended destination and L1 distance from destination. """
19 | 
20 |         # Collect global location details
21 |         bounds = self.env.grid_size
22 |         location = self.env.agent_states[self.agent]['location']
23 |         heading = self.env.agent_states[self.agent]['heading']
24 | 
25 |         delta_a = (self.destination[0] - location[0], self.destination[1] - location[1])
26 |         delta_b = (bounds[0] + delta_a[0] if delta_a[0] <= 0 else delta_a[0] - bounds[0], \
27 |                    bounds[1] + delta_a[1] if delta_a[1] <= 0 else delta_a[1] - bounds[1])
28 | 
29 |         # Calculate true difference in location based on world-wrap
30 |         # This will pre-determine the need for U-turns from improper headings
31 |         dx = delta_a[0] if abs(delta_a[0]) < abs(delta_b[0]) else delta_b[0]
32 |         dy = delta_a[1] if abs(delta_a[1]) < abs(delta_b[1]) else delta_b[1]
33 | 
34 |         # First check if destination is at location
35 |         if dx == 0 and dy == 0:
36 |             return None
37 |         
38 |         # Next check if destination is cardinally East or West of location    
39 |         elif dx != 0:
40 | 
41 |             if dx * heading[0] > 0:  # Heading the correct East or West direction
42 |                 return 'forward'
43 |             elif dx * heading[0] < 0 and heading[0] < 0: # Heading West, destination East
44 |                 if dy > 0: # Destination also to the South
45 |                     return 'left'
46 |                 else:
47 |                     return 'right'
48 |             elif dx * heading[0] < 0 and heading[0] > 0: # Heading East, destination West
49 |                 if dy < 0: # Destination also to the North
50 |                     return 'left'
51 |                 else:
52 |                     return 'right'
53 |             elif dx * heading[1] > 0: # Heading North destination West; Heading South destination East
54 |                 return 'left'
55 |             else:
56 |                 return 'right'
57 | 
58 |         # Finally, check if destination is cardinally North or South of location
59 |         elif dy != 0:
60 | 
61 |             if dy * heading[1] > 0:  # Heading the correct North or South direction
62 |                 return 'forward'
63 |             elif dy * heading[1] < 0 and heading[1] < 0: # Heading North, destination South
64 |                 if dx < 0: # Destination also to the West
65 |                     return 'left'
66 |                 else:
67 |                     return 'right'
68 |             elif dy * heading[1] < 0 and heading[1] > 0: # Heading South, destination North
69 |                 if dx > 0: # Destination also to the East
70 |                     return 'left'
71 |                 else:
72 |                     return 'right'
73 |             elif dy * heading[0] > 0: # Heading West destination North; Heading East destination South
74 |                 return 'right'
75 |             else:
76 |                 return 'left'


--------------------------------------------------------------------------------
/projects/smartcab/visuals.py:
--------------------------------------------------------------------------------
  1 | ###########################################
  2 | # Suppress matplotlib user warnings
  3 | # Necessary for newer version of matplotlib
  4 | import warnings
  5 | warnings.filterwarnings("ignore", category = UserWarning, module = "matplotlib")
  6 | ###########################################
  7 | #
  8 | # Display inline matplotlib plots with IPython
  9 | from IPython import get_ipython
 10 | get_ipython().run_line_magic('matplotlib', 'inline')
 11 | ###########################################
 12 | 
 13 | import matplotlib.pyplot as plt
 14 | import numpy as np
 15 | import pandas as pd
 16 | import os
 17 | import ast
 18 | 
 19 | 
 20 | def calculate_safety(data):
 21 | 	""" Calculates the safety rating of the smartcab during testing. """
 22 | 
 23 | 	good_ratio = data['good_actions'].sum() * 1.0 / \
 24 | 	(data['initial_deadline'] - data['final_deadline']).sum()
 25 | 
 26 | 	if good_ratio == 1: # Perfect driving
 27 | 		return ("A+", "green")
 28 | 	else: # Imperfect driving
 29 | 		if data['actions'].apply(lambda x: ast.literal_eval(x)[4]).sum() > 0: # Major accident
 30 | 			return ("F", "red")
 31 | 		elif data['actions'].apply(lambda x: ast.literal_eval(x)[3]).sum() > 0: # Minor accident
 32 | 			return ("D", "#EEC700")
 33 | 		elif data['actions'].apply(lambda x: ast.literal_eval(x)[2]).sum() > 0: # Major violation
 34 | 			return ("C", "#EEC700")
 35 | 		else: # Minor violation
 36 | 			minor = data['actions'].apply(lambda x: ast.literal_eval(x)[1]).sum()
 37 | 			if minor >= len(data)/2: # Minor violation in at least half of the trials
 38 | 				return ("B", "green")
 39 | 			else:
 40 | 				return ("A", "green")
 41 | 
 42 | 
 43 | def calculate_reliability(data):
 44 | 	""" Calculates the reliability rating of the smartcab during testing. """
 45 | 
 46 | 	success_ratio = data['success'].sum() * 1.0 / len(data)
 47 | 
 48 | 	if success_ratio == 1: # Always meets deadline
 49 | 		return ("A+", "green")
 50 | 	else:
 51 | 		if success_ratio >= 0.90:
 52 | 			return ("A", "green")
 53 | 		elif success_ratio >= 0.80:
 54 | 			return ("B", "green")
 55 | 		elif success_ratio >= 0.70:
 56 | 			return ("C", "#EEC700")
 57 | 		elif success_ratio >= 0.60:
 58 | 			return ("D", "#EEC700")
 59 | 		else:
 60 | 			return ("F", "red")
 61 | 
 62 | 
 63 | def plot_trials(csv):
 64 | 	""" Plots the data from logged metrics during a simulation."""
 65 | 
 66 | 	data = pd.read_csv(os.path.join("logs", csv))
 67 | 
 68 | 	if len(data) < 10:
 69 | 		print "Not enough data collected to create a visualization."
 70 | 		print "At least 20 trials are required."
 71 | 		return
 72 | 	
 73 | 	# Create additional features
 74 | 	data['average_reward'] = (data['net_reward'] / (data['initial_deadline'] - data['final_deadline'])).rolling(window=10, center=False).mean()
 75 | 	data['reliability_rate'] = (data['success']*100).rolling(window=10, center=False).mean()  # compute avg. net reward with window=10
 76 | 	data['good_actions'] = data['actions'].apply(lambda x: ast.literal_eval(x)[0])
 77 | 	data['good'] = (data['good_actions'] * 1.0 / \
 78 | 		(data['initial_deadline'] - data['final_deadline'])).rolling(window=10, center=False).mean()
 79 | 	data['minor'] = (data['actions'].apply(lambda x: ast.literal_eval(x)[1]) * 1.0 / \
 80 | 		(data['initial_deadline'] - data['final_deadline'])).rolling(window=10, center=False).mean()
 81 | 	data['major'] = (data['actions'].apply(lambda x: ast.literal_eval(x)[2]) * 1.0 / \
 82 | 		(data['initial_deadline'] - data['final_deadline'])).rolling(window=10, center=False).mean()
 83 | 	data['minor_acc'] = (data['actions'].apply(lambda x: ast.literal_eval(x)[3]) * 1.0 / \
 84 | 		(data['initial_deadline'] - data['final_deadline'])).rolling(window=10, center=False).mean()
 85 | 	data['major_acc'] = (data['actions'].apply(lambda x: ast.literal_eval(x)[4]) * 1.0 / \
 86 | 		(data['initial_deadline'] - data['final_deadline'])).rolling(window=10, center=False).mean()
 87 | 	data['epsilon'] = data['parameters'].apply(lambda x: ast.literal_eval(x)['e']) 
 88 | 	data['alpha'] = data['parameters'].apply(lambda x: ast.literal_eval(x)['a']) 
 89 | 
 90 | 
 91 | 	# Create training and testing subsets
 92 | 	training_data = data[data['testing'] == False]
 93 | 	testing_data = data[data['testing'] == True]
 94 | 
 95 | 	plt.figure(figsize=(12,8))
 96 | 
 97 | 
 98 | 	###############
 99 | 	### Average step reward plot
100 | 	###############
101 | 	
102 | 	ax = plt.subplot2grid((6,6), (0,3), colspan=3, rowspan=2)
103 | 	ax.set_title("10-Trial Rolling Average Reward per Action")
104 | 	ax.set_ylabel("Reward per Action")
105 | 	ax.set_xlabel("Trial Number")
106 | 	ax.set_xlim((10, len(training_data)))
107 | 
108 | 	# Create plot-specific data
109 | 	step = training_data[['trial','average_reward']].dropna()
110 | 
111 | 	ax.axhline(xmin = 0, xmax = 1, y = 0, color = 'black', linestyle = 'dashed')
112 | 	ax.plot(step['trial'], step['average_reward'])
113 | 
114 | 
115 | 	###############
116 | 	### Parameters Plot
117 | 	###############
118 | 
119 | 	ax = plt.subplot2grid((6,6), (2,3), colspan=3, rowspan=2)
120 | 
121 | 	# Check whether the agent was expected to learn
122 | 	if csv != 'sim_no-learning.csv':
123 | 		ax.set_ylabel("Parameter Value")
124 | 		ax.set_xlabel("Trial Number")
125 | 		ax.set_xlim((1, len(training_data)))
126 | 		ax.set_ylim((0, 1.05))
127 | 
128 | 		ax.plot(training_data['trial'], training_data['epsilon'], color='blue', label='Exploration factor')
129 | 		ax.plot(training_data['trial'], training_data['alpha'], color='green', label='Learning factor')
130 | 
131 | 		ax.legend(bbox_to_anchor=(0.5,1.19), fancybox=True, ncol=2, loc='upper center', fontsize=10)
132 | 
133 | 	else:
134 | 		ax.axis('off')
135 | 		ax.text(0.52, 0.30, "Simulation completed\nwith learning disabled.", fontsize=24, ha='center', style='italic')	
136 | 
137 | 
138 | 	###############
139 | 	### Bad Actions Plot
140 | 	###############
141 | 	
142 | 	actions = training_data[['trial','good', 'minor','major','minor_acc','major_acc']].dropna()
143 | 	maximum = (1 - actions['good']).values.max()
144 | 	
145 | 	ax = plt.subplot2grid((6,6), (0,0), colspan=3, rowspan=4)
146 | 	ax.set_title("10-Trial Rolling Relative Frequency of Bad Actions")
147 | 	ax.set_ylabel("Relative Frequency")
148 | 	ax.set_xlabel("Trial Number")
149 | 
150 | 	ax.set_ylim((0, maximum + 0.01))
151 | 	ax.set_xlim((10, len(training_data)))
152 | 
153 | 	ax.set_yticks(np.linspace(0, maximum+0.01, 10))
154 | 
155 | 	ax.plot(actions['trial'], (1 - actions['good']), color='black', label='Total Bad Actions', linestyle='dotted', linewidth=3)
156 | 	ax.plot(actions['trial'], actions['minor'], color='orange', label='Minor Violation', linestyle='dashed')
157 | 	ax.plot(actions['trial'], actions['major'], color='orange', label='Major Violation', linewidth=2)
158 | 	ax.plot(actions['trial'], actions['minor_acc'], color='red', label='Minor Accident', linestyle='dashed')
159 | 	ax.plot(actions['trial'], actions['major_acc'], color='red', label='Major Accident', linewidth=2)
160 | 	
161 | 	ax.legend(loc='upper right', fancybox=True, fontsize=10)
162 | 
163 | 
164 | 	###############
165 | 	### Rolling Success-Rate plot
166 | 	###############
167 | 	
168 | 	ax = plt.subplot2grid((6,6), (4,0), colspan=4, rowspan=2)
169 | 	ax.set_title("10-Trial Rolling Rate of Reliability")
170 | 	ax.set_ylabel("Rate of Reliability")
171 | 	ax.set_xlabel("Trial Number")
172 | 	ax.set_xlim((10, len(training_data)))
173 | 	ax.set_ylim((-5, 105))
174 | 	ax.set_yticks(np.arange(0, 101, 20))
175 | 	ax.set_yticklabels(['0%', '20%', '40%', '60%', '80%', '100%'])
176 | 
177 | 	# Create plot-specific data
178 | 	trial = training_data.dropna()['trial']
179 | 	rate = training_data.dropna()['reliability_rate']
180 | 
181 | 	# Rolling success rate
182 | 	ax.plot(trial, rate, label="Reliability Rate", color='blue')
183 | 
184 | 
185 | 	###############
186 | 	### Test results
187 | 	###############
188 | 
189 | 	ax = plt.subplot2grid((6,6), (4,4), colspan=2, rowspan=2)
190 | 	ax.axis('off')
191 | 
192 | 	if len(testing_data) > 0:
193 | 		safety_rating, safety_color = calculate_safety(testing_data)
194 | 		reliability_rating, reliability_color = calculate_reliability(testing_data)
195 | 
196 | 		# Write success rate
197 | 		ax.text(0.40, .9, "{} testing trials simulated.".format(len(testing_data)), fontsize=14, ha='center')
198 | 		ax.text(0.40, 0.7, "Safety Rating:", fontsize=16, ha='center')
199 | 		ax.text(0.40, 0.42, "{}".format(safety_rating), fontsize=40, ha='center', color=safety_color)
200 | 		ax.text(0.40, 0.27, "Reliability Rating:", fontsize=16, ha='center')
201 | 		ax.text(0.40, 0, "{}".format(reliability_rating), fontsize=40, ha='center', color=reliability_color)
202 | 
203 | 	else:
204 | 		ax.text(0.36, 0.30, "Simulation completed\nwith testing disabled.", fontsize=20, ha='center', style='italic')	
205 | 
206 | 	plt.tight_layout()
207 | 	plt.show()
208 | 


--------------------------------------------------------------------------------
/projects/student_intervention/README.md:
--------------------------------------------------------------------------------
 1 | # Machine Learning Engineer Nanodegree
 2 | # Supervised Learning
 3 | ## Project: Building a Student Intervention System
 4 | 
 5 | ### Install
 6 | 
 7 | This project requires **Python 2.7** and the following Python libraries installed:
 8 | 
 9 | - [NumPy](http://www.numpy.org/)
10 | - [Pandas](http://pandas.pydata.org)
11 | - [scikit-learn](http://scikit-learn.org/stable/)
12 | 
13 | You will also need to have software installed to run and execute a [Jupyter Notebook](http://ipython.org/notebook.html)
14 | 
15 | If you do not have Python installed yet, it is highly recommended that you install the [Anaconda](http://continuum.io/downloads) distribution of Python, which already has the above packages and more included. Make sure that you select the Python 2.7 installer and not the Python 3.x installer.
16 | 
17 | ### Code
18 | 
19 | Template code is provided in the `student_intervention.ipynb` notebook file. You will also be required to use the `student-data.csv` dataset file to complete your work. While some code has already been implemented to get you started, you will need to implement additional functionality when requested to successfully complete the project.
20 | 
21 | ### Run
22 | 
23 | In a terminal or command window, navigate to the top-level project directory `student_intervention/` (that contains this README) and run one of the following commands:
24 | 
25 | ```bash
26 | ipython notebook student_intervention.ipynb
27 | ```  
28 | or
29 | ```bash
30 | jupyter notebook student_intervention.ipynb
31 | ```
32 | 
33 | This will open the Jupyter Notebook software and project file in your browser.
34 | 
35 | ### Data
36 | 
37 | The dataset used in this project is included as `student-data.csv`. This dataset has the following attributes:
38 | 
39 | - `school` : student's school (binary: "GP" or "MS")
40 | - `sex` : student's sex (binary: "F" - female or "M" - male)
41 | - `age` : student's age (numeric: from 15 to 22)
42 | - `address` : student's home address type (binary: "U" - urban or "R" - rural)
43 | - `famsize` : family size (binary: "LE3" - less or equal to 3 or "GT3" - greater than 3)
44 | - `Pstatus` : parent's cohabitation status (binary: "T" - living together or "A" - apart)
45 | - `Medu` : mother's education (numeric: 0 - none,  1 - primary education (4th grade), 2 - 5th to 9th grade, 3 - secondary education or 4 - higher education)
46 | - `Fedu` : father's education (numeric: 0 - none,  1 - primary education (4th grade), 2 - 5th to 9th grade, 3 - secondary education or 4 - higher education)
47 | - `Mjob` : mother's job (nominal: "teacher", "health" care related, civil "services" (e.g. administrative or police), "at_home" or "other")
48 | - `Fjob` : father's job (nominal: "teacher", "health" care related, civil "services" (e.g. administrative or police), "at_home" or "other")
49 | - `reason` : reason to choose this school (nominal: close to "home", school "reputation", "course" preference or "other")
50 | - `guardian` : student's guardian (nominal: "mother", "father" or "other")
51 | - `traveltime` : home to school travel time (numeric: 1 - <15 min., 2 - 15 to 30 min., 3 - 30 min. to 1 hour, or 4 - >1 hour)
52 | - `studytime` : weekly study time (numeric: 1 - <2 hours, 2 - 2 to 5 hours, 3 - 5 to 10 hours, or 4 - >10 hours)
53 | - `failures` : number of past class failures (numeric: n if 1<=n<3, else 4)
54 | - `schoolsup` : extra educational support (binary: yes or no)
55 | - `famsup` : family educational support (binary: yes or no)
56 | - `paid` : extra paid classes within the course subject (Math or Portuguese) (binary: yes or no)
57 | - `activities` : extra-curricular activities (binary: yes or no)
58 | - `nursery` : attended nursery school (binary: yes or no)
59 | - `higher` : wants to take higher education (binary: yes or no)
60 | - `internet` : Internet access at home (binary: yes or no)
61 | - `romantic` : with a romantic relationship (binary: yes or no)
62 | - `famrel` : quality of family relationships (numeric: from 1 - very bad to 5 - excellent)
63 | - `freetime` : free time after school (numeric: from 1 - very low to 5 - very high)
64 | - `goout` : going out with friends (numeric: from 1 - very low to 5 - very high)
65 | - `Dalc` : workday alcohol consumption (numeric: from 1 - very low to 5 - very high)
66 | - `Walc` : weekend alcohol consumption (numeric: from 1 - very low to 5 - very high)
67 | - `health` : current health status (numeric: from 1 - very bad to 5 - very good)
68 | - `absences` : number of school absences (numeric: from 0 to 93)
69 | - `passed` : did the student pass the final exam (binary: yes or no)
70 | 


--------------------------------------------------------------------------------
/projects/student_intervention/project_description.md:
--------------------------------------------------------------------------------
 1 | # Content: Supervised Learning
 2 | ## Project: Creating a Student Intervention System
 3 | 
 4 | ## Project Overview
 5 | As education has grown to rely more on technology, vast amounts of data has become available for examination and prediction. Logs of student activities, grades, interactions with teachers and fellow students, and more, are now captured in real time through learning management systems like **Canvas** and **Edmodo**. This is especially true for online classrooms, which are becoming popular even at the primary and secondary school level. Within all levels of education, there exists a push to help increase the likelihood of student success, without watering down the education or engaging in behaviors that fail to improve the underlying issues. Graduation rates are often the criteria of choice, and educators seek new ways to predict the success and failure of students early enough to stage effective interventions.
 6 | 
 7 | ## Description
 8 | A local school district has a goal to reach a 95% graduation rate by the end of the decade by identifying students who need intervention before they drop out of school. As a software engineer contacted by the school district, your task is to model the factors that predict how likely a student is to pass their high school final exam, by constructing an intervention system that leverages supervised learning techniques. The board of supervisors has asked that you find the most effective model that uses the least amount of computation costs to save on the budget. You will need to analyze the dataset on students' performance and develop a model that will predict the likelihood that a given student will pass, quantifying whether an intervention is necessary.
 9 | 
10 | ## Software Requirements
11 | This project uses the following software and Python libraries:
12 | 
13 | - [Python 2.7](https://www.python.org/download/releases/2.7/)
14 | - [NumPy](http://www.numpy.org/)
15 | - [pandas](http://pandas.pydata.org/)
16 | - [scikit-learn](http://scikit-learn.org/0.17/install.html) (v0.17)
17 | 
18 | You will also need to have software installed to run and execute a [Jupyter Notebook](http://ipython.org/notebook.html).
19 | 
20 | If you do not have Python installed yet, it is highly recommended that you install the [Anaconda](http://continuum.io/downloads) distribution of Python, which already has the above packages and more included. Make sure that you select the Python 2.7 installer and not the Python 3.x installer.
21 | 
22 | ## Starting the Project
23 | For this assignment, you can find the `student_intervention` folder containing the necessary project files on the [Machine Learning projects GitHub](https://github.com/udacity/machine-learning), under the `projects` folder. You may download all of the files for projects we'll use in this Nanodegree program directly from this repo. Please make sure that you use the most recent version of project files when completing a project!
24 | 
25 | This project contains two files:
26 | 
27 | - `student_intervention.ipynb`: This is the main file where you will be performing your work on the project.
28 | - `student-data.csv`: The project dataset. You?ll load this data in the notebook.
29 | 
30 | In the Terminal or Command Prompt, navigate to the folder containing the project files, and then use the command `jupyter notebook student_intervention.ipynb` to open up a browser window or tab to work with your notebook. Alternatively, you can use the command `jupyter notebook` or `ipython notebook` and navigate to the notebook file in the browser window that opens. Follow the instructions in the notebook and answer each question presented to successfully complete the project. A **README** file has also been provided with the project files which may contain additional necessary information or instruction for the project. 
31 | 
32 | ## Submitting the Project
33 | 
34 | ### Evaluation
35 | Your project will be reviewed by a Udacity reviewer against the **<a href="https://review.udacity.com/#!/rubrics/104/view" target="_blank">Building a Student Intervention System project rubric</a>**. Be sure to review this rubric thoroughly and self-evaluate your project before submission. All criteria found in the rubric must be *meeting specifications* for you to pass.
36 | 
37 | ### Submission Files
38 | When you are ready to submit your project, collect the following files and compress them into a single archive for upload. Alternatively, you may supply the following files on your GitHub Repo in a folder named `student_intervention` for ease of access:
39 |  - The `student_intervention.ipynb` notebook file with all questions answered and all code cells executed and displaying output.
40 |  - An **HTML** export of the project notebook with the name **report.html**. This file *must* be present for your project to be evaluated.
41 | 
42 | Once you have collected these files and reviewed the project rubric, proceed to the project submission page.
43 | 
44 | ### I'm Ready!
45 | When you're ready to submit your project, click on the **Submit Project** button at the bottom of the page.
46 | 
47 | If you are having any problems submitting your project or wish to check on the status of your submission, please email us at **machine-support@udacity.com** or visit us in the <a href="http://discussions.udacity.com" target="_blank">discussion forums</a>.
48 | 
49 | ### What's Next?
50 | You will get an email as soon as your reviewer has feedback for you. In the meantime, review your next project and feel free to get started on it or the courses supporting it!
51 | 


--------------------------------------------------------------------------------
/projects/titanic_survival_exploration/README.md:
--------------------------------------------------------------------------------
 1 | # Machine Learning Engineer Nanodegree
 2 | ## Introduction and Foundations
 3 | ## Project: Titanic Survival Exploration
 4 | 
 5 | ### Install
 6 | 
 7 | This project requires **Python** and the following Python libraries installed:
 8 | 
 9 | - [NumPy](http://www.numpy.org/)
10 | - [Pandas](http://pandas.pydata.org)
11 | - [matplotlib](http://matplotlib.org/)
12 | - [scikit-learn](http://scikit-learn.org/stable/)
13 | 
14 | You will also need to have software installed to run and execute a [Jupyter Notebook](http://ipython.org/notebook.html)
15 | 
16 | If you do not have Python installed yet, it is highly recommended that you install the [Anaconda](http://continuum.io/downloads) distribution of Python, which already has the above packages and more included
17 | 
18 | ### Code
19 | 
20 | Template code is provided in the notebook `titanic_survival_exploration.ipynb` notebook file. Additional supporting code can be found in `visuals.py`. While some code has already been implemented to get you started, you will need to implement additional functionality when requested to successfully complete the project. Note that the code included in `visuals.py` is meant to be used out-of-the-box and not intended for students to manipulate. If you are interested in how the visualizations are created in the notebook, please feel free to explore this Python file.
21 | 
22 | ### Run
23 | 
24 | In a terminal or command window, navigate to the top-level project directory `titanic_survival_exploration/` (that contains this README) and run one of the following commands:
25 | 
26 | ```bash
27 | jupyter notebook titanic_survival_exploration.ipynb
28 | ```
29 | or
30 | ```bash
31 | ipython notebook titanic_survival_exploration.ipynb
32 | ```
33 | 
34 | This will open the Jupyter Notebook software and project file in your web browser.
35 | 
36 | ### Data
37 | 
38 | The dataset used in this project is included as `titanic_data.csv`. This dataset is provided by Udacity and contains the following attributes:
39 | 
40 | **Features**
41 | - `pclass` : Passenger Class (1 = 1st; 2 = 2nd; 3 = 3rd)
42 | - `name` : Name
43 | - `sex` : Sex
44 | - `age` : Age
45 | - `sibsp` : Number of Siblings/Spouses Aboard
46 | - `parch` : Number of Parents/Children Aboard
47 | - `ticket` : Ticket Number
48 | - `fare` : Passenger Fare
49 | - `cabin` : Cabin
50 | - `embarked` : Port of Embarkation (C = Cherbourg; Q = Queenstown; S = Southampton)
51 | 
52 | **Target Variable**
53 | - `survival` : Survival (0 = No; 1 = Yes)


--------------------------------------------------------------------------------
/projects/titanic_survival_exploration/project_description.md:
--------------------------------------------------------------------------------
 1 | ## Content: Introduction and Foundations
 2 | ## Project: Titanic Survival Exploration
 3 | 
 4 | ## Project Overview
 5 | Welcome to the Machine Learning Engineer Nanodegree!
 6 | 
 7 | In this ***optional*** project, you will create decision functions that attempt to predict survival outcomes from the 1912 Titanic disaster based on each passenger's features, such as sex and age. You will start with a simple algorithm and increase its complexity until you are able to accurately predict the outcomes for at least 80% of the passengers in the provided data. This project will introduce you to some of the concepts of machine learning as you start the Nanodegree program.
 8 | 
 9 | In addition, you'll make sure Python is installed with the necessary packages to complete this project. There are two Python libraries, `numpy` and `pandas`, that we'll use a bit here in this project. Don't worry about how these libraries work for now -- we'll get to them in more detail in later projects. This project will also familiarize you with the submission process for the projects that you will be completing as part of the Nanodegree program.
10 | 
11 | ## Software Requirements
12 | This project uses the following software and Python libraries:
13 | 
14 | - [Python](https://www.python.org/downloads/)
15 | - [NumPy](http://www.numpy.org/)
16 | - [pandas](http://pandas.pydata.org/)
17 | - [matplotlib](http://matplotlib.org/)
18 | 
19 | You will also need to have software installed to run and execute a [Jupyter Notebook](http://ipython.org/notebook.html).
20 | 
21 | If you do not have Python installed yet, it is highly recommended that you install the [Anaconda](http://continuum.io/downloads) distribution of Python, which already has the above packages and more included.
22 | 
23 | If you already have Python 2.7 installed on your computer, then you can install `numpy`, `pandas`, `matplotlib` and Jupyter Notebook (formerly known as "iPython") by using [pip](https://pip.pypa.io/en/stable/) on the command line. [This page](http://www.lfd.uci.edu/~gohlke/pythonlibs/) may also be of use for some packages for Windows users, if pip has trouble performing the installation. After installing pip, you can install all the packages with the following command:
24 | 
25 | `sudo pip install numpy pandas matplotlib jupyter`
26 | 
27 | ## Starting the Project
28 | 
29 | For this assignment, you can find the `titanic_survival_exploration` folder containing the necessary project files on the [Machine Learning projects GitHub](https://github.com/udacity/machine-learning), under the `projects` folder. You may download all of the files for projects we'll use in this Nanodegree program directly from this repo. Please make sure that you use the most recent version of project files when completing a project!
30 | 
31 | This project contains three files:
32 | 
33 | - `titanic_survival_exploration.ipynb`: This is the main file where you will be performing your work on the project.
34 | - `titanic_data.csv`: The project dataset. You?ll load this data in the notebook.
35 | - `visuals.py`: This Python script provides supplementary visualizations for the project. Do not modify.
36 | 
37 | In the Terminal or Command Prompt, navigate to the folder containing the project files, and then use the command `jupyter notebook titanic_survival_exploration.ipynb` to open up a browser window or tab to work with your notebook. Alternatively, you can use the command `jupyter notebook` or `ipython notebook` and navigate to the notebook file in the browser window that opens. Follow the instructions in the notebook and answer each question presented to successfully complete the project. A **README** file has also been provided with the project files which may contain additional necessary information or instruction for the project. 
38 | 


--------------------------------------------------------------------------------
/projects/titanic_survival_exploration/visuals.py:
--------------------------------------------------------------------------------
  1 | ###########################################
  2 | # Suppress matplotlib user warnings
  3 | # Necessary for newer version of matplotlib
  4 | import warnings
  5 | warnings.filterwarnings("ignore", category = UserWarning, module = "matplotlib")
  6 | #
  7 | # Display inline matplotlib plots with IPython
  8 | from IPython import get_ipython
  9 | get_ipython().run_line_magic('matplotlib', 'inline')
 10 | ###########################################
 11 | 
 12 | import numpy as np
 13 | import pandas as pd
 14 | import matplotlib.pyplot as plt
 15 | 
 16 | def filter_data(data, condition):
 17 |     """
 18 |     Remove elements that do not match the condition provided.
 19 |     Takes a data list as input and returns a filtered list.
 20 |     Conditions should be a list of strings of the following format:
 21 |       '<field> <op> <value>'
 22 |     where the following operations are valid: >, <, >=, <=, ==, !=
 23 |     
 24 |     Example: ["Sex == 'male'", 'Age < 18']
 25 |     """
 26 | 
 27 |     field, op, value = condition.split(" ")
 28 |     
 29 |     # convert value into number or strip excess quotes if string
 30 |     try:
 31 |         value = float(value)
 32 |     except:
 33 |         value = value.strip("\'\"")
 34 |     
 35 |     # get booleans for filtering
 36 |     if op == ">":
 37 |         matches = data[field] > value
 38 |     elif op == "<":
 39 |         matches = data[field] < value
 40 |     elif op == ">=":
 41 |         matches = data[field] >= value
 42 |     elif op == "<=":
 43 |         matches = data[field] <= value
 44 |     elif op == "==":
 45 |         matches = data[field] == value
 46 |     elif op == "!=":
 47 |         matches = data[field] != value
 48 |     else: # catch invalid operation codes
 49 |         raise Exception("Invalid comparison operator. Only >, <, >=, <=, ==, != allowed.")
 50 |     
 51 |     # filter data and outcomes
 52 |     data = data[matches].reset_index(drop = True)
 53 |     return data
 54 | 
 55 | def survival_stats(data, outcomes, key, filters = []):
 56 |     """
 57 |     Print out selected statistics regarding survival, given a feature of
 58 |     interest and any number of filters (including no filters)
 59 |     """
 60 |     
 61 |     # Check that the key exists
 62 |     if key not in data.columns.values :
 63 |         print("'{}' is not a feature of the Titanic data. Did you spell something wrong?".format(key))
 64 |         return False
 65 | 
 66 |     # Return the function before visualizing if 'Cabin' or 'Ticket'
 67 |     # is selected: too many unique categories to display
 68 |     if(key == 'Cabin' or key == 'PassengerId' or key == 'Ticket'):
 69 |         print("'{}' has too many unique categories to display! Try a different feature.".format(key))
 70 |         return False
 71 | 
 72 |     # Merge data and outcomes into single dataframe
 73 |     all_data = pd.concat([data, outcomes.to_frame()], axis = 1)
 74 |     
 75 |     # Apply filters to data
 76 |     for condition in filters:
 77 |         all_data = filter_data(all_data, condition)
 78 | 
 79 |     # Create outcomes DataFrame
 80 |     all_data = all_data[[key, 'Survived']]
 81 |     
 82 |     # Create plotting figure
 83 |     plt.figure(figsize=(8,6))
 84 | 
 85 |     # 'Numerical' features
 86 |     if(key == 'Age' or key == 'Fare'):
 87 |         
 88 |         # Remove NaN values from Age data
 89 |         all_data = all_data[~np.isnan(all_data[key])]
 90 |         
 91 |         # Divide the range of data into bins and count survival rates
 92 |         min_value = all_data[key].min()
 93 |         max_value = all_data[key].max()
 94 |         value_range = max_value - min_value
 95 | 
 96 |         # 'Fares' has larger range of values than 'Age' so create more bins
 97 |         if(key == 'Fare'):
 98 |             bins = np.arange(0, all_data['Fare'].max() + 20, 20)
 99 |         if(key == 'Age'):
100 |             bins = np.arange(0, all_data['Age'].max() + 10, 10)
101 |         
102 |         # Overlay each bin's survival rates
103 |         nonsurv_vals = all_data[all_data['Survived'] == 0][key].reset_index(drop = True)
104 |         surv_vals = all_data[all_data['Survived'] == 1][key].reset_index(drop = True)
105 |         plt.hist(nonsurv_vals, bins = bins, alpha = 0.6,
106 |                  color = 'red', label = 'Did not survive')
107 |         plt.hist(surv_vals, bins = bins, alpha = 0.6,
108 |                  color = 'green', label = 'Survived')
109 |     
110 |         # Add legend to plot
111 |         plt.xlim(0, bins.max())
112 |         plt.legend(framealpha = 0.8)
113 |     
114 |     # 'Categorical' features
115 |     else:
116 |        
117 |         # Set the various categories
118 |         if(key == 'Pclass'):
119 |             values = np.arange(1,4)
120 |         if(key == 'Parch' or key == 'SibSp'):
121 |             values = np.arange(0,np.max(data[key]) + 1)
122 |         if(key == 'Embarked'):
123 |             values = ['C', 'Q', 'S']
124 |         if(key == 'Sex'):
125 |             values = ['male', 'female']
126 | 
127 |         # Create DataFrame containing categories and count of each
128 |         frame = pd.DataFrame(index = np.arange(len(values)), columns=(key,'Survived','NSurvived'))
129 |         for i, value in enumerate(values):
130 |             frame.loc[i] = [value, \
131 |                    len(all_data[(all_data['Survived'] == 1) & (all_data[key] == value)]), \
132 |                    len(all_data[(all_data['Survived'] == 0) & (all_data[key] == value)])]
133 | 
134 |         # Set the width of each bar
135 |         bar_width = 0.4
136 | 
137 |         # Display each category's survival rates
138 |         for i in np.arange(len(frame)):
139 |             nonsurv_bar = plt.bar(i-bar_width, frame.loc[i]['NSurvived'], width = bar_width, color = 'r')
140 |             surv_bar = plt.bar(i, frame.loc[i]['Survived'], width = bar_width, color = 'g')
141 | 
142 |             plt.xticks(np.arange(len(frame)), values)
143 |             plt.legend((nonsurv_bar[0], surv_bar[0]),('Did not survive', 'Survived'), framealpha = 0.8)
144 | 
145 |     # Common attributes for plot formatting
146 |     plt.xlabel(key)
147 |     plt.ylabel('Number of Passengers')
148 |     plt.title('Passenger Survival Statistics With \'%s\' Feature'%(key))
149 |     plt.show()
150 | 
151 |     # Report number of passengers with missing values
152 |     if sum(pd.isnull(all_data[key])):
153 |         nan_outcomes = all_data[pd.isnull(all_data[key])]['Survived']
154 |         print("Passengers with missing '{}' values: {} ({} survived, {} did not survive)".format( \
155 |               key, len(nan_outcomes), sum(nan_outcomes == 1), sum(nan_outcomes == 0)))
156 | 
157 | 


--------------------------------------------------------------------------------