├── images ├── MLP.png ├── ds_ai.png ├── backprop.png ├── fwd_step.png ├── github.jpg ├── ndarray.png ├── pycharm.png ├── sigmoid.png ├── tf_logo.png ├── venn_ds.png ├── Perceptron.png ├── cluster_0.png ├── cluster_1.png ├── df_inside.png ├── df_outside.png ├── ds_ai_full.png ├── edit_mode.png ├── join-inner.png ├── join-left.png ├── join-outer.png ├── join-right.png ├── reference.png ├── bkwd_step_net.png ├── command_mode.png ├── fwd_step_net.png ├── gmail_small.png ├── iris_setosa.jpg ├── ml-wordle-436.jpg ├── mlp_details.png ├── overfitting.png ├── petal_sepal.jpg ├── scikit-learn.png ├── single_layer.png ├── storage_index.png ├── twitter_small.png ├── venn_diagram.png ├── df_inside_numpy.png ├── iris_versicolor.jpg ├── iris_virginica.jpg ├── linkedin_small.png ├── menubar_toolbar.png ├── multi-layers-1.png ├── multi-layers-2.png ├── storage_simple.png ├── tensorflow_head.png ├── Perceptron and MLP.png ├── cluster_comparison.png ├── keras-logo-small.jpg ├── logistic_function.png ├── modeling_data_flow.png ├── ndarray_with_details.png ├── keras-tensorflow-logo.jpg ├── ml_supervised_example.png ├── ml_unsupervised_example.png └── scikit-learn-cheatsheet.png ├── 2_alchemist ├── data │ ├── blooth_sales_data.xlsx │ ├── blooth_sales_data_clean.xlsx │ └── sampledf.json ├── helpers.py ├── createFakeHDF.py ├── plot_clustering.py ├── 0. Introducing Pandas.ipynb ├── 5. Level Up.ipynb └── 1. Data selection & Indexing.ipynb ├── requirements.txt ├── conda-environment.yml ├── 3_mage ├── utils │ ├── plot_linear_svc_regularization.py │ ├── __init__.py │ ├── plot_interactive_forest.py │ ├── plot_kneighbors_regularization.py │ ├── plot_2d_separator.py │ ├── plot_rbf_svm_parameters.py │ └── plot_interactive_tree.py ├── 5.1. Review of Scikit-learn API.ipynb └── 1.1. Introduction to Machine Learning.ipynb ├── LICENSE ├── .gitignore ├── 0_basic_chemicals ├── 01. Data Science What is What if.ipynb └── 03. Developer tools for Data Science.ipynb ├── 1_apprentice ├── 2.3. Scipy Challenge.ipynb ├── 2.2. Scipy Sparse_Matrices.ipynb ├── 3.4 Level Up.ipynb └── 1.5. Numpy Challenge.ipynb ├── README.md └── 4_archmage ├── intro_to_ann.csv ├── ann.py └── 2.2.1 Keras Backend.ipynb /images/MLP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/MLP.png -------------------------------------------------------------------------------- /images/ds_ai.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/ds_ai.png -------------------------------------------------------------------------------- /images/backprop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/backprop.png -------------------------------------------------------------------------------- /images/fwd_step.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/fwd_step.png -------------------------------------------------------------------------------- /images/github.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/github.jpg -------------------------------------------------------------------------------- /images/ndarray.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/ndarray.png -------------------------------------------------------------------------------- /images/pycharm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/pycharm.png -------------------------------------------------------------------------------- /images/sigmoid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/sigmoid.png -------------------------------------------------------------------------------- /images/tf_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/tf_logo.png -------------------------------------------------------------------------------- /images/venn_ds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/venn_ds.png -------------------------------------------------------------------------------- /images/Perceptron.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/Perceptron.png -------------------------------------------------------------------------------- /images/cluster_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/cluster_0.png -------------------------------------------------------------------------------- /images/cluster_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/cluster_1.png -------------------------------------------------------------------------------- /images/df_inside.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/df_inside.png -------------------------------------------------------------------------------- /images/df_outside.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/df_outside.png -------------------------------------------------------------------------------- /images/ds_ai_full.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/ds_ai_full.png -------------------------------------------------------------------------------- /images/edit_mode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/edit_mode.png -------------------------------------------------------------------------------- /images/join-inner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/join-inner.png -------------------------------------------------------------------------------- /images/join-left.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/join-left.png -------------------------------------------------------------------------------- /images/join-outer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/join-outer.png -------------------------------------------------------------------------------- /images/join-right.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/join-right.png -------------------------------------------------------------------------------- /images/reference.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/reference.png -------------------------------------------------------------------------------- /images/bkwd_step_net.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/bkwd_step_net.png -------------------------------------------------------------------------------- /images/command_mode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/command_mode.png -------------------------------------------------------------------------------- /images/fwd_step_net.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/fwd_step_net.png -------------------------------------------------------------------------------- /images/gmail_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/gmail_small.png -------------------------------------------------------------------------------- /images/iris_setosa.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/iris_setosa.jpg -------------------------------------------------------------------------------- /images/ml-wordle-436.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/ml-wordle-436.jpg -------------------------------------------------------------------------------- /images/mlp_details.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/mlp_details.png -------------------------------------------------------------------------------- /images/overfitting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/overfitting.png -------------------------------------------------------------------------------- /images/petal_sepal.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/petal_sepal.jpg -------------------------------------------------------------------------------- /images/scikit-learn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/scikit-learn.png -------------------------------------------------------------------------------- /images/single_layer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/single_layer.png -------------------------------------------------------------------------------- /images/storage_index.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/storage_index.png -------------------------------------------------------------------------------- /images/twitter_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/twitter_small.png -------------------------------------------------------------------------------- /images/venn_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/venn_diagram.png -------------------------------------------------------------------------------- /images/df_inside_numpy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/df_inside_numpy.png -------------------------------------------------------------------------------- /images/iris_versicolor.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/iris_versicolor.jpg -------------------------------------------------------------------------------- /images/iris_virginica.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/iris_virginica.jpg -------------------------------------------------------------------------------- /images/linkedin_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/linkedin_small.png -------------------------------------------------------------------------------- /images/menubar_toolbar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/menubar_toolbar.png -------------------------------------------------------------------------------- /images/multi-layers-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/multi-layers-1.png -------------------------------------------------------------------------------- /images/multi-layers-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/multi-layers-2.png -------------------------------------------------------------------------------- /images/storage_simple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/storage_simple.png -------------------------------------------------------------------------------- /images/tensorflow_head.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/tensorflow_head.png -------------------------------------------------------------------------------- /images/Perceptron and MLP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/Perceptron and MLP.png -------------------------------------------------------------------------------- /images/cluster_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/cluster_comparison.png -------------------------------------------------------------------------------- /images/keras-logo-small.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/keras-logo-small.jpg -------------------------------------------------------------------------------- /images/logistic_function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/logistic_function.png -------------------------------------------------------------------------------- /images/modeling_data_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/modeling_data_flow.png -------------------------------------------------------------------------------- /images/ndarray_with_details.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/ndarray_with_details.png -------------------------------------------------------------------------------- /images/keras-tensorflow-logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/keras-tensorflow-logo.jpg -------------------------------------------------------------------------------- /images/ml_supervised_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/ml_supervised_example.png -------------------------------------------------------------------------------- /images/ml_unsupervised_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/ml_unsupervised_example.png -------------------------------------------------------------------------------- /images/scikit-learn-cheatsheet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/images/scikit-learn-cheatsheet.png -------------------------------------------------------------------------------- /2_alchemist/data/blooth_sales_data.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/2_alchemist/data/blooth_sales_data.xlsx -------------------------------------------------------------------------------- /2_alchemist/data/blooth_sales_data_clean.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/develer-data-science/master/2_alchemist/data/blooth_sales_data_clean.xlsx -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | jupyter~=1.0.0 2 | keras>=2.2.4 3 | matplotlib~=3.0.0 4 | bokeh>=0.12.16 5 | notebook~=5.7.0 6 | numpy~=1.15.2 7 | pandas~=0.23.4 8 | scikit-learn~=0.20.0 9 | scipy~=1.1.0 10 | tensorflow>=1.10.0 11 | Pillow==7.1.0 # necessary to matplotlib to load png 12 | pscript>=0.6.1 # bokeh custom javascript 13 | openpyxl>=2.5.8 # Excel write pandas 14 | xlrd>=1.1.0 # Excel read pandas 15 | -------------------------------------------------------------------------------- /conda-environment.yml: -------------------------------------------------------------------------------- 1 | name: develer-science 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - jupyter=1.0* 7 | - jupyterlab=0.34* 8 | - matplotlib=3.0* 9 | - notebook=5.7* 10 | - pandas=0.23* 11 | - scikit-learn=0.20* 12 | - certifi=2018* 13 | - bokeh=0.12* 14 | - pip: 15 | - keras>=2.2.4 16 | - tensorflow>=1.8 17 | - Pillow~=5.3 18 | - openpyxl>=2.5.8 19 | - pscript>=0.6.1 20 | - xlrd>=1.1.0 21 | -------------------------------------------------------------------------------- /2_alchemist/helpers.py: -------------------------------------------------------------------------------- 1 | # helper to display Pandas Table output side by side 2 | 3 | from IPython.display import display_html 4 | 5 | def highlight(data): 6 | return ['background-color: yellow' for x in data] 7 | 8 | def display_side_by_side(subset, *args): 9 | html_str='' 10 | for i, df in enumerate(args): 11 | if i: 12 | html_str+=df.style.render() 13 | else: 14 | df.style.apply(highlight, subset) 15 | html_str+=df.style.render() 16 | 17 | display_html(html_str.replace('table','table style="display:inline"'),raw=True) 18 | 19 | 20 | -------------------------------------------------------------------------------- /2_alchemist/data/sampledf.json: -------------------------------------------------------------------------------- 1 | {"0":{"0":79,"1":25,"2":37,"3":74,"4":79,"5":45,"6":12,"7":36,"8":55,"9":46},"1":{"0":19,"1":39,"2":64,"3":61,"4":60,"5":26,"6":29,"7":32,"8":53,"9":74},"2":{"0":21,"1":89,"2":31,"3":100,"4":83,"5":73,"6":18,"7":22,"8":89,"9":36},"3":{"0":99,"1":66,"2":69,"3":6,"4":85,"5":73,"6":98,"7":4,"8":13,"9":54},"4":{"0":35,"1":9,"2":61,"3":58,"4":16,"5":100,"6":62,"7":66,"8":84,"9":21},"5":{"0":59,"1":41,"2":97,"3":80,"4":5,"5":60,"6":68,"7":25,"8":87,"9":12},"6":{"0":44,"1":6,"2":5,"3":95,"4":16,"5":21,"6":92,"7":63,"8":74,"9":68},"7":{"0":25,"1":69,"2":11,"3":50,"4":69,"5":19,"6":29,"7":51,"8":3,"9":33},"8":{"0":75,"1":63,"2":76,"3":15,"4":5,"5":95,"6":74,"7":59,"8":2,"9":80},"9":{"0":58,"1":3,"2":57,"3":51,"4":20,"5":12,"6":96,"7":14,"8":64,"9":25}} -------------------------------------------------------------------------------- /3_mage/utils/plot_linear_svc_regularization.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | from sklearn.svm import SVC 4 | from sklearn.datasets import make_blobs 5 | from .plot_2d_separator import plot_2d_separator 6 | 7 | 8 | def plot_linear_svc_regularization(): 9 | X, y = make_blobs(centers=2, random_state=4, n_samples=30) 10 | # a carefully hand-designed dataset lol 11 | y[7] = 0 12 | y[27] = 0 13 | 14 | fig, axes = plt.subplots(1, 3, figsize=(12, 4)) 15 | 16 | for ax, C in zip(axes, [1e-2, 1, 1e2]): 17 | ax.scatter(X[:, 0], X[:, 1], s=150, c=np.array(['red', 'blue'])[y]) 18 | 19 | svm = SVC(kernel='linear', C=C).fit(X, y) 20 | plot_2d_separator(svm, X, ax=ax, eps=.5) 21 | ax.set_title("C = %f" % C) 22 | -------------------------------------------------------------------------------- /3_mage/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .plot_2d_separator import plot_2d_separator 2 | from .plot_kneighbors_regularization import plot_kneighbors_regularization, \ 3 | plot_regression_datasets, make_dataset 4 | from .plot_linear_svc_regularization import plot_linear_svc_regularization 5 | from .plot_interactive_tree import plot_tree_interactive 6 | from .plot_interactive_forest import plot_forest_interactive 7 | from .plot_rbf_svm_parameters import plot_rbf_svm_parameters 8 | from .plot_rbf_svm_parameters import plot_svm_interactive 9 | 10 | __all__ = ['plot_2d_separator', 'plot_kneighbors_regularization', 11 | 'plot_linear_svc_regularization', 'plot_tree_interactive', 12 | 'plot_regression_datasets', 'make_dataset', 13 | "plot_forest_interactive", "plot_rbf_svm_parameters", 14 | "plot_svm_interactive"] 15 | -------------------------------------------------------------------------------- /2_alchemist/createFakeHDF.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from pandas import HDFStore, DataFrame 3 | 4 | # create (or open) an hdf5 file and opens in append mode 5 | hdf = HDFStore('data/hdata.h5') 6 | 7 | df = DataFrame(np.random.rand(1000, 3), columns=('A', 'B', 'C')) 8 | # put the dataset in the storage 9 | hdf.put('d1', df, format='table', data_columns=True) 10 | print(hdf['d1'].shape) 11 | 12 | hdf.append('d1', DataFrame(np.random.rand(5, 3), 13 | columns=('A', 'B', 'C')), 14 | format='table', data_columns=True) 15 | 16 | df = DataFrame(np.random.rand(1000, 3), columns=('A', 'B', 'C')) 17 | # put the dataset in the storage 18 | hdf.put('d2', df, format='table', data_columns=True) 19 | print(hdf['d2'].shape) 20 | 21 | hdf.append('d2', DataFrame(np.random.rand(5, 3), 22 | columns=('A', 'B', 'C')), 23 | format='table', data_columns=True) 24 | hdf.close() # closes the file 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Valerio Maggio 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /2_alchemist/plot_clustering.py: -------------------------------------------------------------------------------- 1 | 2 | import matplotlib.pyplot as plt 3 | 4 | def plot_kmeans_clustering_results(c1, c2, c3, vq1, vq2, vq3): 5 | 6 | # Setting plot limits 7 | x1, x2 = -10, 10 8 | y1, y2 = -10, 10 9 | 10 | fig = plt.figure() 11 | fig.subplots_adjust(hspace=0.1, wspace=0.1) 12 | 13 | ax1 = fig.add_subplot(121, aspect='equal') 14 | ax1.scatter(c1[:, 0], c1[:, 1], lw=0.5, color='#00CC00') 15 | ax1.scatter(c2[:, 0], c2[:, 1], lw=0.5, color='#028E9B') 16 | ax1.scatter(c3[:, 0], c3[:, 1], lw=0.5, color='#FF7800') 17 | ax1.xaxis.set_visible(False) 18 | ax1.yaxis.set_visible(False) 19 | ax1.set_xlim(x1, x2) 20 | ax1.set_ylim(y1, y2) 21 | ax1.text(-9, 8, 'Original') 22 | 23 | ax2 = fig.add_subplot(122, aspect='equal') 24 | ax2.scatter(vqc1[:, 0], vqc1[:, 1], lw=0.5, color='#00CC00') 25 | ax2.scatter(vqc2[:, 0], vqc2[:, 1], lw=0.5, color='#028E9B') 26 | ax2.scatter(vqc3[:, 0], vqc3[:, 1], lw=0.5, color='#FF7800') 27 | ax2.xaxis.set_visible(False) 28 | ax2.yaxis.set_visible(False) 29 | ax2.set_xlim(x1, x2) 30 | ax2.set_ylim(y1, y2) 31 | ax2.text(-9, 8, 'VQ identified') 32 | 33 | return fig -------------------------------------------------------------------------------- /3_mage/utils/plot_interactive_forest.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | from sklearn.datasets import make_blobs 5 | from sklearn.ensemble import RandomForestClassifier 6 | 7 | 8 | X, y = make_blobs(centers=[[0, 0], [1, 1]], random_state=61526, n_samples=50) 9 | 10 | 11 | def plot_forest(max_depth=1): 12 | plt.figure() 13 | ax = plt.gca() 14 | h = 0.02 15 | 16 | x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 17 | y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 18 | xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) 19 | 20 | if max_depth != 0: 21 | forest = RandomForestClassifier(n_estimators=20, max_depth=max_depth, 22 | random_state=1).fit(X, y) 23 | Z = forest.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1] 24 | Z = Z.reshape(xx.shape) 25 | ax.contourf(xx, yy, Z, alpha=.4) 26 | ax.set_title("max_depth = %d" % max_depth) 27 | else: 28 | ax.set_title("data set") 29 | ax.scatter(X[:, 0], X[:, 1], c=np.array(['b', 'r'])[y], s=60) 30 | ax.set_xlim(x_min, x_max) 31 | ax.set_ylim(y_min, y_max) 32 | ax.set_xticks(()) 33 | ax.set_yticks(()) 34 | 35 | 36 | def plot_forest_interactive(): 37 | from IPython.html.widgets import interactive, IntSlider 38 | slider = IntSlider(min=0, max=8, step=1, value=0) 39 | return interactive(plot_forest, max_depth=slider) 40 | -------------------------------------------------------------------------------- /3_mage/utils/plot_kneighbors_regularization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | from sklearn.neighbors import KNeighborsRegressor 5 | 6 | 7 | def make_dataset(n_samples=100): 8 | rnd = np.random.RandomState(42) 9 | x = np.linspace(-3, 3, n_samples) 10 | y_no_noise = np.sin(4 * x) + x 11 | y = y_no_noise + rnd.normal(size=len(x)) 12 | return x, y 13 | 14 | 15 | def plot_regression_datasets(): 16 | fig, axes = plt.subplots(1, 3, figsize=(15, 5)) 17 | for n_samples, ax in zip([10, 100, 1000], axes): 18 | x, y = make_dataset(n_samples) 19 | ax.plot(x, y, 'o', alpha=.6) 20 | 21 | 22 | def plot_kneighbors_regularization(): 23 | rnd = np.random.RandomState(42) 24 | x = np.linspace(-3, 3, 100) 25 | y_no_noise = np.sin(4 * x) + x 26 | y = y_no_noise + rnd.normal(size=len(x)) 27 | X = x[:, np.newaxis] 28 | fig, axes = plt.subplots(1, 3, figsize=(15, 5)) 29 | 30 | x_test = np.linspace(-3, 3, 1000) 31 | 32 | for n_neighbors, ax in zip([2, 5, 20], axes.ravel()): 33 | kneighbor_regression = KNeighborsRegressor(n_neighbors=n_neighbors) 34 | kneighbor_regression.fit(X, y) 35 | ax.plot(x, y_no_noise, label="true function") 36 | ax.plot(x, y, "o", label="data") 37 | ax.plot(x_test, kneighbor_regression.predict(x_test[:, np.newaxis]), 38 | label="prediction") 39 | ax.legend() 40 | ax.set_title("n_neighbors = %d" % n_neighbors) 41 | 42 | if __name__ == "__main__": 43 | plot_kneighbors_regularization() 44 | plt.show() 45 | -------------------------------------------------------------------------------- /3_mage/utils/plot_2d_separator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | 5 | def plot_2d_separator(classifier, X, fill=False, ax=None, eps=None): 6 | if eps is None: 7 | eps = X.std() / 2. 8 | x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps 9 | y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps 10 | xx = np.linspace(x_min, x_max, 100) 11 | yy = np.linspace(y_min, y_max, 100) 12 | 13 | X1, X2 = np.meshgrid(xx, yy) 14 | X_grid = np.c_[X1.ravel(), X2.ravel()] 15 | try: 16 | decision_values = classifier.decision_function(X_grid) 17 | levels = [0] 18 | fill_levels = [decision_values.min(), 0, decision_values.max()] 19 | except AttributeError: 20 | # no decision_function 21 | decision_values = classifier.predict_proba(X_grid)[:, 1] 22 | levels = [.5] 23 | fill_levels = [0, .5, 1] 24 | 25 | if ax is None: 26 | ax = plt.gca() 27 | if fill: 28 | ax.contourf(X1, X2, decision_values.reshape(X1.shape), 29 | levels=fill_levels, colors=['blue', 'red']) 30 | else: 31 | ax.contour(X1, X2, decision_values.reshape(X1.shape), levels=levels, 32 | colors="black") 33 | ax.set_xlim(x_min, x_max) 34 | ax.set_ylim(y_min, y_max) 35 | ax.set_xticks(()) 36 | ax.set_yticks(()) 37 | 38 | 39 | if __name__ == '__main__': 40 | from sklearn.datasets import make_blobs 41 | from sklearn.linear_model import LogisticRegression 42 | X, y = make_blobs(centers=2, random_state=42) 43 | clf = LogisticRegression().fit(X, y) 44 | plot_2d_separator(clf, X, fill=True) 45 | plt.scatter(X[:, 0], X[:, 1], c=y) 46 | plt.show() 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /0_basic_chemicals/01. Data Science What is What if.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# AI, Machine Learning, Data Science..." 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "![DS AI](../images/ds_ai.png)" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "## ... to make it funny" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "![DS AI full](../images/ds_ai_full.png)" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "----" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "# What do you need to become a Data Scientist?" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "![Data Science Venn Cornway](../images/venn_ds.png)" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "## ... despite Venn Diagrams can go really bad ..." 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "![venn](../images/venn_diagram.png)" 64 | ] 65 | } 66 | ], 67 | "metadata": { 68 | "kernelspec": { 69 | "display_name": "Python 3.6 (Develer Science)", 70 | "language": "python", 71 | "name": "develer-science" 72 | }, 73 | "language_info": { 74 | "codemirror_mode": { 75 | "name": "ipython", 76 | "version": 3 77 | }, 78 | "file_extension": ".py", 79 | "mimetype": "text/x-python", 80 | "name": "python", 81 | "nbconvert_exporter": "python", 82 | "pygments_lexer": "ipython3", 83 | "version": "3.6.6" 84 | } 85 | }, 86 | "nbformat": 4, 87 | "nbformat_minor": 2 88 | } 89 | -------------------------------------------------------------------------------- /3_mage/utils/plot_rbf_svm_parameters.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | from sklearn.svm import SVC 4 | from sklearn.datasets import make_blobs 5 | from .plot_2d_separator import plot_2d_separator 6 | 7 | 8 | def make_handcrafted_dataset(): 9 | # a carefully hand-designed dataset lol 10 | X, y = make_blobs(centers=2, random_state=4, n_samples=30) 11 | y[np.array([7, 27])] = 0 12 | mask = np.ones(len(X), dtype=np.bool) 13 | mask[np.array([0, 1, 5, 26])] = 0 14 | X, y = X[mask], y[mask] 15 | return X, y 16 | 17 | 18 | def plot_rbf_svm_parameters(): 19 | X, y = make_handcrafted_dataset() 20 | 21 | fig, axes = plt.subplots(1, 3, figsize=(12, 4)) 22 | for ax, C in zip(axes, [1e0, 5, 10, 100]): 23 | ax.scatter(X[:, 0], X[:, 1], s=150, c=np.array(['red', 'blue'])[y]) 24 | 25 | svm = SVC(kernel='rbf', C=C).fit(X, y) 26 | plot_2d_separator(svm, X, ax=ax, eps=.5) 27 | ax.set_title("C = %f" % C) 28 | 29 | fig, axes = plt.subplots(1, 4, figsize=(15, 3)) 30 | for ax, gamma in zip(axes, [0.1, .5, 1, 10]): 31 | ax.scatter(X[:, 0], X[:, 1], s=150, c=np.array(['red', 'blue'])[y]) 32 | svm = SVC(gamma=gamma, kernel='rbf', C=1).fit(X, y) 33 | plot_2d_separator(svm, X, ax=ax, eps=.5) 34 | ax.set_title("gamma = %f" % gamma) 35 | 36 | 37 | def plot_svm(log_C, log_gamma): 38 | X, y = make_handcrafted_dataset() 39 | C = 10. ** log_C 40 | gamma = 10. ** log_gamma 41 | svm = SVC(kernel='rbf', C=C, gamma=gamma).fit(X, y) 42 | ax = plt.gca() 43 | plot_2d_separator(svm, X, ax=ax, eps=.5) 44 | # plot data 45 | ax.scatter(X[:, 0], X[:, 1], s=150, c=np.array(['red', 'blue'])[y]) 46 | # plot support vectors 47 | sv = svm.support_vectors_ 48 | ax.scatter(sv[:, 0], sv[:, 1], s=230, facecolors='none', zorder=10, linewidth=3) 49 | ax.set_title("C = %.4f gamma = %.4f" % (C, gamma)) 50 | 51 | 52 | def plot_svm_interactive(): 53 | from IPython.html.widgets import interactive, FloatSlider 54 | C_slider = FloatSlider(min=-3, max=3, step=.1, value=0, readout=False) 55 | gamma_slider = FloatSlider(min=-2, max=2, step=.1, value=0, readout=False) 56 | return interactive(plot_svm, log_C=C_slider, log_gamma=gamma_slider) 57 | -------------------------------------------------------------------------------- /3_mage/utils/plot_interactive_tree.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | from sklearn.datasets import make_blobs 5 | from sklearn.tree import DecisionTreeClassifier 6 | 7 | from sklearn.externals.six import StringIO # doctest: +SKIP 8 | from sklearn.tree import export_graphviz 9 | from scipy import ndimage 10 | try: 11 | from scipy.misc import imread 12 | except ImportError: 13 | from scipy.ndimage import imread 14 | 15 | import re 16 | 17 | X, y = make_blobs(centers=[[0, 0], [1, 1]], random_state=61526, n_samples=50) 18 | 19 | 20 | def tree_image(tree, fout=None): 21 | try: 22 | import pydot 23 | except ImportError: 24 | # make a hacky white plot 25 | x = np.ones((10, 10)) 26 | x[0, 0] = 0 27 | return x 28 | dot_data = StringIO() 29 | export_graphviz(tree, out_file=dot_data) 30 | data = re.sub(r"gini = 0\.[0-9]+\\n", "", dot_data.getvalue()) 31 | data = re.sub(r"samples = [0-9]+\\n", "", data) 32 | data = re.sub(r"\\nsamples = [0-9]+", "", data) 33 | 34 | graph = pydot.graph_from_dot_data(data) 35 | if fout is None: 36 | fout = "tmp.png" 37 | graph.write_png(fout) 38 | return imread(fout) 39 | 40 | 41 | def plot_tree(max_depth=1): 42 | fig, ax = plt.subplots(1, 2, figsize=(15, 7)) 43 | h = 0.02 44 | 45 | x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 46 | y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 47 | xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) 48 | 49 | if max_depth != 0: 50 | tree = DecisionTreeClassifier(max_depth=max_depth, random_state=1).fit(X, y) 51 | Z = tree.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1] 52 | Z = Z.reshape(xx.shape) 53 | faces = tree.tree_.apply(np.c_[xx.ravel(), yy.ravel()].astype(np.float32)) 54 | faces = faces.reshape(xx.shape) 55 | border = ndimage.laplace(faces) != 0 56 | ax[0].contourf(xx, yy, Z, alpha=.4) 57 | ax[0].scatter(xx[border], yy[border], marker='.', s=1) 58 | ax[0].set_title("max_depth = %d" % max_depth) 59 | ax[1].imshow(tree_image(tree)) 60 | ax[1].axis("off") 61 | else: 62 | ax[0].set_title("data set") 63 | ax[1].set_visible(False) 64 | ax[0].scatter(X[:, 0], X[:, 1], c=np.array(['b', 'r'])[y], s=60) 65 | ax[0].set_xlim(x_min, x_max) 66 | ax[0].set_ylim(y_min, y_max) 67 | ax[0].set_xticks(()) 68 | ax[0].set_yticks(()) 69 | 70 | 71 | def plot_tree_interactive(): 72 | from IPython.html.widgets import interactive, IntSlider 73 | slider = IntSlider(min=0, max=8, step=1, value=0) 74 | return interactive(plot_tree, max_depth=slider) 75 | -------------------------------------------------------------------------------- /0_basic_chemicals/03. Developer tools for Data Science.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Developer Tools and IDEs for Data Science\n", 8 | "\n", 9 | "**Note**: This is not intended to be a comprehensive guide on tools and IDEs for Data Science.\n", 10 | "The spirit is to reference some interesting tools out there to be used." 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "## Jupyter Notebooks \n", 18 | "\n", 19 | "\"Jupyter" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "## nteract\n", 27 | "\n", 28 | "
\n", 29 | " \n", 30 | "
\n" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 2, 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "data": { 40 | "text/html": [ 41 | "" 44 | ], 45 | "text/plain": [ 46 | "" 47 | ] 48 | }, 49 | "execution_count": 2, 50 | "metadata": {}, 51 | "output_type": "execute_result" 52 | } 53 | ], 54 | "source": [ 55 | "from IPython.display import Video\n", 56 | "\n", 57 | "Video(\"https://nteract.github.io/assets/images/video/nteract_app_demo@2x.mp4\")" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "## Atom + HydroGen" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "\"Atom" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "## PyCharm IDE" 79 | ] 80 | }, 81 | { 82 | "attachments": {}, 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "\n", 87 | "\n", 88 | "\n", 89 | "### Scientific Mode\n", 90 | "\n", 91 | "[PyCharm Scientific Mode](https://www.jetbrains.com/help/pycharm/matplotlib-tutorial.html)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "## Others worthwile mentioning..." 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "* **Spyder**: [https://www.spyder-ide.org]()\n", 106 | "* **Rodeo**: [https://rodeo.yhat.com]()\n", 107 | "* **VS Code**: [https://code.visualstudio.com]()" 108 | ] 109 | } 110 | ], 111 | "metadata": { 112 | "kernelspec": { 113 | "display_name": "Python 3.6 (Develer Science)", 114 | "language": "python", 115 | "name": "develer-science" 116 | }, 117 | "language_info": { 118 | "codemirror_mode": { 119 | "name": "ipython", 120 | "version": 3 121 | }, 122 | "file_extension": ".py", 123 | "mimetype": "text/x-python", 124 | "name": "python", 125 | "nbconvert_exporter": "python", 126 | "pygments_lexer": "ipython3", 127 | "version": "3.6.6" 128 | } 129 | }, 130 | "nbformat": 4, 131 | "nbformat_minor": 2 132 | } 133 | -------------------------------------------------------------------------------- /2_alchemist/0. Introducing Pandas.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Pandas\n", 8 | "\n", 9 | "Pandas is the Swiss-Multipurpose Knife for Data Analysis in Python. With Pandas dealing with data-analysis is easy and simple but there are some things you need to get your head around first as Data-Frames and Data-Series. \n", 10 | "\n", 11 | "The tutorial provides a compact introduction to Pandas for beginners for I/O, data visualisation, statistical data analysis and aggregation within Jupiter notebooks." 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "## Content at a glance\n", 19 | "\n", 20 | "#### A Practical Start: Reading and Writing Data Across Multiple Formats \n", 21 | "\n", 22 | "* CSV\n", 23 | "* Excel\n", 24 | "* JSON\n", 25 | "* Clipboard\n", 26 | " \n", 27 | "* data\n", 28 | " * .info\n", 29 | " * .describe\n", 30 | "\n", 31 | "#### DataSeries & DataFrames / NumPy\n", 32 | "\n", 33 | "* Ode to NumPy\n", 34 | "* Data-Series\n", 35 | "* Data-Frames\n", 36 | "\n", 37 | "#### Data selection & Indexing\n", 38 | "\n", 39 | "* Data-Series: \n", 40 | " * Slicing\n", 41 | " * Access by label\n", 42 | " * Index\n", 43 | "* Data-Frames: \n", 44 | " * Slicing\n", 45 | " * Access by label\n", 46 | " * Peek into joining data\n", 47 | "* Returns a copy / inplace\n", 48 | "* Boolean indexing\n", 49 | "\n", 50 | "#### Operations\n", 51 | " \n", 52 | " * add/substract\n", 53 | " * multiply\n", 54 | " * mention Index but don't go deep\n", 55 | "\n", 56 | "#### Data Visualisation\n", 57 | "\n", 58 | " * plot your data directly into your notebook\n", 59 | " \n", 60 | "#### Anti Patterns\n", 61 | "\n", 62 | " * a collection of (anti-)patterns when using Pandas" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "---" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "## Brief Introduction to Pandas\n", 77 | "\n", 78 | "Pandas builds on top of two main data structures: **Data Frame** and **Series**" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "### Data Frame _from the outside_\n", 86 | "\n", 87 | "" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "### Data Frame _from the inside_\n", 95 | "\n", 96 | "" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "### Data Frame vs Numpy Array" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "#### Numpy Array\n", 111 | "\n", 112 | "" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "#### Pandas Data Frame" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "" 127 | ] 128 | } 129 | ], 130 | "metadata": { 131 | "kernelspec": { 132 | "display_name": "Python 3.6 (Develer Science)", 133 | "language": "python", 134 | "name": "develer-science" 135 | }, 136 | "language_info": { 137 | "codemirror_mode": { 138 | "name": "ipython", 139 | "version": 3 140 | }, 141 | "file_extension": ".py", 142 | "mimetype": "text/x-python", 143 | "name": "python", 144 | "nbconvert_exporter": "python", 145 | "pygments_lexer": "ipython3", 146 | "version": "3.6.6" 147 | } 148 | }, 149 | "nbformat": 4, 150 | "nbformat_minor": 2 151 | } 152 | -------------------------------------------------------------------------------- /1_apprentice/2.3. Scipy Challenge.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# SciPy Challenge\n", 8 | "\n", 9 | "## SciPy at a Glance\n", 10 | "\n", 11 | "The SciPy framework builds on top of the low-level NumPy framework for multidimensional arrays, and provides a large number of higher-level scientific algorithms. Some of the topics that SciPy covers are:\n", 12 | "\n", 13 | "* Special functions ([scipy.special](http://docs.scipy.org/doc/scipy/reference/special.html))\n", 14 | "* Integration ([scipy.integrate](http://docs.scipy.org/doc/scipy/reference/integrate.html))\n", 15 | "* Optimization ([scipy.optimize](http://docs.scipy.org/doc/scipy/reference/optimize.html))\n", 16 | "* Interpolation ([scipy.interpolate](http://docs.scipy.org/doc/scipy/reference/interpolate.html))\n", 17 | "* Fourier Transforms ([scipy.fftpack](http://docs.scipy.org/doc/scipy/reference/fftpack.html))\n", 18 | "* Signal Processing ([scipy.signal](http://docs.scipy.org/doc/scipy/reference/signal.html))\n", 19 | "* Linear Algebra ([scipy.linalg](http://docs.scipy.org/doc/scipy/reference/linalg.html))\n", 20 | "* Sparse Eigenvalue Problems ([scipy.sparse](http://docs.scipy.org/doc/scipy/reference/sparse.html))\n", 21 | "* Statistics ([scipy.stats](http://docs.scipy.org/doc/scipy/reference/stats.html))\n", 22 | "* Multi-dimensional image processing ([scipy.ndimage](http://docs.scipy.org/doc/scipy/reference/ndimage.html))\n", 23 | "* File IO ([scipy.io](http://docs.scipy.org/doc/scipy/reference/io.html))" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "## Sparse Matrices\n", 31 | "\n", 32 | "**Sparse Matrices** are very nice in some situations. \n", 33 | "\n", 34 | "For example, in some machine learning tasks, especially those associated\n", 35 | "with textual analysis, the data may be mostly zeros. \n", 36 | "\n", 37 | "Storing all these zeros is very inefficient. \n", 38 | "\n", 39 | "We can create and manipulate sparse matrices using the `scipy.sparse` module." 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "There exists different implementations of sparse matrices, which are supposed to be efficient in different scenarios:\n", 47 | "\n", 48 | "- CSR: Compressed Sparse Rows\n", 49 | "- CSC: Compressec Sparse Colums\n", 50 | "- DOK: Dictionary of Keys\n", 51 | "- LIL: List of Lists\n", 52 | "- BSR: Block Sparse Row" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "## Ex 1.1\n", 60 | "\n", 61 | "Create a big numpy **dense** matrix filled with random numbers in \n", 62 | "`[0, 1)`.\n", 63 | "Generate a random number within this range and subsitute all the elements in the matrix **less than** this number with a zero.\n", 64 | "\n", 65 | "Save resulting matrix as a `DOK` sparse matrix" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": { 72 | "collapsed": true 73 | }, 74 | "outputs": [], 75 | "source": [] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "## Ex 1.2\n", 82 | "\n", 83 | "Repeat the previous exercise, but this time use a `CSR` sparse matrix." 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": { 90 | "collapsed": true 91 | }, 92 | "outputs": [], 93 | "source": [] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "## Ex 1.3\n", 100 | "\n", 101 | "Transform the previously generated sparse matrix back to a full dense `numpy.array`." 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": { 108 | "collapsed": true 109 | }, 110 | "outputs": [], 111 | "source": [] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "## Ex 1.4 \n", 118 | "\n", 119 | "Generate two sparse Matrix and sum them together, choosing the most appropriate internal representation (i.e. `LIL`, `CSR`, `DOK`...).\n", 120 | "\n", 121 | "#### Hint: Oh c'mon.. :)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": { 128 | "collapsed": true 129 | }, 130 | "outputs": [], 131 | "source": [] 132 | } 133 | ], 134 | "metadata": { 135 | "kernelspec": { 136 | "display_name": "Python 3.6 (Develer Science)", 137 | "language": "python", 138 | "name": "develer-science" 139 | }, 140 | "language_info": { 141 | "codemirror_mode": { 142 | "name": "ipython", 143 | "version": 3 144 | }, 145 | "file_extension": ".py", 146 | "mimetype": "text/x-python", 147 | "name": "python", 148 | "nbconvert_exporter": "python", 149 | "pygments_lexer": "ipython3", 150 | "version": "3.6.6" 151 | } 152 | }, 153 | "nbformat": 4, 154 | "nbformat_minor": 2 155 | } 156 | -------------------------------------------------------------------------------- /3_mage/5.1. Review of Scikit-learn API.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# A recap on Scikit-learn's estimator interface" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": { 13 | "slideshow": { 14 | "slide_type": "subslide" 15 | } 16 | }, 17 | "source": [ 18 | "Scikit-learn strives to have a uniform interface across all methods. Given a scikit-learn *estimator*\n", 19 | "object named `model`, the following methods are available (not all for each model)" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": { 25 | "slideshow": { 26 | "slide_type": "subslide" 27 | } 28 | }, 29 | "source": [ 30 | "- Available in **all Estimators**\n", 31 | " + `model.fit()` : fit training data. For supervised learning applications,\n", 32 | " this accepts two arguments: the data `X` and the labels `y` (e.g. `model.fit(X, y)`).\n", 33 | " For unsupervised learning applications, ``fit`` takes only a single argument,\n", 34 | " the data `X` (e.g. `model.fit(X)`)." 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": { 40 | "slideshow": { 41 | "slide_type": "subslide" 42 | } 43 | }, 44 | "source": [ 45 | "- Available in **supervised estimators**\n", 46 | " + `model.predict()` : given a trained model, predict the label of a new set of data.\n", 47 | " This method accepts one argument, the new data `X_new` (e.g. `model.predict(X_new)`),\n", 48 | " and returns the learned label for each object in the array.\n", 49 | " + `model.predict_proba()` : For classification problems, some estimators also provide\n", 50 | " this method, which returns the probability that a new observation has each categorical label.\n", 51 | " In this case, the label with the highest probability is returned by `model.predict()`.\n", 52 | " " 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": { 58 | "slideshow": { 59 | "slide_type": "subslide" 60 | } 61 | }, 62 | "source": [ 63 | "- Available in **supervised estimators** (cont.)\n", 64 | " \n", 65 | " + `model.decision_function()` : For classification problems, some estimators provide an uncertainty estimate that is not a probability. For binary classification, a decision_function >= 0 means the positive class will be predicted, while < 0 means the negative class.\n", 66 | " + `model.score()` : for classification or regression problems, most (all?) estimators implement\n", 67 | " a score method. Scores are between 0 and 1, with a larger score indicating a better fit." 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": { 73 | "slideshow": { 74 | "slide_type": "subslide" 75 | } 76 | }, 77 | "source": [ 78 | "- Available in **supervised estimators** (cont.)\n", 79 | "\n", 80 | " + `model.transform()` : For feature selection algorithms, this will reduce the dataset to the selected features. For some classification and regression models such as some linear models and random forests, this method reduces the dataset to the most informative features. These classification and regression models can therefor also be used as feature selection methods." 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": { 86 | "slideshow": { 87 | "slide_type": "subslide" 88 | } 89 | }, 90 | "source": [ 91 | "- Available in **unsupervised estimators**\n", 92 | " + `model.transform()` : given an unsupervised model, transform new data into the new basis.\n", 93 | " This also accepts one argument `X_new`, and returns the new representation of the data based\n", 94 | " on the unsupervised model.\n", 95 | " + `model.fit_transform()` : some estimators implement this method,\n", 96 | " which more efficiently performs a fit and a transform on the same input data." 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": { 102 | "slideshow": { 103 | "slide_type": "subslide" 104 | } 105 | }, 106 | "source": [ 107 | "- Available in **unsupervised estimators** (cont.)\n", 108 | "\n", 109 | " + `model.predict()` : for clustering algorithms, the predict method will produce cluster labels for new data points. Not all clustering methods have this functionality.\n", 110 | " + `model.predict_proba()` : Gaussian mixture models (GMMs) provide the probability for each point to be generated by a given mixture component.\n", 111 | " + `model.score()` : Density models like KDE and GMMs provide the likelihood of the data under the model." 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": { 117 | "slideshow": { 118 | "slide_type": "subslide" 119 | } 120 | }, 121 | "source": [ 122 | "Apart from ``fit``, the two most important functions are arguably ``predict`` to produce a target variable (a ``y``) ``transform``, which produces a new representation of the data (an ``X``).\n", 123 | "The following table shows for which class of models which function applies:\n", 124 | "\n" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": {}, 130 | "source": [ 131 | "\n", 132 | "\n", 133 | "\n", 134 | "\n", 135 | "\n", 136 | "\n", 137 | "\n", 138 | "
``model.predict````model.transform``
ClassificationPreprocessing
RegressionDimensionality Reduction
ClusteringFeature Extraction
 Feature selection
\n", 139 | "\n", 140 | "\n" 141 | ] 142 | } 143 | ], 144 | "metadata": { 145 | "kernelspec": { 146 | "display_name": "Python 3.6 (Develer Science)", 147 | "language": "python", 148 | "name": "develer-science" 149 | }, 150 | "language_info": { 151 | "codemirror_mode": { 152 | "name": "ipython", 153 | "version": 3 154 | }, 155 | "file_extension": ".py", 156 | "mimetype": "text/x-python", 157 | "name": "python", 158 | "nbconvert_exporter": "python", 159 | "pygments_lexer": "ipython3", 160 | "version": "3.6.6" 161 | } 162 | }, 163 | "nbformat": 4, 164 | "nbformat_minor": 2 165 | } 166 | -------------------------------------------------------------------------------- /3_mage/1.1. Introduction to Machine Learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# Introduction to Machine Learning in Python" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "slideshow": { 18 | "slide_type": "subslide" 19 | } 20 | }, 21 | "source": [ 22 | "## What is Machine Learning?" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": { 28 | "slideshow": { 29 | "slide_type": "subslide" 30 | } 31 | }, 32 | "source": [ 33 | "### Machine Learning at Glance" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": { 39 | "slideshow": { 40 | "slide_type": "-" 41 | } 42 | }, 43 | "source": [ 44 | "" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": { 50 | "slideshow": { 51 | "slide_type": "subslide" 52 | } 53 | }, 54 | "source": [ 55 | "> Machine learning teaches machines how to carry out tasks by themselves. It is that simple.\n", 56 | "The complexity comes with the details.\n", 57 | "\n", 58 | "_W. Richert & L.P. Coelho, 2013\n", 59 | "Building Machine Learning Systems with Python_" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": { 65 | "slideshow": { 66 | "slide_type": "subslide" 67 | } 68 | }, 69 | "source": [ 70 | "Machine learning is the process to automatically **extract knowledge** from data, usually with the goal of making **predictions** on _new_, _unseen_ data. \n", 71 | "\n", 72 | "A classical example is a _spam filter_, for which the user keeps labeling incoming mails as either spam or not spam. \n", 73 | "\n", 74 | "A machine learning algorithm then \"learns\" what distinguishes spam from normal emails, and can predict for new emails whether they are spam or not." 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": { 80 | "slideshow": { 81 | "slide_type": "subslide" 82 | } 83 | }, 84 | "source": [ 85 | "Central to machine learning is the concept of **making decision automatically** from data, **without the user specifying explicit rules** how this decision should be made.\n", 86 | "\n", 87 | "For the case of emails, the user doesn't provide a list of words or characteristics that make an email spam. Instead, the user provides examples of spam and non-spam emails." 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": { 93 | "slideshow": { 94 | "slide_type": "subslide" 95 | } 96 | }, 97 | "source": [ 98 | "The second central concept is **generalization**. \n", 99 | "\n", 100 | "The goal of a machine learning algorithm is to predict on new, previously unseen data. We are not interested in marking an email as spam or not, that the human already labeled. Instead, we want to make the users life easier by making an automatic decision for new incoming mail." 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": { 106 | "slideshow": { 107 | "slide_type": "subslide" 108 | } 109 | }, 110 | "source": [ 111 | "There are two kinds of machine learning we will talk about in these notebooks: \n", 112 | "\n", 113 | "* **Supervised learning;** \n", 114 | "* **Unsupervised learning.**" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": { 120 | "slideshow": { 121 | "slide_type": "slide" 122 | } 123 | }, 124 | "source": [ 125 | "### Supervised Learning\n", 126 | "\n", 127 | "In **Supervised Learning**, we have a dataset consisting of both input features and a desired output, such as in the spam / no-spam example.\n", 128 | "\n", 129 | "The task is to construct a model (or program) which is able to predict the desired output of an unseen object\n", 130 | "given the set of features." 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": { 136 | "slideshow": { 137 | "slide_type": "subslide" 138 | } 139 | }, 140 | "source": [ 141 | "" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": { 147 | "slideshow": { 148 | "slide_type": "subslide" 149 | } 150 | }, 151 | "source": [ 152 | "Supervised learning is further broken down into two categories, **classification** and **regression**.\n", 153 | "\n", 154 | "In classification, the label is discrete (a.k.a. _Categorical Data_, i.e. _Integer values_), such as \"spam\" or \"no spam\". \n", 155 | "\n", 156 | "In other words, it provides a clear-cut distinction between categories. \n", 157 | "\n", 158 | "In regression, the label is continuous, i.e. _Float output_." 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": { 164 | "slideshow": { 165 | "slide_type": "subslide" 166 | } 167 | }, 168 | "source": [ 169 | "### Other Examples\n", 170 | "\n", 171 | "Some more complicated examples are:\n", 172 | "\n", 173 | "- given a multicolor image of an object through a telescope, determine\n", 174 | " whether that object is a star, a quasar, or a galaxy.\n", 175 | "- given a photograph of a person, identify the person in the photo.\n", 176 | "- given a list of movies a person has watched and their personal rating\n", 177 | " of the movie, recommend a list of movies they would like.\n", 178 | "- given a persons age, education and position, infer their salary" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": { 184 | "slideshow": { 185 | "slide_type": "subslide" 186 | } 187 | }, 188 | "source": [ 189 | "What these tasks have in common is that there is one or more unknown\n", 190 | "quantities associated with the object which needs to be determined from other\n", 191 | "observed quantities." 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": { 197 | "slideshow": { 198 | "slide_type": "subslide" 199 | } 200 | }, 201 | "source": [ 202 | "### For example\n", 203 | "\n", 204 | "* In astronomy, the task of determining whether an object is a star, a galaxy, or a quasar is a **classification problem**: the label is from three distinct categories. \n", 205 | "\n", 206 | "* On the other hand, we might wish to estimate the age of an object based on such observations: this would be a **regression problem**, because the label (age) is a continuous quantity." 207 | ] 208 | }, 209 | { 210 | "cell_type": "markdown", 211 | "metadata": { 212 | "slideshow": { 213 | "slide_type": "slide" 214 | } 215 | }, 216 | "source": [ 217 | "### Unsupervised Learning\n", 218 | "\n", 219 | "In **Unsupervised Learning** there is no desired output associated with the data.\n", 220 | "\n", 221 | "Instead, we are interested in extracting some form of knowledge or model from the given data.\n", 222 | "\n", 223 | "In a sense, you can think of unsupervised learning as a means of discovering labels from the data itself.\n", 224 | "\n", 225 | "Unsupervised learning comprises tasks such as *dimensionality reduction*, *clustering*, and\n", 226 | "*density estimation*. " 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": { 232 | "slideshow": { 233 | "slide_type": "subslide" 234 | } 235 | }, 236 | "source": [ 237 | "" 238 | ] 239 | }, 240 | { 241 | "cell_type": "markdown", 242 | "metadata": { 243 | "slideshow": { 244 | "slide_type": "fragment" 245 | } 246 | }, 247 | "source": [ 248 | "Unsupervised learning is often harder to understand and to evaluate.\n", 249 | "\n", 250 | "Sometimes the two may even be combined: e.g. Unsupervised learning can be used to find useful\n", 251 | "features in heterogeneous data, and then these features can be used within a supervised\n", 252 | "framework." 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": { 258 | "slideshow": { 259 | "slide_type": "subslide" 260 | } 261 | }, 262 | "source": [ 263 | "### Other Examples\n", 264 | "\n", 265 | "Some more involved unsupervised learning problems are:\n", 266 | "\n", 267 | "- given detailed observations of distant galaxies, determine which features or combinations of\n", 268 | " features summarize best the information.\n", 269 | "- given a mixture of two sound sources (for example, a person talking over some music),\n", 270 | " separate the two (this is called the [blind source separation](http://en.wikipedia.org/wiki/Blind_signal_separation) problem).\n", 271 | "- given a large collection of news articles, find recurring topics inside these articles.\n", 272 | "- given a collection of images, cluster similar images together (for example to group them when visualizing a collection)" 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": { 278 | "slideshow": { 279 | "slide_type": "slide" 280 | } 281 | }, 282 | "source": [ 283 | "# Scikit-learn at a Glance" 284 | ] 285 | }, 286 | { 287 | "cell_type": "markdown", 288 | "metadata": {}, 289 | "source": [ 290 | "" 291 | ] 292 | } 293 | ], 294 | "metadata": { 295 | "celltoolbar": "Slideshow", 296 | "kernelspec": { 297 | "display_name": "Python 3.6 (Develer Science)", 298 | "language": "python", 299 | "name": "develer-science" 300 | }, 301 | "language_info": { 302 | "codemirror_mode": { 303 | "name": "ipython", 304 | "version": 3 305 | }, 306 | "file_extension": ".py", 307 | "mimetype": "text/x-python", 308 | "name": "python", 309 | "nbconvert_exporter": "python", 310 | "pygments_lexer": "ipython3", 311 | "version": "3.6.6" 312 | } 313 | }, 314 | "nbformat": 4, 315 | "nbformat_minor": 2 316 | } 317 | -------------------------------------------------------------------------------- /2_alchemist/5. Level Up.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# StarWars" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Introduction:\n", 15 | "\n", 16 | "This time you will create the data.\n", 17 | "```\n", 18 | "raw_data = {\"name\": ['Dart Vather', 'Leia Princess','Luke Skywalker','Yoda'],\n", 19 | " \"class\": ['Empire','Rebels','Rebels','Rebels'],\n", 20 | " \"gender\": ['M', 'F', 'M', 'J'],\n", 21 | " \"hp\": [45, 39, 44, 45],\n", 22 | " \"like\": ['yes', 'no','yes','no'] \n", 23 | " }\n", 24 | "````\n", 25 | "\n", 26 | "### Step 1. Import the necessary libraries" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": { 33 | "collapsed": false 34 | }, 35 | "outputs": [], 36 | "source": [] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "### Step 2. Create a data dictionary" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": { 49 | "collapsed": true 50 | }, 51 | "outputs": [], 52 | "source": [] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "### Step 3. Create a DataFrame object starting from the data dictionary, and print its head" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": { 65 | "collapsed": false 66 | }, 67 | "outputs": [], 68 | "source": [] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "### Step 4. Ops...it seems the DataFrame columns are in alphabetical order. Place the order of the columns as name, type, hp, evolution, pokedex" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": { 81 | "collapsed": false 82 | }, 83 | "outputs": [], 84 | "source": [] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "### Step 5. Add another column called actor, and insert what you have in mind." 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": { 97 | "collapsed": false 98 | }, 99 | "outputs": [], 100 | "source": [] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "### Step 6. Present the type of each column" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": { 113 | "collapsed": false 114 | }, 115 | "outputs": [], 116 | "source": [] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "# Filtering and Sorting Data" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "This time we are going to pull data directly from the internet.\n", 130 | "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", 131 | "\n", 132 | "### Step 1. Import the necessary libraries" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": { 139 | "collapsed": false 140 | }, 141 | "outputs": [], 142 | "source": [] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). " 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "### Step 3. Assign it to a variable called chipo." 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": { 162 | "collapsed": false 163 | }, 164 | "outputs": [], 165 | "source": [] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "### Step 4. How many products cost more than $10.00?" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": { 178 | "collapsed": false 179 | }, 180 | "outputs": [], 181 | "source": [] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "metadata": {}, 186 | "source": [ 187 | "### Step 5. What is the price of each item? \n", 188 | "###### print a data frame with only two columns item_name and item_price" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": null, 194 | "metadata": { 195 | "collapsed": false 196 | }, 197 | "outputs": [], 198 | "source": [] 199 | }, 200 | { 201 | "cell_type": "markdown", 202 | "metadata": {}, 203 | "source": [ 204 | "### Step 6. Sort by the name of the item" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "metadata": { 211 | "collapsed": false 212 | }, 213 | "outputs": [], 214 | "source": [] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "metadata": {}, 219 | "source": [ 220 | "### Step 7. What was the quantity of the most expensive item ordered?" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": null, 226 | "metadata": { 227 | "collapsed": false 228 | }, 229 | "outputs": [], 230 | "source": [] 231 | }, 232 | { 233 | "cell_type": "markdown", 234 | "metadata": {}, 235 | "source": [ 236 | "### Step 8. How many times were a Veggie Salad Bowl ordered?" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": null, 242 | "metadata": { 243 | "collapsed": false 244 | }, 245 | "outputs": [], 246 | "source": [] 247 | }, 248 | { 249 | "cell_type": "markdown", 250 | "metadata": {}, 251 | "source": [ 252 | "### Step 9. How many times people orderd more than one Canned Soda?" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": null, 258 | "metadata": { 259 | "collapsed": false 260 | }, 261 | "outputs": [], 262 | "source": [] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "metadata": {}, 267 | "source": [ 268 | "# Exercises - GroupBy" 269 | ] 270 | }, 271 | { 272 | "cell_type": "markdown", 273 | "metadata": {}, 274 | "source": [ 275 | "### Introduction:\n", 276 | "\n", 277 | "GroupBy can be summarizes as Split-Apply-Combine.\n", 278 | "\n", 279 | "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", 280 | "\n", 281 | "Check out this Diagram:\n", 282 | "\n", 283 | "![Group-by Diagram](https://i.imgur.com/yjNkiwL.png?1) \n", 284 | "\n", 285 | "\n", 286 | "### Step 1. Import the necessary libraries" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": null, 292 | "metadata": { 293 | "collapsed": false 294 | }, 295 | "outputs": [], 296 | "source": [] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "metadata": {}, 301 | "source": [ 302 | "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/drinks.csv). " 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": {}, 308 | "source": [ 309 | "### Step 3. Assign it to a variable called drinks." 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": null, 315 | "metadata": { 316 | "collapsed": false 317 | }, 318 | "outputs": [], 319 | "source": [] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "metadata": {}, 324 | "source": [ 325 | "### Step 4. Which continent drinks more beer on average?" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": null, 331 | "metadata": { 332 | "collapsed": false 333 | }, 334 | "outputs": [], 335 | "source": [] 336 | }, 337 | { 338 | "cell_type": "markdown", 339 | "metadata": {}, 340 | "source": [ 341 | "### Step 5. For each continent print the statistics for wine consumption." 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": null, 347 | "metadata": { 348 | "collapsed": false 349 | }, 350 | "outputs": [], 351 | "source": [] 352 | }, 353 | { 354 | "cell_type": "markdown", 355 | "metadata": {}, 356 | "source": [ 357 | "### Step 6. Print the mean alcoohol consumption per continent for every column" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": null, 363 | "metadata": { 364 | "collapsed": false 365 | }, 366 | "outputs": [], 367 | "source": [] 368 | }, 369 | { 370 | "cell_type": "markdown", 371 | "metadata": {}, 372 | "source": [ 373 | "### Step 7. Print the median alcoohol consumption per continent for every column" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": null, 379 | "metadata": { 380 | "collapsed": false 381 | }, 382 | "outputs": [], 383 | "source": [] 384 | }, 385 | { 386 | "cell_type": "markdown", 387 | "metadata": {}, 388 | "source": [ 389 | "### Step 8. Print the mean, min and max values for spirit consumption.\n", 390 | "#### This time output a DataFrame" 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "execution_count": null, 396 | "metadata": { 397 | "collapsed": false 398 | }, 399 | "outputs": [], 400 | "source": [] 401 | } 402 | ], 403 | "metadata": { 404 | "anaconda-cloud": {}, 405 | "kernelspec": { 406 | "display_name": "Python 3.6 (Develer Science)", 407 | "language": "python", 408 | "name": "develer-science" 409 | }, 410 | "language_info": { 411 | "codemirror_mode": { 412 | "name": "ipython", 413 | "version": 3 414 | }, 415 | "file_extension": ".py", 416 | "mimetype": "text/x-python", 417 | "name": "python", 418 | "nbconvert_exporter": "python", 419 | "pygments_lexer": "ipython3", 420 | "version": "3.6.6" 421 | } 422 | }, 423 | "nbformat": 4, 424 | "nbformat_minor": 2 425 | } 426 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Develer turns to Data Science 2 | 3 | ## Lecture notes for the "Data Science, the Pythonic way" @ [Develer](https://www.develer.com/) 4 | 5 | 6 | 7 | ### Author: Valerio Maggio 8 | 9 | #### _PostDoc Data Scientist @ FBK/MPBA_ 10 | 11 | #### Contacts: 12 | 13 | 14 | 15 | 16 | 20 | 24 | 28 | 29 | 30 |
17 | 18 | @leriomaggio 19 | 21 | 22 | valeriomaggio 23 | 25 | 26 | valeriomaggio_at_gmail_dot_com 27 |
31 | 32 | # Materials: 33 | 34 | ![github](./images/github.jpg) 35 | 36 | ```shell 37 | git clone https://github.com/leriomaggio/develer-data-science.git 38 | ``` 39 | 40 | # Outline at a glance: 41 | (from _apprentice_ to _doctor strange_) 42 | 43 | - **Level I**) _Apprentice_: **Pythonic tools for Data Science** 44 | 45 | * _Dev Tools_ for Data Scientist and Jupyter notebooks 46 | * Numerical computation in Python: `numpy` 47 | * Working with data: `pandas` 48 | 49 | 50 | - **Level II**) _Alchemist_: **Data Visualisation** 51 | 52 | * Basic principles of data visualisation 53 | * Introduction to `matplotlib` 54 | * interactive data visualisation using `bokeh` 55 | 56 | 57 | - **Level III**) _Mage_: **Crash course on Machine Learning** 58 | 59 | * What is _Machine Learning_ 60 | * Introduction to `sklearn` 61 | * _Supervised_ and _**Un**supervised_ Machine learning 62 | * Robust Machine Learning: _selection bias and cross-validation_ 63 | 64 | 65 | - **Level IV**) _Arch-Mage_ : **Deep Learning & Pythonic perspectives** 66 | * What is _Deep Learning_ 67 | * Deep Learning frameworks 68 | * Introduction to Keras 69 | 70 | ### Description 71 | 72 | The course will be organised in **four** different parts, 73 | mostly covering the basics (plus some more advanced topics) 74 | related to Machine Learning and Data Science. 75 | 76 | We will start by introducing the basics of data science in Python, 77 | and the (development) tools and frameworks to be used. 78 | Then we will start working with real data (in different formats) 79 | to have a very general feeling of what does it _mean_ to be 80 | a _data scientist_. There will also be a section specifically 81 | focused on basic principles (and tools) of 82 | data visualisation. 83 | Finally, more advanced concepts will be introduced. 84 | In particular, a general introduction to Machine Learning models 85 | and settings (i.e. _supervised_ and _unsupervised_) will be 86 | provided, along with a glimpse of Deep learning models and 87 | frameworks. 88 | 89 | All these parts will be presented always considering the 90 | perspective of the developer and practitioner who wants to 91 | learn (and understand) _Data Science_ in a very practical way. 92 | For this aim, the materials will contain lots of 93 | exercises and challenges along the way to test your 94 | skills. 95 | 96 | --- 97 | 98 | # Technical Requirements 99 | 100 | This tutorial requires the following packages: 101 | 102 | - Python version 3.6 103 | - Python 3.4+ should be fine as well 104 | - likely Python 2.7 would be also fine, but *who knows*? :P 105 | - `numpy`: http://www.numpy.org/ 106 | - `scipy`: http://www.scipy.org/ 107 | - `matplotlib`: http://matplotlib.org/ 108 | - `pandas`: http://pandas.pydata.org 109 | - `scikit-learn` : http://scikit-learn.org 110 | - `jupyter` & `notebook`: http://jupyter.org 111 | 112 | Plus - for the last Deep learning section: 113 | - `keras`: http://keras.io 114 | - `tensorflow`: https://www.tensorflow.org 115 | - (optional) `torch`: http://pytorch.org 116 | 117 | The easiest way to get (most of) these is to use an all-in-one installer 118 | such as [Anaconda](https://www.anaconda.com/download/) from Continuum, 119 | which is available for multiple computer platforms, namely Linux, 120 | Windows, and OSX. 121 | 122 | --- 123 | 124 | ### Python Version 125 | 126 | I'm currently running this tutorial with **Python 3** on **Anaconda** 127 | 128 | 129 | ```shell 130 | $ python --version 131 | Python 3.6.6 132 | ``` 133 | 134 | --- 135 | 136 | # Accessing the materials 137 | 138 | If you want to access the materials, you have several options: 139 | 140 | ## Jupyter Notebook 141 | 142 | Most of the materials in this course is provided as a collection of 143 | Jupyter Notebooks. 144 | 145 | In case you don't know **what is** a Jupyter notebook, here is a good 146 | reference for a quick introduction: 147 | [Jupyter Notebook Beginner Guide](https://jupyter-notebook-beginner-guide.readthedocs.io/en/latest/what_is_jupyter.html). 148 | 149 | On the other hand, if you also want to know (_and you should_) **what is NOT** 150 | a Jupyter notebook - *spoiler alert:* **it is NOT an IDE** - 151 | here is a very nice reference: 152 | 153 | → [I Don't like Notebooks,](https://twitter.com/joelgrus/status/1033035196428378113) 154 | by _Joel Grus_ @ JupyterCon 2018. 155 | 156 | If you **already have all the environment setup** on your machine, 157 | all you need to do is to run the Jupyter notebook server: 158 | 159 | ```shell 160 | $ jupyter notebook 161 | ``` 162 | 163 | Alternatively, I suggest you to try the new **Jupyter Lab** environment: 164 | ```shell 165 | $ jupyter lab 166 | ``` 167 | 168 | **NOTE**: Before running Jupyter server, it is mandatory to enable 169 | the (Python) virtual environment. 170 | 171 | Please refer to the section [Setting the Environment](#setup) for 172 | detailed instructions on how to install all the required 173 | packages and libraries. 174 | 175 | 176 | ## Binder 177 | 178 | (Consider this option only if your WiFi is stable) 179 | 180 | If you don't want the hassle of setting up all the environment and 181 | libraries on your machine, or simply you want to avoid doing 182 | "_too much computation_" on your hardware setup, 183 | I strongly suggest you to use the **Binder** service. 184 | 185 | The primary goal of Binder is to turn a GitHub repo into a collection of 186 | interactive Jupyter notebooks 187 | 188 | To start using Binder, just click on the button below: 189 | [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/leriomaggio/develer-data-science/master) 190 | 191 | ## Google Colaboratory 192 | 193 | [Colaboratory](https://colab.research.google.com/) is a free Jupyter 194 | notebook environment that 195 | requires no setup and runs entirely in the Google cloud. 196 | Moreover, **GPU** and **TPU** runtime environments are available, 197 | and completely for free. 198 | (This last option will be worthwhile mentioning in the very 199 | last part of the course, when we will talk 200 | about Deep Learning networks). 201 | 202 | [Here](https://colab.research.google.com/notebooks/welcome.ipynb) 203 | is an overview of the main features offered by Colaboratory. 204 | 205 | To start using Colaboratory, just click on the button below: 206 | [![Colab](https://img.shields.io/badge/launch-colaboratory-yellow.svg)](https://colab.research.google.com/) 207 | 208 | --- 209 | 210 | 211 | # Setting the Environment 212 | 213 | In this repository, files to install the required packages are provided. 214 | The first step to setup the environment is to create a 215 | Python [Virtual Environment](https://docs.python.org/3.6/tutorial/venv.html). 216 | 217 | Whether you are using [Anaconda](https://www.anaconda.com/download/) 218 | Python Distribution or the **Standard 219 | Python framework** (from [python.org](https://www.python.org/downloads/)), 220 | below are reported the instructions for the two cases, respectively. 221 | 222 | ## (a) Conda Environment 223 | 224 | This repository includes a `conda-environment.yml` file that is necessary 225 | to re-create the Conda virtual environment. 226 | 227 | To re-create the virtual environments: 228 | 229 | ```shell 230 | $ conda env create -f conda-environment.yml 231 | ``` 232 | 233 | Then, to **activate** the virtual environment: 234 | 235 | ```shell 236 | $ conda activate develer-science 237 | ``` 238 | 239 | ## (b) `pyenv` & `virtualenv` 240 | 241 | Alternatively, if you don't want to install (yet) another Python 242 | distribution on your machine, or you prefer not to use the full-stack Anaconda 243 | Python, I strongly suggest to give a try to the new `pyenv` project. 244 | 245 | ### 1. Setup `pyenv` 246 | 247 | `pyenv` is a new package that lets you easily switch between multiple 248 | versions of Python. 249 | It is simple, unobtrusive, and follows the UNIX tradition of single-purpose 250 | tools that do one thing well. 251 | 252 | To **setup** `pyenv`, please follow the instructions reported on the 253 | [GitHub Repository](https://github.com/pyenv/pyenv) of the project, 254 | according to the specific platform and operating system. 255 | 256 | There exists a `pyenv` plugin named `pyenv-virtualenv` which comes with various 257 | features to help `pyenv` users to manage virtual environments created by 258 | `virtualenv` or Anaconda. 259 | 260 | ### 2. Installing `pyenv-virtualenv` 261 | 262 | I would recommend to install `pyenv-virtualenv` as reported in 263 | the official 264 | [documentation](https://github.com/pyenv/pyenv-virtualenv/blob/master/README.md). 265 | 266 | ### 3. Setting up the virtual environment 267 | 268 | Once `pyenv` and `pyenv-virtualenv` have been correctly installed and 269 | configured, these are the instructions to 270 | set up the virtual environment for this tutorial: 271 | 272 | ```shell 273 | $ pyenv install 3.6.6 # downloads and enables Python 3.6 274 | $ pyenv virtualenv 3.6.6 develer-science # create virtual env using Py3.6 275 | $ pyenv activate develer-science # activate the environment 276 | $ pip install -r requirements.txt # install requirements 277 | 278 | ``` 279 | 280 | ### Installing Jupyter Kernel (Optional) 281 | 282 | All the notebooks in this tutorial have been saved using a Jupyter Kernel 283 | defined on the created virtual environment, named "Python 3.6 (DL Keras TF)". 284 | 285 | In case you got a warning of _non-existent kernel_ when you open the 286 | notebooks on your machine, you need to create the corresponding 287 | `IPython` kernel: 288 | 289 | ```shell 290 | $ python -m ipykernel install --user --name develer-science --display-name "Python 3.6 (Develer Science)" 291 | ``` 292 | 293 | --- 294 | 295 | ## Test if everything is up&running 296 | 297 | ### 1. Check import 298 | 299 | 300 | ```Python 301 | >>> import numpy as np 302 | >>> import scipy as sp 303 | >>> import pandas as pd 304 | >>> import matplotlib.pyplot as plt 305 | >>> import sklearn 306 | >>> import keras 307 | Using TensorFlow backend. 308 | ``` 309 | 310 | ### 2. Check installed Versions 311 | 312 | 313 | ```Python 314 | >>> import numpy 315 | >>> print('numpy:', numpy.__version__) 316 | >>> import scipy 317 | >>> print('scipy:', scipy.__version__) 318 | >>> import matplotlib 319 | >>> print('matplotlib:', matplotlib.__version__) 320 | >>> import sklearn 321 | >>> print('scikit-learn:', sklearn.__version__) 322 | ``` 323 | ``` 324 | numpy: 1.15.2 325 | scipy: 1.1.0 326 | matplotlib: 3.0.0 327 | scikit-learn: 0.20.0 328 | ``` 329 | 330 |
331 |

If everything worked till down here, you're ready to start!

332 | -------------------------------------------------------------------------------- /4_archmage/intro_to_ann.csv: -------------------------------------------------------------------------------- 1 | Feature1,Feature2,Target 2.067788388,0.258133225,1 0.993994008,-0.609144512,1 -0.690315436,0.749920622,0 1.023582376,0.52900308,0 0.700747058,-0.496724018,1 0.955062941,0.371061016,0 -0.051023466,0.009786883,1 2.111668915,0.29146667,1 1.173019389,-0.101473076,0 -0.57794707,1.277303087,0 0.909597624,0.036308672,0 2.077734242,0.629147857,1 -0.962290964,0.827233623,0 0.947003591,-0.290186106,1 0.383209976,-0.309917567,1 -0.150287574,0.317294355,1 0.080361123,0.502094351,1 1.200125191,-0.527865179,1 -0.922386507,0.465574823,0 0.512394856,-0.033492128,1 0.660176708,0.354198518,0 0.919878779,-0.59377197,1 0.185474117,-0.214731663,1 -0.963168026,0.294003942,0 1.426699651,-0.099509079,1 0.976457825,-0.449373622,1 1.75471374,-0.261642816,1 1.168186512,-0.672740552,1 0.27774654,-0.607361346,1 0.672725148,0.666333709,0 0.002033057,0.376967935,1 0.314327033,-0.243225802,1 0.931967471,0.173111872,0 -0.409800131,0.597131669,0 -0.476941175,1.019581533,0 0.838882632,-0.071573271,0 1.033284866,-0.525249709,1 0.326116205,-0.577021121,1 0.85947284,1.340802524,0 1.551221655,-0.528586441,1 0.101201434,0.540632498,0 -0.09049836,0.749596127,1 2.028777695,0.151478964,1 0.580198862,1.033262412,0 0.192071709,-0.121336928,1 -1.462223951,0.07106442,0 1.906780301,0.506260992,1 0.695029196,-0.599016144,1 1.001086685,-0.70844148,1 1.110090768,-0.339190684,1 0.710397938,-0.460291185,1 2.042904712,-0.090700237,1 0.456990999,-0.563120595,1 0.087652349,0.095485193,1 1.332034469,-0.563468501,1 0.360617086,0.455561453,0 1.579667988,0.251685059,1 0.597723548,0.688549389,0 -0.805950346,1.010476996,0 0.271306147,0.832912579,0 1.899341837,0.044502462,1 1.527602184,-0.735397326,1 1.639754245,-0.27615701,1 -0.037638919,0.923068888,0 0.176032053,0.076067456,1 2.148428077,0.031181293,1 0.045108737,0.218221448,1 0.724493125,0.363354865,0 -0.664756351,0.643436386,0 -0.801725803,0.999480546,0 0.861801719,0.844010631,0 1.727336522,0.145706011,1 1.704197081,0.113234553,1 -0.288162751,0.584628604,1 0.327757328,0.535062913,1 0.180931746,1.191129451,0 1.090762095,-0.099058367,0 0.152277456,0.337631885,1 -0.873448576,0.616061304,0 1.000549409,0.326569244,0 -0.384538159,0.632005344,0 -0.26699789,0.155658861,1 -0.083148995,0.434163485,1 0.682670623,0.418051246,0 1.230411746,-0.810815948,1 0.164731882,1.030431426,0 -1.176887533,0.440652126,0 0.222984654,0.280817027,1 1.840889076,0.239281488,1 0.07449561,-0.282217468,1 -0.024120207,0.304116449,1 0.148422149,0.027105547,1 -1.031412466,0.380071744,0 -0.943866831,0.600079978,0 1.040815444,-0.442568799,1 1.497320582,-0.390044387,1 0.262257211,0.096298493,1 1.835052164,-0.103412045,1 0.212307547,0.256849647,1 -0.214058501,1.330767658,0 0.072106929,-0.279012748,1 0.510387152,0.89046706,0 0.859342773,-0.430765764,1 1.231053667,0.281765888,0 2.095878754,-0.124457184,1 -0.048682973,0.848149869,0 -0.069962169,0.732770866,0 0.921461668,0.537777084,0 -0.072085224,0.445207272,1 0.691680107,-0.670960694,1 1.996373123,-0.06278932,1 -0.029574955,-0.143949774,1 -1.180878055,0.287093293,0 -0.809065601,0.981521134,0 0.792498292,0.754776331,0 0.570914979,0.418499076,0 0.959679592,-0.614855395,1 0.683776074,0.718920684,0 0.268748478,-0.539059384,1 0.829946449,0.810127371,0 0.602300611,1.060983364,0 1.089894854,-0.258045447,0 1.215326407,-0.108394387,1 0.363525509,1.059532137,0 0.87411916,0.790742277,0 -0.343226394,-0.150687513,1 -0.963432794,0.169237163,0 0.126423731,0.257156827,1 -1.266957539,0.122578452,0 1.809965093,0.521589108,1 -0.228845381,0.864591676,0 1.037129806,0.052176264,0 1.53870046,-0.054829392,1 1.843084808,0.119603176,1 0.182608949,0.360479486,1 -0.783997593,0.898602849,0 -0.927885151,0.846814488,0 -0.869158721,0.860426049,0 1.432638854,-0.684939943,1 0.585717127,0.931606566,0 1.756197562,0.520718884,1 1.591918487,0.03892334,1 0.910273683,1.120535967,0 0.043718854,-0.324789656,1 -0.20580902,0.603084516,0 0.79754503,-0.671824307,1 0.132794235,0.301552639,1 -0.088050173,0.956326244,0 -0.910303685,-0.140268609,0 1.273308231,-0.369064664,1 1.832106199,0.641188872,1 -0.073000565,1.023659911,0 0.097588494,-0.365188283,1 1.15143737,-0.378877952,1 1.006185066,0.586229182,0 -1.059323432,0.265637226,0 0.437426846,-0.184497995,1 0.24070156,0.771780065,0 1.826147629,-0.005370137,1 -0.698250117,0.518885048,0 0.033997867,1.002559399,0 -0.365756211,0.467188778,1 2.072602655,0.840845991,1 1.282207264,-0.3852855,1 0.294091067,-0.106090673,1 -0.684048777,0.498068262,0 0.569431434,-0.166176935,1 1.240374044,-0.338748665,1 0.296825382,-0.070172696,1 -1.002931362,0.137418862,0 0.772972777,0.39039837,0 0.199129842,1.169529783,0 0.272673306,1.177485462,0 1.300638002,-0.454605085,1 1.564634514,0.44804985,1 -0.040331609,1.026401238,0 1.629887767,0.244857792,1 1.143086991,-0.604189955,1 0.396320491,-0.39621675,1 -0.626659052,1.014021465,0 1.721056871,0.646281907,1 -0.604186387,0.703327277,0 0.057670035,1.272818815,0 0.454523688,-0.401695138,1 -0.670639988,0.612491482,0 0.328273511,1.160241489,0 0.510588023,1.016852601,0 0.789854793,0.132560638,0 0.500701227,0.528972528,0 0.961569333,0.454935146,0 1.80512166,0.544117663,1 0.030248855,1.07059265,0 1.73386192,0.379948472,1 -0.881264897,0.33091525,0 0.645125709,0.010547475,0 -0.567454227,1.065282884,0 0.275296535,-0.175275933,1 -0.633857944,0.464939873,0 -1.056542614,0.685542647,0 0.228879606,1.134429351,0 -0.053634126,0.322480786,1 -0.118192714,0.309408119,1 0.409426898,-0.434405407,1 1.229937385,-0.210990997,0 1.981178869,0.257237744,1 1.625934204,-0.617482966,1 -0.472291616,0.878888915,0 2.192799683,0.328102442,1 0.960354135,0.077361355,0 -0.790501055,1.31421158,0 2.237470292,0.06894621,1 1.414529152,-0.518533021,1 0.610237507,0.683805557,0 1.233649984,-0.556688892,1 0.586796523,-0.251126721,1 0.81754217,0.666592737,0 0.580221038,0.932275998,0 0.058137766,0.896289048,0 0.579846614,-0.567698327,1 1.929656615,0.156016477,1 0.992711133,-0.027168436,0 2.027815527,0.363122856,1 1.338652267,-0.823465756,1 1.598191518,-0.411064707,1 0.976142984,-0.190358151,0 1.61250291,-0.521032611,1 1.35546992,-0.490609874,1 0.499861695,1.058450639,0 1.015369625,0.207315117,0 1.018118245,0.456516401,0 1.096461863,-0.280727167,1 -1.194040968,0.416441978,0 0.568309164,0.647458129,0 0.960588057,0.208865115,0 1.449178024,-0.459902987,1 1.88199633,0.466290375,1 0.631996864,-0.309905342,1 0.654989783,-0.253093095,1 0.84516718,0.285090431,0 0.401198088,0.436221932,0 0.527232031,0.768876824,0 0.372940088,-0.3860018,1 2.066393868,-0.539216929,1 1.430582326,-0.388926174,1 0.189907716,-0.056403166,1 0.882697896,-0.734363957,1 -1.190098679,0.637042186,0 -0.894856639,0.556642259,0 0.70215789,-0.187835487,1 1.950487011,-0.345075672,1 0.022418039,1.004134807,0 0.849971596,-0.21483451,1 2.036410764,-0.025556022,1 0.275048593,-0.408398856,1 -0.108810254,0.159667885,1 0.938494136,0.64097432,0 -0.236808252,0.341747012,1 0.835324588,0.327895824,0 0.833630821,0.38695039,0 0.602538337,-0.450144939,1 -1.316428825,0.754775796,0 1.844066074,0.193577285,1 1.064502653,-0.496454037,1 0.642658768,-0.020278558,1 1.016500344,-0.411243574,1 1.975868019,-0.0481966,1 0.993444732,-0.573538352,1 0.261224684,1.209148214,0 1.207647248,-0.681878027,1 1.029184593,0.662369558,0 -0.896113151,0.767599977,0 0.447930908,-0.476552598,1 1.193378751,-0.671368037,1 0.986163944,0.389985835,0 1.515764334,0.093743109,1 0.869459733,0.054546438,0 1.063058924,-0.003285827,1 0.094687292,-0.154667249,1 -0.215654363,1.093575329,0 -1.106925474,0.335575538,0 1.284327882,0.263986996,0 -1.337557833,-0.00412942,0 0.340293431,0.213608836,1 1.058099207,-0.020903818,0 -0.360133801,1.078035323,0 -0.644516105,0.488370431,0 -0.906095366,0.215727927,0 -0.652783156,1.173715873,0 1.022420252,0.192338596,0 -0.780675441,0.879523891,0 0.916308838,0.419944074,0 0.087696458,1.06618628,0 -0.495034594,0.721509893,0 0.604990594,-0.164890834,1 1.155923352,-0.549577539,1 0.071604266,1.697739637,0 -0.994669061,-0.007707493,0 1.480666468,-0.397497896,1 0.202685948,0.755654894,0 0.760454103,-0.180150242,0 0.029265726,0.06831265,1 -0.96321318,0.368704483,0 -1.070563993,0.553634924,0 0.046489389,0.953371901,0 0.486832393,-0.538941402,1 0.22261155,-0.15254824,1 2.179258079,0.541416792,1 -0.64634876,0.06402454,0 0.506319646,-0.86308181,1 2.114109853,-0.398015027,1 -0.03509274,0.108265231,1 -0.029109978,0.427875674,1 -0.140841702,0.661982894,0 0.266868529,-0.14631097,1 -0.998676417,0.116940821,0 0.082773572,0.961283817,0 0.941346049,-0.384150223,1 1.501332258,-0.275121139,1 0.29313361,0.37612275,1 0.539404176,0.643377656,0 0.189764846,0.00193534,1 1.556984609,-0.165952231,1 -0.24062961,0.071192873,1 0.919671265,0.383063909,0 -0.672636342,1.001715405,0 1.09642227,0.907604584,0 1.130676385,0.057702229,0 1.380338971,-0.376954849,1 0.090890973,0.118510932,1 1.858138806,0.239015185,1 0.179356967,-0.183583258,1 0.482296078,-0.651576504,1 0.761994902,0.633107619,0 2.473241953,7.86E-05,1 -0.245505241,0.336720392,0 -0.349850263,0.356718546,0 0.221741674,0.790938927,0 0.453347673,0.734276866,0 0.701948022,1.279234222,0 0.313783222,-0.62045118,1 -0.73809991,0.409950052,0 -0.650429008,0.41313687,0 0.81779875,-0.780491191,1 1.295782411,-0.615319879,1 -0.882350817,0.7161834,0 1.225622536,-0.199088478,1 1.701879096,0.109120994,1 -0.845272933,0.584193311,0 0.159783489,0.90737903,0 0.20528482,0.854610183,0 1.167600439,-0.072608591,1 -1.002638614,0.325598778,0 -0.771454661,0.289385589,0 1.657852572,-0.432481714,1 -1.140596214,0.368647611,0 0.05651217,-0.27020989,1 1.222078237,-0.107769216,1 0.166799331,-0.092400646,1 1.382139915,-0.54972713,1 -0.060441546,1.072743677,0 0.734895361,0.442755242,0 0.803896949,-0.493066015,1 1.953519644,-0.175476112,1 2.414351092,0.333630678,1 0.775111425,-0.812628459,1 -0.348238275,0.963577238,0 0.366964433,-0.310046727,1 -0.790850087,0.103205044,0 0.172920477,0.768141966,0 -0.028795956,0.686002968,0 0.796633375,0.482423152,0 0.849063798,0.672392408,0 0.361955413,0.991183959,0 0.119040405,-0.367741577,1 -0.492589106,0.625064565,0 -0.54233366,0.469779538,0 1.763243475,0.309898796,1 -1.153543233,-0.07626472,0 1.071246403,-0.310178222,0 0.906786599,0.546905393,0 2.090023124,0.414629671,1 2.083480948,0.249232463,1 1.186357975,-0.187071486,0 0.738270466,-0.307318366,1 1.891660982,-0.294302439,1 -0.77720761,0.938230073,0 0.405869823,0.838221554,0 -0.701489744,0.255676765,0 0.387436461,0.895997608,0 -0.426901469,0.796977399,0 1.252474088,0.419778642,0 -1.322590514,0.450941153,0 0.247652562,-0.108573295,1 -0.850472738,0.854716727,0 -0.507554218,1.177745763,0 0.54236405,1.049010618,0 -0.125318758,0.557509852,1 1.370866032,-0.322147727,1 1.825410945,-0.379217248,1 1.562090065,0.210769453,1 0.434462285,0.950519364,0 0.131676796,0.935932583,0 1.229664001,0.725615425,0 0.312860044,0.989189572,0 1.90131256,0.15072306,1 0.193500004,1.050598819,0 0.03392665,0.407114286,1 1.651426238,-0.379391344,1 2.053219604,-0.039639925,1 0.243898894,-0.03794715,1 -1.113157386,0.744162073,0 -1.331684771,0.319815034,0 0.127646667,0.956519535,0 0.998491626,-0.49700126,1 0.662523004,0.055986284,0 -0.777262489,0.055346478,0 -0.731865243,0.305513652,0 -0.640417258,0.809781443,0 -0.76055756,0.491641159,0 1.151879338,-0.336581332,1 0.800560366,-0.542805923,1 0.884866314,0.583140372,0 0.925878139,0.740087536,0 0.874841419,0.430962575,0 -0.198712663,0.913528351,0 2.103667833,0.397014915,1 2.059901907,-0.231619581,1 0.473996884,-0.27048088,1 -0.57451854,0.584002067,0 -0.701419019,0.397124136,0 0.711614355,0.782885753,0 -0.496588268,1.302124802,0 0.066449298,0.784290559,0 0.538144645,-0.130769101,1 1.755682505,0.025561113,1 -0.849798595,0.931951784,0 0.333367885,-0.132341271,1 0.705602088,-0.229055609,1 -0.873354454,-0.49267613,0 -0.825327122,0.67304811,0 1.517665755,-0.523883147,1 0.120672904,-0.07052083,1 0.725710646,0.762302198,0 0.344673,0.208553298,1 0.263766538,0.049840726,1 -0.88392615,0.497464642,0 -1.156119607,0.170227029,0 1.248965737,0.369726326,0 0.952682479,-0.36811615,1 1.727901537,0.0372861,1 -0.767097925,0.709731654,0 1.165707539,0.769217807,0 0.049487533,0.720903576,0 -0.539098522,0.824388711,0 -0.982058703,0.288854587,0 1.528935488,-0.112802146,1 -0.672473504,0.686282233,0 0.895096305,0.665784299,0 -0.00760403,0.924168238,0 -0.240290739,1.021991518,0 -0.283467781,1.045292529,0 -1.026508891,0.459402069,0 1.588325794,-0.338967375,1 -0.348265559,1.213028067,0 0.77874607,-0.653521284,1 1.806149473,0.021203959,1 -0.129694598,0.725221931,0 -0.373456638,0.592067642,0 0.990046473,-0.464129679,1 1.772713799,-0.601498909,1 0.631039539,1.099329971,0 1.856709577,-0.190336881,1 0.018943698,0.21532497,1 0.172846724,0.604169382,0 0.557979808,0.757427667,0 0.080418897,0.119262443,1 -0.897248067,0.438920507,0 0.261996093,0.918375619,0 0.678953635,-0.435800274,1 1.210765492,-0.599573281,1 0.557261634,0.939216926,0 0.134711277,1.06457682,0 1.607493338,0.190921234,1 1.420577116,-0.148027678,1 1.744704015,-0.140667505,1 -0.215743207,1.230288577,0 0.750635451,-0.116643372,1 1.778321698,-0.54794534,1 0.957379288,0.795259767,0 -0.10590001,-0.197994179,1 1.851059402,-0.071373547,1 0.338727554,-0.211027921,1 1.960801446,-0.089686612,1 -0.17839517,0.727372153,1 -0.380811187,0.995282564,0 -1.215691519,0.453117521,0 -0.283985656,0.10845028,1 0.859746592,-0.59355665,1 -0.235539753,0.951239001,0 0.304995074,-0.57115376,1 -------------------------------------------------------------------------------- /4_archmage/ann.py: -------------------------------------------------------------------------------- 1 | """Python Implementation of Multi-Layer Perceptron""" 2 | 3 | import numpy as np 4 | from numpy.random import seed 5 | 6 | # ================= 7 | # Utility functions 8 | # ================= 9 | 10 | import random 11 | random.seed(123) 12 | 13 | # calculate a random number where: a <= rand < b 14 | def rand(a, b): 15 | return (b-a)*random.random() + a 16 | 17 | # Make a matrix 18 | def makeMatrix(I, J, fill=0.0): 19 | return np.zeros([I,J]) 20 | 21 | # our sigmoid function 22 | def sigmoid(x): 23 | #return math.tanh(x) 24 | return 1/(1+np.exp(-x)) 25 | 26 | # derivative of our sigmoid function, in terms of the output (i.e. y) 27 | def dsigmoid(y): 28 | return y - y**2 29 | 30 | # ================== 31 | 32 | 33 | class MLP: 34 | """Multi Layer Perceptron 35 | 36 | Parameters 37 | ------------ 38 | ni : int 39 | Number of Input neurons 40 | n_h : int 41 | Number of Hidden neurons 42 | n_o : int 43 | Number of Output neurons 44 | 45 | Attributes 46 | ----------- 47 | ni : int 48 | Number of Input neurons 49 | n_h : int 50 | Number of Hidden neurons 51 | n_o : int 52 | Number of Output neurons 53 | 54 | ai : 1d-array (size: n_i) 55 | Activations for Input layer 56 | ah : 1d-array (size: n_h) 57 | Activations for Hidden layer 58 | ao : 1d-array (size: n_o) 59 | Activations for Output layer 60 | 61 | wi : 2d-array (shape n_i x n_h_) 62 | Weight matrix between Input and Hidden Layer. 63 | wo : 2d-array (shape n_h x n_o) 64 | Weight matrix between Hidden and Output Layer. 65 | 66 | """ 67 | 68 | def __init__(self, ni, nh, no): 69 | # number of input, hidden, and output nodes 70 | self.ni = ni + 1 # +1 for bias node 71 | self.nh = nh 72 | self.no = no 73 | 74 | # activations for nodes 75 | self.ai = [1.0]*self.ni 76 | self.ah = [1.0]*self.nh 77 | self.ao = [1.0]*self.no 78 | 79 | # create weights 80 | self.wi = makeMatrix(self.ni, self.nh) 81 | self.wo = makeMatrix(self.nh, self.no) 82 | 83 | # set them to random vaules 84 | for i in range(self.ni): 85 | for j in range(self.nh): 86 | self.wi[i][j] = rand(-0.2, 0.2) 87 | for j in range(self.nh): 88 | for k in range(self.no): 89 | self.wo[j][k] = rand(-2.0, 2.0) 90 | 91 | # last change in weights for momentum 92 | self.ci = makeMatrix(self.ni, self.nh) 93 | self.co = makeMatrix(self.nh, self.no) 94 | 95 | 96 | def backPropagate(self, targets, N, M): 97 | 98 | if len(targets) != self.no: 99 | print(targets) 100 | raise ValueError('wrong number of target values') 101 | 102 | # calculate error terms for output 103 | output_deltas = np.zeros(self.no) 104 | for k in range(self.no): 105 | error = targets[k]-self.ao[k] 106 | output_deltas[k] = dsigmoid(self.ao[k]) * error 107 | 108 | # calculate error terms for hidden 109 | hidden_deltas = np.zeros(self.nh) 110 | for j in range(self.nh): 111 | error = 0.0 112 | for k in range(self.no): 113 | error += output_deltas[k]*self.wo[j][k] 114 | hidden_deltas[j] = dsigmoid(self.ah[j]) * error 115 | 116 | # update output weights 117 | for j in range(self.nh): 118 | for k in range(self.no): 119 | change = output_deltas[k] * self.ah[j] 120 | self.wo[j][k] += N*change + M*self.co[j][k] 121 | self.co[j][k] = change 122 | 123 | # update input weights 124 | for i in range(self.ni): 125 | for j in range(self.nh): 126 | change = hidden_deltas[j]*self.ai[i] 127 | self.wi[i][j] += N*change + M*self.ci[i][j] 128 | self.ci[i][j] = change 129 | 130 | # calculate error 131 | error = 0.0 132 | for k in range(len(targets)): 133 | error += 0.5*(targets[k]-self.ao[k])**2 134 | return error 135 | 136 | 137 | def test(self, patterns): 138 | self.predict = np.empty([len(patterns), self.no]) 139 | for i, p in enumerate(patterns): 140 | self.predict[i] = self.activate(p) 141 | #self.predict[i] = self.activate(p[0]) 142 | 143 | def activate(self, inputs): 144 | 145 | if len(inputs) != self.ni-1: 146 | print(inputs) 147 | raise ValueError('wrong number of inputs') 148 | 149 | # input activations 150 | for i in range(self.ni-1): 151 | self.ai[i] = inputs[i] 152 | 153 | # hidden activations 154 | for j in range(self.nh): 155 | sum_h = 0.0 156 | for i in range(self.ni): 157 | sum_h += self.ai[i] * self.wi[i][j] 158 | self.ah[j] = sigmoid(sum_h) 159 | 160 | # output activations 161 | for k in range(self.no): 162 | sum_o = 0.0 163 | for j in range(self.nh): 164 | sum_o += self.ah[j] * self.wo[j][k] 165 | self.ao[k] = sigmoid(sum_o) 166 | 167 | return self.ao[:] 168 | 169 | 170 | def train(self, patterns, iterations=1000, N=0.5, M=0.1): 171 | # N: learning rate 172 | # M: momentum factor 173 | patterns = list(patterns) 174 | for i in range(iterations): 175 | error = 0.0 176 | for p in patterns: 177 | inputs = p[0] 178 | targets = p[1] 179 | self.activate(inputs) 180 | error += self.backPropagate([targets], N, M) 181 | if i % 5 == 0: 182 | print('error in interation %d : %-.5f' % (i,error)) 183 | print('Final training error: %-.5f' % error) 184 | 185 | 186 | class Perceptron(object): 187 | """Perceptron classifier. 188 | 189 | Parameters 190 | ------------ 191 | eta : float 192 | Learning rate (between 0.0 and 1.0) 193 | n_iter : int 194 | Passes over the training dataset. 195 | 196 | Attributes 197 | ----------- 198 | w_ : 1d-array 199 | Weights after fitting. 200 | errors_ : list 201 | Number of misclassifications in every epoch. 202 | 203 | """ 204 | def __init__(self, eta=0.01, n_iter=10): 205 | self.eta = eta 206 | self.n_iter = n_iter 207 | 208 | def fit(self, X, y): 209 | """Fit training data. 210 | 211 | Parameters 212 | ---------- 213 | X : {array-like}, shape = [n_samples, n_features] 214 | Training vectors, where n_samples is the number of samples and 215 | n_features is the number of features. 216 | y : array-like, shape = [n_samples] 217 | Target values. 218 | 219 | Returns 220 | ------- 221 | self : object 222 | 223 | """ 224 | self.w_ = np.zeros(1 + X.shape[1]) 225 | self.errors_ = [] 226 | 227 | for _ in range(self.n_iter): 228 | errors = 0 229 | for xi, target in zip(X, y): 230 | update = self.eta * (target - self.predict(xi)) 231 | self.w_[1:] += update * xi 232 | self.w_[0] += update 233 | errors += int(update != 0.0) 234 | self.errors_.append(errors) 235 | return self 236 | 237 | def net_input(self, X): 238 | """Calculate net input""" 239 | return np.dot(X, self.w_[1:]) + self.w_[0] 240 | 241 | def predict(self, X): 242 | """Return class label after unit step""" 243 | return np.where(self.net_input(X) >= 0.0, 1, -1) 244 | 245 | 246 | class AdalineGD: 247 | """ADAptive LInear NEuron classifier. 248 | 249 | Parameters 250 | ------------ 251 | eta : float 252 | Learning rate (between 0.0 and 1.0) 253 | n_iter : int 254 | Passes over the training dataset. 255 | 256 | Attributes 257 | ----------- 258 | w_ : 1d-array 259 | Weights after fitting. 260 | errors_ : list 261 | Number of misclassifications in every epoch. 262 | 263 | """ 264 | def __init__(self, eta=0.01, n_iter=50): 265 | self.eta = eta 266 | self.n_iter = n_iter 267 | 268 | def fit(self, X, y): 269 | """ Fit training data. 270 | 271 | Parameters 272 | ---------- 273 | X : {array-like}, shape = [n_samples, n_features] 274 | Training vectors, where n_samples is the number of samples and 275 | n_features is the number of features. 276 | y : array-like, shape = [n_samples] 277 | Target values. 278 | 279 | Returns 280 | ------- 281 | self : object 282 | """ 283 | self.w_ = np.zeros(1 + X.shape[1]) 284 | self.cost_ = [] 285 | 286 | for i in range(self.n_iter): 287 | output = self.net_input(X) 288 | errors = (y - output) 289 | self.w_[1:] += self.eta * X.T.dot(errors) 290 | self.w_[0] += self.eta * errors.sum() 291 | cost = (errors**2).sum() / 2.0 292 | self.cost_.append(cost) 293 | return self 294 | 295 | def net_input(self, X): 296 | """Calculate net input""" 297 | return np.dot(X, self.w_[1:]) + self.w_[0] 298 | 299 | def activation(self, X): 300 | """Compute linear activation""" 301 | return self.net_input(X) 302 | 303 | def predict(self, X): 304 | """Return class label after unit step""" 305 | return np.where(self.activation(X) >= 0.0, 1, -1) 306 | 307 | class AdalineSGD(object): 308 | """ADAptive LInear NEuron classifier. 309 | 310 | Parameters 311 | ------------ 312 | eta : float 313 | Learning rate (between 0.0 and 1.0) 314 | n_iter : int 315 | Passes over the training dataset. 316 | 317 | Attributes 318 | ----------- 319 | w_ : 1d-array 320 | Weights after fitting. 321 | errors_ : list 322 | Number of misclassifications in every epoch. 323 | shuffle : bool (default: True) 324 | Shuffles training data every epoch if True to prevent cycles. 325 | random_state : int (default: None) 326 | Set random state for shuffling and initializing the weights. 327 | 328 | """ 329 | def __init__(self, eta=0.01, n_iter=10, shuffle=True, random_state=None): 330 | self.eta = eta 331 | self.n_iter = n_iter 332 | self.w_initialized = False 333 | self.shuffle = shuffle 334 | if random_state: 335 | seed(random_state) 336 | 337 | def fit(self, X, y): 338 | """ Fit training data. 339 | 340 | Parameters 341 | ---------- 342 | X : {array-like}, shape = [n_samples, n_features] 343 | Training vectors, where n_samples is the number of samples and 344 | n_features is the number of features. 345 | y : array-like, shape = [n_samples] 346 | Target values. 347 | 348 | Returns 349 | ------- 350 | self : object 351 | 352 | """ 353 | self._initialize_weights(X.shape[1]) 354 | self.cost_ = [] 355 | for i in range(self.n_iter): 356 | if self.shuffle: 357 | X, y = self._shuffle(X, y) 358 | cost = [] 359 | for xi, target in zip(X, y): 360 | cost.append(self._update_weights(xi, target)) 361 | avg_cost = sum(cost)/len(y) 362 | self.cost_.append(avg_cost) 363 | return self 364 | 365 | def partial_fit(self, X, y): 366 | """Fit training data without reinitializing the weights""" 367 | if not self.w_initialized: 368 | self._initialize_weights(X.shape[1]) 369 | if y.ravel().shape[0] > 1: 370 | for xi, target in zip(X, y): 371 | self._update_weights(xi, target) 372 | else: 373 | self._update_weights(X, y) 374 | return self 375 | 376 | def _shuffle(self, X, y): 377 | """Shuffle training data""" 378 | r = np.random.permutation(len(y)) 379 | return X[r], y[r] 380 | 381 | def _initialize_weights(self, m): 382 | """Initialize weights to zeros""" 383 | self.w_ = np.zeros(1 + m) 384 | self.w_initialized = True 385 | 386 | def _update_weights(self, xi, target): 387 | """Apply Adaline learning rule to update the weights""" 388 | output = self.net_input(xi) 389 | error = (target - output) 390 | self.w_[1:] += self.eta * xi.dot(error) 391 | self.w_[0] += self.eta * error 392 | cost = 0.5 * error**2 393 | return cost 394 | 395 | def net_input(self, X): 396 | """Calculate net input""" 397 | return np.dot(X, self.w_[1:]) + self.w_[0] 398 | 399 | def activation(self, X): 400 | """Compute linear activation""" 401 | return self.net_input(X) 402 | 403 | def predict(self, X): 404 | """Return class label after unit step""" 405 | return np.where(self.activation(X) >= 0.0, 1, -1) 406 | -------------------------------------------------------------------------------- /1_apprentice/2.2. Scipy Sparse_Matrices.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# Scipy Sparse Matrices" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "slideshow": { 18 | "slide_type": "subslide" 19 | } 20 | }, 21 | "source": [ 22 | "**Sparse Matrices** are very nice in some situations. \n", 23 | "\n", 24 | "For example, in some machine learning tasks, especially those associated\n", 25 | "with textual analysis, the data may be mostly zeros. \n", 26 | "\n", 27 | "Storing all these zeros is very inefficient. \n", 28 | "\n", 29 | "We can create and manipulate sparse matrices as follows:" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 1, 35 | "metadata": { 36 | "collapsed": true, 37 | "slideshow": { 38 | "slide_type": "skip" 39 | } 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "import numpy as np\n", 44 | "\n", 45 | "np.random.seed(42)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 2, 51 | "metadata": { 52 | "slideshow": { 53 | "slide_type": "subslide" 54 | } 55 | }, 56 | "outputs": [ 57 | { 58 | "name": "stdout", 59 | "output_type": "stream", 60 | "text": [ 61 | "[[0.37454012 0.95071431 0.73199394 0.59865848 0.15601864]\n", 62 | " [0.15599452 0.05808361 0.86617615 0.60111501 0.70807258]\n", 63 | " [0.02058449 0.96990985 0.83244264 0.21233911 0.18182497]\n", 64 | " [0.18340451 0.30424224 0.52475643 0.43194502 0.29122914]\n", 65 | " [0.61185289 0.13949386 0.29214465 0.36636184 0.45606998]\n", 66 | " [0.78517596 0.19967378 0.51423444 0.59241457 0.04645041]\n", 67 | " [0.60754485 0.17052412 0.06505159 0.94888554 0.96563203]\n", 68 | " [0.80839735 0.30461377 0.09767211 0.68423303 0.44015249]\n", 69 | " [0.12203823 0.49517691 0.03438852 0.9093204 0.25877998]\n", 70 | " [0.66252228 0.31171108 0.52006802 0.54671028 0.18485446]]\n" 71 | ] 72 | } 73 | ], 74 | "source": [ 75 | "# Create a random array with a lot of zeros\n", 76 | "X = np.random.random((10, 5))\n", 77 | "print(X)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 3, 83 | "metadata": { 84 | "slideshow": { 85 | "slide_type": "subslide" 86 | } 87 | }, 88 | "outputs": [ 89 | { 90 | "name": "stdout", 91 | "output_type": "stream", 92 | "text": [ 93 | "[[0. 0.95071431 0.73199394 0. 0. ]\n", 94 | " [0. 0. 0.86617615 0. 0.70807258]\n", 95 | " [0. 0.96990985 0.83244264 0. 0. ]\n", 96 | " [0. 0. 0. 0. 0. ]\n", 97 | " [0. 0. 0. 0. 0. ]\n", 98 | " [0.78517596 0. 0. 0. 0. ]\n", 99 | " [0. 0. 0. 0.94888554 0.96563203]\n", 100 | " [0.80839735 0. 0. 0. 0. ]\n", 101 | " [0. 0. 0. 0.9093204 0. ]\n", 102 | " [0. 0. 0. 0. 0. ]]\n" 103 | ] 104 | } 105 | ], 106 | "source": [ 107 | "X[X < 0.7] = 0\n", 108 | "print(X)" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 4, 114 | "metadata": { 115 | "slideshow": { 116 | "slide_type": "subslide" 117 | } 118 | }, 119 | "outputs": [ 120 | { 121 | "name": "stdout", 122 | "output_type": "stream", 123 | "text": [ 124 | " (0, 1)\t0.9507143064099162\n", 125 | " (0, 2)\t0.7319939418114051\n", 126 | " (1, 2)\t0.8661761457749352\n", 127 | " (1, 4)\t0.7080725777960455\n", 128 | " (2, 1)\t0.9699098521619943\n", 129 | " (2, 2)\t0.8324426408004217\n", 130 | " (5, 0)\t0.7851759613930136\n", 131 | " (6, 3)\t0.9488855372533332\n", 132 | " (6, 4)\t0.9656320330745594\n", 133 | " (7, 0)\t0.8083973481164611\n", 134 | " (8, 3)\t0.9093204020787821\n" 135 | ] 136 | } 137 | ], 138 | "source": [ 139 | "from scipy import sparse\n", 140 | "\n", 141 | "# turn X into a csr (Compressed-Sparse-Row) matrix\n", 142 | "X_csr = sparse.csr_matrix(X)\n", 143 | "print(X_csr)" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 5, 149 | "metadata": { 150 | "slideshow": { 151 | "slide_type": "subslide" 152 | } 153 | }, 154 | "outputs": [ 155 | { 156 | "name": "stdout", 157 | "output_type": "stream", 158 | "text": [ 159 | "[[0. 0.95071431 0.73199394 0. 0. ]\n", 160 | " [0. 0. 0.86617615 0. 0.70807258]\n", 161 | " [0. 0.96990985 0.83244264 0. 0. ]\n", 162 | " [0. 0. 0. 0. 0. ]\n", 163 | " [0. 0. 0. 0. 0. ]\n", 164 | " [0.78517596 0. 0. 0. 0. ]\n", 165 | " [0. 0. 0. 0.94888554 0.96563203]\n", 166 | " [0.80839735 0. 0. 0. 0. ]\n", 167 | " [0. 0. 0. 0.9093204 0. ]\n", 168 | " [0. 0. 0. 0. 0. ]]\n" 169 | ] 170 | } 171 | ], 172 | "source": [ 173 | "# convert the sparse matrix to a dense array\n", 174 | "print(X_csr.toarray())" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 6, 180 | "metadata": { 181 | "slideshow": { 182 | "slide_type": "subslide" 183 | } 184 | }, 185 | "outputs": [ 186 | { 187 | "data": { 188 | "text/plain": [ 189 | "True" 190 | ] 191 | }, 192 | "execution_count": 6, 193 | "metadata": {}, 194 | "output_type": "execute_result" 195 | } 196 | ], 197 | "source": [ 198 | "# Sparse matrices support linear algebra:\n", 199 | "y = np.random.random(X_csr.shape[1])\n", 200 | "z1 = X_csr.dot(y)\n", 201 | "z2 = X.dot(y)\n", 202 | "np.allclose(z1, z2)" 203 | ] 204 | }, 205 | { 206 | "cell_type": "markdown", 207 | "metadata": { 208 | "slideshow": { 209 | "slide_type": "subslide" 210 | } 211 | }, 212 | "source": [ 213 | "* The CSR representation can be very efficient for computations, but it is not as good for adding elements. \n", 214 | "\n", 215 | "* For that, the **LIL** (List-In-List) representation is better:" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 7, 221 | "metadata": { 222 | "slideshow": { 223 | "slide_type": "fragment" 224 | } 225 | }, 226 | "outputs": [ 227 | { 228 | "name": "stdout", 229 | "output_type": "stream", 230 | "text": [ 231 | " (0, 2)\t2.0\n", 232 | " (0, 3)\t3.0\n", 233 | " (0, 4)\t4.0\n", 234 | " (1, 0)\t1.0\n", 235 | " (1, 1)\t2.0\n", 236 | " (1, 4)\t5.0\n", 237 | " (2, 0)\t2.0\n", 238 | " (2, 2)\t4.0\n", 239 | " (2, 4)\t6.0\n", 240 | " (3, 0)\t3.0\n", 241 | " (3, 3)\t6.0\n", 242 | " (3, 4)\t7.0\n", 243 | " (4, 0)\t4.0\n", 244 | " (4, 4)\t8.0\n", 245 | "[[0. 0. 2. 3. 4.]\n", 246 | " [1. 2. 0. 0. 5.]\n", 247 | " [2. 0. 4. 0. 6.]\n", 248 | " [3. 0. 0. 6. 7.]\n", 249 | " [4. 0. 0. 0. 8.]]\n" 250 | ] 251 | } 252 | ], 253 | "source": [ 254 | "# Create an empty LIL matrix and add some items\n", 255 | "X_lil = sparse.lil_matrix((5, 5))\n", 256 | "\n", 257 | "for i, j in np.random.randint(0, 5, (15, 2)):\n", 258 | " X_lil[i, j] = i + j\n", 259 | "\n", 260 | "print(X_lil)\n", 261 | "print(X_lil.toarray())" 262 | ] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "metadata": { 267 | "slideshow": { 268 | "slide_type": "subslide" 269 | } 270 | }, 271 | "source": [ 272 | "* Often, once an LIL matrix is created, it is useful to convert it to a CSR format \n", 273 | " * **Note**: many scikit-learn algorithms require CSR or CSC format" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": 8, 279 | "metadata": { 280 | "slideshow": { 281 | "slide_type": "fragment" 282 | } 283 | }, 284 | "outputs": [ 285 | { 286 | "name": "stdout", 287 | "output_type": "stream", 288 | "text": [ 289 | " (0, 2)\t2.0\n", 290 | " (0, 3)\t3.0\n", 291 | " (0, 4)\t4.0\n", 292 | " (1, 0)\t1.0\n", 293 | " (1, 1)\t2.0\n", 294 | " (1, 4)\t5.0\n", 295 | " (2, 0)\t2.0\n", 296 | " (2, 2)\t4.0\n", 297 | " (2, 4)\t6.0\n", 298 | " (3, 0)\t3.0\n", 299 | " (3, 3)\t6.0\n", 300 | " (3, 4)\t7.0\n", 301 | " (4, 0)\t4.0\n", 302 | " (4, 4)\t8.0\n" 303 | ] 304 | } 305 | ], 306 | "source": [ 307 | "X_csr = X_lil.tocsr()\n", 308 | "print(X_csr)" 309 | ] 310 | }, 311 | { 312 | "cell_type": "markdown", 313 | "metadata": { 314 | "slideshow": { 315 | "slide_type": "subslide" 316 | } 317 | }, 318 | "source": [ 319 | "There are several other sparse formats that can be useful for various problems:\n", 320 | "\n", 321 | "- `CSC` (compressed sparse column)\n", 322 | "- `BSR` (block sparse row)\n", 323 | "- `COO` (coordinate)\n", 324 | "- `DIA` (diagonal)\n", 325 | "- `DOK` (dictionary of keys)" 326 | ] 327 | }, 328 | { 329 | "cell_type": "markdown", 330 | "metadata": { 331 | "slideshow": { 332 | "slide_type": "slide" 333 | } 334 | }, 335 | "source": [ 336 | "## CSC - Compressed Sparse Column\n", 337 | "\n", 338 | "**Advantages of the CSC format**\n", 339 | "\n", 340 | " * efficient arithmetic operations CSC + CSC, CSC * CSC, etc.\n", 341 | " * efficient column slicing\n", 342 | " * fast matrix vector products (CSR, BSR may be faster)\n", 343 | "\n", 344 | "**Disadvantages of the CSC format**\n", 345 | "\n", 346 | " * slow row slicing operations (consider CSR)\n", 347 | " * changes to the sparsity structure are expensive (consider LIL or DOK)" 348 | ] 349 | }, 350 | { 351 | "cell_type": "markdown", 352 | "metadata": { 353 | "slideshow": { 354 | "slide_type": "subslide" 355 | } 356 | }, 357 | "source": [ 358 | "### BSR - Block Sparse Row\n", 359 | "\n", 360 | "The Block Compressed Row (`BSR`) format is very similar to the Compressed Sparse Row (`CSR`) format. \n", 361 | "\n", 362 | "BSR is appropriate for sparse matrices with *dense sub matrices* like the example below. \n", 363 | "\n", 364 | "Block matrices often arise in *vector-valued* finite element discretizations. \n", 365 | "\n", 366 | "In such cases, BSR is **considerably more efficient** than CSR and CSC for many sparse arithmetic operations." 367 | ] 368 | }, 369 | { 370 | "cell_type": "code", 371 | "execution_count": 9, 372 | "metadata": { 373 | "slideshow": { 374 | "slide_type": "subslide" 375 | } 376 | }, 377 | "outputs": [ 378 | { 379 | "data": { 380 | "text/plain": [ 381 | "array([[1, 1, 0, 0, 2, 2],\n", 382 | " [1, 1, 0, 0, 2, 2],\n", 383 | " [0, 0, 0, 0, 3, 3],\n", 384 | " [0, 0, 0, 0, 3, 3],\n", 385 | " [4, 4, 5, 5, 6, 6],\n", 386 | " [4, 4, 5, 5, 6, 6]])" 387 | ] 388 | }, 389 | "execution_count": 9, 390 | "metadata": {}, 391 | "output_type": "execute_result" 392 | } 393 | ], 394 | "source": [ 395 | "from scipy.sparse import bsr_matrix\n", 396 | "\n", 397 | "indptr = np.array([0, 2, 3, 6])\n", 398 | "indices = np.array([0, 2, 2, 0, 1, 2])\n", 399 | "data = np.array([1, 2, 3, 4, 5, 6]).repeat(4).reshape(6, 2, 2)\n", 400 | "bsr_matrix((data,indices,indptr), shape=(6, 6)).toarray()" 401 | ] 402 | }, 403 | { 404 | "cell_type": "markdown", 405 | "metadata": { 406 | "slideshow": { 407 | "slide_type": "slide" 408 | } 409 | }, 410 | "source": [ 411 | "## COO - Coordinate Sparse Matrix\n", 412 | "\n", 413 | "**Advantages of the CSC format**\n", 414 | "\n", 415 | " * facilitates fast conversion among sparse formats\n", 416 | " * permits duplicate entries (see example)\n", 417 | " * very fast conversion to and from CSR/CSC formats\n", 418 | "\n", 419 | "**Disadvantages of the CSC format**\n", 420 | "\n", 421 | " * does not directly support arithmetic operations and slicing\n", 422 | " \n", 423 | "** Intended Usage**\n", 424 | "\n", 425 | " * COO is a fast format for constructing sparse matrices\n", 426 | " * Once a matrix has been constructed, convert to CSR or CSC format for fast arithmetic and matrix vector\n", 427 | " operations\n", 428 | " * By default when converting to CSR or CSC format, duplicate (i,j) entries will be summed together. \n", 429 | " This facilitates efficient construction of finite element matrices and the like.\n" 430 | ] 431 | }, 432 | { 433 | "cell_type": "markdown", 434 | "metadata": { 435 | "slideshow": { 436 | "slide_type": "slide" 437 | } 438 | }, 439 | "source": [ 440 | "## DOK - Dictionary of Keys\n", 441 | "\n", 442 | "Sparse matrices can be used in arithmetic operations: they support addition, subtraction, multiplication, division, and matrix power.\n", 443 | "\n", 444 | "Allows for efficient O(1) access of individual elements. Duplicates are not allowed. Can be efficiently converted to a coo_matrix once constructed." 445 | ] 446 | }, 447 | { 448 | "cell_type": "code", 449 | "execution_count": 10, 450 | "metadata": { 451 | "slideshow": { 452 | "slide_type": "subslide" 453 | } 454 | }, 455 | "outputs": [ 456 | { 457 | "data": { 458 | "text/plain": [ 459 | "array([[0., 1., 2., 3., 4.],\n", 460 | " [0., 2., 3., 4., 5.],\n", 461 | " [0., 0., 4., 5., 6.],\n", 462 | " [0., 0., 0., 6., 7.],\n", 463 | " [0., 0., 0., 0., 8.]], dtype=float32)" 464 | ] 465 | }, 466 | "execution_count": 10, 467 | "metadata": {}, 468 | "output_type": "execute_result" 469 | } 470 | ], 471 | "source": [ 472 | "from scipy.sparse import dok_matrix\n", 473 | "S = dok_matrix((5, 5), dtype=np.float32)\n", 474 | "for i in range(5):\n", 475 | " for j in range(i, 5):\n", 476 | " S[i,j] = i+j\n", 477 | " \n", 478 | "S.toarray()" 479 | ] 480 | }, 481 | { 482 | "cell_type": "markdown", 483 | "metadata": { 484 | "slideshow": { 485 | "slide_type": "subslide" 486 | } 487 | }, 488 | "source": [ 489 | "The ``scipy.sparse`` submodule also has a lot of functions for sparse matrices\n", 490 | "including linear algebra, sparse solvers, graph algorithms, and much more." 491 | ] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "execution_count": null, 496 | "metadata": {}, 497 | "outputs": [], 498 | "source": [] 499 | } 500 | ], 501 | "metadata": { 502 | "kernelspec": { 503 | "display_name": "Python 3.6 (Develer Science)", 504 | "language": "python", 505 | "name": "develer-science" 506 | }, 507 | "language_info": { 508 | "codemirror_mode": { 509 | "name": "ipython", 510 | "version": 3 511 | }, 512 | "file_extension": ".py", 513 | "mimetype": "text/x-python", 514 | "name": "python", 515 | "nbconvert_exporter": "python", 516 | "pygments_lexer": "ipython3", 517 | "version": "3.6.6" 518 | } 519 | }, 520 | "nbformat": 4, 521 | "nbformat_minor": 2 522 | } 523 | -------------------------------------------------------------------------------- /1_apprentice/3.4 Level Up.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Level Up: Final Challenge on Data Viz" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## matplotlib - 2D and 3D plotting in Python" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "# This line configures matplotlib to show figures embedded in the notebook, \n", 24 | "# instead of opening a new window for each figure.\n", 25 | "%matplotlib notebook" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "### Brief Recap to Matplotlib" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "Matplotlib is an excellent 2D and 3D graphics library for generating scientific figures. Some of the many advantages of this library include:\n", 40 | "\n", 41 | "* Easy to get started\n", 42 | "* Support for $\\LaTeX$ formatted labels and texts\n", 43 | "* Great control of every element in a figure, including figure size and DPI. \n", 44 | "* High-quality output in many formats, including PNG, PDF, SVG, EPS, and PGF.\n", 45 | "* GUI for interactively exploring figures *and* support for headless generation of figure files (useful for batch jobs).\n", 46 | "\n", 47 | "One of the of the key features of matplotlib that I would like to emphasize, and that I think makes matplotlib highly suitable for generating figures for scientific publications is that all aspects of the figure can be controlled *programmatically*. This is important for reproducibility and convenient when one needs to regenerate the figure with updated data or change its appearance. \n", 48 | "\n", 49 | "More information at the Matplotlib web page: http://matplotlib.org/\n", 50 | "\n", 51 | "**To get started**:\n", 52 | "import the `matplotlib.pyplot` module under the name `plt` (the tidy way):" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 2, 58 | "metadata": { 59 | "collapsed": true 60 | }, 61 | "outputs": [], 62 | "source": [ 63 | "import matplotlib.pyplot as plt # de-facto convention" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "# 1. Matlab-like API - Basic Plotting\n", 71 | "\n", 72 | "The easiest way to get started with plotting using matplotlib is often to use the MATLAB-like API provided by matplotlib. \n", 73 | "\n", 74 | "It is designed to be compatible with MATLAB's plotting functions, so it is easy to get started with if you are familiar with MATLAB." 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "## Ex. 1.1\n", 82 | "\n", 83 | "Generate two arrays, `x` and `y` so that values of `y` are any function of `x` (e.g. $y = x^2)$. Plot the resulting values.\n", 84 | "\n", 85 | "#### Hint: Take a look at `plot` to generage plot. Take also a look to `xlabel` and `ylabel`. When you're done, call `show()` to actually display the chart." 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": { 92 | "collapsed": true 93 | }, 94 | "outputs": [], 95 | "source": [] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "## Ex 1.1.1\n", 102 | "\n", 103 | "Plot the `sin` function" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": { 110 | "collapsed": true 111 | }, 112 | "outputs": [], 113 | "source": [] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "## Ex 1.2\n", 120 | "\n", 121 | "With the same two `x` and `y` arrays of the previous exercise, generate two plots inverting axis. If you fancy, play a bit with options before moving forward (e.g. set two different colors and different tickers..)\n", 122 | "\n", 123 | "#### Hint: Take a look at `subplot` + plot options. " 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": { 130 | "collapsed": true 131 | }, 132 | "outputs": [], 133 | "source": [] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": {}, 138 | "source": [ 139 | "## Ex 1.3 \n", 140 | "\n", 141 | "Generate two plots of any arbitrary `x`, and `y` values whose labels are the corresponding functions written using LaTeX formulas.\n", 142 | "\n", 143 | "Example:\n", 144 | "\n", 145 | "```python\n", 146 | "fig, ax = plt.subplots()\n", 147 | "\n", 148 | "ax.plot(.....\n", 149 | "```\n", 150 | "Put `legend` inside the chart.\n", 151 | "\n", 152 | "#### Hint: Take a look at `legend`, axix and `label` plot option" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": { 159 | "collapsed": true 160 | }, 161 | "outputs": [], 162 | "source": [] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": {}, 167 | "source": [ 168 | "## Colors \n", 169 | "\n", 170 | "With matplotlib, you can define the colors of lines and other graphical elements in a number of ways. \n", 171 | "\n", 172 | "First of all, we can use the MATLAB-like syntax where `'b'` means blue, `'g'` means green, etc. The MATLAB API for selecting line styles are also supported: where, for example, 'b.-' means a blue line with dots\n", 173 | "\n", 174 | "Example:\n", 175 | "\n", 176 | "```python\n", 177 | "\n", 178 | "# MATLAB style line color and style \n", 179 | "ax.plot(x, x**2, 'b.-') # blue line with dots\n", 180 | "ax.plot(x, x**3, 'g--') # green dashed line\n", 181 | "```" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "metadata": {}, 187 | "source": [ 188 | "## Ex 1.4\n", 189 | "\n", 190 | "Define colors of previous plots by their names or RGB hex codes and optionally provide an alpha value using the `color` and `alpha` keyword arguments" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": { 197 | "collapsed": true 198 | }, 199 | "outputs": [], 200 | "source": [] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": {}, 205 | "source": [ 206 | "## Line and marker styles\n", 207 | "\n", 208 | "To change the line width, we can use the `linewidth` or `lw` keyword argument. The line style can be selected using the `linestyle` or `ls` keyword arguments" 209 | ] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "metadata": {}, 214 | "source": [ 215 | "## Ex 1.5\n", 216 | "\n", 217 | "Generate three plots with different line widths" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": null, 223 | "metadata": { 224 | "collapsed": true 225 | }, 226 | "outputs": [], 227 | "source": [] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "## Ex 1.6\n", 234 | "\n", 235 | "Generate two plots for ($x$, $x^2$) and ($x$, $e(x)$) using normal and logarithmic scales.\n", 236 | "\n", 237 | "#### Hint: Take a look at `set_yscale`" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "metadata": { 244 | "collapsed": true 245 | }, 246 | "outputs": [], 247 | "source": [] 248 | }, 249 | { 250 | "cell_type": "markdown", 251 | "metadata": {}, 252 | "source": [ 253 | "# 2 .Charts\n", 254 | "\n", 255 | "Matplotlib supports different chart types. For example: *Scatter plots*, *Bar plots*, *histograms*." 256 | ] 257 | }, 258 | { 259 | "cell_type": "markdown", 260 | "metadata": {}, 261 | "source": [ 262 | "## Ex 2.1\n", 263 | "\n", 264 | "Generate a `numpy.array` of $10^5$ random numbers. Plot the histogram of this array.\n", 265 | "\n", 266 | "#### Hint: Take a look at `plt.hist`. Also, see returned values of this function." 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "metadata": { 273 | "collapsed": true 274 | }, 275 | "outputs": [], 276 | "source": [] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": {}, 281 | "source": [ 282 | "The `plt.hist` docstring has more information on other customization options available. " 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": {}, 288 | "source": [ 289 | "## Ex 2.2\n", 290 | "\n", 291 | "Plot the histogram of three (or more) different normal distributions.\n", 292 | "\n", 293 | "#### hint: Take a look at `histtype` and `alpha` parameter of the `plt.hist`. \n", 294 | "\n", 295 | "#### To generate normal distributions, take a look at `np.random.normal`" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": null, 301 | "metadata": { 302 | "collapsed": true 303 | }, 304 | "outputs": [], 305 | "source": [] 306 | }, 307 | { 308 | "cell_type": "markdown", 309 | "metadata": {}, 310 | "source": [ 311 | "## Ex 2.3\n", 312 | "\n", 313 | "Generate a scatter plot of random numbers and itself plus some random perturbations: $y = x + c * \\text{gaussian noise}$.\n", 314 | "Select colors and markers that you fancy.\n", 315 | "\n", 316 | "#### Hint: See `plt.scatter`." 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": null, 322 | "metadata": { 323 | "collapsed": true 324 | }, 325 | "outputs": [], 326 | "source": [] 327 | }, 328 | { 329 | "cell_type": "markdown", 330 | "metadata": {}, 331 | "source": [ 332 | "## Ex 2.4\n", 333 | "\n", 334 | "Generate a random number distribution and generate a box plot.\n", 335 | "\n", 336 | "#### Hint: See `plt.boxplot`" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": null, 342 | "metadata": { 343 | "collapsed": true 344 | }, 345 | "outputs": [], 346 | "source": [] 347 | }, 348 | { 349 | "cell_type": "markdown", 350 | "metadata": {}, 351 | "source": [ 352 | "## Ex 2.4\n", 353 | "\n", 354 | "Given \n", 355 | "```python\n", 356 | "\n", 357 | "mean = [0, 0]\n", 358 | "cov = [[1, 1], [1, 2]]\n", 359 | "```\n", 360 | "Get `x` and `y` drawn from a Gaussian Multivariate Distribution and plot a 2D Histogram, along with a corresponding colorbar.\n", 361 | "\n", 362 | "#### Hint: See `plot.hist2d` and `plt.colorbar`\n", 363 | "\n" 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "execution_count": null, 369 | "metadata": { 370 | "collapsed": true 371 | }, 372 | "outputs": [], 373 | "source": [] 374 | }, 375 | { 376 | "cell_type": "markdown", 377 | "metadata": {}, 378 | "source": [ 379 | "# 3. Challenge: Handwritten Digits\n", 380 | "\n", 381 | "For an example of where matplotlib might be useful, let’s look at an interesting visualization of some hand-written digits data. \n", 382 | "\n", 383 | "This data is included in scikit-learn, and consists of nearly $2000$ $8\\times8$ thumbnails showing various hand-written digits.\n", 384 | "\n", 385 | "For now, let’s start by downloading the digits data and visualizing several of the example images with plt.imshow()" 386 | ] 387 | }, 388 | { 389 | "cell_type": "markdown", 390 | "metadata": {}, 391 | "source": [ 392 | "## Ex 3.1\n", 393 | "\n", 394 | "Download the digits dataset from `sklearn`\n", 395 | "\n", 396 | "#### Hint: Take a look at `sklearn.datasets`" 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": null, 402 | "metadata": { 403 | "collapsed": true 404 | }, 405 | "outputs": [], 406 | "source": [] 407 | }, 408 | { 409 | "cell_type": "markdown", 410 | "metadata": {}, 411 | "source": [ 412 | "## Ex 3.2\n", 413 | "\n", 414 | "Generate an $8\\times8$ grid of images with a `figsize` of `(6, 6)` and show images from the dataset.\n", 415 | "\n", 416 | "#### Hint: Take a look at `plt.imshow`" 417 | ] 418 | }, 419 | { 420 | "cell_type": "code", 421 | "execution_count": null, 422 | "metadata": { 423 | "collapsed": true 424 | }, 425 | "outputs": [], 426 | "source": [] 427 | }, 428 | { 429 | "cell_type": "markdown", 430 | "metadata": {}, 431 | "source": [ 432 | "## Ex 3.3 (Manifold Learing Preview)\n", 433 | "\n", 434 | "Provided the following two-dimensional manifold learning projection of digits data:\n", 435 | "\n", 436 | "```python\n", 437 | "\n", 438 | "# project the digits into 2 dimensions using IsoMap from sklearn.manifold \n", 439 | "import Isomap\n", 440 | "iso = Isomap(n_components=2)\n", 441 | "projection = iso.fit_transform(digits.data)\n", 442 | "```\n", 443 | "Use a discrete colormap to view the results in a scatter plot, setting the ticks and clim to improve the aesthetics of the resulting colorbar\n" 444 | ] 445 | }, 446 | { 447 | "cell_type": "code", 448 | "execution_count": null, 449 | "metadata": { 450 | "collapsed": true 451 | }, 452 | "outputs": [], 453 | "source": [] 454 | }, 455 | { 456 | "cell_type": "markdown", 457 | "metadata": {}, 458 | "source": [ 459 | "# There's more: 3D Plotting\n", 460 | "\n", 461 | "To use 3D graphics in matplotlib, we first need to create an instance of the `Axes3D` class. 3D axes can be added to a matplotlib figure canvas in exactly the same way as 2D axes; or, more conveniently, by passing a `projection='3d'` keyword argument to the `add_axes` or `add_subplot` methods.\n", 462 | "\n", 463 | "Start with \n", 464 | "\n", 465 | "`from mpl_toolkits.mplot3d.axes3d import Axes3D`" 466 | ] 467 | }, 468 | { 469 | "cell_type": "code", 470 | "execution_count": null, 471 | "metadata": { 472 | "collapsed": true 473 | }, 474 | "outputs": [], 475 | "source": [] 476 | }, 477 | { 478 | "cell_type": "markdown", 479 | "metadata": {}, 480 | "source": [ 481 | "# Bokeh\n", 482 | "\n", 483 | "Bokeh is a Python interactive visualization library that targets modern web browsers for presentation. Its goal is to provide elegant, concise construction of novel graphics in the style of D3.js, and to extend this capability with high-performance interactivity over very large or streaming datasets. Bokeh can help anyone who would like to quickly and easily create interactive plots, dashboards, and data applications.\n", 484 | "\n", 485 | "More on: [http://bokeh.pydata.org/en/latest/](http://bokeh.pydata.org/en/latest/)\n" 486 | ] 487 | }, 488 | { 489 | "cell_type": "markdown", 490 | "metadata": {}, 491 | "source": [ 492 | "### Using Bokeh\n", 493 | "\n", 494 | "Bokeh APIs are slightly different from those of Matplotlib.\n", 495 | "For example, to make a line you should use `bokeh.plotting.charts.line`:\n", 496 | "\n", 497 | "```python\n", 498 | "# MISSING - PLEASE FILL HERE\n", 499 | "\n", 500 | "from bokeh.plotting import figure, show\n", 501 | "\n", 502 | "# prepare some data\n", 503 | "x = list(range(1, 6))\n", 504 | "y = [6, 7, 2, 4, 5]\n", 505 | "\n", 506 | "# create a new plot with axis label and title\n", 507 | "p = figure(title=\"Sample Line Example\", x_axis_label='x', y_axis_label='y')\n", 508 | "\n", 509 | "# add a line rendered with legend and line thickness\n", 510 | "p.line(x, y, legend='Temp.', line_width=2)\n", 511 | "\n", 512 | "# show the result\n", 513 | "show(p)\n", 514 | "``` \n", 515 | "\n", 516 | "Try this snippet out and fill the missing part - See `bokeh.io.output_notebook`" 517 | ] 518 | }, 519 | { 520 | "cell_type": "code", 521 | "execution_count": null, 522 | "metadata": { 523 | "collapsed": true 524 | }, 525 | "outputs": [], 526 | "source": [] 527 | }, 528 | { 529 | "cell_type": "markdown", 530 | "metadata": {}, 531 | "source": [ 532 | "## Scatter Plot\n", 533 | "\n", 534 | "Generate a Scatter plot using Bokeh library" 535 | ] 536 | }, 537 | { 538 | "cell_type": "code", 539 | "execution_count": null, 540 | "metadata": { 541 | "collapsed": true 542 | }, 543 | "outputs": [], 544 | "source": [] 545 | } 546 | ], 547 | "metadata": { 548 | "kernelspec": { 549 | "display_name": "Python 3.6 (Develer Science)", 550 | "language": "python", 551 | "name": "develer-science" 552 | }, 553 | "language_info": { 554 | "codemirror_mode": { 555 | "name": "ipython", 556 | "version": 3 557 | }, 558 | "file_extension": ".py", 559 | "mimetype": "text/x-python", 560 | "name": "python", 561 | "nbconvert_exporter": "python", 562 | "pygments_lexer": "ipython3", 563 | "version": "3.6.6" 564 | } 565 | }, 566 | "nbformat": 4, 567 | "nbformat_minor": 2 568 | } 569 | -------------------------------------------------------------------------------- /1_apprentice/1.5. Numpy Challenge.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Numpy Basics Challenge" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "# To import numpy:\n", 19 | "import numpy as np # np is de-facto standard convention" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "# Brief Recap on Numpy Arrays\n", 27 | "\n", 28 | "NumPy's main object is the **homogeneous** ***multidimensional array***. It is a table of elements (usually numbers), all of the same type. \n", 29 | "\n", 30 | "In Numpy dimensions are called **axes**. \n", 31 | "\n", 32 | "The number of axes is called **rank**. \n", 33 | "\n", 34 | "The most important attributes of an ndarray object are:\n", 35 | "\n", 36 | "* **ndarray.ndim** - the number of axes (dimensions) of the array. \n", 37 | "* **ndarray.shape** - the dimensions of the array. For a matrix with n rows and m columns, shape will be (n,m). \n", 38 | "* **ndarray.size** - the total number of elements of the array. \n", 39 | "* **ndarray.dtype** - numpy.int32, numpy.int16, and numpy.float64 are some examples. \n", 40 | "* **ndarray.itemsize** - the size in bytes of elements of the array. For example, elements of type float64 has itemsize 8 (=64/8) " 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "## Warm Up: Creating `numpy` arrays\n", 48 | "\n", 49 | "There are a number of ways to initialize new numpy arrays, for example from a Python list or tuples!" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 2, 55 | "metadata": {}, 56 | "outputs": [ 57 | { 58 | "data": { 59 | "text/plain": [ 60 | "array([1, 2, 3, 4])" 61 | ] 62 | }, 63 | "execution_count": 2, 64 | "metadata": {}, 65 | "output_type": "execute_result" 66 | } 67 | ], 68 | "source": [ 69 | "np.array([1, 2, 3, 4])" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "## Ex 1. Create an array containing integers from $2$ to $2^6$\n", 77 | "\n", 78 | "#### Hint: use Python `range` function" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": { 85 | "collapsed": true 86 | }, 87 | "outputs": [], 88 | "source": [] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "## 1.1 Print `ndarray` attributes and properties\n", 95 | "(e.g. `type`, `dtype`, `shape...`) using previous on" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": { 102 | "collapsed": true 103 | }, 104 | "outputs": [], 105 | "source": [] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "## Ex 2. Create a 3x3 Matrix array and fill it with integer numbers" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": { 118 | "collapsed": true 119 | }, 120 | "outputs": [], 121 | "source": [] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "## Ex3: Create a Matrix of any size and fill it with random numbers\n", 128 | "\n", 129 | "### HInt: take a look at `numpy.random.rand`" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": { 136 | "collapsed": true 137 | }, 138 | "outputs": [], 139 | "source": [] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "# 1. Using _array-generating_ functions\n", 146 | "\n", 147 | "For larger arrays it is inpractical to initialize the data manually, using explicit python lists. \n", 148 | "\n", 149 | "Instead we can use one of the many **functions** in `numpy` that generates arrays of different forms. \n", 150 | "\n", 151 | "So far, you should already have used one that is `numpy.random.rand`.\n", 152 | "\n", 153 | "Some of the most common are: \n", 154 | "\n", 155 | "* `np.arange`; \n", 156 | "* `np.linspace`; \n", 157 | "* `np.logspace`; \n", 158 | "* `np.ones`;\n", 159 | "* `np.zeros`;\n", 160 | "\n", 161 | "The following challenges will require you to use one (or many) of these functions." 162 | ] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": {}, 167 | "source": [ 168 | "## Ex 1.1\n", 169 | "\n", 170 | "Create an array of floating-point numbers in an arbitrary range (randomly generated), and using a decimal step (e.g. `0.2`) \n", 171 | "\n", 172 | "**Note**: You CAN use **three** numpy functions in this exercise. Guess the difference!." 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": { 179 | "collapsed": true 180 | }, 181 | "outputs": [], 182 | "source": [] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "metadata": {}, 187 | "source": [ 188 | "## Ex 1.2\n", 189 | "\n", 190 | "Create an Matrix of any shape full of zeros" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": { 197 | "collapsed": true 198 | }, 199 | "outputs": [], 200 | "source": [] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": {}, 205 | "source": [ 206 | "## Ex 1.3\n", 207 | "\n", 208 | "Create an array of ones and put as the main diagonal of a matrix \n", 209 | "\n", 210 | "#### Hint: Take a look at `np.diag` and `np.eye`" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": { 217 | "collapsed": true 218 | }, 219 | "outputs": [], 220 | "source": [] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": {}, 225 | "source": [ 226 | "## Ex 1.4\n", 227 | "\n", 228 | "Create an arbitrary array of numbers and put it as the first upper off-diagonal of a new matrix\n", 229 | "\n", 230 | "#### Hint: Look at the parameters of `np.diag`" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": null, 236 | "metadata": { 237 | "collapsed": true 238 | }, 239 | "outputs": [], 240 | "source": [] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": {}, 245 | "source": [ 246 | "# 2. File I/O\n", 247 | "\n", 248 | "## Comma-separated values (CSV)\n", 249 | "\n", 250 | "A very common file format for data files are the comma-separated values (CSV), or related format such as TSV (tab-separated values). To read data from such file into Numpy arrays we can use the `numpy.genfromtxt` function. For example:" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 2, 256 | "metadata": {}, 257 | "outputs": [ 258 | { 259 | "name": "stdout", 260 | "output_type": "stream", 261 | "text": [ 262 | "1800 1 1 -6.1 -6.1 -6.1 1\n", 263 | "1800 1 2 -15.4 -15.4 -15.4 1\n", 264 | "1800 1 3 -15.0 -15.0 -15.0 1\n", 265 | "1800 1 4 -19.3 -19.3 -19.3 1\n", 266 | "1800 1 5 -16.8 -16.8 -16.8 1\n", 267 | "1800 1 6 -11.4 -11.4 -11.4 1\n", 268 | "1800 1 7 -7.6 -7.6 -7.6 1\n", 269 | "1800 1 8 -7.1 -7.1 -7.1 1\n", 270 | "1800 1 9 -10.1 -10.1 -10.1 1\n", 271 | "1800 1 10 -9.5 -9.5 -9.5 1\n" 272 | ] 273 | } 274 | ], 275 | "source": [ 276 | "!head files/stockholm_td_adj.dat" 277 | ] 278 | }, 279 | { 280 | "cell_type": "markdown", 281 | "metadata": {}, 282 | "source": [ 283 | "# Ex 2.1\n", 284 | "\n", 285 | "Generate a numpy array with data extracted from csv file.\n", 286 | "\n", 287 | "#### Hint: Take a look at `np.genfromtxt`" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": null, 293 | "metadata": { 294 | "collapsed": true 295 | }, 296 | "outputs": [], 297 | "source": [] 298 | }, 299 | { 300 | "cell_type": "markdown", 301 | "metadata": {}, 302 | "source": [ 303 | "## Ex 2.2 \n", 304 | "\n", 305 | "Analise properties of resulting `np.ndarrays` loaded from file." 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": null, 311 | "metadata": { 312 | "collapsed": true 313 | }, 314 | "outputs": [], 315 | "source": [] 316 | }, 317 | { 318 | "cell_type": "markdown", 319 | "metadata": {}, 320 | "source": [ 321 | "## Ex 2.3\n", 322 | "\n", 323 | "Create a $100 \\times 100$ matrix of random numbers, `reshape` as a $10000 \\times 1$ array and save it in a new file in the `data` folder.\n", 324 | "\n", 325 | "#### Hint: `np.save` might be useful :). The other hint is in the text !-)" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": null, 331 | "metadata": { 332 | "collapsed": true 333 | }, 334 | "outputs": [], 335 | "source": [] 336 | }, 337 | { 338 | "cell_type": "markdown", 339 | "metadata": {}, 340 | "source": [ 341 | "## Ex 2.4\n", 342 | "\n", 343 | "Load back the previously saved array and print out the first 20 elements\n", 344 | "\n", 345 | "#### Note: We are bit anticipating indexing here, but I believe you may guess if you know a bit about Python List" 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": null, 351 | "metadata": { 352 | "collapsed": true 353 | }, 354 | "outputs": [], 355 | "source": [] 356 | }, 357 | { 358 | "cell_type": "markdown", 359 | "metadata": {}, 360 | "source": [ 361 | "### Ex 2.4.1\n", 362 | "\n", 363 | "To make it a little bit harder, `reshape` the array back into a \n", 364 | "$100 \\times 100$ matrix and print out the first row and the first column." 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": null, 370 | "metadata": { 371 | "collapsed": true 372 | }, 373 | "outputs": [], 374 | "source": [] 375 | }, 376 | { 377 | "cell_type": "markdown", 378 | "metadata": {}, 379 | "source": [ 380 | "# 3. Indexing" 381 | ] 382 | }, 383 | { 384 | "cell_type": "markdown", 385 | "metadata": {}, 386 | "source": [ 387 | "Index slicing is the technical name for the syntax `M[lower:upper:step]` to extract part of an array" 388 | ] 389 | }, 390 | { 391 | "cell_type": "markdown", 392 | "metadata": {}, 393 | "source": [ 394 | "## Ex 3.1 \n", 395 | "\n", 396 | "Generate a three-dimensional array of any size containing random numbers taken from an uniform distribution (_guess the numpy function in `np.random`_). Then print out separately the first entry along the three axis (i.e. `x, y, z`) \n", 397 | "\n", 398 | "\n", 399 | "#### Hint: Slicing with numpy arrays works quite like Python lists" 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": null, 405 | "metadata": { 406 | "collapsed": true 407 | }, 408 | "outputs": [], 409 | "source": [] 410 | }, 411 | { 412 | "cell_type": "markdown", 413 | "metadata": {}, 414 | "source": [ 415 | "## Ex 3.2\n", 416 | "\n", 417 | "Create a vector and print out elements in reverse order\n", 418 | "\n", 419 | "#### Hint: Use slicing for this exercise" 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": null, 425 | "metadata": { 426 | "collapsed": true 427 | }, 428 | "outputs": [], 429 | "source": [] 430 | }, 431 | { 432 | "cell_type": "markdown", 433 | "metadata": {}, 434 | "source": [ 435 | "## Ex 3.3\n", 436 | "\n", 437 | "Generate a $7 \\times 7$ matrix and replace all the elements in odd rows and even columns with `1`.\n", 438 | "\n", 439 | "#### Hint: Use slicing to solve this exercise!\n", 440 | "\n", 441 | "#### Note: Take a look at the original matrix, then." 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": null, 447 | "metadata": { 448 | "collapsed": true 449 | }, 450 | "outputs": [], 451 | "source": [] 452 | }, 453 | { 454 | "cell_type": "markdown", 455 | "metadata": {}, 456 | "source": [ 457 | "## Ex 3.4 \n", 458 | "\n", 459 | "Generate a `10 x 10` matrix of numbers `A`. Then, generate a numpy array of integers in range `1-9`. Pick `5` random values (with no repetition) from this array and use these values to extract rows from the original matrix `A`." 460 | ] 461 | }, 462 | { 463 | "cell_type": "code", 464 | "execution_count": null, 465 | "metadata": { 466 | "collapsed": true 467 | }, 468 | "outputs": [], 469 | "source": [] 470 | }, 471 | { 472 | "cell_type": "markdown", 473 | "metadata": {}, 474 | "source": [ 475 | "## Ex 3.5 \n", 476 | "\n", 477 | "Repeat the previous exercise but this time extract columns from `A`" 478 | ] 479 | }, 480 | { 481 | "cell_type": "code", 482 | "execution_count": null, 483 | "metadata": { 484 | "collapsed": true 485 | }, 486 | "outputs": [], 487 | "source": [] 488 | }, 489 | { 490 | "cell_type": "markdown", 491 | "metadata": {}, 492 | "source": [ 493 | "## Ex 3.6\n", 494 | "\n", 495 | "Generate an array of numbers from `0` to `20` with step `0.5`. \n", 496 | "Extract all the values greater than a randomly generated number in the same range.\n", 497 | "\n", 498 | "#### Hint: Try to write the condition as an expression and save it to a variable. Then, use this variable in square brackets to index.... this is when the magic happens!" 499 | ] 500 | }, 501 | { 502 | "cell_type": "code", 503 | "execution_count": null, 504 | "metadata": { 505 | "collapsed": true 506 | }, 507 | "outputs": [], 508 | "source": [] 509 | }, 510 | { 511 | "cell_type": "markdown", 512 | "metadata": {}, 513 | "source": [ 514 | "# 4. Basic Arithmetics\n", 515 | "\n", 516 | "Vectorizing code is the key to writing efficient numerical calculation with Python/Numpy. That means that as much as possible of a program should be formulated in terms of matrix and vector operations, like matrix-matrix multiplication.\n", 517 | "\n", 518 | "## Scalar-array operartions\n", 519 | "\n", 520 | "We can use the usual arithmetic operators to multiply, add, subtract, and divide arrays with scalar numbers." 521 | ] 522 | }, 523 | { 524 | "cell_type": "markdown", 525 | "metadata": {}, 526 | "source": [ 527 | "## Ex 4.1 \n", 528 | "\n", 529 | "Generate a matrix of any size. Then multiply each element by `2` and subtract `1`.\n", 530 | "\n", 531 | "#### HInt: The most intuitive way to implement this is the right one!" 532 | ] 533 | }, 534 | { 535 | "cell_type": "code", 536 | "execution_count": null, 537 | "metadata": { 538 | "collapsed": true 539 | }, 540 | "outputs": [], 541 | "source": [] 542 | }, 543 | { 544 | "cell_type": "markdown", 545 | "metadata": {}, 546 | "source": [ 547 | "## Ex 4.2\n", 548 | "\n", 549 | "Generate two square matrix of random numbers and multiply them element wise.\n", 550 | "\n", 551 | "#### Hint: The clues from the previous exercise also apply here!-)" 552 | ] 553 | }, 554 | { 555 | "cell_type": "code", 556 | "execution_count": null, 557 | "metadata": { 558 | "collapsed": true 559 | }, 560 | "outputs": [], 561 | "source": [] 562 | }, 563 | { 564 | "cell_type": "markdown", 565 | "metadata": {}, 566 | "source": [ 567 | "## Ex 4.3\n", 568 | "\n", 569 | "Execute the `dot` product between two randomly generated matrix of comparable shapes.\n", 570 | "\n", 571 | "#### Hint: Since we are using Python 3 here, you should be aware of the `@` operator.. :)" 572 | ] 573 | }, 574 | { 575 | "cell_type": "code", 576 | "execution_count": null, 577 | "metadata": { 578 | "collapsed": true 579 | }, 580 | "outputs": [], 581 | "source": [] 582 | }, 583 | { 584 | "cell_type": "markdown", 585 | "metadata": {}, 586 | "source": [ 587 | "## Ex 4.4\n", 588 | "\n", 589 | "Generate a matrix of any size and create the transpose.\n", 590 | "\n", 591 | "#### Hint: Guess the difference from `np.transpose` and `array.T`" 592 | ] 593 | }, 594 | { 595 | "cell_type": "code", 596 | "execution_count": null, 597 | "metadata": { 598 | "collapsed": true 599 | }, 600 | "outputs": [], 601 | "source": [] 602 | }, 603 | { 604 | "cell_type": "markdown", 605 | "metadata": {}, 606 | "source": [ 607 | "## Ex 4.5\n", 608 | "\n", 609 | "Generate a `ndarray` of any size and shape of random numbers and calculate some statistics (`mean`, `sum` along axis, `min`, `max`)\n", 610 | "\n", 611 | "\n", 612 | "#### HInt: Clues in the text of the exercise !-)" 613 | ] 614 | }, 615 | { 616 | "cell_type": "code", 617 | "execution_count": null, 618 | "metadata": { 619 | "collapsed": true 620 | }, 621 | "outputs": [], 622 | "source": [] 623 | }, 624 | { 625 | "cell_type": "markdown", 626 | "metadata": {}, 627 | "source": [ 628 | "# 5. Stacking and repeating arrays\n", 629 | "\n", 630 | "Using function `repeat`, `tile`, `vstack`, `hstack`, and `concatenate` we can create larger vectors and matrices from smaller ones\n", 631 | "\n" 632 | ] 633 | }, 634 | { 635 | "cell_type": "markdown", 636 | "metadata": {}, 637 | "source": [ 638 | "## Ex 5.1\n", 639 | "\n", 640 | "Generate two random matrix of the same size and concatenate on the `y-axis`.\n", 641 | "\n", 642 | "#### Hint: You CAN use **two** functions to solve this challenge. Guess the differences." 643 | ] 644 | }, 645 | { 646 | "cell_type": "code", 647 | "execution_count": null, 648 | "metadata": { 649 | "collapsed": true 650 | }, 651 | "outputs": [], 652 | "source": [] 653 | }, 654 | { 655 | "cell_type": "markdown", 656 | "metadata": {}, 657 | "source": [ 658 | "## Ex 5.2\n", 659 | "\n", 660 | "Generate two random matrix of the same size and concatenate on the `x-axis`.\n", 661 | "\n", 662 | "#### Hint: You CAN use **two** functions to solve this challenge. Guess the differences." 663 | ] 664 | }, 665 | { 666 | "cell_type": "code", 667 | "execution_count": null, 668 | "metadata": { 669 | "collapsed": true 670 | }, 671 | "outputs": [], 672 | "source": [] 673 | }, 674 | { 675 | "cell_type": "markdown", 676 | "metadata": {}, 677 | "source": [ 678 | "# So, why is it useful then?\n", 679 | "\n", 680 | "So far the `numpy.ndarray` looks awefully much like a Python **list** (or **nested list**). \n", 681 | "\n", 682 | "*Why not simply use Python lists for computations instead of creating a new array type?*\n", 683 | "\n", 684 | "There are several reasons:\n", 685 | "\n", 686 | "* Python lists are very general. \n", 687 | " - They can contain any kind of object. \n", 688 | " - They are dynamically typed. \n", 689 | " - They do not support mathematical functions such as matrix and dot multiplications, etc. \n", 690 | " - Implementing such functions for Python lists would not be very efficient because of the dynamic typing.\n", 691 | " \n", 692 | " \n", 693 | "* Numpy arrays are **statically typed** and **homogeneous**. \n", 694 | " - The type of the elements is determined when array is created.\n", 695 | " \n", 696 | " \n", 697 | "* Numpy arrays are memory efficient.\n", 698 | " - Because of the static typing, fast implementation of mathematical functions such as multiplication and addition of `numpy` arrays can be implemented in a compiled language (C and Fortran is used)." 699 | ] 700 | }, 701 | { 702 | "cell_type": "markdown", 703 | "metadata": {}, 704 | "source": [ 705 | "### Benchmark\n", 706 | "\n", 707 | "Initialise a range of 1000 numbers as a python `list` and as a `numpy.array`. Square all the elements and time this operation!\n", 708 | "\n", 709 | "### Hint: Use `%timeit` IPython Magic to take timing" 710 | ] 711 | }, 712 | { 713 | "cell_type": "code", 714 | "execution_count": null, 715 | "metadata": { 716 | "collapsed": true 717 | }, 718 | "outputs": [], 719 | "source": [] 720 | } 721 | ], 722 | "metadata": { 723 | "kernelspec": { 724 | "display_name": "Python 3.6 (Develer Science)", 725 | "language": "python", 726 | "name": "develer-science" 727 | }, 728 | "language_info": { 729 | "codemirror_mode": { 730 | "name": "ipython", 731 | "version": 3 732 | }, 733 | "file_extension": ".py", 734 | "mimetype": "text/x-python", 735 | "name": "python", 736 | "nbconvert_exporter": "python", 737 | "pygments_lexer": "ipython3", 738 | "version": "3.6.6" 739 | } 740 | }, 741 | "nbformat": 4, 742 | "nbformat_minor": 2 743 | } 744 | -------------------------------------------------------------------------------- /2_alchemist/1. Data selection & Indexing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "import random" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "%run 'helpers.py'" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "# Data selection & Indexing" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "## Series" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 3, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "series = pd.Series([3, 62, 75, 83, 47, 43, 39, 16, 19, 2])" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 4, 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "data": { 53 | "text/plain": [ 54 | "0 3\n", 55 | "1 62\n", 56 | "2 75\n", 57 | "3 83\n", 58 | "4 47\n", 59 | "5 43\n", 60 | "6 39\n", 61 | "7 16\n", 62 | "8 19\n", 63 | "9 2\n", 64 | "dtype: int64" 65 | ] 66 | }, 67 | "execution_count": 4, 68 | "metadata": {}, 69 | "output_type": "execute_result" 70 | } 71 | ], 72 | "source": [ 73 | "series" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "### Access by Position / Slice" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 5, 86 | "metadata": {}, 87 | "outputs": [ 88 | { 89 | "data": { 90 | "text/plain": [ 91 | "3" 92 | ] 93 | }, 94 | "execution_count": 5, 95 | "metadata": {}, 96 | "output_type": "execute_result" 97 | } 98 | ], 99 | "source": [ 100 | "series[0]" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 6, 106 | "metadata": {}, 107 | "outputs": [ 108 | { 109 | "data": { 110 | "text/plain": [ 111 | "3 83\n", 112 | "4 47\n", 113 | "5 43\n", 114 | "dtype: int64" 115 | ] 116 | }, 117 | "execution_count": 6, 118 | "metadata": {}, 119 | "output_type": "execute_result" 120 | } 121 | ], 122 | "source": [ 123 | "series[3:6]" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 7, 129 | "metadata": {}, 130 | "outputs": [ 131 | { 132 | "data": { 133 | "text/plain": [ 134 | "3 83\n", 135 | "4 47\n", 136 | "5 43\n", 137 | "dtype: int64" 138 | ] 139 | }, 140 | "execution_count": 7, 141 | "metadata": {}, 142 | "output_type": "execute_result" 143 | } 144 | ], 145 | "source": [ 146 | "# series[3:6]\n", 147 | "series.iloc[3:6]\n", 148 | "# note [] not ()!" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "### Access by label" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 8, 161 | "metadata": {}, 162 | "outputs": [ 163 | { 164 | "data": { 165 | "text/plain": [ 166 | "10" 167 | ] 168 | }, 169 | "execution_count": 8, 170 | "metadata": {}, 171 | "output_type": "execute_result" 172 | } 173 | ], 174 | "source": [ 175 | "len(series)" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 9, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "# set alpha label as new index for the series\n", 185 | "series.index = [x for x in \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\"][:len(series)]" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 10, 191 | "metadata": {}, 192 | "outputs": [ 193 | { 194 | "data": { 195 | "text/plain": [ 196 | "A 3\n", 197 | "B 62\n", 198 | "C 75\n", 199 | "D 83\n", 200 | "E 47\n", 201 | "F 43\n", 202 | "G 39\n", 203 | "H 16\n", 204 | "I 19\n", 205 | "J 2\n", 206 | "dtype: int64" 207 | ] 208 | }, 209 | "execution_count": 10, 210 | "metadata": {}, 211 | "output_type": "execute_result" 212 | } 213 | ], 214 | "source": [ 215 | "series" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 11, 221 | "metadata": {}, 222 | "outputs": [ 223 | { 224 | "data": { 225 | "text/plain": [ 226 | "D 83\n", 227 | "E 47\n", 228 | "F 43\n", 229 | "dtype: int64" 230 | ] 231 | }, 232 | "execution_count": 11, 233 | "metadata": {}, 234 | "output_type": "execute_result" 235 | } 236 | ], 237 | "source": [ 238 | "series[3:6]\n", 239 | "# position, pythonic" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 12, 245 | "metadata": {}, 246 | "outputs": [ 247 | { 248 | "data": { 249 | "text/plain": [ 250 | "D 83\n", 251 | "E 47\n", 252 | "F 43\n", 253 | "dtype: int64" 254 | ] 255 | }, 256 | "execution_count": 12, 257 | "metadata": {}, 258 | "output_type": "execute_result" 259 | } 260 | ], 261 | "source": [ 262 | "series['D':'F']\n", 263 | "# by label: slice includes end! " 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 13, 269 | "metadata": {}, 270 | "outputs": [ 271 | { 272 | "ename": "SyntaxError", 273 | "evalue": "invalid syntax (, line 1)", 274 | "output_type": "error", 275 | "traceback": [ 276 | "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m series[['D':'F', 'I':'J']]\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" 277 | ] 278 | } 279 | ], 280 | "source": [ 281 | "series[['D':'F', 'I':'J']]\n", 282 | "# cannot combine multiple ranges" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": null, 288 | "metadata": {}, 289 | "outputs": [], 290 | "source": [ 291 | "pd.concat([series['D':'F'], series['I':'J']])\n", 292 | "# concat to combine multiple ranges" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": null, 298 | "metadata": {}, 299 | "outputs": [], 300 | "source": [ 301 | "# set alpha label as new index for the series\n", 302 | "series.index = [x for x in \"GATTACAXYZ\"][:len(series)]" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": null, 308 | "metadata": {}, 309 | "outputs": [], 310 | "source": [ 311 | "series" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": null, 317 | "metadata": {}, 318 | "outputs": [], 319 | "source": [ 320 | "series.loc['G']" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": null, 326 | "metadata": {}, 327 | "outputs": [], 328 | "source": [ 329 | "series.loc['A']" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": null, 335 | "metadata": { 336 | "scrolled": true 337 | }, 338 | "outputs": [], 339 | "source": [ 340 | "series.loc['G':'A']\n", 341 | "# non-unique values breaks slicing" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": null, 347 | "metadata": {}, 348 | "outputs": [], 349 | "source": [ 350 | "series.loc['X':'Z']\n", 351 | "# while unique values are still slicable in a non-unique index" 352 | ] 353 | }, 354 | { 355 | "cell_type": "markdown", 356 | "metadata": {}, 357 | "source": [ 358 | "## DataFrames, 2D Data" 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": null, 364 | "metadata": {}, 365 | "outputs": [], 366 | "source": [ 367 | "df = pd.read_json('./data/sampledf.json')" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": null, 373 | "metadata": {}, 374 | "outputs": [], 375 | "source": [ 376 | "from IPython import display" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": null, 382 | "metadata": {}, 383 | "outputs": [], 384 | "source": [ 385 | "# visualisation of below - for presentation\n", 386 | "display.display_html(df.style.apply(highlight, subset=pd.IndexSlice[:, 2]))\n", 387 | "\n", 388 | "# column\n", 389 | "df[2]" 390 | ] 391 | }, 392 | { 393 | "cell_type": "code", 394 | "execution_count": null, 395 | "metadata": {}, 396 | "outputs": [], 397 | "source": [ 398 | "df[2:4]" 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": null, 404 | "metadata": {}, 405 | "outputs": [], 406 | "source": [ 407 | "# visualisation of below - for presentation\n", 408 | "display.display_html(df.style.apply(highlight, \n", 409 | " subset=pd.IndexSlice[range(2, 4), :]))\n", 410 | "\n", 411 | "# column\n", 412 | "df[2]" 413 | ] 414 | }, 415 | { 416 | "cell_type": "code", 417 | "execution_count": null, 418 | "metadata": {}, 419 | "outputs": [], 420 | "source": [ 421 | "df.iloc[2:, 2]" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": null, 427 | "metadata": {}, 428 | "outputs": [], 429 | "source": [ 430 | "# visualisation of below - for presentation\n", 431 | "display.display_html(df.style.apply(highlight, subset=pd.IndexSlice[range(2, 4), range(2, 4)]))\n", 432 | "\n", 433 | "\n", 434 | "# segment\n", 435 | "df.iloc[2, :]" 436 | ] 437 | }, 438 | { 439 | "cell_type": "code", 440 | "execution_count": null, 441 | "metadata": {}, 442 | "outputs": [], 443 | "source": [ 444 | "# visualisation of below - for presentation\n", 445 | "display.display_html(df.style.apply(highlight, subset=pd.IndexSlice[:, range(2, 4)]))\n", 446 | "\n", 447 | "# column slice\n", 448 | "df.iloc[:, 2:4]" 449 | ] 450 | }, 451 | { 452 | "cell_type": "code", 453 | "execution_count": null, 454 | "metadata": {}, 455 | "outputs": [], 456 | "source": [ 457 | "df" 458 | ] 459 | }, 460 | { 461 | "cell_type": "code", 462 | "execution_count": null, 463 | "metadata": {}, 464 | "outputs": [], 465 | "source": [ 466 | "df.index = [\"R{:02d}\".format(i) for i in range(len(df))]" 467 | ] 468 | }, 469 | { 470 | "cell_type": "code", 471 | "execution_count": null, 472 | "metadata": {}, 473 | "outputs": [], 474 | "source": [ 475 | "df.columns = [\"C{:02d}\".format(i) for i in range(len(df.columns))]" 476 | ] 477 | }, 478 | { 479 | "cell_type": "code", 480 | "execution_count": null, 481 | "metadata": {}, 482 | "outputs": [], 483 | "source": [ 484 | "df" 485 | ] 486 | }, 487 | { 488 | "cell_type": "code", 489 | "execution_count": null, 490 | "metadata": {}, 491 | "outputs": [], 492 | "source": [ 493 | "# visualisation of below - for presentation\n", 494 | "display.display_html(df.style.apply(highlight, subset=pd.IndexSlice[:, 'C05']))\n", 495 | "\n", 496 | "df['C05']" 497 | ] 498 | }, 499 | { 500 | "cell_type": "code", 501 | "execution_count": null, 502 | "metadata": {}, 503 | "outputs": [], 504 | "source": [ 505 | "# visualisation of below - for presentation\n", 506 | "display.display_html(df.style.apply(highlight, subset=pd.IndexSlice['R02':'R05', :]))\n", 507 | "\n", 508 | "\n", 509 | "df['R02':'R05']" 510 | ] 511 | }, 512 | { 513 | "cell_type": "code", 514 | "execution_count": null, 515 | "metadata": {}, 516 | "outputs": [], 517 | "source": [ 518 | "df[['C04', 'C05']]" 519 | ] 520 | }, 521 | { 522 | "cell_type": "code", 523 | "execution_count": null, 524 | "metadata": {}, 525 | "outputs": [], 526 | "source": [ 527 | "# visualisation of below - for presentation\n", 528 | "display.display_html(df.style.apply(highlight, subset=pd.IndexSlice['R02':'R05', 'C04':'C05']))\n", 529 | "\n", 530 | "\n", 531 | "# segment\n", 532 | "df.loc['R02':'R05', 'C04':'C05']" 533 | ] 534 | }, 535 | { 536 | "cell_type": "code", 537 | "execution_count": null, 538 | "metadata": {}, 539 | "outputs": [], 540 | "source": [ 541 | "df.loc['R02':'R05', 'C04':'C05']" 542 | ] 543 | }, 544 | { 545 | "cell_type": "markdown", 546 | "metadata": {}, 547 | "source": [ 548 | "### Excercise" 549 | ] 550 | }, 551 | { 552 | "cell_type": "code", 553 | "execution_count": null, 554 | "metadata": {}, 555 | "outputs": [], 556 | "source": [ 557 | "sales_data = pd.read_excel('./data/blooth_sales_data_clean.xlsx')\n", 558 | "sales_data.head(5)" 559 | ] 560 | }, 561 | { 562 | "cell_type": "code", 563 | "execution_count": null, 564 | "metadata": {}, 565 | "outputs": [], 566 | "source": [ 567 | "sales_data.info()" 568 | ] 569 | }, 570 | { 571 | "cell_type": "markdown", 572 | "metadata": {}, 573 | "source": [ 574 | "Select columns two to four (three columns in total)" 575 | ] 576 | }, 577 | { 578 | "cell_type": "code", 579 | "execution_count": null, 580 | "metadata": {}, 581 | "outputs": [], 582 | "source": [ 583 | "# Your code here\n" 584 | ] 585 | }, 586 | { 587 | "cell_type": "markdown", 588 | "metadata": {}, 589 | "source": [ 590 | "Select the columns *birthday and name* (together)" 591 | ] 592 | }, 593 | { 594 | "cell_type": "code", 595 | "execution_count": null, 596 | "metadata": {}, 597 | "outputs": [], 598 | "source": [ 599 | "# Your code here\n" 600 | ] 601 | }, 602 | { 603 | "cell_type": "markdown", 604 | "metadata": {}, 605 | "source": [ 606 | "Select the rows 2 to 4 (three rows)" 607 | ] 608 | }, 609 | { 610 | "cell_type": "code", 611 | "execution_count": null, 612 | "metadata": {}, 613 | "outputs": [], 614 | "source": [ 615 | "# Your code here" 616 | ] 617 | }, 618 | { 619 | "cell_type": "markdown", 620 | "metadata": {}, 621 | "source": [ 622 | "Select the rows 55, 77" 623 | ] 624 | }, 625 | { 626 | "cell_type": "code", 627 | "execution_count": null, 628 | "metadata": {}, 629 | "outputs": [], 630 | "source": [ 631 | "# Your code here\n" 632 | ] 633 | }, 634 | { 635 | "cell_type": "markdown", 636 | "metadata": {}, 637 | "source": [ 638 | "## Boolean Index" 639 | ] 640 | }, 641 | { 642 | "cell_type": "markdown", 643 | "metadata": {}, 644 | "source": [ 645 | "A boolean index is an array of true/false values: [1, 0, 1, 1, 0, 0, 1, …]\n", 646 | "\n", 647 | "! though the index name it's not one of the Pandas Index Types." 648 | ] 649 | }, 650 | { 651 | "cell_type": "code", 652 | "execution_count": null, 653 | "metadata": {}, 654 | "outputs": [], 655 | "source": [ 656 | "df['C04']" 657 | ] 658 | }, 659 | { 660 | "cell_type": "code", 661 | "execution_count": null, 662 | "metadata": {}, 663 | "outputs": [], 664 | "source": [ 665 | "df['C04'] > 60" 666 | ] 667 | }, 668 | { 669 | "cell_type": "code", 670 | "execution_count": null, 671 | "metadata": {}, 672 | "outputs": [], 673 | "source": [ 674 | "df[df['C04'] > 60]" 675 | ] 676 | }, 677 | { 678 | "cell_type": "code", 679 | "execution_count": null, 680 | "metadata": {}, 681 | "outputs": [], 682 | "source": [ 683 | "df[(df['C04'] < 60) | (df['C04'] > 80)] # multiple OR" 684 | ] 685 | }, 686 | { 687 | "cell_type": "code", 688 | "execution_count": null, 689 | "metadata": {}, 690 | "outputs": [], 691 | "source": [ 692 | "df[(df['C04'] < 60) & (df['C04'] % 2 == 0)] # multiple AND" 693 | ] 694 | }, 695 | { 696 | "cell_type": "markdown", 697 | "metadata": {}, 698 | "source": [ 699 | "### Excercise" 700 | ] 701 | }, 702 | { 703 | "cell_type": "code", 704 | "execution_count": null, 705 | "metadata": {}, 706 | "outputs": [], 707 | "source": [ 708 | "sales_data = pd.read_excel('./data/blooth_sales_data_clean.xlsx')\n", 709 | "sales_data.head(5)" 710 | ] 711 | }, 712 | { 713 | "cell_type": "code", 714 | "execution_count": null, 715 | "metadata": {}, 716 | "outputs": [], 717 | "source": [ 718 | "sales_data.info(5)" 719 | ] 720 | }, 721 | { 722 | "cell_type": "markdown", 723 | "metadata": {}, 724 | "source": [ 725 | "Find all rows with exactly 50 units" 726 | ] 727 | }, 728 | { 729 | "cell_type": "code", 730 | "execution_count": null, 731 | "metadata": {}, 732 | "outputs": [], 733 | "source": [ 734 | "# Your code here\n" 735 | ] 736 | }, 737 | { 738 | "cell_type": "markdown", 739 | "metadata": {}, 740 | "source": [ 741 | "Find all rows with exactly 50 playstations" 742 | ] 743 | }, 744 | { 745 | "cell_type": "code", 746 | "execution_count": null, 747 | "metadata": {}, 748 | "outputs": [], 749 | "source": [ 750 | "# Your code here\n" 751 | ] 752 | }, 753 | { 754 | "cell_type": "markdown", 755 | "metadata": {}, 756 | "source": [ 757 | "## filter" 758 | ] 759 | }, 760 | { 761 | "cell_type": "markdown", 762 | "metadata": {}, 763 | "source": [ 764 | "Filter by label or index" 765 | ] 766 | }, 767 | { 768 | "cell_type": "code", 769 | "execution_count": null, 770 | "metadata": {}, 771 | "outputs": [], 772 | "source": [ 773 | "df.columns" 774 | ] 775 | }, 776 | { 777 | "cell_type": "code", 778 | "execution_count": null, 779 | "metadata": { 780 | "scrolled": true 781 | }, 782 | "outputs": [], 783 | "source": [ 784 | "df.filter(like='R0', axis=0) # , axis=1 per default" 785 | ] 786 | }, 787 | { 788 | "cell_type": "code", 789 | "execution_count": null, 790 | "metadata": { 791 | "scrolled": true 792 | }, 793 | "outputs": [], 794 | "source": [ 795 | "df.filter(regex='.0[2-4]', axis=0)" 796 | ] 797 | }, 798 | { 799 | "cell_type": "markdown", 800 | "metadata": {}, 801 | "source": [ 802 | "### Transpose with .T" 803 | ] 804 | }, 805 | { 806 | "cell_type": "code", 807 | "execution_count": null, 808 | "metadata": {}, 809 | "outputs": [], 810 | "source": [ 811 | "df.iloc[2:3]" 812 | ] 813 | }, 814 | { 815 | "cell_type": "code", 816 | "execution_count": null, 817 | "metadata": {}, 818 | "outputs": [], 819 | "source": [ 820 | "df.iloc[2:3].T" 821 | ] 822 | }, 823 | { 824 | "cell_type": "markdown", 825 | "metadata": {}, 826 | "source": [ 827 | "### Formatting with Styler" 828 | ] 829 | }, 830 | { 831 | "cell_type": "code", 832 | "execution_count": null, 833 | "metadata": {}, 834 | "outputs": [], 835 | "source": [ 836 | "df = pd.read_json('./data/sampledf.json')\n", 837 | "df" 838 | ] 839 | }, 840 | { 841 | "cell_type": "code", 842 | "execution_count": null, 843 | "metadata": {}, 844 | "outputs": [], 845 | "source": [ 846 | "df.style.highlight_min()" 847 | ] 848 | }, 849 | { 850 | "cell_type": "code", 851 | "execution_count": null, 852 | "metadata": {}, 853 | "outputs": [], 854 | "source": [ 855 | "def odd_or_even(data):\n", 856 | " return [('background-color: green; color:white;' if x%2==0 else 'background-color: orange') \n", 857 | " for x in data]\n", 858 | "df.style.apply(odd_or_even)" 859 | ] 860 | } 861 | ], 862 | "metadata": { 863 | "kernelspec": { 864 | "display_name": "Python 3.6 (Develer Science)", 865 | "language": "python", 866 | "name": "develer-science" 867 | }, 868 | "language_info": { 869 | "codemirror_mode": { 870 | "name": "ipython", 871 | "version": 3 872 | }, 873 | "file_extension": ".py", 874 | "mimetype": "text/x-python", 875 | "name": "python", 876 | "nbconvert_exporter": "python", 877 | "pygments_lexer": "ipython3", 878 | "version": "3.6.6" 879 | }, 880 | "varInspector": { 881 | "cols": { 882 | "lenName": 16, 883 | "lenType": 16, 884 | "lenVar": 40 885 | }, 886 | "kernels_config": { 887 | "python": { 888 | "delete_cmd_postfix": "", 889 | "delete_cmd_prefix": "del ", 890 | "library": "var_list.py", 891 | "varRefreshCmd": "print(var_dic_list())" 892 | }, 893 | "r": { 894 | "delete_cmd_postfix": ") ", 895 | "delete_cmd_prefix": "rm(", 896 | "library": "var_list.r", 897 | "varRefreshCmd": "cat(var_dic_list()) " 898 | } 899 | }, 900 | "types_to_exclude": [ 901 | "module", 902 | "function", 903 | "builtin_function_or_method", 904 | "instance", 905 | "_Feature" 906 | ], 907 | "window_display": false 908 | } 909 | }, 910 | "nbformat": 4, 911 | "nbformat_minor": 2 912 | } 913 | -------------------------------------------------------------------------------- /4_archmage/2.2.1 Keras Backend.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Keras Backend\n", 8 | "\n", 9 | "In this notebook we will be using the [Keras backend module](http://keras.io/backend/), which provides an abstraction over both Theano and Tensorflow." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "Let's try to re-implement the Logistic Regression Model using the `keras.backend` APIs.\n", 17 | "\n", 18 | "The following code will look like very similar to what we would write in Theano or Tensorflow (with the *only difference* that it may run on both the two backends)." 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "name": "stderr", 28 | "output_type": "stream", 29 | "text": [ 30 | "Using TensorFlow backend.\n" 31 | ] 32 | } 33 | ], 34 | "source": [ 35 | "import keras.backend as K\n", 36 | "import numpy as np\n", 37 | "import matplotlib.pyplot as plt\n", 38 | "\n", 39 | "%matplotlib inline" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 2, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "from kaggle_data import load_data, preprocess_data, preprocess_labels" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 3, 54 | "metadata": {}, 55 | "outputs": [ 56 | { 57 | "name": "stdout", 58 | "output_type": "stream", 59 | "text": [ 60 | "9 classes\n", 61 | "93 dims\n" 62 | ] 63 | } 64 | ], 65 | "source": [ 66 | "X_train, labels = load_data('../data/kaggle_ottogroup/train.csv', train=True)\n", 67 | "X_train, scaler = preprocess_data(X_train)\n", 68 | "Y_train, encoder = preprocess_labels(labels)\n", 69 | "\n", 70 | "X_test, ids = load_data('../data/kaggle_ottogroup/test.csv', train=False)\n", 71 | "\n", 72 | "X_test, _ = preprocess_data(X_test, scaler)\n", 73 | "\n", 74 | "nb_classes = Y_train.shape[1]\n", 75 | "print(nb_classes, 'classes')\n", 76 | "\n", 77 | "dims = X_train.shape[1]\n", 78 | "print(dims, 'dims')" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 4, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "feats = dims\n", 88 | "training_steps = 25" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 5, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "x = K.placeholder(dtype=\"float\", shape=X_train.shape) \n", 98 | "target = K.placeholder(dtype=\"float\", shape=Y_train.shape)\n", 99 | "\n", 100 | "# Set model weights\n", 101 | "W = K.variable(np.random.rand(dims, nb_classes))\n", 102 | "b = K.variable(np.random.rand(nb_classes))" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 6, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "# Define model and loss\n", 112 | "y = K.dot(x, W) + b\n", 113 | "loss = K.categorical_crossentropy(y, target)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 7, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "activation = K.softmax(y) # Softmax" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 8, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "lr = K.constant(0.01)\n", 132 | "grads = K.gradients(loss, [W,b])\n", 133 | "updates = [(W, W-lr*grads[0]), (b, b-lr*grads[1])]" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 9, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "train = K.function(inputs=[x, target], outputs=[loss], updates=updates)" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 10, 148 | "metadata": { 149 | "scrolled": false 150 | }, 151 | "outputs": [ 152 | { 153 | "name": "stdout", 154 | "output_type": "stream", 155 | "text": [ 156 | "Loss: [1296.8262 -427.89313 -16.26692 ... 1968.651 -704.3656 1619.4004 ]\n", 157 | "Loss: [-28785862. -25194672. -28507862. ... -26947364. -22272130. -31571466.]\n" 158 | ] 159 | } 160 | ], 161 | "source": [ 162 | "# Training\n", 163 | "loss_history = []\n", 164 | "for epoch in range(training_steps):\n", 165 | " current_loss = train([X_train, Y_train])[0]\n", 166 | " loss_history.append(current_loss)\n", 167 | " if epoch % 20 == 0:\n", 168 | " print(\"Loss: {}\".format(current_loss))" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 11, 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [ 177 | "loss_history = [np.mean(lh) for lh in loss_history]" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": 12, 183 | "metadata": {}, 184 | "outputs": [ 185 | { 186 | "data": { 187 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAERCAYAAABsNEDqAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAIABJREFUeJzt3Xl0VfX57/H3I1CJyGVQihBEQS1VIASJCIrVyjyoyIWfilSsA7KUiu0ChYuXOiyVSh1qJ+SnVhQULRX0J1Qmp7qEQgJhkiqD0IZwCxVDQaUSfO4f5ySEeBLOSXLOPsPntVYWZ+/zPXs/O5vkyf5+97O/5u6IiIhE64SgAxARkdSixCEiIjFR4hARkZgocYiISEyUOEREJCZKHCIiEpO0TRxm9pyZ7TGzjVG0fcLMCsNfn5hZSSJiFBFJRZaudRxm9gPgIPCCu3eK4XM/Abq6+01xC05EJIWl7RWHu78P7Ku4zszOMrO3zKzAzP5iZt+P8NHrgJcTEqSISAqqH3QACTYTGOvuW8zsQuB3wOVlb5rZGUA74O2A4hMRSXoZkzjM7GTgIuCPZla2+sRKza4F5rn7kUTGJiKSSjImcRDqlitx99xq2lwL3JGgeEREUlLajnFU5u7/Bj41sxEAFtKl7H0z6wA0A1YEFKKISEpI28RhZi8TSgIdzKzIzG4GrgduNrN1wCbgqgofuQ6Y6+l6m5mISB1J29txRUQkPtL2ikNEROIjLQfHTz31VD/zzDODDkNEJGUUFBT8y91bRNM2LRPHmWeeSX5+ftBhiIikDDPbGW1bdVWJiEhMlDhERCQmShwiIhKTtBzjkPR1+PBhioqKOHToUNChiKSkhg0b0qZNGxo0aFDjbShxSEopKiqicePGnHnmmVR45piIRMHd+eyzzygqKqJdu3Y13k6gXVVmNsDMPjazrWY2KcL7J5rZK+H3/2pmZ8YrlgVrd3HxtLdpN2khF097mwVrd8VrV1ILhw4d4pRTTlHSEKkBM+OUU06p9RV7YInDzOoBvwUGAucB15nZeZWa3Qx87u5nA08Av4hHLAvW7mLyaxvYVfIVDuwq+YrJr21Q8khSShoiNVcXPz9BXnF0B7a6+3Z3/xqYy7HPjiK8PCv8eh7Q2+LwW2P64o/56vCxT1L/6vARpi/+uK53JSKS8oJMHNnAPyosF4XXRWzj7qXAfuCUSBszszFmlm9m+Xv37o0pkOKSr2JaL5nt5JNPrvU2iouLGT58eJXvl5SU8Lvf/S7q9pXdeOONtGvXjtzcXLp06cLy5ctrFW9dmzFjBi+88EKttrFhwwZyc3PJzc2lefPm5cfbp0+fmLbTv39/Dhw4UG2bKVOm8M4779Qm3OPq1asXhYWFcd1HXQlycDzSlUPlJy5G0ya00n0moRn+yMvLi+nJja2bZrErQpJo3TQrls1IElqwdhfTF39McclXtG6axcT+HRjatfLfJ4nXunVr5s2bV+X7ZYnj9ttvj6p9JNOnT2f48OG88847jBkzhi1bttQqZoDS0lLq16/9r42xY8fWehudO3cu/0V74403MmTIkIjJ9XgxL168+Lj7euihh2oeaBoK8oqjCDi9wnIboLiqNmZWH2hCpXnE68LE/h3IalDvmHVZDeoxsX+Hut6VJFAix6527txJ7969ycnJoXfv3vz9738HYNu2bfTo0YMLLriAqVOnll+t7Nixg06dOgGwadMmunfvTm5uLjk5OWzZsoVJkyaxbds2cnNzmThx4jHtjxw5woQJE+jcuTM5OTn8+te/rja2nj17smvX0WMuKCjg0ksvpVu3bvTv35/du3cDsHr1anJycujZsycTJ04s39/zzz/PiBEjuOKKK+jXrx8QSkoXXHABOTk5/PznPwfgiy++YPDgwXTp0oVOnTrxyiuvADBp0iTOO+88cnJymDBhAgD33Xcfv/zlLwEoLCykR48e5OTkcPXVV/P5558DcNlll3HPPffQvXt3vve97/GXv/wl6vOxbNky+vTpw7XXXkvXrl0BuOKKK+jWrRsdO3bkmWeeKW/bpk0bSkpK2Lp1K506deLmm2+mY8eODBw4sHwQedSoUSxYsKC8/X333UfXrl3Jycnhk08+AWDPnj307t2b888/n9tvv53s7GxKSkqOiau0tJSmTZvy05/+lPPPP5++ffvy2Weflb8/d+5cunfvTocOHfjwww+B0P+hSy65hK5du9KtWzf++te/ArBr1y569epFbm4unTp1Km//5z//mZ49e3L++edzzTXX8MUXX0T9fYtWkIljNXCOmbUzs+8Qmn3vjUpt3gBGh18PB96Ox3wZQ7tm88iwzmQ3zcKA7KZZPDKsc1L8ZSo1l8ixq3HjxnHDDTewfv16rr/+eu68804Axo8fz/jx41m9ejWtW7eO+NkZM2Ywfvx4CgsLyc/Pp02bNkybNo2zzjqLwsJCpk+ffkz7mTNn8umnn7J27dry/VXnrbfeYujQoUCoDuYnP/kJ8+bNo6CggJtuuokpU6YA8OMf/5gZM2awYsUK6tU79g+pFStWMGvWLN5++22WLFnCli1bWLVqFYWFhRQUFPD+++/z1ltv0bp1a9atW8fGjRsZMGAA+/btY/78+WzatIn169dz7733fiu+G264gV/84hesX7+ezp07c//995e/V1payqpVq3jyySePWR+NlStX8uijj7JhwwYAZs2aRUFBAatXr+bxxx8vT1AVffzxx9x1111s2rSJrKys8mRRWcuWLVm7di233HILjz/+OABTp05lwIABrFmzhkGDBlFcXPnv4JD9+/fTo0cP1qxZQ8+ePXnwwQfL33N3Vq1axfTp03nggQcAaNWqFUuXLmXt2rXMmTOn/P/W7NmzueKKKygsLGTdunXk5OSwZ88epk2bxvLly1mzZg05OTn86le/iun7Fo3AuqrcvdTMxgGLgXrAc+6+ycweAPLd/Q3gWeBFM9tK6Erj2njFM7RrthJFmknk2NWKFSt47bXXAPjRj37E3XffXb6+7JfPyJEjy//irqhnz5489NBDFBUVMWzYMM4555xq97Vs2TLGjh1b3v3SvHnziO0mTpzI3XffzZ49e1i5ciUQ+sW4ceNG+vbtC4SuXlq1akVJSQkHDhzgoosuKo/1zTffLN9W3759y/ezZMkSlixZUv6X/MGDB9myZQuXXHIJEyZM4J577mHIkCFccskllJaW0rBhQ2655RYGDx7MkCFDjolx//79lJSUcOmllwIwevRoRowYUf7+sGHDAOjWrRs7duyo9vtSWc+ePWnbtm358hNPPMEbb4T+Ni0qKmLbtm3k5eUd85mzzz6bzp07H3efFeNatGgRAB988EF5Eh4yZAiNGzeO+Nn69euXH+OoUaMYOXJkxO2W7fs///kP48aNY926ddSvX59t27YBcMEFF3Dbbbdx6NAhhg4dSpcuXVi2bBkfffRR+Xn8+uuv6dWrVxTfrdgEWgDo7ouARZXWTa3w+hAwovLnkkGy9p3LUUGOXcVy89/IkSO58MILWbhwIf379+eZZ56hffv2VbZ396i2P336dIYNG8ZTTz3F6NGjKSgowN3p2LEjK1YcO0NypL++K2rUqNEx+588eTK33Xbbt9oVFBSwaNEiJk+eTL9+/Zg6dSqrVq1i+fLlzJ07l9/85je8/fbbx429zIknnghAvXr1KC0tjfpzlWNetmwZ77//PitXriQrK4tevXpFrGUo29/x9hkprmg7Qyqfu4rLkbb72GOPcfrppzN79mwOHz5c3t15+eWX8+6777Jw4UKuv/56Jk+ezEknncSAAQN48cUXo4qlpvSsqhpQ3UdqSOTY1UUXXcTcuXMBmDNnTvlfeT169OBPf/oTQPn7lW3fvp327dtz5513cuWVV7J+/XoaN25c5Z0+/fr1Y8aMGeW/WPbtq3rY74QTTmD8+PF88803LF68mA4dOrB3797yxHH48GE2bdpEs2bNaNy4cfmVSVWxQugupOeee46DBw8Cob72PXv2UFxczEknncSoUaOYMGECa9as4eDBg+zfv59Bgwbx5JNPfuuuoSZNmtCsWbPy8YsXX3yx/OqjLu3fv5/mzZuTlZXFpk2bWL16dZ3vo1evXrz66qsALFq0qMrzd/jw4fKr05deeum4VwT79++nVatWmBmzZs0qT1A7d+7ktNNOY8yYMdx4442sXbuWiy66iPfee4/t27cDoXGnurgpojI9cqQGqus711VH8ig7F3V9Zfjll1/Spk2b8uWf/exnPPXUU9x0001Mnz6dFi1a8Ic//AGAJ598klGjRvHYY48xePBgmjRp8q3tvfLKK8yePZsGDRpw2mmnMXXqVJo3b87FF19Mp06dGDhwIHfccUd5+1tuuYVPPvmEnJwcGjRowK233sq4ceOqjNfMuPfee3n00Ufp378/8+bN484772T//v2UlpZy11130bFjR5599lluvfVWGjVqxGWXXRYxVgglrs2bN9OzZ08gdHvy7Nmz2bp1KxMnTuSEE06gQYMG/P73v+fAgQNcddVVHDp0CHfniSee+Nb2Zs2axdixY/nyyy9p3759+feuLg0ePJiZM2fSpUsXvv/973PhhRfW+T7uv/9+Ro4cyZw5c7j88stp2bLlMVc9ZZo0acKaNWt4+OGHad68eflNBFUZN24cw4cP5+WXX6ZPnz7lVyXLly/n8ccfp0GDBuXnoGXLljz77LNcc801fP311wA8/PDDx+3+jFVazjmel5fn8ZzIqd2khRHvCTbg02mD47Zfgc2bN3PuuecGHUbUvvzyS7KysjAz5s6dy8svv8zrr78edFgRHTx4sLwbZNq0aezevTsuA6vp6tChQ9SvX5/69evzwQcfcNddd31rQrnS0lJOPfXUb91tlWiRfo7MrMDd86r4yDF0xVEDqvuQaBUUFDBu3DjcnaZNm/Lcc88FHVKVFi5cyCOPPEJpaSlnnHEGzz//fNAhpZQdO3Zw3XXXceTIEU488USefvrpoEOKG11x1EDZGEfF7qqsBvV0C28CpNoVh0gy0hVHAOLVdy7RifauIhH5trq4WFDiqCHVfQSjYcOGfPbZZ3q0ukgNlM3H0bBhw1ptR4lDUkqbNm0oKioi1gdZikhI2QyAtaHEkUAqGqy9Bg0a1GrmMhGpPSWOBKk8oF5WNAgoeYhISlHleIJosigRSRdKHAmiyaJEJF0ocSRIVcWBKhoUkVSjxJEgmixKRNKFBscTREWDIpIulDgSSEWDIpIOlDiSnGo/RCTZKHEkMdV+iEgy0uB4ElPth4gkIyWOJKbaDxFJRkocSUy1HyKSjAJJHGbW3MyWmtmW8L/Nqmh3xMwKw19vJDrOoKn2Q0SSUVBXHJOA5e5+DrA8vBzJV+6eG/66MnHhJYehXbN5ZFhnsptmYUB20yzNMigigQtk6lgz+xi4zN13m1kr4F13/9af0WZ20N1PjnX78Z46VkQk3aTC1LEt3X03QDh5fLeKdg3NLB8oBaa5+4KERZjCVPshIvEUt8RhZsuA0yK8NSWGzbR192Izaw+8bWYb3H1bFfsbA4wBaNu2bczxpgvVfohIvMUtcbh7n6reM7N/mlmrCl1Ve6rYRnH43+1m9i7QFYiYONx9JjATQl1VtQw/ZVVX+6HEISJ1IajB8TeA0eHXo4HXKzcws2ZmdmL49anAxcBHCYswRan2Q0TiLajEMQ3oa2ZbgL7hZcwsz8yeCbc5F8g3s3XAO4TGOJQ4jkO1HyISb4EMjrv7Z0DvCOvzgVvCrz8EOic4tJQ3sX+HY8Y4QLUfIlK39JDDNKN5P0Qk3pQ40pDm/RCReFLiENV9iEhMlDgynOo+RCRWejpuhtOcHyISKyWODKe6DxGJlRJHhlPdh4jESokjw2nODxGJlQbHM5zqPkQkVkocoroPEYmJuqpERCQmuuKQGlHRoEjmUuKQmKloUCSzqatKYqaiQZHMpsQhMVPRoEhmU+KQmKloUCSzKXFIzFQ0KJLZNDguMVPRoEhmU+KQGlHRoEjmUuKQhFHth0h6UOKQhFDth0j60OC4JIRqP0TSRyCJw8xGmNkmM/vGzPKqaTfAzD42s61mNimRMUrdUu2HSPoI6opjIzAMeL+qBmZWD/gtMBA4D7jOzM5LTHhS11T7IZI+Akkc7r7Z3Y/XR9Ed2Oru2939a2AucFX8o5N4UO2HSPpI5jGObOAfFZaLwusiMrMxZpZvZvl79+6Ne3ASm6Fds3lkWGeym2ZhQHbTLB4Z1lkD4yIpKG53VZnZMuC0CG9NcffXo9lEhHVeVWN3nwnMBMjLy6uynQRHtR8i6SFuicPd+9RyE0XA6RWW2wDFtdympBDVfYgkp2Su41gNnGNm7YBdwLXAyGBDkkRR3YdI8grqdtyrzawI6AksNLPF4fWtzWwRgLuXAuOAxcBm4FV33xREvJJ4qvsQSV6BXHG4+3xgfoT1xcCgCsuLgEUJDE2ShOo+RJJXMt9VJRlMdR8iyUuJQ5KS6j5EklcyD45LBtOcHyLJS4lDkpbqPkSSkxKHpBXVfojEnxKHpA3VfogkhgbHJW2o9kMkMZQ4JG2o9kMkMZQ4JG2o9kMkMZQ4JG2o9kMkMTQ4LmlDtR8iiaHEIWlFtR8i8aeuKhERiYmuOCTjqWhQJDZKHJLRVDQoEjt1VUlGU9GgSOyUOCSjqWhQJHZKHJLRVDQoEjslDsloKhoUiZ0GxyWjqWhQJHZKHJLxVDQoEptAEoeZjQDuA84Furt7fhXtdgAHgCNAqbvnJSpGkaqo7kMyXVBXHBuBYcDTUbT9obv/K87xiERFdR8iAQ2Ou/tmd9eN8pJyVPchEmXiCHctHXddHDiwxMwKzGxMdQ3NbIyZ5ZtZ/t69exMQmmQi1X2IRH/FMTnKdeXMbJmZbYzwdVUM8V3s7ucDA4E7zOwHVTV095nunufueS1atIhhFyLRU92HyHHGOMxsIDAIyDazpyq89b+A0uo+6+59ahucuxeH/91jZvOB7sD7td2uSE1N7N/hmDEOUN2HZJ7jDY4XA/nAlUBBhfUHgJ/GKygAM2sEnODuB8Kv+wEPxHOfIsejug8RMHc/fiOzBu5+OPy6GXC6u6+v8U7NrgZ+DbQASoBCd+9vZq2BZ9x9kJm1B+aHP1IfeMndH4pm+3l5eZ6fH/EOXxERicDMCqIteYj2dtylZnZluH0hsNfM3nP3n9UkQHefz9GkUHF9MaGuMdx9O9ClJtsXSTaq/ZB0Eu3geBN3/zeh2os/uHs3oNZjGCKZoKz2Y1fJVzhHaz8WrN0VdGgiNRJt4qhvZq2A/wLejGM8ImlHtR+SbqJNHA8Ai4Ft7r46PP6wJX5hiaQP1X5IuolqjMPd/wj8scLyduB/xysokXTSumkWuyIkCdV+SKqKtnK8jZnNN7M9ZvZPM/uTmbWJd3Ai6UBzfki6ibar6g/AG0BrIBv4n/A6ETmOoV2zeWRYZ7KbZmFAdtMsHhnWWXdVScqKto6j0N1zj7cuWaiOQ0QkNvGo4/iXmY0CXg4vXwd8VpPgRCQ6qv2QZBVtV9VNhG7F/X/AbmA48ON4BSWS6VT7Icks2sTxIDDa3Vu4+3cJJZL74haVSIZT7Ycks2gTR467f1624O77gK7xCUlEVPshySzaxHFC+OGGAJhZc4KbdlYk7WneD0lm0SaOx4APzexBM3sA+BB4NH5hiWQ21X5IMou2cvwFM8sHLgcMGObuH8U1MpEMpnk/JJlF3d0UThRKFiIJMrRrthKFJKVou6pEREQADXCLpA0VDEqiKHGIpIGygsGy2o+ygkFAyUPqnLqqRNKACgYlkZQ4RNKACgYlkZQ4RNKACgYlkQJJHGY23cz+ZmbrwxNENa2i3QAz+9jMtprZpETHKZIqVDAoiRTUFcdSoJO75wCfAJMrNzCzesBvgYHAecB1ZnZeQqMUSRGaLEoSKZC7qtx9SYXFlYQe015Zd2BreH5zzGwucBUqQhSJSAWDkijJcDvuTcArEdZnA/+osFwEXJiQiEQyhGo/pCbiljjMbBlwWoS3prj76+E2U4BSYE6kTURYV+U8t2Y2BhgD0LZt25jjFck0qv2Qmopb4nD3PtW9b2ajgSFAb4888XkRcHqF5TZAcTX7mwnMhNCc4zEHLJJhqqv9UOKQ6gR1V9UA4B7gSnf/sopmq4FzzKydmX0HuBZ4I1ExiqQ71X5ITQV1V9VvgMbAUjMrNLMZAGbW2swWAbh7KTAOWAxsBl51900BxSuSdlT7ITUV1F1VZ1exvhgYVGF5EbAoUXGJZJKJ/TscM8YBqv2Q6CTDXVUiEgBNFiU1pcQhksFU+yE1ocQhIjFR7YcocYhI1FT7IaCn44pIDDTvh4ASh4jEQLUfAkocIhID1X4IKHGISAw074eABsdFJAaq/RBQ4hCRGKn2Q5Q4RCSuVPeRfpQ4RCRuVPeRnjQ4LiJxo7qP9KTEISJxo7qP9KTEISJxo7qP9KTEISJxo7qP9KTBcRGJG9V9pCclDhGJK9V9pB8lDhFJOqr9SG5KHCKSVFT7kfw0OC4iSUW1H8lPiUNEkopqP5JfIInDzKab2d/MbL2ZzTezplW022FmG8ys0MzyEx2niCSeaj+SX1BXHEuBTu6eA3wCTK6m7Q/dPdfd8xITmogESbUfyS+QxOHuS9y9NLy4EmgTRBwiknyGds3mkWGdyW6ahQHZTbN4ZFhnDYwnEXP3YAMw+x/gFXefHeG9T4HPAQeedveZ1WxnDDAGoG3btt127twZp4hFRNKPmRVE27MTt9txzWwZcFqEt6a4++vhNlOAUmBOFZu52N2Lzey7wFIz+5u7vx+pYTipzATIy8sLNhuKiKSxuCUOd+9T3ftmNhoYAvT2Ki573L04/O8eM5sPdAciJg4RyWwqGkycoO6qGgDcA1zp7l9W0aaRmTUuew30AzYmLkoRSRVlRYO7Sr7COVo0uGDtrqBDS0tB3VX1G6Axoe6nQjObAWBmrc1sUbhNS+ADM1sHrAIWuvtbwYQrIslMRYOJFcgjR9z97CrWFwODwq+3A10SGZeIpCYVDSaWKsdFJOWpaDCxlDhEJOWpaDCx9HRcEUl5mjAqsZQ4RCQtaMKoxFHiEJGMpLqPmlPiEJGMo8miakeD4yKScVT3UTtKHCKScVT3UTtKHCKScVT3UTtKHCKScVT3UTsaHBeRjKO6j9pR4hCRjKS6j5pT4hARiZJqP0KUOEREoqDaj6M0OC4iEgXVfhylxCEiEgXVfhylxCEiEgXVfhylxCEiEgXVfhylwXERkSio9uMoJQ4RkSip9iNEiUNEJI7SsfYjsDEOM3vQzNabWaGZLTGz1lW0G21mW8JfoxMdp4hITZXVfuwq+QrnaO3HgrW7gg6tVoIcHJ/u7jnungu8CUyt3MDMmgM/By4EugM/N7NmiQ1TRKRm0rX2I7DE4e7/rrDYCPAIzfoDS919n7t/DiwFBiQiPhGR2krX2o9AxzjM7CHgBmA/8MMITbKBf1RYLgqvi7StMcAYgLZt29ZtoCIiNdC6aRa7IiSJVK/9iOsVh5ktM7ONEb6uAnD3Ke5+OjAHGBdpExHWRboywd1nunueu+e1aNGi7g5CRKSG0rX2I65XHO7eJ8qmLwELCY1nVFQEXFZhuQ3wbq0DExFJgHSt/Qisq8rMznH3LeHFK4G/RWi2GHi4woB4P2ByIuITEakL6Vj7EeQYxzQz6wB8A+wExgKYWR4w1t1vcfd9ZvYgsDr8mQfcfV8w4YqICIC5RxwySGl5eXmen58fdBgiIjELqmDQzArcPS+atqocFxFJEqkyWZSejisikiRSpWBQiUNEJEmkSsGgEoeISJJIlcmilDhERJJEqhQManBcRCRJpErBoBKHiEgSSYWCQSUOEZEUl+jaDyUOEZEUFkTthwbHRURSWBC1H0ocIiIpLIjaDyUOEZEUFkTthxKHiEgKC6L2Q4PjIiIpLIjaDyUOEZEUl+jaD3VViYhITJQ4REQkJkocIiISEyUOERGJiRKHiIjExNw96BjqnJntBXbW8OOnAv+qw3BSSSYfO2T28evYM1fZ8Z/h7i2i+UBaJo7aMLN8d88LOo4gZPKxQ2Yfv449M48danb86qoSEZGYKHGIiEhMlDi+bWbQAQQok48dMvv4deyZK+bj1xiHiIjERFccIiISEyUOERGJiRJHmJkNMLOPzWyrmU0KOp5EM7MdZrbBzArNLD/oeOLJzJ4zsz1mtrHCuuZmttTMtoT/bRZkjPFUxfHfZ2a7wue/0MwGBRljvJjZ6Wb2jpltNrNNZjY+vD7tz381xx7zudcYB2Bm9YBPgL5AEbAauM7dPwo0sAQysx1AnrunfSGUmf0AOAi84O6dwuseBfa5+7TwHw7N3P2eIOOMlyqO/z7goLv/MsjY4s3MWgGt3H2NmTUGCoChwI2k+fmv5tj/ixjPva44QroDW919u7t/DcwFrgo4JokTd38f2Fdp9VXArPDrWYR+oNJSFcefEdx9t7uvCb8+AGwGssmA81/NscdMiSMkG/hHheUiavgNTWEOLDGzAjMbE3QwAWjp7rsh9AMGfDfgeIIwzszWh7uy0q6rpjIzOxPoCvyVDDv/lY4dYjz3ShwhFmFdpvXhXezu5wMDgTvC3RmSOX4PnAXkAruBx4INJ77M7GTgT8Bd7v7voONJpAjHHvO5V+IIKQJOr7DcBigOKJZAuHtx+N89wHxC3XeZ5J/hPuCyvuA9AceTUO7+T3c/4u7fAP9NGp9/M2tA6BfnHHd/Lbw6I85/pGOvyblX4ghZDZxjZu3M7DvAtcAbAceUMGbWKDxYhpk1AvoBG6v/VNp5Axgdfj0aeD3AWBKu7Jdm2NWk6fk3MwOeBTa7++MV3kr781/Vsdfk3OuuqrDwLWhPAvWA59z9oYBDShgza0/oKgOgPvBSOh+/mb0MXEbocdL/BH4OLABeBdoCfwdGuHtaDiBXcfyXEeqqcGAHcFtZn386MbNewF+ADcA34dX/h1Bff1qf/2qO/TpiPPdKHCIiEhN1VYmISEyUOEREJCZKHCIiEhMlDhERiYkSh4iIxESJQySJmNllZvZm0HGIVEeJQ0REYqLEIVIDZjbKzFaF5y942szqmdnHw4ZxAAABkklEQVRBM3vMzNaY2XIzaxFum2tmK8MPkZtf9hA5MzvbzJaZ2brwZ84Kb/5kM5tnZn8zsznhil+RpKHEIRIjMzsXuIbQgyFzgSPA9UAjYE34YZHvEarIBngBuMfdcwhV7ZatnwP81t27ABcResAchJ5aehdwHtAeuDjuByUSg/pBByCSgnoD3YDV4YuBLEIPxfsGeCXcZjbwmpk1AZq6+3vh9bOAP4afDZbt7vMB3P0QQHh7q9y9KLxcCJwJfBD/wxKJjhKHSOwMmOXuk49ZafZ/K7Wr7nk+1XU//afC6yPo51SSjLqqRGK3HBhuZt+F8vmqzyD08zQ83GYk8IG77wc+N7NLwut/BLwXngehyMyGhrdxopmdlNCjEKkh/SUjEiN3/8jM7iU0Y+IJwGHgDuALoKOZFQD7CY2DQOgx3TPCiWE78OPw+h8BT5vZA+FtjEjgYYjUmJ6OK1JHzOygu58cdBwi8aauKhERiYmuOEREJCa64hARkZgocYiISEyUOEREJCZKHCIiEhMlDhERicn/B1Wje2YNV+NyAAAAAElFTkSuQmCC\n", 188 | "text/plain": [ 189 | "
" 190 | ] 191 | }, 192 | "metadata": { 193 | "needs_background": "light" 194 | }, 195 | "output_type": "display_data" 196 | } 197 | ], 198 | "source": [ 199 | "# plotting\n", 200 | "plt.plot(range(len(loss_history)), loss_history, 'o', label='Logistic Regression Training phase')\n", 201 | "plt.ylabel('cost')\n", 202 | "plt.xlabel('epoch')\n", 203 | "plt.legend()\n", 204 | "plt.show()" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": {}, 210 | "source": [ 211 | "## Your Turn\n", 212 | "\n", 213 | "Please switch to the **Theano** backend and **restart** the notebook.\n", 214 | "\n", 215 | "You _should_ see no difference in the execution!\n", 216 | "\n", 217 | "**Reminder**: please keep in mind that you *can* execute shell commands from a notebook (pre-pending a `!` sign).\n", 218 | "Thus:\n", 219 | "\n", 220 | "```shell\n", 221 | " !cat ~/.keras/keras.json\n", 222 | "```\n", 223 | "should show you the content of your keras configuration file." 224 | ] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "metadata": { 229 | "collapsed": true 230 | }, 231 | "source": [ 232 | "### Moreover\n", 233 | "\n", 234 | "Try to play a bit with the **learning reate** parameter to see how the loss history floats... " 235 | ] 236 | }, 237 | { 238 | "cell_type": "markdown", 239 | "metadata": {}, 240 | "source": [ 241 | "---" 242 | ] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "metadata": {}, 247 | "source": [ 248 | "## Exercise: Linear Regression\n", 249 | "To get familiar with automatic differentiation, we start by learning a simple linear regression model using Stochastic Gradient Descent (SGD).\n", 250 | "\n", 251 | "Recall that given a dataset $\\{(x_i, y_i)\\}_{i=0}^N$, with $x_i, y_i \\in \\mathbb{R}$, the objective of linear regression is to find two scalars $w$ and $b$ such that $y = w\\cdot x + b$ fits the dataset. In this tutorial we will learn $w$ and $b$ using SGD and a Mean Square Error (MSE) loss:\n", 252 | "\n", 253 | "$$\\mathcal{l} = \\frac{1}{N} \\sum_{i=0}^N (w\\cdot x_i + b - y_i)^2$$\n", 254 | "\n", 255 | "Starting from random values, parameters $w$ and $b$ will be updated at each iteration via the following rule:\n", 256 | "\n", 257 | "$$w_t = w_{t-1} - \\eta \\frac{\\partial \\mathcal{l}}{\\partial w}$$\n", 258 | "
\n", 259 | "$$b_t = b_{t-1} - \\eta \\frac{\\partial \\mathcal{l}}{\\partial b}$$\n", 260 | "\n", 261 | "where $\\eta$ is the learning rate.\n", 262 | "\n", 263 | "**NOTE:** Recall that **linear regression** is indeed a **simple neuron** with a linear activation function!!" 264 | ] 265 | }, 266 | { 267 | "cell_type": "markdown", 268 | "metadata": {}, 269 | "source": [ 270 | "### Definition: Placeholders and Variables" 271 | ] 272 | }, 273 | { 274 | "cell_type": "markdown", 275 | "metadata": {}, 276 | "source": [ 277 | "First of all, we define the necessary variables and placeholders for our computational graph. Variables maintain state across executions of the computational graph, while placeholders are ways to feed the graph with external data.\n", 278 | "\n", 279 | "For the linear regression example, we need three variables: `w`, `b`, and the learning rate for SGD, `lr`. \n", 280 | "\n", 281 | "Two placeholders `x` and `target` are created to store $x_i$ and $y_i$ values." 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": 13, 287 | "metadata": {}, 288 | "outputs": [], 289 | "source": [ 290 | "# Placeholders and variables\n", 291 | "x = K.placeholder()\n", 292 | "target = K.placeholder()\n", 293 | "w = K.variable(np.random.rand())\n", 294 | "b = K.variable(np.random.rand())" 295 | ] 296 | }, 297 | { 298 | "cell_type": "markdown", 299 | "metadata": {}, 300 | "source": [ 301 | "#### Notes:\n", 302 | "\n", 303 | "In case you're wondering what's the difference between a **placeholder** and a **variable**, in short:\n", 304 | "\n", 305 | "* Use `K.variable()` for trainable variables such as weights (`W`) and biases (`b`) for your model.\n", 306 | "* Use `K.placeholder()` to feed actual data (e.g. training examples)" 307 | ] 308 | }, 309 | { 310 | "cell_type": "markdown", 311 | "metadata": { 312 | "collapsed": true 313 | }, 314 | "source": [ 315 | "## Model definition\n", 316 | "Now we can define the $y = w\\cdot x + b$ relation as well as the MSE loss in the computational graph." 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": null, 322 | "metadata": { 323 | "collapsed": true 324 | }, 325 | "outputs": [], 326 | "source": [ 327 | "# Define model and loss" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": null, 333 | "metadata": { 334 | "collapsed": true 335 | }, 336 | "outputs": [], 337 | "source": [ 338 | "# %load ../solutions/sol_2311.py" 339 | ] 340 | }, 341 | { 342 | "cell_type": "markdown", 343 | "metadata": {}, 344 | "source": [ 345 | "Then, given the gradient of MSE wrt to `w` and `b`, we can define how we update the parameters via SGD:" 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": null, 351 | "metadata": {}, 352 | "outputs": [], 353 | "source": [ 354 | "# %load ../solutions/sol_2312.py" 355 | ] 356 | }, 357 | { 358 | "cell_type": "markdown", 359 | "metadata": {}, 360 | "source": [ 361 | "The whole model can be encapsulated in a `function`, which takes as input `x` and `target`, returns the current loss value and updates its parameter according to `updates`." 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": null, 367 | "metadata": { 368 | "collapsed": true 369 | }, 370 | "outputs": [], 371 | "source": [ 372 | "train = K.function(inputs=[x, target], outputs=[loss], updates=updates)" 373 | ] 374 | }, 375 | { 376 | "cell_type": "markdown", 377 | "metadata": {}, 378 | "source": [ 379 | "## Training\n", 380 | "Training is now just a matter of calling the `function` we have just defined. Each time `train` is called, indeed, `w` and `b` will be updated using the SGD rule.\n", 381 | "\n", 382 | "Having generated some random training data, we will feed the `train` function for several epochs and observe the values of `w`, `b`, and loss." 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": null, 388 | "metadata": { 389 | "collapsed": true 390 | }, 391 | "outputs": [], 392 | "source": [ 393 | "# Generate data\n", 394 | "np_x = np.random.rand(1000)\n", 395 | "np_target = 0.96*np_x + 0.24" 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": null, 401 | "metadata": { 402 | "collapsed": true 403 | }, 404 | "outputs": [], 405 | "source": [ 406 | "# Training\n", 407 | "loss_history = []\n", 408 | "for epoch in range(200):\n", 409 | " current_loss = train([np_x, np_target])[0]\n", 410 | " loss_history.append(current_loss)\n", 411 | " if epoch % 20 == 0:\n", 412 | " print(\"Loss: %.03f, w, b: [%.02f, %.02f]\" % (current_loss, K.eval(w), K.eval(b)))" 413 | ] 414 | }, 415 | { 416 | "cell_type": "markdown", 417 | "metadata": {}, 418 | "source": [ 419 | "We can also plot the loss history:" 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": null, 425 | "metadata": { 426 | "collapsed": true 427 | }, 428 | "outputs": [], 429 | "source": [ 430 | "# Plot loss history" 431 | ] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "execution_count": null, 436 | "metadata": { 437 | "collapsed": true 438 | }, 439 | "outputs": [], 440 | "source": [ 441 | "# %load ../solutions/sol_2313.py" 442 | ] 443 | }, 444 | { 445 | "cell_type": "markdown", 446 | "metadata": {}, 447 | "source": [ 448 | "### Final Note:\n", 449 | "\n", 450 | "Please switch back your backend to `tensorflow` before moving on. It may be useful for next notebooks !-)" 451 | ] 452 | } 453 | ], 454 | "metadata": { 455 | "kernelspec": { 456 | "display_name": "Python 3.6 (DL Keras TF)", 457 | "language": "python", 458 | "name": "dl-keras-tf" 459 | }, 460 | "language_info": { 461 | "codemirror_mode": { 462 | "name": "ipython", 463 | "version": 3 464 | }, 465 | "file_extension": ".py", 466 | "mimetype": "text/x-python", 467 | "name": "python", 468 | "nbconvert_exporter": "python", 469 | "pygments_lexer": "ipython3", 470 | "version": "3.6.6" 471 | } 472 | }, 473 | "nbformat": 4, 474 | "nbformat_minor": 1 475 | } 476 | --------------------------------------------------------------------------------