├── .gitignore
├── Procfile
├── README.md
├── References.md
├── correlations
    └── correlation_plot.ipynb
├── decision_trees
    ├── decision_tree.JPG
    ├── decision_tree.pptx
    └── decision_trees.ipynb
├── dynamically_controlled_kernel_estimation
    ├── dcke
    │   ├── __init__.py
    │   ├── dcke.py
    │   └── test_dcke.py
    ├── dynamically_controlled_kernel_estimation.ipynb
    ├── locreg
    │   ├── __init__.py
    │   ├── local_regression.py
    │   └── test_local_regression.py
    ├── models
    │   ├── __init__.py
    │   ├── black_scholes.py
    │   └── test_black_scholes.py
    └── pics
    │   ├── american_option_pricing.png
    │   ├── conditional_expectation_orthogonal_projection.png
    │   ├── dcke_basket_heston.jpg
    │   ├── dcke_performance.jpg
    │   └── dcke_rbergomi.jpg
├── ensemble
    ├── adaboost_classifier.ipynb
    └── adaboost_regressor.ipynb
├── environment-explicit.txt
├── environment.yml
├── gaussian_process_regression
    └── gaussian_process_regression.ipynb
├── lda_qda
    └── linear_quadratic_discriminant_analysis.ipynb
├── local_regression
    ├── local_regression.ipynb
    └── locreg
    │   ├── __init__.py
    │   ├── local_regression.py
    │   └── test_local_regression.py
├── logistic_regression
    └── logistic_regression.ipynb
├── lstm_intro
    ├── lstm.pdf
    ├── lstm_cell.pdf
    ├── lstm_cell.png
    └── lstm_intro.ipynb
├── naive_bayes
    └── naive_bayes.ipynb
├── network_topology_selection
    ├── data_how_deep_financial_models.zip
    ├── data_surgery.zip
    ├── how_deep_are_financial_models.ipynb
    ├── keras_grid
    │   ├── __init__.py
    │   ├── model_grid.py
    │   └── test_model_grid.py
    ├── network_topology_selection.ipynb
    ├── networks_financial_models_brain_surgery.ipynb
    └── pricinglib
    │   ├── __init__.py
    │   ├── black_scholes.py
    │   └── heston.py
├── neural_network_intro
    └── neural_network_intro_model_setup.ipynb
├── newton_gradient_backprop
    ├── adjoint.ipynb
    ├── backpropagation.ipynb
    ├── gradient_descent.ipynb
    └── newton.ipynb
├── regression_revisited
    └── regression_revisited.ipynb
├── requirements.txt
├── runtime.txt
└── start.ipynb


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | .idea/
107 | 


--------------------------------------------------------------------------------
/Procfile:
--------------------------------------------------------------------------------
1 | web: voila --port=$PORT --no-browser --enable_nbextensions=True --strip_sources=False


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Machine Learning Fundamentals
 2 | 
 3 | ## Probability
 4 | * [Correlation Matrices](https://github.com/niknow/machine-learning-examples/blob/master/correlations/correlation_plot.ipynb): A plotly vizualization of the space of 3x3 correlation matrices using a nice parametrization
 5 | 
 6 | 
 7 | ## Classification
 8 | * [Naive Bayes](https://github.com/niknow/machine-learning-examples/blob/master/naive_bayes/naive_bayes.ipynb): Introduction, derivation and reconciliation of Naive Bayes - a baseline model for classification.
 9 | 
10 | * [Linear / Quadratic Discriminant Analysis](https://github.com/niknow/machine-learning-examples/blob/master/lda_qda/linear_quadratic_discriminant_analysis.ipynb): Introduction, derivation, properties and examples of LDA/QDA classification
11 | 
12 | * [Logistic Regression](https://github.com/niknow/machine-learning-examples/blob/master/logistic_regression/logistic_regression.ipynb): Definitions, Binary and multi-class case, Sigmoid and Softmax functions, Cross-Entropy Loss, Regularization, Examples
13 | 
14 | * [Decision Tree Classifiers](https://github.com/niknow/machine-learning-examples/blob/master/decision_trees/decision_trees.ipynb): Graph theory, binary rooted trees, impurity functions, minimal cost-complexity pruning
15 |   
16 | 
17 | ## Advanced Regression Techniques
18 | 
19 | * [Linear Regression](https://nbviewer.jupyter.org/github/niknow/machine-learning-examples/blob/master/regression_revisited/regression_revisited.ipynb): A recap of linear regression - a core fundamental of machine learning.
20 |    
21 | * [Local Regression](https://nbviewer.jupyter.org/github/niknow/machine-learning-examples/blob/master/local_regression/local_regression.ipynb): Local regression is a refinement of linear regression that adapts the model at each point of the prediction.  
22 | 
23 | * [Gaussian Process Regression (GPR)](https://nbviewer.jupyter.org/github/niknow/machine-learning-examples/blob/master/gaussian_process_regression/gaussian_process_regression.ipynb): An advanced regression technique that produces not only predictions, but also confidence bounds around them.
24 | 
25 | * [Dynamically Controlled Kernel Estimation (DCKE)](https://github.com/niknow/machine-learning-examples/blob/master/dynamically_controlled_kernel_estimation/dynamically_controlled_kernel_estimation.ipynb): A combination of local regression, control variates and Gaussian process regression to estimate conditional expectations. The method is model free, data-driven and particularly suited for financial applications.
26 | 
27 | * [Decision Tree Regressors](https://github.com/niknow/machine-learning-examples/blob/master/decision_trees/decision_trees.ipynb): Decision trees can be used for regression as well
28 | 
29 | 
30 | ## Neural Network Topologies
31 | 
32 | * [Multilayer Perceptrons (MLP)](https://nbviewer.jupyter.org/github/niknow/machine-learning-examples/blob/master/neural_network_intro/neural_network_intro_model_setup.ipynb): Introduction to the most common form of artificial neural networks (ANN).
33 | 
34 | * [Long-Term-Short-Term-Memory networks (LSTM)](https://nbviewer.jupyter.org/github/niknow/machine-learning-examples/blob/master/lstm_intro/lstm_intro.ipynb): Introduction to LSTMs, a popular form of recurrent neural networks (RNNs).
35 | 
36 | * [Network Topology Selection](https://nbviewer.jupyter.org/github/niknow/machine-learning-examples/blob/master/network_topology_selection/network_topology_selection.ipynb): A methodology to choose a topology for a neural network, e.g. the number of hidden layers and units.
37 | 
38 | 
39 | ## Ensemble Learning
40 | 
41 | * [Boosting Classifications](https://github.com/niknow/machine-learning-examples/blob/master/ensemble/adaboost_classifier.ipynb): Boosting decision tree classifiers is a very common form of ensemble learning. We discuss the famous SAMME algorithm including the weak classifier training, motivate its the weighting and the exponential loss function. 
42 | 
43 | * [Boosting Regressions](https://github.com/niknow/machine-learning-examples/blob/master/ensemble/adaboost_regressor.ipynb): Boosting regressors is possible, but slightly different from classifier boosting. We discuss the popular R2 algorithm including the bootstrap sampling and its differences to the SAMME classifier boosting. 
44 | 
45 | 
46 | ## Training Networks & Optimization Techniques
47 | 
48 | * [Newton's Method](https://nbviewer.jupyter.org/github/niknow/machine-learning-examples/blob/master/newton_gradient_backprop/newton.ipynb): A recap of the Newton's method.
49 | 
50 | * [Gradient Descent - Basics](https://nbviewer.jupyter.org/github/niknow/machine-learning-examples/blob/master/newton_gradient_backprop/gradient_descent.ipynb): Mathematical foundations and basics of gradient descent.
51 | 
52 | * [Gradient Descent - Advanced](https://nbviewer.jupyter.org/github/Lapsilago/Machine-Learning---Option-Pricing-Calibration-Hedging--/blob/master/stochastic_gradient_descent.ipynb) [[@lapislago]](https://github.com/Lapsilago/Machine-Learning---Option-Pricing-Calibration-Hedging--): Some illustrations, background and examples of gradient descent.
53 | 
54 | * [Backpropagation](https://nbviewer.jupyter.org/github/niknow/machine-learning-examples/blob/master/newton_gradient_backprop/backpropagation.ipynb): Derivation of the backpropagation algorithm.
55 | 
56 | * [Adjoint Method](https://nbviewer.jupyter.org/github/niknow/machine-learning-examples/blob/master/newton_gradient_backprop/adjoint.ipynb): Relationship between backpropagation and the adjoint method.
57 | 
58 | ## Basic Examples
59 | 
60 | * [Learning the Sine](https://nbviewer.jupyter.org/github/Lapsilago/Machine-Learning---Option-Pricing-Calibration-Hedging--/blob/master/LearnSine_JK.ipynb) [[@lapislago]](https://github.com/Lapsilago/Machine-Learning---Option-Pricing-Calibration-Hedging--): Simple example of how to use keras and tensorflow to learn a curve.
61 | 
62 | * [Learning a 2D function](https://nbviewer.jupyter.org/github/Lapsilago/Machine-Learning---Option-Pricing-Calibration-Hedging--/blob/master/Learn2dFunction.ipynb) [[@lapislago]](https://github.com/Lapsilago/Machine-Learning---Option-Pricing-Calibration-Hedging--): A slightly more complex example of how to learn a surface.
63 | 
64 | 
65 | # Machine Learning & Quantitative Finance
66 | * [How deep are financial models?](https://nbviewer.jupyter.org/github/niknow/machine-learning-examples/blob/master/network_topology_selection/how_deep_are_financial_models.ipynb): Learn the pricing function of Black-Scholes and Heston model. Application of [network topology selection](https://nbviewer.jupyter.org/github/niknow/machine-learning-examples/blob/master/network_topology_selection/network_topology_selection.ipynb).
67 | 
68 | * [Neural Network Brain Surgery](https://nbviewer.jupyter.org/github/niknow/machine-learning-examples/blob/master/network_topology_selection/networks_financial_models_brain_surgery.ipynb): Can the difference between the Black-Scholes and the Heston model be visualized as the brains of the networks that learn their pricing function?
69 | 
70 | * [Calibrating Heston](https://nbviewer.jupyter.org/github/Lapsilago/Machine-Learning---Option-Pricing-Calibration-Hedging--/blob/master/Calibration_Illustration.ipynb) [[@lapislago]](https://github.com/Lapsilago/Machine-Learning---Option-Pricing-Calibration-Hedging--): Learn the calibration function of a Heston model using a neural network.
71 | 
72 | * [Calibrating Hull-White](https://nbviewer.jupyter.org/github/Lapsilago/Machine-Learning---Option-Pricing-Calibration-Hedging--/blob/master/HW_1F_Pricing.ipynb) [[@lapislago]](https://github.com/Lapsilago/Machine-Learning---Option-Pricing-Calibration-Hedging--): Learn the calibration function of a Hull-White model using a neural network.
73 | 
74 | * [Autograd](https://nbviewer.jupyter.org/github/Lapsilago/Machine-Learning---Option-Pricing-Calibration-Hedging--/blob/master/Autograd.ipynb) [[@lapislago]](https://github.com/Lapsilago/Machine-Learning---Option-Pricing-Calibration-Hedging--): A small example on how to automatically differentiate the Black-Scholes pricing formula.
75 | 
76 | 
77 | [References](https://github.com/niknow/machine-learning-examples/blob/master/References.md)
78 | 


--------------------------------------------------------------------------------
/References.md:
--------------------------------------------------------------------------------
 1 | # Machine Learning & Quant Finance
 2 | 
 3 | ## Deep Pricing
 4 | * [[URL]](https://arxiv.org/abs/1901.08943) Liu, Oosterlee, Bohte: Pricing options and computing implied volatilities using neural networks.
 5 | * [[URL]](https://arxiv.org/abs/1809.02233) Ferguson, Green: Deeply Learning Derivatives.
 6 | * [[URL]](https://ssrn.com/abstract=3288882) McGhee: An Artificial Neural Network Representation of the SABR Stochastic Volatility Model.
 7 | * [[URL]](https://ssrn.com/abstract=236673) Hutchinson, Lo, Poggio: A Nonparametric Approach to Pricing and Hedging Derivative Securities Via Learning Networks.
 8 | * [[URL]](https://ssrn.com/abstract=3191050) Spiegeleer, Madan, Reyners, Schoutens: Machine Learning for Quantitative Finance.
 9 | * [[URL]](https://aaltodoc.aalto.fi/handle/123456789/30398) Stark: Machine Learning and Options Pricing.
10 | 
11 | ## Deep Calibration
12 | * [[URL]](https://www.researchgate.net/publication/220505020_Machine_Learning_Vasicek_Model_Calibration_with_Gaussian_Processes) Sousa, Esquivel, Gaspar: Machine learning Vasicek model calibration with Gaussian processes. 
13 | * [[URL]](https://arxiv.org/abs/1810.03399) Bayer, Stemper: Deep calibration of rough stochastic volatility models. 
14 | * [[URL]](https://ssrn.com/abstract=3252432) Dimitroff, Roeder, Fries: Volatility model calibration with convolutional neural networks.
15 | * [[URL]](http://ssrn.com/abstract=2812140) Hernandez: Model Calibration with Neural Networks.
16 | * [[URL]](https://arxiv.org/abs/1901.09647) Horvath, Muguruza, Tomas: Deep Learning Volatility.
17 | 
18 | ## Deep Hedging
19 | * [[URL]](https://arxiv.org/abs/1802.03042) Bühler, Gonon, Teichmann, Wood: Deep Hedging.
20 | 
21 | ## Curve Dynamics & Term Structures
22 | * [[URL]](https://ssrn.com/abstract=3041232) Kondratyev: Learning Curve Dynamics with Artificial Neural Networks.
23 | * [[URL]](https://arxiv.org/abs/1703.01536) Sambasivan, Das: A Statistical Machine Learning Approach to Yield Curve Forecasting.
24 | * [[URL]](https://arxiv.org/abs/1604.02237) Cousin, Maatouk, Rulliere: Kriging of financial term-structures.
25 | 
26 | ## CDS Spreads
27 | * [[URL]](https://arxiv.org/abs/1705.06899) Brummelhuis, Luo: CDS rate construction methods by machine learning techniques.
28 | 
29 | ## XVA
30 | * [[URL]](https://arxiv.org/abs/1901.11081) Crépey, Dixon: Gaussian Process Regression for Derivative Portfolio Modeling and Application to CVA Computations 
31 | * [[URL]](https://ssrn.com/abstract=3357626) Ma, Spinner, Venditti, Li, Tang: Initial Margin Simulation with Deep Learning
32 | 
33 | 
34 | # Quantitative Finance
35 | 
36 | ## Risk Factor Models
37 | * [[URL]](https://doi.org/10.1093/rfs/3.4.573) Hull, White: Pricing Interest-Rate-Derivative Securities
38 | * [[URL]](https://www.scribd.com/doc/198899911/Evaluating-and-Hedging-Exotic-Swap-Instruments-via-LGM) Hagan: Evaluating and Hedging Exotic Swap Instruments via LGM
39 | * [[URL]](https://doi.org/10.1093/rfs/6.2.327) Heston: A Closed-Form Solution for Options with Stochastic Volatility with Applications to Bond and Currency Options
40 | * [[URL]](https://ssrn.com/abstract=946405) Andersen: Efficient Simulation of the Heston Stochastic Volatility Model
41 | * [[URL]](https://www.researchgate.net/profile/Patrick_Hagan3/publication/300789919_Probability_Distribution_in_the_SABR_Model_of_Stochastic_Volatility/links/5c91734a299bf11169395d8f/Probability-Distribution-in-the-SABR-Model-of-Stochastic-Volatility.pdf) Hagan, Lesniewski, Woodward: Probability Distribution in the SABR Model of Stochastic Volatility
42 | * [[URL]](https://ssrn.com/abstract=966364) Trolle, Schwarz: A General Stochastic Volatility Model for the Pricing of Interest Rate Derivatives
43 | 
44 | ## American Monte Carlo
45 | * [[URL]](https://escholarship.org/uc/item/43n1k4jb) Longstaff, Schwarz: Valuing American Options by Simulation: A Simple Least-Squares Approach
46 | * [[URL]](https://www.mit.edu/~jnt/Papers/J086-01-bvr-options.pdf) Tsitsiklis, Van Roy: Regression Methods for Pricing Complex American-Style Options
47 | * [[URL]](https://www.mit.edu/~jnt/Papers/J074-99-bvr-stop.pdf) Tsitsiklis, Van Roy: Optimal Stopping of Markov Processes: Hilbert Space Theory, Approximation Algorithms, and an Application to Pricing Financial Derivatives
48 | 
49 | ## Counterparty Risk
50 | * [[URL]](https://ssrn.com/abstract=1032522) Pykhtin, Zhu: A Guide to Modeling Counterparty Credit Risk
51 | 
52 | ## CVA
53 | * [[URL]](https://ssrn.com/abstract=1782063) Pykhtin, Rosen: Pricing Counterparty Risk at the Trade Level and CVA Allocations
54 | 
55 | ## FVA
56 | * [[URL]](https://ssrn.com/abstract=2027195) Burghard, Kjar: Funding Costs, Funding Strategies
57 | * [[URL]](https://ssrn.com/abstract=2157634) Burgard, Kjaer: The FVA Debate: In Theory and Practice
58 | * [[URL]](https://ssrn.com/abstract=1785262) Burgard, Kjaer: In the Balance
59 | * [[URL]](https://www.risk.net/derivatives/1589992/funding-beyond-discounting-collateral-agreements-and-derivatives-pricing) Piterbarg: Funding Beyond Discounting
60 | * [[URL]](https://ssrn.com/abstract=2746010) Andersen, Duffie, Song: Funding Value Adjustments
61 | 
62 | 
63 | 
64 | ## DIM and MVA
65 | * [[URL]](https://ssrn.com/abstract=2716279) Anfuso, Aziz, Giltinan, Loukopoulos: A Sound Modelling and Backtesting Framework for Forecasting Initial Margin Requirements
66 | * [[URL]](https://ssrn.com/abstract=2911167) Caspers, Giltinan, Lichters, Nowaczyk: Forecasting Initial Margin Requirements - A Model Evaluation
67 | * [[URL]](https://arxiv.org/abs/1808.08221) Ruiz, Zeron: Dynamic Initial Margin via Chebyshev Spectral Decomposition
68 | * [[URL]](https://ssrn.com/abstract=3147811) McWalter, Kienitz, Nowaczyk, Rudd, Acar: Dynamic Initial Margin Estimation Based on Quantiles of Johnson Distributions
69 | * [[URL]](https://ssrn.com/abstract=2806156) Andersen, Pykhtin, Sokol: Credit Exposure in the Presence of Initial Margin
70 | * [[URL]](https://ssrn.com/abstract=2902737) Andersen, Pykhtin, Sokol: Rethinking the Margin Period of Risk
71 | * [[URL]](https://ssrn.com/abstract=3040061) Antonov, Issakov, McClelland: Efficient SIMM-MVA Calculations for Callable Exotics
72 | * [[URL]](https://ssrn.com/abstract=3018165) Fries: Fast Stochastic Forward Sensitivities in Monte-Carlo Simulations Using Stochastic Automatic Differentiation (with Applications to Initial Margin Valuation Adjustments (MVA))
73 | * [[URL]](https://arxiv.org/abs/1512.07337) Lou: MVA Transfer Pricing
74 | 
75 | 
76 | 


--------------------------------------------------------------------------------
/decision_trees/decision_tree.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niknow/machine-learning-examples/b147133c7b0115b591739fa4a600462cbaee6225/decision_trees/decision_tree.JPG


--------------------------------------------------------------------------------
/decision_trees/decision_tree.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niknow/machine-learning-examples/b147133c7b0115b591739fa4a600462cbaee6225/decision_trees/decision_tree.pptx


--------------------------------------------------------------------------------
/dynamically_controlled_kernel_estimation/dcke/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niknow/machine-learning-examples/b147133c7b0115b591739fa4a600462cbaee6225/dynamically_controlled_kernel_estimation/dcke/__init__.py


--------------------------------------------------------------------------------
/dynamically_controlled_kernel_estimation/dcke/dcke.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | from inspect import isfunction
  3 | import numpy as np
  4 | from sklearn.base import RegressorMixin
  5 | from copy import deepcopy
  6 | from GPy.models import GPRegression
  7 | 
  8 | 
  9 | class DCKE(RegressorMixin):
 10 |     """ Dynamically Controlled Kernel Estimation
 11 |         Computes the conditional expectation $E[Y \mid X=x]$ from
 12 |         a training set $X_i$, $y_i$, $i=1, \ldots, N$ of joint
 13 |         realizations of $X$ and $Y$ for an arbitrary prediction
 14 |         set of $x$'s. The DCKE regressor first uses local regression
 15 |         on a mesh grid to solve the problem on the mesh grid and then
 16 |         uses GPR to evaluate in between the points on the mesh grid.
 17 |         Optionally, a control variate $Z$ can be supplied together
 18 |         with $\mu_Z = E[Z \mid X=x_k]$ for the points $x_k$ on the
 19 |         mesh grid. In that case, the expectation
 20 |         $E[Y +\beta (Z-\mu_Z) \mid X=x_k]$ is computed on the
 21 |         mesh grid with variance reduced by the correlation between
 22 |         $Y$ and $Z$.
 23 |     """
 24 | 
 25 |     def __init__(self, locreg, gpr_kernel):
 26 |         """
 27 |         Initializes the DCKE object.
 28 |         :param locreg: an instance of LocalRegression
 29 |         :param gpr_kernel: an instance of GPy.kern
 30 |         """
 31 |         self.locreg = locreg
 32 |         self.gpr_kernel = gpr_kernel
 33 |         self.gpr_ = None
 34 |         self.X_train_ = None
 35 |         self.y_train_ = None
 36 |         self.x_mesh_ = None
 37 |         self.y_mesh_ = None
 38 |         self.Z_ = None
 39 |         self.mz_ = None
 40 |         self.cov_ = None
 41 |         self.var_ = None
 42 |         self.beta_ = None
 43 | 
 44 |     def fit(self, X, y, x_mesh, Z=None, mz=None, bandwidth=None):
 45 |         """
 46 |         Fits the DCKE to training data.
 47 | 
 48 |         :param X: a numpy array of shape (num_samples, num_dimensions)
 49 |         :param y: a numpy array of shape (num_samples,)
 50 |         :param Z: a numpy array of shape (num_samples,)
 51 |         :param x_mesh: a numpy array of shape (num_meshes, num_dimensions)
 52 |         :param mz: a numpy array of shape (num_meshes,) any any mz[k]
 53 |                    satisties mz[k] = E[Z \mid X=x_k]$ where x_k are the
 54 |                    points in x_mesh
 55 |         :param bandwidth: bandwidth parameter for the local regression
 56 |         :return:
 57 |         """
 58 |         self.X_train_ = X
 59 |         self.y_train_ = y
 60 |         self.x_mesh_ = x_mesh
 61 |         if Z is None and mz is None:
 62 |             self.Z_ = np.zeros_like(self.y_train_)
 63 |             self.mz_ = np.zeros(self.x_mesh_.shape[0])
 64 |         elif (Z is None and mz is not None) or (Z is not None and mz is None):
 65 |             raise ValueError('Parameter Z and mz have to be either both None or both not None.')
 66 |         else:
 67 |             self.Z_ = Z
 68 |             self.mz_ = mz
 69 |         self.locreg.warm_start = True
 70 |         self.locreg.fit(X, y, bandwidth)
 71 | 
 72 |     def _calculate_locregs(self):
 73 |         """
 74 |         Uses the approximate conditional expectation operator
 75 |         $\tilde E[_ \mid X=x]$ defined by the local regression in self.locreg
 76 |         to compute the approximate optimal beta for the control variate $Z$
 77 |         defined by $\beta_x = - \tfrac{\Cov[Y, Z \mid X=x]}{\Var[Z \mid X=x]}$
 78 |         for all $x$ in self.x_mesh.
 79 | 
 80 |         :return: beta, a numpy array of shape (num_mesh_points, )
 81 |         """
 82 |         h = self.locreg.bandwidth
 83 |         n = self.x_mesh_.shape[0]
 84 |         self.cov_ = np.zeros(n)
 85 |         self.var_ = np.zeros(n)
 86 |         self.y_mesh_ = np.zeros(n)
 87 |         self.beta_ = np.zeros(n)
 88 |         m_y = np.zeros(n)
 89 |         m_z = np.zeros(n)
 90 |         for i in range(n):
 91 |             m_y[i] = self.locreg.predict(np.atleast_2d(self.x_mesh_[i]).T).squeeze()
 92 |             self.locreg.fit_partial(np.atleast_2d(self.Z_).T, h)
 93 |             m_z[i] = self.locreg.predict_partial().squeeze()
 94 |             self.locreg.fit_partial((self.y_train_ - m_y[i]) * (self.Z_ - m_z[i]), h)
 95 |             self.cov_[i] = self.locreg.predict_partial().squeeze()
 96 |             self.locreg.fit_partial((self.Z_ - m_z[i]) ** 2, h)
 97 |             self.var_[i] = self.locreg.predict_partial().squeeze()
 98 |             self.beta_[i] = - self.cov_[i] / self.var_[i]
 99 |             self.locreg.fit_partial(self.y_train_ + self.beta_[i] * (self.Z_ - self.mz_[i]), h)
100 |             self.y_mesh_[i] = self.locreg.predict_partial()
101 | 
102 |     def predict(self, X):
103 |         """
104 |         Predicts the conditional expectation $E[Y \mid X=x]$ for all x in $X$.
105 | 
106 |         :param X: a numpy array of shape (num_predictions, num_dimensions)
107 |         :return: a numpy array of shape (num_predictions,)
108 |         """
109 | 
110 |         self._calculate_locregs()
111 |         self.gpr_ = GPRegression(self.x_mesh_,
112 |                                  np.atleast_2d(self.y_mesh_).T,
113 |                                  self.gpr_kernel)
114 |         self.gpr_.optimize(messages=False)
115 |         #self.gpr_.optimize_restarts(num_restarts = 10)
116 |         y_pred, self.gp_var_ = self.gpr_.predict(X)
117 |         self.gp_var_ = self.gp_var_.squeeze()
118 |         return y_pred.squeeze()
119 | 
120 | 
121 | class DCKEGrid(ABC):
122 | 
123 |     def __init__(self, locreg, gpr):
124 |         self.locreg = locreg
125 |         self.gpr = gpr
126 |         self.dckes = []
127 | 
128 |     @abstractmethod
129 |     def fit(self, X, y, x_mesh, Z=None, mz=None, bandwidth=None):
130 |         pass
131 | 
132 |     @abstractmethod
133 |     def predict(self, X):
134 |         pass
135 | 
136 |     def __getitem__(self, key):
137 |         return self.dckes[key]
138 | 
139 |     @property
140 |     def cov_(self):
141 |         return np.array([dcke.cov_ for dcke in self.dckes])
142 | 
143 |     @property
144 |     def var_(self):
145 |         return np.array([dcke.var_ for dcke in self.dckes])
146 | 
147 |     @property
148 |     def beta_(self):
149 |         return np.array([dcke.beta_ for dcke in self.dckes])
150 | 
151 | 
152 | class DCKEGridIndependent(DCKEGrid):
153 |     """
154 |     Provides a wrapper for consistently estimating conditional expectations
155 |     via DCKE on a grid of random variables, e.g. from a stochastic process.
156 |     """
157 | 
158 |     def _get_bandwidths(self, bandwidth, m):
159 |         if bandwidth is None:
160 |             return [None for _ in range(m)]
161 |         elif isinstance(bandwidth, (list, tuple, np.ndarray)):
162 |             return bandwidth
163 |         else:
164 |             return np.array([bandwidth for _ in range(m)])
165 | 
166 |     def fit(self, X, y, x_mesh, Z=None, mz=None, bandwidth=None):
167 |         """
168 |         Fits the DCKE to training data.
169 | 
170 |         :param X: a numpy array of shape (num_grid_points, num_samples, num_dimensions)
171 |         :param y: a numpy array of shape (num_grid_points, num_samples,)
172 |         :param Z: a numpy array of shape (num_grid_points, num_samples,)
173 |         :param x_mesh: a numpy array of shape (num_grid_points, num_meshes, num_dimensions)
174 |         :param mz: a numpy array of shape (num_meshes,) any any mz[k]
175 |                    satisfies mz[k] = E[Z \mid X=x_k]$ where x_k are the
176 |                    points in x_mesh
177 |         :param bandwidth: bandwidth parameters for the local regression
178 |                           if None, then bandwidth will be selected automatically
179 |                           if scalar, then bandwith will be the same for all
180 |                           if array, then each DCKE uses its own bandwidth
181 |         :return:
182 |         """
183 |         m = X.shape[0]
184 |         self.dckes = [DCKE(deepcopy(self.locreg), deepcopy(self.gpr)) for _ in range(m)]
185 |         bandwidths = self._get_bandwidths(bandwidth, m)
186 |         for i in range(m):
187 |             self.dckes[i].fit(
188 |                 np.atleast_2d(X[i]),
189 |                 y[i],
190 |                 x_mesh[i],
191 |                 Z[i],
192 |                 mz[i],
193 |                 bandwidths[i])
194 | 
195 |     def predict(self, X):
196 |         """
197 |         Predicts the conditional expectation $E[Y \mid X=x]$ for all x in $X$.
198 | 
199 |         :param X: a numpy array of shape (num_grid_points, num_predictions, num_dimensions)
200 |         :return: a numpy array of shape (num_grid_points, num_predictions,)
201 |         """
202 |         m = X.shape[0]
203 |         return np.array([self.dckes[i].predict(np.atleast_2d(X[i])) for i in range(m)])
204 | 
205 |     @property
206 |     def X_(self):
207 |         return np.array([dcke.X_train_ for dcke in self.dckes])
208 | 
209 |     @property
210 |     def y_(self):
211 |         return np.array([dcke.y_train_ for dcke in self.dckes])
212 | 
213 |     @property
214 |     def x_mesh_(self):
215 |         return np.array([dcke.x_mesh_ for dcke in self.dckes])
216 | 
217 |     @property
218 |     def Z_(self):
219 |         return np.array([dcke.Z_ for dcke in self.dckes])
220 | 
221 |     @property
222 |     def mz_(self):
223 |         return np.array([dcke.mz_ for dcke in self.dckes])
224 | 
225 | 
226 | class DCKEGridRecursive(DCKEGrid):
227 | 
228 |     def __init__(self, locreg, gpr):
229 |         super().__init__(locreg, gpr)
230 |         self.X_train_ = None
231 |         self.y_train_ = None
232 |         self.x_mesh_ = None
233 |         self.Z_ = None
234 |         self.mz_ = None
235 |         self.bandwidths_ = None
236 |         self.recursion_functions_ = None
237 |         self.y_rec_ = None
238 | 
239 |     def fit(self, X, y, x_mesh, Z=None, mz=None, bandwidth=None, recursion_functions=None):
240 |         """
241 |         Fits the DCKE to training data.
242 | 
243 |         :param X: a numpy array of shape (num_grid_points, num_samples, num_dimensions)
244 |         :param y: a numpy array of shape (num_samples,)
245 |                              or of shape (num_samples,)
246 |         :param Z: a numpy array of shape (num_grid_points, num_samples,)
247 |         :param x_mesh: a numpy array of shape (num_grid_points, num_meshes, num_dimensions)
248 |         :param mz: a numpy array of shape (num_meshes,) any any mz[k]
249 |                    satisfies mz[k] = E[Z \mid X=x_k]$ where x_k are the
250 |                    points in x_mesh
251 |         :param bandwidth: bandwidth parameters for the local regression
252 |                           if None, then bandwidth will be selected automatically
253 |                           if scalar, then bandwith will be the same for all
254 |                           if array of scalars, then each DCKE uses its own bandwidth
255 |                           if array of functions, then each DCKE computes its own bandwidth
256 |                           by evaluating the function on y_train_
257 |         :return:
258 |         """
259 |         self.X_train_ = X
260 |         self.y_train_ = y
261 |         self.x_mesh_ = x_mesh
262 |         self.Z_ = Z
263 |         self.mz_ = mz
264 |         self.bandwidths_ = self._get_bandwidths(bandwidth)
265 |         m = X.shape[0]
266 |         self.dckes = [DCKE(deepcopy(self.locreg), deepcopy(self.gpr)) for _ in range(m)]
267 |         self.bandwidths_ = self._get_bandwidths(bandwidth)
268 |         self.recursion_functions_ = self._get_recursion_functions(recursion_functions)
269 | 
270 |     def _get_bandwidths(self, bandwidth):
271 |         m = self.X_train_.shape[0]
272 |         if bandwidth is None:
273 |             bw = [lambda x: None for _ in range(m)]
274 |         elif np.isscalar(bandwidth):
275 |             bw = [lambda x: bandwidth for _ in range(m)]
276 |         elif isinstance(bandwidth, (list, tuple, np.ndarray)):
277 |             if np.isscalar(bandwidth[0]):
278 |                 bw = [lambda x, b=b: b for b in bandwidth]
279 |             elif isfunction(bandwidth[0]):
280 |                 bw = bandwidth
281 |             else:
282 |                 raise ValueError("Bandwidths not recognized.")
283 |         else:
284 |             raise ValueError("Bandwidths not recognized..")
285 |         return bw
286 | 
287 |     def _get_recursion_functions(self, recursion_functions):
288 |         m = self.X_train_.shape[0]
289 |         if recursion_functions is None:
290 |             rf = [lambda x: x for _ in range(m)]
291 |         elif isinstance(recursion_functions, (list, tuple, np.ndarray)):
292 |             if isfunction(recursion_functions[0]):
293 |                 rf = recursion_functions
294 |             else:
295 |                 raise ValueError("Recursion functions not recognized.")
296 |         else:
297 |             raise ValueError("Recursion functions not recognized..")
298 |         return rf
299 | 
300 |     def predict(self, X=None):
301 |         """
302 |         Predicts the conditional expectation $E[Y \mid X=x]$ for all x in $X$.
303 | 
304 |         :param X: a numpy array of shape (num_grid_points, num_predictions, num_dimensions)
305 |         :param recursion_functions: If not None, then only self[-1] uses y_train_ for 
306 |                                     the prediction. Traversing the list of DCKEs backwards,
307 |                                     in step i, self[i] uses f(self[i+1].predict(X[i+1])) 
308 |                                     instead of self[i].y_train_, where 
309 |                                     f = recursion_functions[i].
310 |         :return: a numpy array of shape (num_grid_points, num_predictions,)
311 |         """
312 | 
313 |         num_grid_points = self.X_train_.shape[0]
314 |         num_samples = self.X_train_.shape[1]
315 |         self.y_rec_ = np.zeros((num_grid_points, num_samples))
316 |         if X is not None:
317 |             num_predictions = X.shape[1]
318 |             y_pred = np.zeros((num_grid_points, num_predictions))
319 |         self[-1].fit(
320 |             self.X_train_[-1],
321 |             self.y_train_,
322 |             self.x_mesh_[-1],
323 |             self.Z_[-1],
324 |             self.mz_[-1],
325 |             self.bandwidths_[-1](self.y_train_))
326 |         self.y_rec_[-1, :] = self[-1].predict(self.X_train_[-1])
327 |         if X is not None:
328 |             y_pred[-1, :] = self[-1].predict(X[-1])
329 |         for i in range(num_grid_points-2, -1, -1):
330 |             y = self.recursion_functions_[i](self.y_rec_[i+1])
331 |             self[i].fit(
332 |                 self.X_train_[i],
333 |                 y,
334 |                 self.x_mesh_[i],
335 |                 self.Z_[i],
336 |                 self.mz_[i],
337 |                 self.bandwidths_[i](y))
338 |             self.y_rec_[i, :] = self[i].predict(self.X_train_[i])
339 |             if X is not None:
340 |                 y_pred[i, :] = self[i].predict(X[i])
341 |         if X is not None:
342 |             return y_pred
343 |         else:
344 |             return self.y_rec_
345 | 


--------------------------------------------------------------------------------
/dynamically_controlled_kernel_estimation/dcke/test_dcke.py:
--------------------------------------------------------------------------------
  1 | from copy import deepcopy
  2 | from unittest import TestCase
  3 | import numpy as np
  4 | import GPy
  5 | import matplotlib.pyplot as plt
  6 | 
  7 | from dcke import DCKE, DCKEGridIndependent, DCKEGridRecursive
  8 | from locreg import LocalRegression
  9 | from models.black_scholes import BlackScholes
 10 | 
 11 | 
 12 | class TestDCKE(TestCase):
 13 | 
 14 |     def setUp(self):
 15 |         self.locreg = LocalRegression(degree=0)
 16 |         self.gpr_kernel = GPy.kern.RBF(input_dim=1)
 17 | 
 18 |     def test_black_scholes(self):
 19 |         self.r = 0.01
 20 |         self.sigma = 0.3
 21 |         self.bs = BlackScholes(r=self.r, sigma=self.sigma)
 22 |         self.time_grid = np.array([0, 0.5, 1.])
 23 |         self.num_sims = 1000
 24 |         np.random.seed(1)
 25 |         self.X = self.bs.paths(s0=100, time_grid=self.time_grid, num_sims=self.num_sims)
 26 |         self.T = self.time_grid[-1]
 27 |         self.t = self.time_grid[-2]
 28 |         self.K = 95
 29 |         self.df = np.exp(-(self.T-self.t) * self.r)
 30 |         self.y = self.df * np.maximum(self.X[-1] - self.K, 0)
 31 |         self.h = (4 / (3 * self.num_sims)) ** (1 / 5) * np.std(self.y)
 32 |         self.eps = 1 / (2 * self.h **2)
 33 |         self.num_quantiles = 100
 34 |         self.quantile_grid = np.linspace(0.1, 99.0, num=self.num_quantiles)
 35 |         self.x_mesh = np.percentile(self.X[1], self.quantile_grid)
 36 |         self.beta = np.zeros(self.num_quantiles)
 37 |         self.mz = np.zeros(self.num_quantiles)
 38 |         self.my = np.zeros(self.num_quantiles)
 39 |         self.var = np.zeros(self.num_quantiles)
 40 |         self.cov = np.zeros(self.num_quantiles)
 41 |         for i in range(self.x_mesh.shape[0]):
 42 |             x = self.x_mesh[i]
 43 |             k = np.exp(-self.eps * (self.X[1] - x)**2)
 44 |             self.mz[i] = np.sum(self.df * self.X[2] * k) / np.sum(k)
 45 |             self.my[i] = np.sum(self.y * k) / np.sum(k)
 46 |             cov = (self.y - self.my[i]) * (self.df * self.X[2] - self.mz[i])
 47 |             self.cov[i] = np.sum(cov * k) / np.sum(k)
 48 |             var = (self.df * self.X[2] - self.mz[i])**2
 49 |             self.var[i] = np.sum(var * k) / np.sum(k)
 50 |             self.beta[i] = - self.cov[i] / self.var[i]
 51 |         self.y_mesh = self.my + self.beta * (self.mz - self.x_mesh)
 52 |         self.gpr = GPy.models.GPRegression(np.atleast_2d(self.x_mesh).T,
 53 |                                            np.atleast_2d(self.y_mesh).T,
 54 |                                            deepcopy(self.gpr_kernel))
 55 |         self.gpr.optimize(messages=False)
 56 |         y_pred = self.gpr.predict(np.atleast_2d(self.x_mesh).T)[0].squeeze()
 57 |         self.dcke = DCKE(locreg=deepcopy(self.locreg), gpr_kernel=deepcopy(self.gpr_kernel))
 58 |         self.dcke.fit(X=np.atleast_2d(self.X[1]).T,
 59 |                       y=self.y,
 60 |                       Z=self.df * self.X[2],
 61 |                       x_mesh=np.atleast_2d(self.x_mesh).T,
 62 |                       mz=self.x_mesh)
 63 |         y_pred_dcke = self.dcke.predict(np.atleast_2d(self.x_mesh).T)
 64 |         y_true = np.array([self.bs.option_price(s, self.T - self.t, self.K) for s in self.x_mesh])
 65 |         # plt.plot(self.x_mesh, y_true, label="truth")
 66 |         # plt.plot(self.x_mesh, y_pred, label="pred")
 67 |         # plt.plot(self.x_mesh, y_pred_dcke, label="pred dcke")
 68 |         # plt.legend()
 69 |         # plt.show()
 70 |         np.testing.assert_array_almost_equal(self.y_mesh.squeeze(), self.dcke.y_mesh_.squeeze())
 71 |         np.testing.assert_array_almost_equal(self.x_mesh.squeeze(), self.dcke.x_mesh_.squeeze())
 72 |         np.testing.assert_array_almost_equal(y_pred, y_pred_dcke)
 73 |         self.assertTrue(np.all(np.abs(y_pred -y_true)<1))
 74 | 
 75 | 
 76 | class TestDCKEGridIndependent(TestCase):
 77 | 
 78 |     def setUp(self):
 79 |         self.mu = np.array([1, 2, 0])
 80 |         self.Sigma = np.array([[3, 0, 0],
 81 |                                [0, 4, 0],
 82 |                                [0, 0, 5]])
 83 |         quantile_levels = np.linspace(0.1, 99, 10)
 84 |         N = 100
 85 |         np.random.seed(1)
 86 |         W = np.random.multivariate_normal(self.mu, self.Sigma, N)
 87 |         self.X = W[:, 0]
 88 |         self.Y = W[:, 1]
 89 |         self.Z = W[:, 2]
 90 |         self.x_mesh = np.percentile(self.X, quantile_levels)
 91 |         self.mz = np.zeros_like(self.x_mesh)
 92 |         self.locreg = LocalRegression(degree=0)
 93 |         self.gpr_kernel = GPy.kern.RBF(input_dim=1)
 94 | 
 95 |     def test_singleton(self):
 96 |         self.dcke = DCKE(deepcopy(self.locreg), deepcopy(self.gpr_kernel))
 97 |         self.dcke.fit(np.atleast_2d(self.X).T, self.Y, np.atleast_2d(self.x_mesh).T, self.Z, self.mz)
 98 |         y_pred = self.dcke.predict(np.atleast_2d(self.x_mesh).T)
 99 |         self.dcke_grid = DCKEGridIndependent(deepcopy(self.locreg), deepcopy(self.gpr_kernel))
100 |         self.dcke_grid.fit(self.X[np.newaxis, : , np.newaxis],
101 |                            self.Y[np.newaxis, :],
102 |                            self.x_mesh[np.newaxis, :, np.newaxis],
103 |                            self.Z[np.newaxis, :],
104 |                            self.mz[np.newaxis, :])
105 |         y_pred_grid = self.dcke_grid.predict(self.x_mesh[np.newaxis, :, np.newaxis])
106 |         np.testing.assert_array_almost_equal(y_pred, y_pred_grid.squeeze())
107 | 
108 |     def test_grid_components(self):
109 |         self.dcke1 = DCKE(deepcopy(self.locreg), deepcopy(self.gpr_kernel))
110 |         self.dcke2 = DCKE(deepcopy(self.locreg), deepcopy(self.gpr_kernel))
111 |         self.dcke1.fit(np.atleast_2d(self.X).T,
112 |                        self.Y,
113 |                        np.atleast_2d(self.x_mesh).T,
114 |                        self.Z,
115 |                        self.mz)
116 |         bandwidth = self.dcke1.locreg.bandwidth
117 |         self.dcke2.fit(2 * np.atleast_2d(self.X).T,
118 |                        2 * self.Y,
119 |                        2 * np.atleast_2d(self.x_mesh).T,
120 |                        2 * self.Z,
121 |                        2 * self.mz,
122 |                        bandwidth)
123 |         y_pred1 = self.dcke1.predict(np.atleast_2d(self.x_mesh).T)
124 |         y_pred2 = self.dcke2.predict(2 * np.atleast_2d(self.x_mesh).T)
125 |         self.dcke_grid = DCKEGridIndependent(deepcopy(self.locreg), deepcopy(self.gpr_kernel))
126 |         X = np.concatenate((np.atleast_2d(self.X).T, 2 * np.atleast_2d(self.X).T), axis=1).T[:, :, np.newaxis]
127 |         y = np.concatenate((np.atleast_2d(self.Y).T, 2 * np.atleast_2d(self.Y).T), axis=1).T
128 |         Z = np.concatenate((np.atleast_2d(self.Z).T, 2 * np.atleast_2d(self.Z).T), axis=1).T
129 |         mz = np.concatenate((np.atleast_2d(self.mz).T, 2 * np.atleast_2d(self.mz).T), axis=1).T
130 |         x_mesh = np.concatenate((np.atleast_2d(self.x_mesh).T, 2 * np.atleast_2d(self.x_mesh).T), axis=1).T[:, :, np.newaxis]
131 |         self.dcke_grid.fit(X, y, x_mesh, Z, mz, bandwidth)
132 |         y_pred_grid = self.dcke_grid.predict(x_mesh)
133 |         y_pred = {0: y_pred1, 1: y_pred2}
134 |         dcke = {0: self.dcke1, 1: self.dcke2}
135 |         for i in range(2):
136 |             np.testing.assert_array_almost_equal(y_pred[i], y_pred_grid[i].squeeze())
137 |             np.testing.assert_array_almost_equal(self.dcke_grid.X_[i], dcke[i].X_train_)
138 |             np.testing.assert_array_almost_equal(self.dcke_grid.y_[i], dcke[i].y_train_)
139 |             np.testing.assert_array_almost_equal(self.dcke_grid.x_mesh_[i], dcke[i].x_mesh_)
140 |             np.testing.assert_array_almost_equal(self.dcke_grid.Z_[i], dcke[i].Z_)
141 |             np.testing.assert_array_almost_equal(self.dcke_grid.mz_[i], dcke[i].mz_)
142 |             np.testing.assert_array_almost_equal(self.dcke_grid.cov_[i], dcke[i].cov_)
143 |             np.testing.assert_array_almost_equal(self.dcke_grid.var_[i], dcke[i].var_)
144 |             np.testing.assert_array_almost_equal(self.dcke_grid.beta_[i], dcke[i].beta_)
145 | 
146 |     def test_bandwidths(self):
147 |         self.dcke = DCKEGridIndependent(deepcopy(self.locreg), deepcopy(self.gpr_kernel))
148 |         self.assertListEqual(self.dcke._get_bandwidths(None, 3), [None, None, None])
149 |         np.testing.assert_array_almost_equal(self.dcke._get_bandwidths(2.7, 3),
150 |                                              np.array([2.7, 2.7, 2.7]))
151 |         np.testing.assert_array_almost_equal(self.dcke._get_bandwidths(np.array([1., 2., 3.]), 3),
152 |                                              np.array([1., 2., 3.]))
153 | 
154 | 
155 | class TestDCKEGridRecursive(TestCase):
156 | 
157 |     def setUp(self):
158 |         self.mu = np.array([1, 2, 0])
159 |         self.Sigma = np.array([[3, 0, 0],
160 |                                [0, 4, 0],
161 |                                [0, 0, 5]])
162 |         quantile_levels = np.linspace(0.1, 99, 10)
163 |         N = 100
164 |         np.random.seed(1)
165 |         W = np.random.multivariate_normal(self.mu, self.Sigma, N)
166 |         self.X = W[:, 0]
167 |         self.Y = W[:, 1]
168 |         self.Z = W[:, 2]
169 |         self.x_mesh = np.percentile(self.X, quantile_levels)
170 |         self.mz = np.zeros_like(self.x_mesh)
171 |         self.locreg = LocalRegression(degree=0)
172 |         self.gpr_kernel = GPy.kern.RBF(input_dim=1)
173 | 
174 |     def test_singleton(self):
175 |         self.dcke = DCKE(deepcopy(self.locreg), deepcopy(self.gpr_kernel))
176 |         self.dcke.fit(np.atleast_2d(self.X).T, self.Y, np.atleast_2d(self.x_mesh).T, self.Z, self.mz)
177 |         y_pred = self.dcke.predict(np.atleast_2d(self.x_mesh).T)
178 |         y_pred = self.dcke.predict(np.atleast_2d(self.x_mesh).T)
179 |         h = self.dcke.locreg.bandwidth
180 |         self.dcke_grid = DCKEGridRecursive(deepcopy(self.locreg), deepcopy(self.gpr_kernel))
181 |         self.dcke_grid.fit(self.X[np.newaxis, : , np.newaxis],
182 |                            self.Y[np.newaxis, :],
183 |                            self.x_mesh[np.newaxis, :, np.newaxis],
184 |                            self.Z[np.newaxis, :],
185 |                            self.mz[np.newaxis, :],
186 |                            h)
187 |         y_pred_grid = self.dcke_grid.predict(self.x_mesh[np.newaxis, :, np.newaxis])
188 |         np.testing.assert_array_almost_equal(y_pred, y_pred_grid.squeeze())
189 | 
190 |     def test_grid_components(self):
191 |         self.dcke2 = DCKE(deepcopy(self.locreg), deepcopy(self.gpr_kernel))
192 |         self.dcke1 = DCKE(deepcopy(self.locreg), deepcopy(self.gpr_kernel))
193 |         self.dcke2.fit(2 * np.atleast_2d(self.X).T,
194 |                        2 * self.Y,
195 |                        2 * np.atleast_2d(self.x_mesh).T,
196 |                        2 * self.Z,
197 |                        2 * self.mz)
198 |         y_pred2 = self.dcke2.predict(2 * np.atleast_2d(self.X).T)
199 |         df = np.exp(-0.5)
200 |         self.dcke1.fit(np.atleast_2d(self.X).T,
201 |                        df * y_pred2,
202 |                        np.atleast_2d(self.x_mesh).T,
203 |                        self.Z,
204 |                        self.mz)
205 |         y_pred1 = self.dcke1.predict(np.atleast_2d(self.X).T)
206 |         self.dcke_grid = DCKEGridRecursive(deepcopy(self.locreg), deepcopy(self.gpr_kernel))
207 |         X = np.concatenate((np.atleast_2d(self.X).T, 2 * np.atleast_2d(self.X).T), axis=1).T[:, :, np.newaxis]
208 |         Z = np.concatenate((np.atleast_2d(self.Z).T, 2 * np.atleast_2d(self.Z).T), axis=1).T
209 |         mz = np.concatenate((np.atleast_2d(self.mz).T, 2 * np.atleast_2d(self.mz).T), axis=1).T
210 |         x_mesh = np.concatenate((np.atleast_2d(self.x_mesh).T, 2 * np.atleast_2d(self.x_mesh).T), axis=1).T[:, :, np.newaxis]
211 |         bandwidths = np.array([self.dcke1.locreg.bandwidth, self.dcke2.locreg.bandwidth])
212 |         self.dcke_grid.fit(X, 2 * self.Y, x_mesh, Z, mz, bandwidths, recursion_functions=[lambda x: x * df])
213 |         y_pred_grid = self.dcke_grid.predict()
214 |         y_pred = {0: y_pred1, 1: y_pred2}
215 |         dcke = {0: self.dcke1, 1: self.dcke2}
216 |         for i in range(2):
217 |             np.testing.assert_array_almost_equal(self.dcke_grid[i].X_train_, dcke[i].X_train_)
218 |             np.testing.assert_array_almost_equal(self.dcke_grid[i].y_train_, dcke[i].y_train_)
219 |             np.testing.assert_array_almost_equal(self.dcke_grid[i].x_mesh_, dcke[i].x_mesh_)
220 |             np.testing.assert_array_almost_equal(self.dcke_grid[i].Z_, dcke[i].Z_)
221 |             np.testing.assert_array_almost_equal(self.dcke_grid[i].mz_, dcke[i].mz_)
222 |             np.testing.assert_array_almost_equal(self.dcke_grid[i].var_, dcke[i].var_)
223 |             np.testing.assert_array_almost_equal(self.dcke_grid[i].cov_, dcke[i].cov_)
224 |             np.testing.assert_array_almost_equal(self.dcke_grid[i].beta_, dcke[i].beta_)
225 |             np.testing.assert_array_almost_equal(self.dcke_grid[i].y_mesh_, dcke[i].y_mesh_)
226 |             np.testing.assert_array_almost_equal(y_pred[i], y_pred_grid[i].squeeze())
227 | 


--------------------------------------------------------------------------------
/dynamically_controlled_kernel_estimation/locreg/__init__.py:
--------------------------------------------------------------------------------
1 | from locreg.local_regression import LocalRegression
2 | __all__ = ['LocalRegression',
3 |            ]
4 | 


--------------------------------------------------------------------------------
/dynamically_controlled_kernel_estimation/locreg/local_regression.py:
--------------------------------------------------------------------------------
  1 | from sklearn.base import RegressorMixin
  2 | from sklearn.preprocessing import PolynomialFeatures
  3 | from scipy.optimize import leastsq
  4 | import numpy as np
  5 | from scipy.linalg import solve_triangular
  6 | 
  7 | 
  8 | def exp_kernel(z):
  9 |     """
 10 |     Implements the exponential kernel $$ e^{-|z|^2/2}$$
 11 |     as a vectorized function.
 12 |     :param z: a numpy array of any dimension
 13 |     :return: the exponential kernel evaluated on the array z assuming 
 14 |              the last axis is the dimension of the elements of z
 15 |     """
 16 |     return np.exp(-np.linalg.norm(np.atleast_1d(z), axis=-1) ** 2 / 2)
 17 | 
 18 | 
 19 | class LocalRegression(RegressorMixin):
 20 |     """
 21 |     This class performs local polynomial regression of dimension $d$
 22 |     and degree $p$, i.e. given a training set $(X,y)$ of $N$ samples
 23 |     $x_i \in \R^d$ and $y_i \in \R$, it makes a new prediction at
 24 |     $x \in \R^d$ as $x=\beta^0$, where $\beta$ is the value minimizing
 25 |     the cost functional
 26 |     \begin{align*}
 27 |         J(\beta) & := \sum_{i=1}^{N}{(y_i - \beta ^0 + j_1(x_i))^2 w_i},
 28 |         j_1(x_i) & := \sum_{1 \leq |\alpha| \leq p}{(x-x_i)^\alpha}, \\
 29 |         w_i & := K_h(x-x_i),
 30 |     \end{align*}
 31 |     where $K_h$ is a kernel function scaled with bandwith $h$. The minimum
 32 |     of the cost function is computed via QR decomposition (or analytically).
 33 |     """
 34 | 
 35 |     def __init__(self, degree, kernel=exp_kernel, warm_start=True):
 36 |         """
 37 |         :param degree: an int specifying the degree of the polynomial
 38 |         :param kernel: a kernel function for the weight calculation
 39 |         :param warm_start: if True the fitted kernel is preserved enabling
 40 |                            partial_fit() for new y
 41 |         """
 42 |         self.degree = degree
 43 |         self.kernel = kernel
 44 |         self.warm_start = warm_start
 45 |         self.bandwidth = None
 46 |         self.fitted_kernel = None
 47 |         self.method = None
 48 |         self.X_train_ = None
 49 |         self.y_train = None
 50 | 
 51 |     def fit(self, X, y, bandwidth=None):
 52 |         """
 53 |         Fits the regressor to the data. As the concept of local regression is
 54 |         to fit the data to each prediction, this function only stores the data
 55 |         and either sets a fixed bandwidth or estimates an optimal one.
 56 |         :param X: a numpy array of shape (N, d)
 57 |         :param y: a numpy array of shape (N,)
 58 |         :param bandwidth: a scalar or None
 59 |         :return: self
 60 |         """
 61 |         self.X_train_ = X
 62 |         self.y_train = y
 63 |         self._set_bandwidth(bandwidth)
 64 |         return self
 65 | 
 66 |     def predict(self, X, method=None):
 67 |         """
 68 |         Performs the prediction for each value x in the prediction set X.
 69 |         If $d=1$ and $p=0,1$ the cost functional can be minimized analytically.
 70 |         :param X: a numpy array of dimension (M, d)
 71 |         :param method: can be 'analytic' or 'qr' or 'leastsq' or None. Method is
 72 |                        set automatically if None.
 73 |         :return: a numpy array y of dimension (M,) with the predictions
 74 |         """
 75 |         if method is None:
 76 |             method = self._determine_method()
 77 |         if self.fitted_kernel is not None:
 78 |             self.fitted_kernel = None
 79 |             self.method = None
 80 |         return self._predict_with_method(X, method)
 81 | 
 82 |     def fit_partial(self, y, bandwidth=None):
 83 |         """ Re-fits only the y values of the regression. Only works if
 84 |             warm_start==True and a previous (full) fit and predict
 85 |             has already been performed.
 86 |         """
 87 |         if self.warm_start and self.fitted_kernel is not None and self.method in ['analytic', 'qr']:
 88 |             self.y_train = y
 89 |             self._set_bandwidth(bandwidth)
 90 |         else:
 91 |             raise ValueError("The fit_partial method can only be invoked\
 92 |                 if fit and predict have been invoked previously with method \
 93 |                     `analytic´ or `qr` and warm_start is set to True")
 94 | 
 95 |     def predict_partial(self):
 96 |         """ Predicts on the last value of y_train set by fit() or fit_partial()
 97 |             and the last X that has been used for prediction. """
 98 |         if self.fitted_kernel is not None and self.method is not None:
 99 |             return self._predict_with_method(X=None, method=self.method)
100 |         else:
101 |             raise ValueError("The method predict_partial requires a full\
102 |                               prior run of predict with method `analytic´ \
103 |                               or `qr´.")
104 | 
105 |     def _set_bandwidth(self, bandwidth):
106 |         """ Sets the bandwidth in the fitting."""
107 |         if bandwidth is None:
108 |             self.bandwidth = self._silverman()
109 |         else:
110 |             self.bandwidth = bandwidth
111 | 
112 |     def _silverman(self):
113 |         """
114 |         This function implements Silverman's Rule of Thumb
115 |         \begin{align*}
116 |             h = \Big( \frac{4}{3n} \Big)^{\frac{1}{5}} \hat \sigma_Y
117 |         \end{align*}
118 |         to estimate the optimal bandwidth of the training data y.
119 |         :return: bandwidth h
120 |         """
121 |         sigma_y = np.std(self.y_train)
122 |         n = self.y_train.shape[0]
123 |         return (4 / (3 * n)) ** (1 / 5) * sigma_y
124 | 
125 |     def _scaled_kernel(self):
126 |         """
127 |         Scales the kernel function self.kernel by the
128 |         bandwidth self.bandwidth.
129 |         :return: scaled kernel function
130 |         """
131 |         def kh(z):
132 |             d = self.X_train_.shape[1]
133 |             return self.kernel(z / self.bandwidth) / self.bandwidth ** d
134 |         return kh
135 | 
136 |     def _predict_with_method(self, X, method):
137 |         """ Performs the prediction based on the `method´ flag.
138 |         :param method: a string chosen from 'analytic', 'leastsq', 'qr'
139 |         """
140 |         self.method = method
141 |         num_dims = self.X_train_.shape[1]
142 |         if method == 'analytic':
143 |             if self.degree == 0:
144 |                 return self._predict_nadaraya_watson(X)
145 |             elif num_dims == 1 and self.degree == 1:
146 |                 return self._predict_locally_linear(X)
147 |             else:
148 |                 raise ValueError('Method `analytic´ is only available if \
149 |                                   self.degree=0 or self.degree=1 and X.shape[1] == 1.')
150 |         elif method == 'qr':
151 |             return self._predict_qr(X)
152 |         elif method == 'leastsq':
153 |             return self._predict_leastsq(X)
154 |         else:
155 |             raise ValueError('Parameter `method´ has to be `analytic´ or\
156 |                 `qr´ or `leastsq´ or None, but is currently set to: %s' % method)
157 | 
158 |     def _determine_method(self):
159 |         """ Automatically selects the method for prediction based on the dimension
160 |             of the training data self_X_train_.
161 |         """
162 |         num_dims = self.X_train_.shape[1]
163 |         if self.degree == 0 or (self.degree == 1 and num_dims == 1):
164 |             return 'analytic'
165 |         else:
166 |             return 'qr'
167 | 
168 |     def _predict_nadaraya_watson(self, X):
169 |         """
170 |         Performs local regression of degree $p=0$ in dimension $d=1$.
171 |         In this case, the cost functional can be minimized analytically
172 |         and for any $x \in \R$, the estimate $y$ is given by
173 |         \begin{align*}
174 |             y &= \sum_{i=1}^{N}{W^i_h(x) y_i}, \\
175 |             W^0_i(x) &= \frac{K_h(x - x_i)}{\sum_{j=1}^N{K_h(x - x_j)}}
176 |         \end{align*}
177 |         :param X: a numpy array of dimension (M, d) at which to predict
178 |         :return: a numpy array of dimension (M,) with the M predicted y's
179 |         """
180 |         if (self.warm_start and self.fitted_kernel is None) or (not self.warm_start):
181 |             k_h = self._scaled_kernel()
182 |             nwk = k_h(np.array([x - self.X_train_ for x in X]))
183 |             self.fitted_kernel = nwk / np.sum(nwk, axis=1)[:, np.newaxis]
184 |         return np.sum(self.fitted_kernel * self.y_train.squeeze(), axis=1)
185 | 
186 |     def _predict_locally_linear(self, X):
187 |         """
188 |         Performs local regression of degree $p=1$ in dimension $d=1$.
189 |         In this case, the cost functional can be minimized analytically
190 |         and for any $x \in \R$, the estimate $y$ is given by
191 |         \begin{align*}
192 |             y &= \sum_{i=1}^{N}{W^i_h(x) y_i}, \\
193 |             W^1_i(x) &:= \frac{K_h(x-x_i)}{N}\frac{s_2(x)-s_1(x)(x-x_i)}{x_2(x)s_0(x)} \\
194 |             s_r(x) &:= \frac{1}{N} \sum_{i=1}^N{(x-x_i)^rK_h(x-x_i)}.
195 |         \end{align*}
196 |         :param X: a numpy array of dimension (M, d) at which to predict
197 |         :return: a numpy array of dimension (M,) with the M predicted y's
198 |         """
199 |         if (self.warm_start and self.fitted_kernel is None) or (not self.warm_start):
200 |             n = self.X_train_.shape[0]
201 |             m = X.shape[0]
202 |             self.fitted_kernel = np.zeros((m, n))
203 |             k_h = self._scaled_kernel()
204 |             X_ = self.X_train_.squeeze()
205 |             for i in range(m):
206 |                 x = X[i]
207 |                 llk = k_h(x - self.X_train_).squeeze()
208 |                 s0 = np.mean(llk)
209 |                 s1 = np.mean((x - X_) * llk)
210 |                 s2 = np.mean((x - X_) ** 2 * llk)
211 |                 s = (s2 - s1 * (x - X_)) / (s2 * s0 - s1 ** 2) / n
212 |                 self.fitted_kernel[i, :] = s * llk
213 |         return np.sum(self.fitted_kernel * self.y_train.squeeze(), axis=1)
214 | 
215 |     def _predict_qr(self, X):
216 |         """
217 |         Performs a prediction for each x in X by solving the associated
218 |         normal equations via QR decomposition.
219 | 
220 |         :param X: a numpy array of dimension (M, d) at which to predict
221 |         :return: a numpy array of dimension (M,) with the M predicted y's
222 |         """
223 |         if (self.warm_start and self.fitted_kernel is None) or (not self.warm_start):
224 |             self.fitted_kernel = []
225 |             poly = PolynomialFeatures(degree=self.degree)
226 |             for i in range(X.shape[0]):
227 |                 x = X[i, :]
228 |                 phi = poly.fit_transform(self.X_train_ - x)
229 |                 m = phi.shape[1]
230 |                 kh = self._scaled_kernel()
231 |                 w = kh(x - self.X_train_)
232 |                 w_mat = np.diag(np.sqrt(w))
233 |                 a = w_mat @ phi
234 |                 q, r = np.linalg.qr(a, mode='complete')
235 |                 r = r[:m, :m]
236 |                 self.fitted_kernel.append((q.transpose() @ w_mat, r))
237 |         n = len(self.fitted_kernel)
238 |         y_pred = np.zeros(n)
239 |         for i in range(n):
240 |             qw, r = self.fitted_kernel[i]
241 |             m = r.shape[1]
242 |             c = (qw @ self.y_train)[:m]
243 |             beta = solve_triangular(r, c)
244 |             y_pred[i] = beta[0]
245 |         return y_pred
246 | 
247 |     def _predict_leastsq(self, X):
248 |         """
249 |         Performs a prediction for each x in X by minimizing the cost
250 |         functional $J$.
251 | 
252 |         :param X: a numpy array of dimension (M, d) at which to predict
253 |         :return: a numpy array of dimension (M,) with the M predicted y's
254 |         """
255 |         poly = PolynomialFeatures(degree=self.degree)
256 |         x0 = poly.fit_transform(np.zeros((1, X[0].shape[0])))[0]
257 |         return np.array([leastsq(func=self._cost_functional(poly, x),
258 |                                     x0=x0)[0][0] for x in X])
259 |     
260 |     def _cost_functional(self, poly, x):
261 |         """
262 |         Creates the cost functional $J$ for optimization using the parameters.
263 |         :param poly: an instance of PolynomialFeatures
264 |         :param x: a numpy array of shape (d,)
265 |         :return: cost funtional $J$
266 |         """
267 |         def cost(beta):
268 |             res = np.sum(poly.fit_transform(self.X_train_ - x) * beta, axis=1) - self.y_train
269 |             kh = self._scaled_kernel()
270 |             w = kh(x - self.X_train_)
271 |             res *= np.sqrt(np.abs(w))
272 |             return res
273 |         return cost
274 | 


--------------------------------------------------------------------------------
/dynamically_controlled_kernel_estimation/locreg/test_local_regression.py:
--------------------------------------------------------------------------------
  1 | from unittest import TestCase
  2 | from local_regression import LocalRegression
  3 | import numpy as np
  4 | 
  5 | 
  6 | class TestLocalRegression(TestCase):
  7 | 
  8 |     def setUp(self):
  9 |         self.n = 10
 10 |         self.x = np.linspace(-10, 10, self.n)
 11 |         self.X = self.x[:, np.newaxis]
 12 |         np.random.seed(1)
 13 |         self.e = np.random.normal(0, 0.01, self.n)
 14 | 
 15 |     def test_nw_analytic_vs_qr_vs_leastsq_1d(self):
 16 |         self.y = self.x**2 + self.e
 17 |         self.nw = LocalRegression(degree=0, warm_start=False)
 18 |         self.nw.fit(self.X, self.y)
 19 |         X_eval = np.linspace(-10, 10, 2 * self.n)[:, np.newaxis]
 20 |         y_pred_analytic = self.nw.predict(X_eval, method='analytic')
 21 |         y_pred_qr = self.nw.predict(X_eval, method='qr')
 22 |         y_pred_leastsq = self.nw.predict(X_eval, method='leastsq')
 23 |         np.testing.assert_almost_equal(y_pred_analytic, y_pred_qr, decimal=6)
 24 |         np.testing.assert_almost_equal(y_pred_qr, y_pred_leastsq, decimal=6)
 25 | 
 26 |     def test_nw_analytic_vs_qr_vs_leastsq_2d(self):
 27 |         grid = np.linspace(-10, 10, self.n)
 28 |         x1, x2 = np.meshgrid(grid, grid)
 29 |         self.X = np.vstack((x1.flatten(), x2.flatten())).T
 30 |         self.y = self.X[:, 0] * self.X[:, 1] + np.random.normal(0, 0.01, self.X.shape[0])
 31 |         self.nw = LocalRegression(degree=0, warm_start=False)
 32 |         self.nw.fit(self.X, self.y)
 33 |         grid_eval = np.linspace(-10, 10, 2 * self.n)[:, np.newaxis]
 34 |         x1e, x2e = np.meshgrid(grid_eval, grid_eval)
 35 |         X_eval = np.vstack((x1e.flatten(), x2e.flatten())).T
 36 |         y_pred_analytic = self.nw.predict(X_eval, method='analytic')
 37 |         y_pred_qr = self.nw.predict(X_eval, method='qr')
 38 |         y_pred_leastsq = self.nw.predict(X_eval, method='leastsq')
 39 |         np.testing.assert_almost_equal(y_pred_qr, y_pred_leastsq, decimal=5)
 40 |         np.testing.assert_almost_equal(y_pred_analytic, y_pred_qr, decimal=5)
 41 | 
 42 |     def test_ll_analytic_vs_cost(self):
 43 |         self.y = self.x + self.e
 44 |         self.ll = LocalRegression(degree=1, warm_start=False)
 45 |         self.ll.fit(self.X, self.y)
 46 |         X_eval = np.linspace(-10, 10, 2 * self.n)[:, np.newaxis]
 47 |         y_pred_analytic = self.ll.predict(X_eval, method='analytic')
 48 |         y_pred_qr = self.ll.predict(X_eval, method='qr')
 49 |         y_pred_leastsq = self.ll.predict(X_eval, method='leastsq')
 50 |         np.testing.assert_almost_equal(y_pred_analytic, y_pred_qr)
 51 |         np.testing.assert_almost_equal(y_pred_qr, y_pred_leastsq)
 52 | 
 53 |     def test_nw_analytic_loc_const_const_1d(self):
 54 |         self.y = np.ones_like(self.x) + self.e
 55 |         self.nw = LocalRegression(degree=0)
 56 |         self.nw.fit(self.X, self.y)
 57 |         np.testing.assert_almost_equal(self.nw.predict(self.X), self.y)
 58 | 
 59 |     def test_ll_analytic_loc_lin_lin_1d(self):
 60 |         self.y = self.x + self.e
 61 |         self.ll = LocalRegression(degree=1)
 62 |         self.ll.fit(self.X, self.y)
 63 |         np.testing.assert_almost_equal(self.ll.predict(self.X), self.y, decimal=1)
 64 | 
 65 |     def test_2d_loc_const_const(self):
 66 |         n = 5
 67 |         c = 7.
 68 |         x = np.linspace(-5, 5, n)
 69 |         y = np.linspace(-5, 5, n)
 70 |         z = np.array([[c for xx in x] for yy in y])
 71 |         x, y = np.meshgrid(x, y)
 72 |         X = np.array(list(zip(x.flatten(), y.flatten())))
 73 |         np.random.seed(1)
 74 |         e = np.random.normal(0, 0.01, (n, n))
 75 |         z = z + e
 76 |         locreg3d = LocalRegression(degree=0).fit(X, z.flatten())
 77 |         z_pred = locreg3d.predict(X)
 78 |         self.assertTrue(np.all(np.abs(z_pred / c - 1) <= 0.01))
 79 | 
 80 |     def test_2d_loc_lin_lin(self):
 81 |         n = 5
 82 |         x = np.linspace(1, 10, n)
 83 |         y = np.linspace(1, 10, n)
 84 |         z = np.array([[xx + yy for xx in x] for yy in y])
 85 |         res = z.flatten()
 86 |         x, y = np.meshgrid(x, y)
 87 |         X = np.array(list(zip(x.flatten(), y.flatten())))
 88 |         np.random.seed(1)
 89 |         e = np.random.normal(0, 0.01, (n, n))
 90 |         z = z + e
 91 |         locreg3d = LocalRegression(degree=1).fit(X, z.flatten())
 92 |         z_pred = locreg3d.predict(X)
 93 |         self.assertTrue(np.all(np.abs(z_pred / res - 1) <= 0.01))
 94 | 
 95 |     def test_2d_least_sq_vs_qr(self):
 96 |         n = 5
 97 |         x1 = np.linspace(1, 10, n)
 98 |         x2 = np.linspace(1, 10, n)
 99 |         y = np.array([[xx1 ** 2 + xx2 ** 2 for xx1 in x1] for xx2 in x2])
100 |         x1, x2 = np.meshgrid(x1, x2)
101 |         X = np.array(list(zip(x1.flatten(), x2.flatten())))
102 |         np.random.seed(1)
103 |         e = np.random.normal(0, 0.01, (n, n))
104 |         y = y + e
105 |         locreg3d = LocalRegression(degree=2).fit(X, y.flatten())
106 |         y_pred_leastsq = locreg3d.predict(X, method='leastsq')
107 |         y_pred_qr = locreg3d.predict(X, method='qr')
108 |         np.testing.assert_array_almost_equal(y_pred_leastsq, y_pred_qr)
109 | 
110 |     def test_fit_partial(self):
111 |         self.y = self.x + self.e
112 |         self.ll = LocalRegression(degree=1, warm_start=True)
113 |         self.ll.fit(self.X, self.y)
114 |         X_eval = np.linspace(-10, 10, 2 * self.n)[:, np.newaxis]
115 |         _ = self.ll.predict(X_eval, method='analytic')
116 |         self.y = self.x ** 2 + self.e
117 |         h = self.ll.bandwidth
118 |         self.ll.fit_partial(self.y, h)
119 |         y_pred_partial = self.ll.predict(X_eval, method='analytic')
120 |         self.ll2 = LocalRegression(degree=1, warm_start=False)
121 |         self.ll2.fit(self.X, self.y, h)
122 |         y_pred = self.ll2.predict(X_eval, method='analytic')
123 |         np.testing.assert_almost_equal(y_pred, y_pred_partial)
124 |         # test against new instance to validate correct state
125 |         self.ll3 = LocalRegression(degree=1, warm_start=False)
126 |         self.ll3.fit(self.X, self.y, h)
127 |         y_pred_new = self.ll3.predict(X_eval, method='analytic')
128 |         np.testing.assert_array_almost_equal(y_pred_new, y_pred)
129 | 


--------------------------------------------------------------------------------
/dynamically_controlled_kernel_estimation/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niknow/machine-learning-examples/b147133c7b0115b591739fa4a600462cbaee6225/dynamically_controlled_kernel_estimation/models/__init__.py


--------------------------------------------------------------------------------
/dynamically_controlled_kernel_estimation/models/black_scholes.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy.stats import norm
  3 | from scipy.optimize import minimize_scalar, newton
  4 | 
  5 | 
  6 | class BlackScholes:
  7 |     """
  8 |     Implements the Black-Scholes model $dS_t = r S_t dt + \sigma S_t dW_t$.
  9 |     """
 10 | 
 11 |     def __init__(self, sigma, r):
 12 |         self.sigma = sigma
 13 |         self.r = r
 14 | 
 15 |     @staticmethod
 16 |     def _d1(sigma, r, s0, tm, sk):
 17 |         """
 18 |         Implements the d1 in the Black-Scholes option price formula.
 19 |         """
 20 |         d1 = np.log(s0 / sk) + (r + sigma ** 2 / 2) * tm
 21 |         return d1 / (sigma * np.sqrt(tm))
 22 | 
 23 |     @staticmethod
 24 |     def _option_price(sigma, r, s0, tm, sk, call):
 25 |         """
 26 |         Implements Black-Scholes option price formula.
 27 |         :param sigma: instantaneous volatility
 28 |         :param r: risk-free rate
 29 |         :param s0: value of underlying stock price at t=0
 30 |         :param tm: time to maturity of the option
 31 |         :param sk: strike of the option
 32 |         :param call: True if call option, False if put
 33 |         :return: option price
 34 |         """
 35 |         d1 = BlackScholes._d1(sigma, r, s0, tm, sk)
 36 |         d2 = d1 - sigma * np.sqrt(tm)
 37 |         pvk = sk * np.exp(-r * tm)
 38 |         phi = norm.cdf
 39 |         if call:
 40 |             return phi(d1) * s0 - phi(d2) * pvk
 41 |         else:
 42 |             return phi(-d2) * pvk - phi(-d1) * s0
 43 | 
 44 |     @staticmethod
 45 |     def _paths(sigma, r, s0, time_grid, num_sims, seed=1):
 46 |         """
 47 |         Create random paths of the underlying.
 48 | 
 49 |         :param sigma: instantaneous volatility
 50 |         :param r: risk-free rate
 51 |         :param time_grid: time grid of shape (num_time_steps) on which to simulate
 52 |         :param s0: initial value of stock at time_grid[0]
 53 |         :param num_sims: number of paths to generate
 54 |         :param seed: seed value of random number generator
 55 | 
 56 |         returns: a tensor `paths´ of shape (num_time_steps, num_sims) where S[i,j] is the j-th
 57 |                  realization of the underlying at time_grid[i]
 58 |         """
 59 |         delta = time_grid[1:] - time_grid[:-1]
 60 |         num_steps = delta.shape[0]
 61 |         np.random.seed(seed)
 62 |         dw = np.random.randn(num_sims, num_steps)
 63 |         paths = s0 * np.cumprod(np.exp((r - sigma ** 2 / 2) * delta + sigma * np.sqrt(delta) * dw), axis=1)
 64 |         return np.transpose(np.c_[np.ones(num_sims) * s0, paths])
 65 | 
 66 |     @staticmethod
 67 |     def _delta(sigma, r, s0, tm, sk, call=True):
 68 |         """
 69 |         Computes the Delta of a European call/put option.
 70 | 
 71 |         :param sigma: instantaneous volatility
 72 |         :param r: risk-free rate
 73 |         :param s0: value of underlying stock price at t=0
 74 |         :param tm: time to maturity of the option
 75 |         :param sk: strike of the option
 76 |         :param call: True if call option, False if put
 77 |         """
 78 |         phi = norm.cdf
 79 |         delta = phi(BlackScholes._d1(sigma, r, s0, tm, sk))
 80 |         if call:
 81 |             return delta
 82 |         else:
 83 |             return delta - 1
 84 | 
 85 |     @staticmethod
 86 |     def _vega(sigma, r, s0, tm, sk):
 87 |         """
 88 |         Computes the Vega of a European call/put option.
 89 | 
 90 |         :param sigma: instantaneous volatility
 91 |         :param r: risk-free rate
 92 |         :param s0: value of underlying stock price at t=0
 93 |         :param tm: time to maturity of the option
 94 |         :param sk: strike of the option
 95 |         """
 96 |         d1 = BlackScholes._d1(sigma, r, s0, tm, sk)
 97 |         return s0 * norm.pdf(d1) * np.sqrt(tm)
 98 | 
 99 |     @staticmethod
100 |     def calibrate(vol_quotes):
101 | 
102 |         def cost(sigma):
103 |             num_quotes = vol_quotes.shape[0]
104 |             c = np.zeros(num_quotes)
105 |             for i in range(num_quotes):
106 |                 tm, sk, iv = vol_quotes[i]
107 |                 c[i] = (iv - sigma) ** 2
108 |             return np.sum(c) / 2
109 | 
110 |         return minimize_scalar(cost, bounds=(0, 1),  method='bounded', options={'xatol': 1e-8}).x
111 | 
112 |     def option_price(self, s0, tm, sk, call=True):
113 |         return BlackScholes._option_price(self.sigma, self.r, s0, tm, sk, call)
114 | 
115 |     def paths(self, s0, time_grid, num_sims, seed=1):
116 |         return BlackScholes._paths(self.sigma, self.r, s0, time_grid, num_sims, seed)
117 | 
118 |     def delta(self, s0, tm, sk, call=True):
119 |         return BlackScholes._delta(self.sigma, self.r, s0, tm, sk, call)
120 | 
121 |     def vega(self, s0, tm, sk):
122 |         return BlackScholes._vega(self.sigma, self.r, s0, tm, sk)
123 | 


--------------------------------------------------------------------------------
/dynamically_controlled_kernel_estimation/models/test_black_scholes.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | from unittest import TestCase
 3 | import numpy as np
 4 | from py_vollib.black_scholes import black_scholes
 5 | from py_vollib.black_scholes.implied_volatility import implied_volatility
 6 | 
 7 | from models.black_scholes import BlackScholes
 8 | 
 9 | 
10 | class TestBlackScholes(TestCase):
11 | 
12 |     def setUp(self):
13 |         self.sigma = 0.2
14 |         self.r = 0.03
15 |         self.bs = BlackScholes(self.sigma, self.r)
16 |         self.maturities = np.array([3/12, 9/12, 1., 5., 10.])
17 |         self.strikes = np.array([80., 95., 105., 120.3])
18 |         self.rates = np.array([-0.03, -0.01, 0, 0.01, 0.03])
19 |         self.sigmas = np.array([0.01, 0.05, 0.2, 0.5, 1.5])
20 |         self.spots = np.array([80., 90., 100., 110., 120])
21 |         self.call = [True, False]
22 |         self.s0 = 100.
23 |         self.sk = 103.
24 |         self.tm = 2.4
25 | 
26 |     def test_vs_py_vollib(self):
27 |         for sigma, r, s0, tm, sk, call in itertools.product(self.sigmas, self.rates, self.spots, self.maturities, self.strikes, self.call):
28 |             with self.subTest():
29 |                 np.testing.assert_almost_equal(BlackScholes._option_price(sigma, r, s0, tm, sk, call),
30 |                                                black_scholes(flag='c' if call else 'p', S=s0, K=sk, t=tm, r=r, sigma=sigma))
31 | 
32 |     def test_put_call_parity(self):
33 |         call = self.bs.option_price(self.s0, self.tm, self.sk, call=True)
34 |         put = self.bs.option_price(self.s0, self.tm, self.sk, call=False)
35 |         df = np.exp(-self.r * self.tm)
36 |         np.testing.assert_almost_equal(call - put, self.s0 - self.sk * df, decimal=6)
37 | 
38 |     def test_path_distribution(self):
39 |         self.time_grid = np.array([0., 1., 5.])
40 |         self.num_sims = 10000
41 |         self.seed = 1
42 |         paths = self.bs.paths(self.s0, self.time_grid, self.num_sims, self.seed)
43 |         #np.testing.assert_array_almost_equal(paths.mean(axis=1), np.exp(self.time_grid * self.r) * self.s0)
44 |         #print(paths.mean(axis=1), np.exp(self.time_grid * self.r) * self.s0)
45 |         #print(paths.std(axis=1)**2, self.s0**2 * np.exp(2 * self.time_grid * self.r) * (np.exp(self.sigma**2 * self.time_grid) - 1))
46 | 
47 |     def test_vega(self):
48 |         bump = 1. / 10000
49 |         for tm, sk in itertools.product(self.maturities, self.strikes):
50 |             with self.subTest():
51 |                 price = BlackScholes._option_price(self.sigma, self.r, self.s0, tm, sk, True)
52 |                 price_bumped = BlackScholes._option_price(self.sigma + bump, self.r, self.s0, tm, sk, True)
53 |                 vega_df = (price_bumped - price) / bump
54 |                 vega = BlackScholes._vega(self.sigma, self.r, self.s0, tm, sk)
55 |                 np.testing.assert_almost_equal(vega, vega_df, decimal=2)
56 | 
57 |     def test_delta(self):
58 |         bump = 1. / 10000
59 |         for tm, sk, call in itertools.product(self.maturities, self.strikes, self.call):
60 |             with self.subTest():
61 |                 price = BlackScholes._option_price(self.sigma, self.r, self.s0, tm, sk, True)
62 |                 price_bumped = BlackScholes._option_price(self.sigma, self.r, self.s0 + bump, tm, sk, True)
63 |                 delta_fd = (price_bumped - price) / bump
64 |                 delta = BlackScholes._delta(self.sigma, self.r, self.s0, tm, sk)
65 |                 np.testing.assert_almost_equal(delta, delta_fd, decimal=5)
66 | 
67 |     def test_implied_volatility(self):
68 |         for sigma, r, s0, tm, sk, call in itertools.product(self.sigmas, self.rates, self.spots, self.maturities, self.strikes, self.call):
69 |             with self.subTest():
70 |                 price = BlackScholes._option_price(sigma, r, s0, tm, sk, call)
71 |                 iv = BlackScholes._implied_volatility(r, s0, tm, sk, call, price)
72 |                 np.testing.assert_almost_equal(sigma, iv)
73 | 
74 |     def test_implied_volatility_vs_py_vollib(self):
75 |         for sigma, r, s0, tm, sk, call in itertools.product(self.sigmas, self.rates, self.spots, self.maturities, self.strikes, self.call):
76 |             with self.subTest():
77 |                 price = BlackScholes._option_price(sigma, r, s0, tm, sk, True)
78 |                 np.testing.assert_almost_equal(BlackScholes._implied_volatility(r, s0, tm, sk, call, price),
79 |                                                implied_volatility(price=price, S=s0, K=sk, t=tm, r=r, flag='c' if call else 'p'))
80 | 
81 |     def test_calibrate(self):
82 |         self.bs = BlackScholes(self.r, self.sigma)
83 |         self.vol_quotes = np.array([[1., 100., 0.23]])
84 |         sigma = self.bs.calibrate(self.vol_quotes)
85 |         np.testing.assert_almost_equal(sigma, 0.23)
86 | 
87 |     def test_calibrate_multiple(self):
88 |         self.vol_quotes = np.array([[1., 100., 0.23], [2., 100., 0.27]])
89 |         sigma = self.bs.calibrate(self.vol_quotes)
90 |         self.assertTrue(sigma <= 0.27)
91 |         self.assertTrue(sigma >= 0.23)
92 | 


--------------------------------------------------------------------------------
/dynamically_controlled_kernel_estimation/pics/american_option_pricing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niknow/machine-learning-examples/b147133c7b0115b591739fa4a600462cbaee6225/dynamically_controlled_kernel_estimation/pics/american_option_pricing.png


--------------------------------------------------------------------------------
/dynamically_controlled_kernel_estimation/pics/conditional_expectation_orthogonal_projection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niknow/machine-learning-examples/b147133c7b0115b591739fa4a600462cbaee6225/dynamically_controlled_kernel_estimation/pics/conditional_expectation_orthogonal_projection.png


--------------------------------------------------------------------------------
/dynamically_controlled_kernel_estimation/pics/dcke_basket_heston.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niknow/machine-learning-examples/b147133c7b0115b591739fa4a600462cbaee6225/dynamically_controlled_kernel_estimation/pics/dcke_basket_heston.jpg


--------------------------------------------------------------------------------
/dynamically_controlled_kernel_estimation/pics/dcke_performance.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niknow/machine-learning-examples/b147133c7b0115b591739fa4a600462cbaee6225/dynamically_controlled_kernel_estimation/pics/dcke_performance.jpg


--------------------------------------------------------------------------------
/dynamically_controlled_kernel_estimation/pics/dcke_rbergomi.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niknow/machine-learning-examples/b147133c7b0115b591739fa4a600462cbaee6225/dynamically_controlled_kernel_estimation/pics/dcke_rbergomi.jpg


--------------------------------------------------------------------------------
/environment-explicit.txt:
--------------------------------------------------------------------------------
  1 | # This file may be used to create an environment using:
  2 | # $ conda create --name <env> --file <this file>
  3 | # platform: win-64
  4 | @EXPLICIT
  5 | https://repo.anaconda.com/pkgs/main/win-64/_tflow_select-2.3.0-mkl.tar.bz2
  6 | https://repo.anaconda.com/pkgs/main/win-64/blas-1.0-mkl.tar.bz2
  7 | https://conda.anaconda.org/conda-forge/win-64/ca-certificates-2020.12.5-h5b45459_0.tar.bz2
  8 | https://conda.anaconda.org/anaconda/win-64/graphviz-2.38.0-4.tar.bz2
  9 | https://repo.anaconda.com/pkgs/main/win-64/icc_rt-2019.0.0-h0cc432a_1.tar.bz2
 10 | https://repo.anaconda.com/pkgs/main/win-64/intel-openmp-2019.4-245.tar.bz2
 11 | https://conda.anaconda.org/anaconda/win-64/libsodium-1.0.3-0.tar.bz2
 12 | https://repo.anaconda.com/pkgs/msys2/win-64/msys2-conda-epoch-20160418-1.tar.bz2
 13 | https://repo.anaconda.com/pkgs/main/win-64/pandoc-2.2.3.2-0.tar.bz2
 14 | https://repo.anaconda.com/pkgs/main/win-64/vs2015_runtime-14.16.27012-hf0eaf9b_3.tar.bz2
 15 | https://repo.anaconda.com/pkgs/main/win-64/winpty-0.4.3-4.tar.bz2
 16 | https://repo.anaconda.com/pkgs/main/win-64/libmklml-2019.0.5-0.tar.bz2
 17 | https://repo.anaconda.com/pkgs/msys2/win-64/m2w64-gmp-6.1.0-2.tar.bz2
 18 | https://repo.anaconda.com/pkgs/msys2/win-64/m2w64-libwinpthread-git-5.0.0.4634.697f757-2.tar.bz2
 19 | https://repo.anaconda.com/pkgs/main/win-64/mkl-2019.4-245.tar.bz2
 20 | https://repo.anaconda.com/pkgs/main/win-64/vc-14.1-h0510ff6_4.tar.bz2
 21 | https://conda.anaconda.org/anaconda/win-64/cudatoolkit-10.2.89-h74a9793_1.tar.bz2
 22 | https://conda.anaconda.org/anaconda/win-64/icu-58.2-vc14hc45fdbb_0.tar.bz2
 23 | https://conda.anaconda.org/anaconda/win-64/jpeg-9b-vc14h4d7706e_1.tar.bz2
 24 | https://conda.anaconda.org/anaconda/win-64/libiconv-1.15-vc14h29686d3_5.tar.bz2
 25 | https://repo.anaconda.com/pkgs/main/win-64/lz4-c-1.9.2-h62dcd97_1.tar.bz2
 26 | https://repo.anaconda.com/pkgs/msys2/win-64/m2w64-gcc-libs-core-5.3.0-7.tar.bz2
 27 | https://conda.anaconda.org/conda-forge/win-64/openssl-1.1.1h-he774522_0.tar.bz2
 28 | https://conda.anaconda.org/anaconda/win-64/sqlite-3.29.0-he774522_0.tar.bz2
 29 | https://conda.anaconda.org/anaconda/win-64/tk-8.6.7-vc14hb68737d_1.tar.bz2
 30 | https://repo.anaconda.com/pkgs/main/win-64/xz-5.2.5-h62dcd97_0.tar.bz2
 31 | https://conda.anaconda.org/anaconda/win-64/yaml-0.1.7-vc14h4cb57cf_1.tar.bz2
 32 | https://conda.anaconda.org/anaconda/win-64/zeromq-4.2.3-hd6b2f15_3.tar.bz2
 33 | https://conda.anaconda.org/anaconda/win-64/zlib-1.2.11-vc14h1cdd9ab_1.tar.bz2
 34 | https://conda.anaconda.org/anaconda/win-64/hdf5-1.10.1-vc14hb361328_0.tar.bz2
 35 | https://repo.anaconda.com/pkgs/main/win-64/libpng-1.6.37-h2a8f88b_0.tar.bz2
 36 | https://conda.anaconda.org/anaconda/win-64/libprotobuf-3.8.0-h7bd577a_0.tar.bz2
 37 | https://conda.anaconda.org/anaconda/win-64/libtiff-4.0.8-vc14h04e2a1e_10.tar.bz2
 38 | https://conda.anaconda.org/anaconda/win-64/libxml2-2.9.4-vc14h8fd0f11_5.tar.bz2
 39 | https://repo.anaconda.com/pkgs/msys2/win-64/m2w64-gcc-libgfortran-5.3.0-6.tar.bz2
 40 | https://repo.anaconda.com/pkgs/main/win-64/python-3.6.8-h9f7ef89_7.tar.bz2
 41 | https://repo.anaconda.com/pkgs/main/win-64/zstd-1.4.5-h04227a9_0.tar.bz2
 42 | https://repo.anaconda.com/pkgs/main/win-64/astor-0.8.0-py36_0.tar.bz2
 43 | https://repo.anaconda.com/pkgs/main/noarch/attrs-19.2.0-py_0.tar.bz2
 44 | https://repo.anaconda.com/pkgs/main/win-64/backcall-0.1.0-py36_0.tar.bz2
 45 | https://repo.anaconda.com/pkgs/main/win-64/colorama-0.4.1-py36_0.tar.bz2
 46 | https://conda.anaconda.org/conda-forge/noarch/colorlover-0.3.0-py_0.tar.bz2
 47 | https://repo.anaconda.com/pkgs/main/win-64/decorator-4.4.0-py36_1.tar.bz2
 48 | https://repo.anaconda.com/pkgs/main/noarch/defusedxml-0.6.0-py_0.tar.bz2
 49 | https://repo.anaconda.com/pkgs/main/win-64/entrypoints-0.3-py36_0.tar.bz2
 50 | https://conda.anaconda.org/anaconda/win-64/freetype-2.8-vc14h17c9bdf_0.tar.bz2
 51 | https://repo.anaconda.com/pkgs/main/noarch/gast-0.3.2-py_0.tar.bz2
 52 | https://conda.anaconda.org/conda-forge/noarch/idna-2.10-pyh9f0ad1d_0.tar.bz2
 53 | https://repo.continuum.io/pkgs/main/win-64/ipython_genutils-0.2.0-py36h3c5d0ee_0.tar.bz2
 54 | https://conda.anaconda.org/anaconda/win-64/joblib-0.13.2-py36_0.tar.bz2
 55 | https://repo.anaconda.com/pkgs/main/win-64/kiwisolver-1.1.0-py36ha925a31_0.tar.bz2
 56 | https://conda.anaconda.org/anaconda/win-64/lazy-object-proxy-1.4.2-py36he774522_0.tar.bz2
 57 | https://conda.anaconda.org/anaconda/win-64/libxslt-1.1.29-vc14hf85b8d4_5.tar.bz2
 58 | https://repo.anaconda.com/pkgs/msys2/win-64/m2w64-gcc-libs-5.3.0-7.tar.bz2
 59 | https://repo.anaconda.com/pkgs/main/win-64/markupsafe-1.1.1-py36he774522_0.tar.bz2
 60 | https://repo.anaconda.com/pkgs/main/win-64/mccabe-0.6.1-py36_1.tar.bz2
 61 | https://repo.anaconda.com/pkgs/main/win-64/mistune-0.8.4-py36he774522_0.tar.bz2
 62 | https://repo.anaconda.com/pkgs/main/win-64/ninja-1.10.1-py36h7ef1ec2_0.tar.bz2
 63 | https://conda.anaconda.org/anaconda/win-64/numpy-base-1.16.4-py36hc3f5095_0.tar.bz2
 64 | https://repo.anaconda.com/pkgs/main/noarch/olefile-0.46-py_0.tar.bz2
 65 | https://repo.anaconda.com/pkgs/main/win-64/pandocfilters-1.4.2-py36_1.tar.bz2
 66 | https://repo.anaconda.com/pkgs/main/noarch/parso-0.5.1-py_0.tar.bz2
 67 | https://repo.anaconda.com/pkgs/main/win-64/pickleshare-0.7.5-py36_0.tar.bz2
 68 | https://conda.anaconda.org/conda-forge/noarch/ply-3.11-py_1.tar.bz2
 69 | https://repo.anaconda.com/pkgs/main/noarch/prometheus_client-0.7.1-py_0.tar.bz2
 70 | https://conda.anaconda.org/conda-forge/noarch/pycodestyle-2.5.0-py_0.tar.bz2
 71 | https://conda.anaconda.org/conda-forge/noarch/pycparser-2.20-pyh9f0ad1d_2.tar.bz2
 72 | https://repo.anaconda.com/pkgs/main/noarch/pyparsing-2.4.2-py_0.tar.bz2
 73 | https://repo.anaconda.com/pkgs/main/win-64/pyreadline-2.1-py36_1.tar.bz2
 74 | https://conda.anaconda.org/conda-forge/win-64/python_abi-3.6-1_cp36m.tar.bz2
 75 | https://repo.anaconda.com/pkgs/main/noarch/pytz-2019.3-py_0.tar.bz2
 76 | https://repo.anaconda.com/pkgs/main/win-64/pywin32-223-py36hfa6e2cd_1.tar.bz2
 77 | https://repo.anaconda.com/pkgs/main/win-64/pyyaml-5.1.2-py36he774522_0.tar.bz2
 78 | https://conda.anaconda.org/anaconda/win-64/pyzmq-17.0.0-py36hfa6e2cd_0.tar.bz2
 79 | https://conda.anaconda.org/anaconda/win-64/qt-5.9.7-vc14h73c81de_0.tar.bz2
 80 | https://repo.anaconda.com/pkgs/main/noarch/rope-0.14.0-py_0.tar.bz2
 81 | https://repo.anaconda.com/pkgs/main/win-64/send2trash-1.5.0-py36_0.tar.bz2
 82 | https://repo.anaconda.com/pkgs/main/win-64/sip-4.19.8-py36h6538335_0.tar.bz2
 83 | https://repo.anaconda.com/pkgs/main/win-64/six-1.12.0-py36_0.tar.bz2
 84 | https://repo.anaconda.com/pkgs/main/win-64/termcolor-1.1.0-py36_1.tar.bz2
 85 | https://repo.anaconda.com/pkgs/main/win-64/testpath-0.4.2-py36_0.tar.bz2
 86 | https://conda.anaconda.org/anaconda/noarch/toml-0.10.1-py_0.tar.bz2
 87 | https://repo.anaconda.com/pkgs/main/win-64/tornado-6.0.3-py36he774522_0.tar.bz2
 88 | https://conda.anaconda.org/anaconda/win-64/typed-ast-1.4.1-py36he774522_0.tar.bz2
 89 | https://repo.continuum.io/pkgs/main/win-64/wcwidth-0.1.7-py36h3d5aa90_0.tar.bz2
 90 | https://repo.anaconda.com/pkgs/main/win-64/webencodings-0.5.1-py36_1.tar.bz2
 91 | https://repo.anaconda.com/pkgs/main/noarch/werkzeug-0.16.0-py_0.tar.bz2
 92 | https://repo.continuum.io/pkgs/main/win-64/wincertstore-0.2-py36h7fe50ca_0.tar.bz2
 93 | https://repo.anaconda.com/pkgs/main/win-64/wrapt-1.11.2-py36he774522_0.tar.bz2
 94 | https://repo.anaconda.com/pkgs/main/win-64/absl-py-0.8.0-py36_0.tar.bz2
 95 | https://conda.anaconda.org/conda-forge/noarch/autopep8-1.4.4-py_0.tar.bz2
 96 | https://conda.anaconda.org/conda-forge/win-64/certifi-2020.12.5-py36ha15d459_0.tar.bz2
 97 | https://conda.anaconda.org/conda-forge/win-64/cffi-1.14.3-py36hef61171_0.tar.bz2
 98 | https://conda.anaconda.org/conda-forge/win-64/chardet-3.0.4-py36h9f0ad1d_1007.tar.bz2
 99 | https://repo.continuum.io/pkgs/main/win-64/cycler-0.10.0-py36h009560c_0.tar.bz2
100 | https://repo.anaconda.com/pkgs/main/win-64/jedi-0.15.1-py36_0.tar.bz2
101 | https://conda.anaconda.org/conda-forge/noarch/lesscpy-0.13.0-py_1.tar.bz2
102 | https://conda.anaconda.org/anaconda/win-64/lxml-4.1.1-py36he0adb16_0.tar.bz2
103 | https://conda.anaconda.org/anaconda/win-64/mkl_random-1.0.2-py36h343c172_0.tar.bz2
104 | https://conda.anaconda.org/anaconda/win-64/pillow-4.2.1-py36hdb25ab2_0.tar.bz2
105 | https://conda.anaconda.org/anaconda/win-64/pydot-1.4.1-py36_0.tar.bz2
106 | https://repo.anaconda.com/pkgs/main/win-64/pyqt-5.9.2-py36h6538335_2.tar.bz2
107 | https://conda.anaconda.org/anaconda/win-64/pyrsistent-0.14.11-py36he774522_0.tar.bz2
108 | https://repo.anaconda.com/pkgs/main/win-64/python-dateutil-2.8.0-py36_0.tar.bz2
109 | https://repo.anaconda.com/pkgs/main/win-64/pywinpty-0.5.5-py36_1000.tar.bz2
110 | https://repo.anaconda.com/pkgs/main/noarch/retrying-1.3.3-py_2.tar.bz2
111 | https://repo.anaconda.com/pkgs/main/win-64/traitlets-4.3.3-py36_0.tar.bz2
112 | https://conda.anaconda.org/conda-forge/win-64/win_inet_pton-1.1.0-py36_0.tar.bz2
113 | https://conda.anaconda.org/conda-forge/win-64/brotlipy-0.7.0-py36h779f372_1000.tar.bz2
114 | https://conda.anaconda.org/conda-forge/win-64/cryptography-3.1.1-py36hef61171_0.tar.bz2
115 | https://repo.anaconda.com/pkgs/main/noarch/jupyter_core-4.5.0-py_0.tar.bz2
116 | https://conda.anaconda.org/plotly/noarch/plotly-4.10.0-py_0.tar.bz2
117 | https://conda.anaconda.org/conda-forge/win-64/pysocks-1.7.1-py36h9f0ad1d_1.tar.bz2
118 | https://repo.anaconda.com/pkgs/main/win-64/setuptools-41.4.0-py36_0.tar.bz2
119 | https://repo.anaconda.com/pkgs/main/win-64/terminado-0.8.2-py36_0.tar.bz2
120 | https://conda.anaconda.org/anaconda/win-64/astroid-2.4.2-py36_0.tar.bz2
121 | https://repo.anaconda.com/pkgs/main/win-64/bleach-3.1.0-py36_0.tar.bz2
122 | https://repo.anaconda.com/pkgs/main/win-64/grpcio-1.16.1-py36h351948d_1.tar.bz2
123 | https://repo.anaconda.com/pkgs/main/win-64/isort-4.3.21-py36_0.tar.bz2
124 | https://repo.anaconda.com/pkgs/main/noarch/jinja2-2.10.3-py_0.tar.bz2
125 | https://repo.anaconda.com/pkgs/main/win-64/jsonschema-3.0.2-py36_0.tar.bz2
126 | https://repo.anaconda.com/pkgs/main/win-64/jupyter_client-5.3.3-py36_1.tar.bz2
127 | https://repo.anaconda.com/pkgs/main/win-64/markdown-3.1.1-py36_0.tar.bz2
128 | https://conda.anaconda.org/anaconda/win-64/nose-1.3.7-py36_2.tar.bz2
129 | https://conda.anaconda.org/anaconda/win-64/protobuf-3.8.0-py36h33f27b4_0.tar.bz2
130 | https://repo.anaconda.com/pkgs/main/noarch/pygments-2.4.2-py_0.tar.bz2
131 | https://conda.anaconda.org/conda-forge/noarch/pyopenssl-19.1.0-py_1.tar.bz2
132 | https://repo.anaconda.com/pkgs/main/win-64/wheel-0.33.6-py36_0.tar.bz2
133 | https://repo.continuum.io/pkgs/main/win-64/nbformat-4.4.0-py36h3a5bc1b_0.tar.bz2
134 | https://repo.anaconda.com/pkgs/main/win-64/pip-19.2.3-py36_0.tar.bz2
135 | https://repo.anaconda.com/pkgs/main/noarch/prompt_toolkit-2.0.10-py_0.tar.bz2
136 | https://conda.anaconda.org/anaconda/win-64/pylint-2.6.0-py36_0.tar.bz2
137 | https://conda.anaconda.org/conda-forge/noarch/urllib3-1.25.10-py_0.tar.bz2
138 | https://repo.anaconda.com/pkgs/main/win-64/ipython-7.8.0-py36h39e3cac_0.tar.bz2
139 | https://repo.anaconda.com/pkgs/main/win-64/nbconvert-5.6.0-py36_1.tar.bz2
140 | https://conda.anaconda.org/conda-forge/noarch/requests-2.24.0-pyh9f0ad1d_0.tar.bz2
141 | https://conda.anaconda.org/conda-forge/noarch/chart-studio-1.1.0-pyh9f0ad1d_0.tar.bz2
142 | https://repo.anaconda.com/pkgs/main/win-64/ipykernel-5.1.2-py36h39e3cac_0.tar.bz2
143 | https://conda.anaconda.org/anaconda/win-64/line_profiler-2.1.2-py36hfa6e2cd_0.tar.bz2
144 | https://repo.anaconda.com/pkgs/main/win-64/jupyter_console-6.0.0-py36_0.tar.bz2
145 | https://repo.anaconda.com/pkgs/main/win-64/notebook-6.0.1-py36_0.tar.bz2
146 | https://repo.anaconda.com/pkgs/main/noarch/qtconsole-4.5.5-py_0.tar.bz2
147 | https://repo.anaconda.com/pkgs/main/win-64/widgetsnbextension-3.5.1-py36_0.tar.bz2
148 | https://repo.anaconda.com/pkgs/main/noarch/ipywidgets-7.5.1-py_0.tar.bz2
149 | https://repo.anaconda.com/pkgs/main/win-64/jupyter-1.0.0-py36_7.tar.bz2
150 | https://conda.anaconda.org/conda-forge/noarch/cufflinks-py-0.17.3-py_0.tar.bz2
151 | https://conda.anaconda.org/anaconda/win-64/h5py-2.7.1-py36he54a1c3_0.tar.bz2
152 | https://conda.anaconda.org/conda-forge/noarch/jupyterthemes-0.20.0-py_0.tar.bz2
153 | https://conda.anaconda.org/anaconda/win-64/matplotlib-2.2.2-py36h153e9ff_1.tar.bz2
154 | https://conda.anaconda.org/anaconda/win-64/mpld3-0.3-py36_0.tar.bz2
155 | https://conda.anaconda.org/anaconda/win-64/mkl-service-2.0.2-py36he774522_0.tar.bz2
156 | https://repo.anaconda.com/pkgs/main/win-64/mkl_fft-1.0.14-py36h14836fe_0.tar.bz2
157 | https://conda.anaconda.org/anaconda/win-64/numpy-1.16.4-py36h19fb1c0_0.tar.bz2
158 | https://repo.anaconda.com/pkgs/main/win-64/pandas-1.1.3-py36ha925a31_0.tar.bz2
159 | https://conda.anaconda.org/pytorch/win-64/pytorch-1.6.0-py3.6_cuda102_cudnn7_0.tar.bz2
160 | https://repo.anaconda.com/pkgs/main/win-64/scipy-1.3.1-py36h29ff71c_0.tar.bz2
161 | https://repo.anaconda.com/pkgs/main/win-64/tensorboard-1.10.0-py36he025d50_0.tar.bz2
162 | https://repo.anaconda.com/pkgs/main/win-64/tensorflow-base-1.10.0-mkl_py36h81393da_0.tar.bz2
163 | https://conda.anaconda.org/conda-forge/noarch/paramz-0.9.5-py_0.tar.bz2
164 | https://repo.anaconda.com/pkgs/main/win-64/patsy-0.5.1-py36_0.tar.bz2
165 | https://conda.anaconda.org/conda-forge/noarch/python-cufflinks-0.17.3-py_0.tar.bz2
166 | https://conda.anaconda.org/anaconda/win-64/scikit-learn-0.21.2-py36h6288b17_0.tar.bz2
167 | https://repo.anaconda.com/pkgs/main/noarch/seaborn-0.11.0-py_0.tar.bz2
168 | https://repo.anaconda.com/pkgs/main/win-64/tensorflow-1.10.0-mkl_py36hb361250_0.tar.bz2
169 | https://conda.anaconda.org/pytorch/win-64/torchvision-0.7.0-py36_cu102.tar.bz2
170 | https://conda.anaconda.org/conda-forge/win-64/gpy-1.9.9-py36hc8d92b1_1.tar.bz2
171 | https://repo.anaconda.com/pkgs/main/win-64/statsmodels-0.10.1-py36h8c2d366_0.tar.bz2
172 | https://repo.anaconda.com/pkgs/main/win-64/keras-2.2.2-0.tar.bz2
173 | https://repo.anaconda.com/pkgs/main/win-64/keras-applications-1.0.4-py36_1.tar.bz2
174 | https://repo.anaconda.com/pkgs/main/win-64/keras-preprocessing-1.0.2-py36_1.tar.bz2
175 | https://repo.anaconda.com/pkgs/main/win-64/keras-base-2.2.2-py36_0.tar.bz2
176 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
  1 | name: ml
  2 | channels:
  3 |   - pytorch
  4 |   - plotly
  5 |   - anaconda
  6 |   - conda-forge
  7 |   - defaults
  8 | dependencies:
  9 |   - _tflow_1100_select=0.0.3=mkl
 10 |   - _tflow_select=2.3.0=mkl
 11 |   - absl-py=0.8.0=py36_0
 12 |   - astor=0.8.0=py36_0
 13 |   - astroid=2.4.2=py36_0
 14 |   - attrs=19.2.0=py_0
 15 |   - autopep8=1.4.4=py_0
 16 |   - backcall=0.1.0=py36_0
 17 |   - blas=1.0=mkl
 18 |   - bleach=3.1.0=py36_0
 19 |   - brotlipy=0.7.0=py36h779f372_1000
 20 |   - ca-certificates=2020.12.5=h5b45459_0
 21 |   - certifi=2020.12.5=py36ha15d459_1
 22 |   - cffi=1.14.3=py36hef61171_0
 23 |   - chardet=3.0.4=py36h9f0ad1d_1007
 24 |   - chart-studio=1.1.0=pyh9f0ad1d_0
 25 |   - colorama=0.4.1=py36_0
 26 |   - colorlover=0.3.0=py_0
 27 |   - cryptography=3.1.1=py36hef61171_0
 28 |   - cudatoolkit=10.2.89=h74a9793_1
 29 |   - cufflinks-py=0.17.3=py_0
 30 |   - cycler=0.10.0=py36h009560c_0
 31 |   - decorator=4.4.0=py36_1
 32 |   - defusedxml=0.6.0=py_0
 33 |   - entrypoints=0.3=py36_0
 34 |   - freetype=2.8=vc14h17c9bdf_0
 35 |   - gast=0.3.2=py_0
 36 |   - gpy=1.9.9=py36hc8d92b1_1
 37 |   - graphviz=2.38.0=4
 38 |   - grpcio=1.16.1=py36h351948d_1
 39 |   - h5py=2.7.1=py36he54a1c3_0
 40 |   - hdf5=1.10.1=vc14hb361328_0
 41 |   - icc_rt=2019.0.0=h0cc432a_1
 42 |   - icu=58.2=vc14hc45fdbb_0
 43 |   - idna=2.10=pyh9f0ad1d_0
 44 |   - intel-openmp=2019.4=245
 45 |   - ipykernel=5.1.2=py36h39e3cac_0
 46 |   - ipympl=0.2.1=py36_1001
 47 |   - ipython=7.8.0=py36h39e3cac_0
 48 |   - ipython_genutils=0.2.0=py36h3c5d0ee_0
 49 |   - ipywidgets=7.5.1=py_0
 50 |   - isort=4.3.21=py36_0
 51 |   - jedi=0.15.1=py36_0
 52 |   - jinja2=2.10.3=py_0
 53 |   - joblib=0.13.2=py36_0
 54 |   - jpeg=9b=vc14h4d7706e_1
 55 |   - json5=0.9.5=pyh9f0ad1d_0
 56 |   - jsonschema=3.0.2=py36_0
 57 |   - jupyter=1.0.0=py36_7
 58 |   - jupyter_client=5.3.3=py36_1
 59 |   - jupyter_console=6.0.0=py36_0
 60 |   - jupyter_core=4.5.0=py_0
 61 |   - jupyterlab=2.2.9=pyhd8ed1ab_0
 62 |   - jupyterlab_server=1.2.0=py_0
 63 |   - jupyterthemes=0.20.0=py_0
 64 |   - keras=2.2.2=0
 65 |   - keras-applications=1.0.4=py36_1
 66 |   - keras-base=2.2.2=py36_0
 67 |   - keras-preprocessing=1.0.2=py36_1
 68 |   - kiwisolver=1.1.0=py36ha925a31_0
 69 |   - lazy-object-proxy=1.4.2=py36he774522_0
 70 |   - lesscpy=0.13.0=py_1
 71 |   - libiconv=1.15=vc14h29686d3_5
 72 |   - libmklml=2019.0.5=0
 73 |   - libpng=1.6.37=h2a8f88b_0
 74 |   - libprotobuf=3.8.0=h7bd577a_0
 75 |   - libsodium=1.0.18=h62dcd97_0
 76 |   - libtiff=4.0.8=vc14h04e2a1e_10
 77 |   - libxml2=2.9.4=vc14h8fd0f11_5
 78 |   - libxslt=1.1.29=vc14hf85b8d4_5
 79 |   - line_profiler=2.1.2=py36hfa6e2cd_0
 80 |   - lxml=4.1.1=py36he0adb16_0
 81 |   - lz4-c=1.9.2=h62dcd97_1
 82 |   - m2w64-gcc-libgfortran=5.3.0=6
 83 |   - m2w64-gcc-libs=5.3.0=7
 84 |   - m2w64-gcc-libs-core=5.3.0=7
 85 |   - m2w64-gmp=6.1.0=2
 86 |   - m2w64-libwinpthread-git=5.0.0.4634.697f757=2
 87 |   - markdown=3.1.1=py36_0
 88 |   - markupsafe=1.1.1=py36he774522_0
 89 |   - matplotlib=2.2.2=py36h153e9ff_1
 90 |   - mccabe=0.6.1=py36_1
 91 |   - mistune=0.8.4=py36he774522_0
 92 |   - mkl=2019.4=245
 93 |   - mkl-service=2.0.2=py36he774522_0
 94 |   - mkl_fft=1.0.14=py36h14836fe_0
 95 |   - mkl_random=1.0.2=py36h343c172_0
 96 |   - mpld3=0.3=py36_0
 97 |   - msys2-conda-epoch=20160418=1
 98 |   - nbconvert=5.6.0=py36_1
 99 |   - nbformat=4.4.0=py36h3a5bc1b_0
100 |   - ninja=1.10.1=py36h7ef1ec2_0
101 |   - nodejs=15.3.0=h57928b3_0
102 |   - nose=1.3.7=py36_2
103 |   - notebook=6.0.1=py36_0
104 |   - numpy=1.16.4=py36h19fb1c0_0
105 |   - numpy-base=1.16.4=py36hc3f5095_0
106 |   - olefile=0.46=py_0
107 |   - openssl=1.1.1i=h8ffe710_0
108 |   - pandas=1.1.3=py36ha925a31_0
109 |   - pandoc=2.2.3.2=0
110 |   - pandocfilters=1.4.2=py36_1
111 |   - paramz=0.9.5=py_0
112 |   - parso=0.5.1=py_0
113 |   - patsy=0.5.1=py36_0
114 |   - pickleshare=0.7.5=py36_0
115 |   - pillow=4.2.1=py36hdb25ab2_0
116 |   - pip=19.2.3=py36_0
117 |   - plotly=4.10.0=py_0
118 |   - ply=3.11=py_1
119 |   - prometheus_client=0.7.1=py_0
120 |   - prompt_toolkit=2.0.10=py_0
121 |   - protobuf=3.8.0=py36h33f27b4_0
122 |   - pycodestyle=2.5.0=py_0
123 |   - pycparser=2.20=pyh9f0ad1d_2
124 |   - pydot=1.4.1=py36_0
125 |   - pygments=2.4.2=py_0
126 |   - pylint=2.6.0=py36_0
127 |   - pyopenssl=19.1.0=py_1
128 |   - pyparsing=2.4.2=py_0
129 |   - pyqt=5.9.2=py36h6538335_2
130 |   - pyreadline=2.1=py36_1
131 |   - pyrsistent=0.14.11=py36he774522_0
132 |   - pysocks=1.7.1=py36h9f0ad1d_1
133 |   - python=3.6.8=h9f7ef89_7
134 |   - python-cufflinks=0.17.3=py_0
135 |   - python-dateutil=2.8.0=py36_0
136 |   - python_abi=3.6=1_cp36m
137 |   - pytorch=1.6.0=py3.6_cuda102_cudnn7_0
138 |   - pytz=2019.3=py_0
139 |   - pywin32=223=py36hfa6e2cd_1
140 |   - pywinpty=0.5.5=py36_1000
141 |   - pyyaml=5.1.2=py36he774522_0
142 |   - pyzmq=17.0.0=py36hfa6e2cd_0
143 |   - qt=5.9.7=vc14h73c81de_0
144 |   - qtconsole=4.5.5=py_0
145 |   - requests=2.24.0=pyh9f0ad1d_0
146 |   - retrying=1.3.3=py_2
147 |   - rope=0.14.0=py_0
148 |   - scikit-learn=0.21.2=py36h6288b17_0
149 |   - scipy=1.3.1=py36h29ff71c_0
150 |   - seaborn=0.11.0=py_0
151 |   - send2trash=1.5.0=py36_0
152 |   - setuptools=41.4.0=py36_0
153 |   - sip=4.19.8=py36h6538335_0
154 |   - six=1.12.0=py36_0
155 |   - sqlite=3.29.0=he774522_0
156 |   - statsmodels=0.10.1=py36h8c2d366_0
157 |   - tensorboard=1.10.0=py36he025d50_0
158 |   - tensorflow=1.10.0=mkl_py36hb361250_0
159 |   - tensorflow-base=1.10.0=mkl_py36h81393da_0
160 |   - termcolor=1.1.0=py36_1
161 |   - terminado=0.8.2=py36_0
162 |   - testpath=0.4.2=py36_0
163 |   - tk=8.6.7=vc14hb68737d_1
164 |   - toml=0.10.1=py_0
165 |   - torchvision=0.7.0=py36_cu102
166 |   - tornado=6.0.3=py36he774522_0
167 |   - traitlets=4.3.3=py36_0
168 |   - typed-ast=1.4.1=py36he774522_0
169 |   - urllib3=1.25.10=py_0
170 |   - vc=14.1=h0510ff6_4
171 |   - vs2015_runtime=14.16.27012=hf0eaf9b_3
172 |   - wcwidth=0.1.7=py36h3d5aa90_0
173 |   - webencodings=0.5.1=py36_1
174 |   - werkzeug=0.16.0=py_0
175 |   - wheel=0.33.6=py36_0
176 |   - widgetsnbextension=3.5.1=py36_0
177 |   - win_inet_pton=1.1.0=py36_0
178 |   - wincertstore=0.2=py36h7fe50ca_0
179 |   - winpty=0.4.3=4
180 |   - wrapt=1.11.2=py36he774522_0
181 |   - xz=5.2.5=h62dcd97_0
182 |   - yaml=0.1.7=vc14h4cb57cf_1
183 |   - zeromq=4.2.3=hd6b2f15_3
184 |   - zlib=1.2.11=vc14h1cdd9ab_1
185 |   - zstd=1.4.5=h04227a9_0
186 |   - pip:
187 |     - future==0.18.2
188 |     - jupyter-contrib-core==0.3.3
189 |     - jupyter-contrib-nbextensions==0.5.1
190 |     - jupyter-highlight-selected-word==0.2.0
191 |     - jupyter-latex-envs==1.4.6
192 |     - jupyter-nbextensions-configurator==0.4.1
193 |     - py-lets-be-rational==1.0.1
194 |     - py-vollib==1.0.1
195 |     - simplejson==3.17.0
196 | prefix: D:\Apps\Anaconda3\envs\ml
197 | 


--------------------------------------------------------------------------------
/local_regression/locreg/__init__.py:
--------------------------------------------------------------------------------
1 | from locreg.local_regression import LocalRegression
2 | __all__ = ['LocalRegression',
3 |            ]
4 | 


--------------------------------------------------------------------------------
/local_regression/locreg/local_regression.py:
--------------------------------------------------------------------------------
  1 | from sklearn.base import RegressorMixin
  2 | from sklearn.preprocessing import PolynomialFeatures
  3 | from scipy.optimize import leastsq
  4 | import numpy as np
  5 | from scipy.linalg import solve_triangular
  6 | 
  7 | 
  8 | def exp_kernel(z):
  9 |     """
 10 |     Implements the exponential kernel $$ e^{-|z|^2/2}$$
 11 |     as a vectorized function.
 12 |     :param z: a numpy array of any dimension
 13 |     :return: the exponential kernel evaluated on the array z assuming 
 14 |              the last axis is the dimension of the elements of z
 15 |     """
 16 |     return np.exp(-np.linalg.norm(np.atleast_1d(z), axis=-1) ** 2 / 2)
 17 | 
 18 | 
 19 | class LocalRegression(RegressorMixin):
 20 |     """
 21 |     This class performs local polynomial regression of dimension $d$
 22 |     and degree $p$, i.e. given a training set $(X,y)$ of $N$ samples
 23 |     $x_i \in \R^d$ and $y_i \in \R$, it makes a new prediction at
 24 |     $x \in \R^d$ as $x=\beta^0$, where $\beta$ is the value minimizing
 25 |     the cost functional
 26 |     \begin{align*}
 27 |         J(\beta) & := \sum_{i=1}^{N}{(y_i - \beta ^0 + j_1(x_i))^2 w_i},
 28 |         j_1(x_i) & := \sum_{1 \leq |\alpha| \leq p}{(x-x_i)^\alpha}, \\
 29 |         w_i & := K_h(x-x_i),
 30 |     \end{align*}
 31 |     where $K_h$ is a kernel function scaled with bandwith $h$. The minimum
 32 |     of the cost function is computed via QR decomposition (or analytically).
 33 |     """
 34 | 
 35 |     def __init__(self, degree, kernel=exp_kernel, warm_start=True):
 36 |         """
 37 |         :param degree: an int specifying the degree of the polynomial
 38 |         :param kernel: a kernel function for the weight calculation
 39 |         :param warm_start: if True the fitted kernel is preserved enabling
 40 |                            partial_fit() for new y
 41 |         """
 42 |         self.degree = degree
 43 |         self.kernel = kernel
 44 |         self.warm_start = warm_start
 45 |         self.bandwidth = None
 46 |         self.fitted_kernel = None
 47 |         self.method = None
 48 |         self.X_train_ = None
 49 |         self.y_train = None
 50 | 
 51 |     def fit(self, X, y, bandwidth=None):
 52 |         """
 53 |         Fits the regressor to the data. As the concept of local regression is
 54 |         to fit the data to each prediction, this function only stores the data
 55 |         and either sets a fixed bandwidth or estimates an optimal one.
 56 |         :param X: a numpy array of shape (N, d)
 57 |         :param y: a numpy array of shape (N,)
 58 |         :param bandwidth: a scalar or None
 59 |         :return: self
 60 |         """
 61 |         self.X_train_ = X
 62 |         self.y_train = y
 63 |         self._set_bandwidth(bandwidth)
 64 |         return self
 65 | 
 66 |     def predict(self, X, method=None):
 67 |         """
 68 |         Performs the prediction for each value x in the prediction set X.
 69 |         If $d=1$ and $p=0,1$ the cost functional can be minimized analytically.
 70 |         :param X: a numpy array of dimension (M, d)
 71 |         :param method: can be 'analytic' or 'qr' or 'leastsq' or None. Method is
 72 |                        set automatically if None.
 73 |         :return: a numpy array y of dimension (M,) with the predictions
 74 |         """
 75 |         if method is None:
 76 |             method = self._determine_method()
 77 |         if self.fitted_kernel is not None:
 78 |             self.fitted_kernel = None
 79 |             self.method = None
 80 |         return self._predict_with_method(X, method)
 81 | 
 82 |     def fit_partial(self, y, bandwidth=None):
 83 |         """ Re-fits only the y values of the regression. Only works if
 84 |             warm_start==True and a previous (full) fit and predict
 85 |             has already been performed.
 86 |         """
 87 |         if self.warm_start and self.fitted_kernel is not None and self.method in ['analytic', 'qr']:
 88 |             self.y_train = y
 89 |             self._set_bandwidth(bandwidth)
 90 |         else:
 91 |             raise ValueError("The fit_partial method can only be invoked\
 92 |                 if fit and predict have been invoked previously with method \
 93 |                     `analytic´ or `qr` and warm_start is set to True")
 94 | 
 95 |     def predict_partial(self):
 96 |         """ Predicts on the last value of y_train set by fit() or fit_partial()
 97 |             and the last X that has been used for prediction. """
 98 |         if self.fitted_kernel is not None and self.method is not None:
 99 |             return self._predict_with_method(X=None, method=self.method)
100 |         else:
101 |             raise ValueError("The method predict_partial requires a full\
102 |                               prior run of predict with method `analytic´ \
103 |                               or `qr´.")
104 | 
105 |     def _set_bandwidth(self, bandwidth):
106 |         """ Sets the bandwidth in the fitting."""
107 |         if bandwidth is None:
108 |             self.bandwidth = self._silverman()
109 |         else:
110 |             self.bandwidth = bandwidth
111 | 
112 |     def _silverman(self):
113 |         """
114 |         This function implements Silverman's Rule of Thumb
115 |         \begin{align*}
116 |             h = \Big( \frac{4}{3n} \Big)^{\frac{1}{5}} \hat \sigma_Y
117 |         \end{align*}
118 |         to estimate the optimal bandwidth of the training data y.
119 |         :return: bandwidth h
120 |         """
121 |         sigma_y = np.std(self.y_train)
122 |         n = self.y_train.shape[0]
123 |         return (4 / (3 * n)) ** (1 / 5) * sigma_y
124 | 
125 |     def _scaled_kernel(self):
126 |         """
127 |         Scales the kernel function self.kernel by the
128 |         bandwidth self.bandwidth.
129 |         :return: scaled kernel function
130 |         """
131 |         def kh(z):
132 |             d = self.X_train_.shape[1]
133 |             return self.kernel(z / self.bandwidth) / self.bandwidth ** d
134 |         return kh
135 | 
136 |     def _predict_with_method(self, X, method):
137 |         """ Performs the prediction based on the `method´ flag.
138 |         :param method: a string chosen from 'analytic', 'leastsq', 'qr'
139 |         """
140 |         self.method = method
141 |         num_dims = self.X_train_.shape[1]
142 |         if method == 'analytic':
143 |             if self.degree == 0:
144 |                 return self._predict_nadaraya_watson(X)
145 |             elif num_dims == 1 and self.degree == 1:
146 |                 return self._predict_locally_linear(X)
147 |             else:
148 |                 raise ValueError('Method `analytic´ is only available if \
149 |                                   self.degree=0 or self.degree=1 and X.shape[1] == 1.')
150 |         elif method == 'qr':
151 |             return self._predict_qr(X)
152 |         elif method == 'leastsq':
153 |             return self._predict_leastsq(X)
154 |         else:
155 |             raise ValueError('Parameter `method´ has to be `analytic´ or\
156 |                 `qr´ or `leastsq´ or None, but is currently set to: %s' % method)
157 | 
158 |     def _determine_method(self):
159 |         """ Automatically selects the method for prediction based on the dimension
160 |             of the training data self_X_train_.
161 |         """
162 |         num_dims = self.X_train_.shape[1]
163 |         if self.degree == 0 or (self.degree == 1 and num_dims == 1):
164 |             return 'analytic'
165 |         else:
166 |             return 'qr'
167 | 
168 |     def _predict_nadaraya_watson(self, X):
169 |         """
170 |         Performs local regression of degree $p=0$ in dimension $d=1$.
171 |         In this case, the cost functional can be minimized analytically
172 |         and for any $x \in \R$, the estimate $y$ is given by
173 |         \begin{align*}
174 |             y &= \sum_{i=1}^{N}{W^i_h(x) y_i}, \\
175 |             W^0_i(x) &= \frac{K_h(x - x_i)}{\sum_{j=1}^N{K_h(x - x_j)}}
176 |         \end{align*}
177 |         :param X: a numpy array of dimension (M, d) at which to predict
178 |         :return: a numpy array of dimension (M,) with the M predicted y's
179 |         """
180 |         if (self.warm_start and self.fitted_kernel is None) or (not self.warm_start):
181 |             k_h = self._scaled_kernel()
182 |             nwk = k_h(np.array([x - self.X_train_ for x in X]))
183 |             self.fitted_kernel = nwk / np.sum(nwk, axis=1)[:, np.newaxis]
184 |         return np.sum(self.fitted_kernel * self.y_train.squeeze(), axis=1)
185 | 
186 |     def _predict_locally_linear(self, X):
187 |         """
188 |         Performs local regression of degree $p=1$ in dimension $d=1$.
189 |         In this case, the cost functional can be minimized analytically
190 |         and for any $x \in \R$, the estimate $y$ is given by
191 |         \begin{align*}
192 |             y &= \sum_{i=1}^{N}{W^i_h(x) y_i}, \\
193 |             W^1_i(x) &:= \frac{K_h(x-x_i)}{N}\frac{s_2(x)-s_1(x)(x-x_i)}{x_2(x)s_0(x)} \\
194 |             s_r(x) &:= \frac{1}{N} \sum_{i=1}^N{(x-x_i)^rK_h(x-x_i)}.
195 |         \end{align*}
196 |         :param X: a numpy array of dimension (M, d) at which to predict
197 |         :return: a numpy array of dimension (M,) with the M predicted y's
198 |         """
199 |         if (self.warm_start and self.fitted_kernel is None) or (not self.warm_start):
200 |             n = self.X_train_.shape[0]
201 |             m = X.shape[0]
202 |             self.fitted_kernel = np.zeros((m, n))
203 |             k_h = self._scaled_kernel()
204 |             X_ = self.X_train_.squeeze()
205 |             for i in range(m):
206 |                 x = X[i]
207 |                 llk = k_h(x - self.X_train_).squeeze()
208 |                 s0 = np.mean(llk)
209 |                 s1 = np.mean((x - X_) * llk)
210 |                 s2 = np.mean((x - X_) ** 2 * llk)
211 |                 s = (s2 - s1 * (x - X_)) / (s2 * s0 - s1 ** 2) / n
212 |                 self.fitted_kernel[i, :] = s * llk
213 |         return np.sum(self.fitted_kernel * self.y_train.squeeze(), axis=1)
214 | 
215 |     def _predict_qr(self, X):
216 |         """
217 |         Performs a prediction for each x in X by solving the associated
218 |         normal equations via QR decomposition.
219 | 
220 |         :param X: a numpy array of dimension (M, d) at which to predict
221 |         :return: a numpy array of dimension (M,) with the M predicted y's
222 |         """
223 |         if (self.warm_start and self.fitted_kernel is None) or (not self.warm_start):
224 |             self.fitted_kernel = []
225 |             poly = PolynomialFeatures(degree=self.degree)
226 |             for i in range(X.shape[0]):
227 |                 x = X[i, :]
228 |                 phi = poly.fit_transform(self.X_train_ - x)
229 |                 m = phi.shape[1]
230 |                 kh = self._scaled_kernel()
231 |                 w = kh(x - self.X_train_)
232 |                 w_mat = np.diag(np.sqrt(w))
233 |                 a = w_mat @ phi
234 |                 q, r = np.linalg.qr(a, mode='complete')
235 |                 r = r[:m, :m]
236 |                 self.fitted_kernel.append((q.transpose() @ w_mat, r))
237 |         n = len(self.fitted_kernel)
238 |         y_pred = np.zeros(n)
239 |         for i in range(n):
240 |             qw, r = self.fitted_kernel[i]
241 |             m = r.shape[1]
242 |             c = (qw @ self.y_train)[:m]
243 |             beta = solve_triangular(r, c)
244 |             y_pred[i] = beta[0]
245 |         return y_pred
246 | 
247 |     def _predict_leastsq(self, X):
248 |         """
249 |         Performs a prediction for each x in X by minimizing the cost
250 |         functional $J$.
251 | 
252 |         :param X: a numpy array of dimension (M, d) at which to predict
253 |         :return: a numpy array of dimension (M,) with the M predicted y's
254 |         """
255 |         poly = PolynomialFeatures(degree=self.degree)
256 |         x0 = poly.fit_transform(np.zeros((1, X[0].shape[0])))[0]
257 |         return np.array([leastsq(func=self._cost_functional(poly, x),
258 |                                     x0=x0)[0][0] for x in X])
259 |     
260 |     def _cost_functional(self, poly, x):
261 |         """
262 |         Creates the cost functional $J$ for optimization using the parameters.
263 |         :param poly: an instance of PolynomialFeatures
264 |         :param x: a numpy array of shape (d,)
265 |         :return: cost funtional $J$
266 |         """
267 |         def cost(beta):
268 |             res = np.sum(poly.fit_transform(self.X_train_ - x) * beta, axis=1) - self.y_train
269 |             kh = self._scaled_kernel()
270 |             w = kh(x - self.X_train_)
271 |             res *= np.sqrt(np.abs(w))
272 |             return res
273 |         return cost
274 | 


--------------------------------------------------------------------------------
/local_regression/locreg/test_local_regression.py:
--------------------------------------------------------------------------------
  1 | from unittest import TestCase
  2 | from local_regression import LocalRegression
  3 | import numpy as np
  4 | 
  5 | 
  6 | class TestLocalRegression(TestCase):
  7 | 
  8 |     def setUp(self):
  9 |         self.n = 10
 10 |         self.x = np.linspace(-10, 10, self.n)
 11 |         self.X = self.x[:, np.newaxis]
 12 |         np.random.seed(1)
 13 |         self.e = np.random.normal(0, 0.01, self.n)
 14 | 
 15 |     def test_nw_analytic_vs_qr_vs_leastsq_1d(self):
 16 |         self.y = self.x**2 + self.e
 17 |         self.nw = LocalRegression(degree=0, warm_start=False)
 18 |         self.nw.fit(self.X, self.y)
 19 |         X_eval = np.linspace(-10, 10, 2 * self.n)[:, np.newaxis]
 20 |         y_pred_analytic = self.nw.predict(X_eval, method='analytic')
 21 |         y_pred_qr = self.nw.predict(X_eval, method='qr')
 22 |         y_pred_leastsq = self.nw.predict(X_eval, method='leastsq')
 23 |         np.testing.assert_almost_equal(y_pred_analytic, y_pred_qr, decimal=6)
 24 |         np.testing.assert_almost_equal(y_pred_qr, y_pred_leastsq, decimal=6)
 25 | 
 26 |     def test_nw_analytic_vs_qr_vs_leastsq_2d(self):
 27 |         grid = np.linspace(-10, 10, self.n)
 28 |         x1, x2 = np.meshgrid(grid, grid)
 29 |         self.X = np.vstack((x1.flatten(), x2.flatten())).T
 30 |         self.y = self.X[:, 0] * self.X[:, 1] + np.random.normal(0, 0.01, self.X.shape[0])
 31 |         self.nw = LocalRegression(degree=0, warm_start=False)
 32 |         self.nw.fit(self.X, self.y)
 33 |         grid_eval = np.linspace(-10, 10, 2 * self.n)[:, np.newaxis]
 34 |         x1e, x2e = np.meshgrid(grid_eval, grid_eval)
 35 |         X_eval = np.vstack((x1e.flatten(), x2e.flatten())).T
 36 |         y_pred_analytic = self.nw.predict(X_eval, method='analytic')
 37 |         y_pred_qr = self.nw.predict(X_eval, method='qr')
 38 |         y_pred_leastsq = self.nw.predict(X_eval, method='leastsq')
 39 |         np.testing.assert_almost_equal(y_pred_qr, y_pred_leastsq, decimal=5)
 40 |         np.testing.assert_almost_equal(y_pred_analytic, y_pred_qr, decimal=5)
 41 | 
 42 |     def test_ll_analytic_vs_cost(self):
 43 |         self.y = self.x + self.e
 44 |         self.ll = LocalRegression(degree=1, warm_start=False)
 45 |         self.ll.fit(self.X, self.y)
 46 |         X_eval = np.linspace(-10, 10, 2 * self.n)[:, np.newaxis]
 47 |         y_pred_analytic = self.ll.predict(X_eval, method='analytic')
 48 |         y_pred_qr = self.ll.predict(X_eval, method='qr')
 49 |         y_pred_leastsq = self.ll.predict(X_eval, method='leastsq')
 50 |         np.testing.assert_almost_equal(y_pred_analytic, y_pred_qr)
 51 |         np.testing.assert_almost_equal(y_pred_qr, y_pred_leastsq)
 52 | 
 53 |     def test_nw_analytic_loc_const_const_1d(self):
 54 |         self.y = np.ones_like(self.x) + self.e
 55 |         self.nw = LocalRegression(degree=0)
 56 |         self.nw.fit(self.X, self.y)
 57 |         np.testing.assert_almost_equal(self.nw.predict(self.X), self.y)
 58 | 
 59 |     def test_ll_analytic_loc_lin_lin_1d(self):
 60 |         self.y = self.x + self.e
 61 |         self.ll = LocalRegression(degree=1)
 62 |         self.ll.fit(self.X, self.y)
 63 |         np.testing.assert_almost_equal(self.ll.predict(self.X), self.y, decimal=1)
 64 | 
 65 |     def test_2d_loc_const_const(self):
 66 |         n = 5
 67 |         c = 7.
 68 |         x = np.linspace(-5, 5, n)
 69 |         y = np.linspace(-5, 5, n)
 70 |         z = np.array([[c for xx in x] for yy in y])
 71 |         x, y = np.meshgrid(x, y)
 72 |         X = np.array(list(zip(x.flatten(), y.flatten())))
 73 |         np.random.seed(1)
 74 |         e = np.random.normal(0, 0.01, (n, n))
 75 |         z = z + e
 76 |         locreg3d = LocalRegression(degree=0).fit(X, z.flatten())
 77 |         z_pred = locreg3d.predict(X)
 78 |         self.assertTrue(np.all(np.abs(z_pred / c - 1) <= 0.01))
 79 | 
 80 |     def test_2d_loc_lin_lin(self):
 81 |         n = 5
 82 |         x = np.linspace(1, 10, n)
 83 |         y = np.linspace(1, 10, n)
 84 |         z = np.array([[xx + yy for xx in x] for yy in y])
 85 |         res = z.flatten()
 86 |         x, y = np.meshgrid(x, y)
 87 |         X = np.array(list(zip(x.flatten(), y.flatten())))
 88 |         np.random.seed(1)
 89 |         e = np.random.normal(0, 0.01, (n, n))
 90 |         z = z + e
 91 |         locreg3d = LocalRegression(degree=1).fit(X, z.flatten())
 92 |         z_pred = locreg3d.predict(X)
 93 |         self.assertTrue(np.all(np.abs(z_pred / res - 1) <= 0.01))
 94 | 
 95 |     def test_2d_least_sq_vs_qr(self):
 96 |         n = 5
 97 |         x1 = np.linspace(1, 10, n)
 98 |         x2 = np.linspace(1, 10, n)
 99 |         y = np.array([[xx1 ** 2 + xx2 ** 2 for xx1 in x1] for xx2 in x2])
100 |         x1, x2 = np.meshgrid(x1, x2)
101 |         X = np.array(list(zip(x1.flatten(), x2.flatten())))
102 |         np.random.seed(1)
103 |         e = np.random.normal(0, 0.01, (n, n))
104 |         y = y + e
105 |         locreg3d = LocalRegression(degree=2).fit(X, y.flatten())
106 |         y_pred_leastsq = locreg3d.predict(X, method='leastsq')
107 |         y_pred_qr = locreg3d.predict(X, method='qr')
108 |         np.testing.assert_array_almost_equal(y_pred_leastsq, y_pred_qr)
109 | 
110 |     def test_fit_partial(self):
111 |         self.y = self.x + self.e
112 |         self.ll = LocalRegression(degree=1, warm_start=True)
113 |         self.ll.fit(self.X, self.y)
114 |         X_eval = np.linspace(-10, 10, 2 * self.n)[:, np.newaxis]
115 |         _ = self.ll.predict(X_eval, method='analytic')
116 |         self.y = self.x ** 2 + self.e
117 |         h = self.ll.bandwidth
118 |         self.ll.fit_partial(self.y, h)
119 |         y_pred_partial = self.ll.predict(X_eval, method='analytic')
120 |         self.ll2 = LocalRegression(degree=1, warm_start=False)
121 |         self.ll2.fit(self.X, self.y, h)
122 |         y_pred = self.ll2.predict(X_eval, method='analytic')
123 |         np.testing.assert_almost_equal(y_pred, y_pred_partial)
124 |         # test against new instance to validate correct state
125 |         self.ll3 = LocalRegression(degree=1, warm_start=False)
126 |         self.ll3.fit(self.X, self.y, h)
127 |         y_pred_new = self.ll3.predict(X_eval, method='analytic')
128 |         np.testing.assert_array_almost_equal(y_pred_new, y_pred)
129 | 


--------------------------------------------------------------------------------
/lstm_intro/lstm.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niknow/machine-learning-examples/b147133c7b0115b591739fa4a600462cbaee6225/lstm_intro/lstm.pdf


--------------------------------------------------------------------------------
/lstm_intro/lstm_cell.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niknow/machine-learning-examples/b147133c7b0115b591739fa4a600462cbaee6225/lstm_intro/lstm_cell.pdf


--------------------------------------------------------------------------------
/lstm_intro/lstm_cell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niknow/machine-learning-examples/b147133c7b0115b591739fa4a600462cbaee6225/lstm_intro/lstm_cell.png


--------------------------------------------------------------------------------
/network_topology_selection/data_how_deep_financial_models.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niknow/machine-learning-examples/b147133c7b0115b591739fa4a600462cbaee6225/network_topology_selection/data_how_deep_financial_models.zip


--------------------------------------------------------------------------------
/network_topology_selection/data_surgery.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niknow/machine-learning-examples/b147133c7b0115b591739fa4a600462cbaee6225/network_topology_selection/data_surgery.zip


--------------------------------------------------------------------------------
/network_topology_selection/keras_grid/__init__.py:
--------------------------------------------------------------------------------
1 | from keras_grid.model_grid import MLPGrid, LSTMGrid
2 | 
3 | __all__ = ['MLPGrid',
4 |            'LSTMGrid'
5 |            ]
6 | 


--------------------------------------------------------------------------------
/network_topology_selection/keras_grid/model_grid.py:
--------------------------------------------------------------------------------
  1 | from abc import abstractmethod, ABC
  2 | import numpy as np
  3 | from keras.models import Sequential, load_model
  4 | from keras.layers import Dense, LSTM
  5 | import os
  6 | import json
  7 | import pickle
  8 | 
  9 | class ModelGrid(ABC):
 10 |     """
 11 |     This class wraps a 2D grid of keras models.
 12 |     """
 13 | 
 14 |     _json_suffix = '_grid.json'
 15 |     _history_suffix = '_grid_history.pickle'
 16 | 
 17 |     def __init__(self, num_inputs, num_outputs, range_units, range_layers, constant_num_weights=False):
 18 |         self.num_inputs = num_inputs
 19 |         self.num_outputs = num_outputs
 20 |         self.range_units = range_units
 21 |         self.range_layers = range_layers
 22 |         self.constant_num_weights = constant_num_weights
 23 |         self.models = {}
 24 |         self.history = {}
 25 |         self.unit_grid = None
 26 | 
 27 |     def __eq__(self, other): 
 28 |         if not isinstance(other, type(self)):
 29 |             return NotImplemented
 30 |         return self.num_inputs == other.num_inputs and \
 31 |                self.num_outputs == other.num_outputs and \
 32 |                np.all(self.range_units == other.range_units) and \
 33 |                np.all(self.range_layers == other.range_layers) and \
 34 |                self.constant_num_weights == other.constant_num_weights \
 35 |                and np.all(self.unit_grid.flatten() == other.unit_grid.flatten()) if self.unit_grid is not None else True
 36 | 
 37 |     @classmethod
 38 |     def from_disk(cls, path):
 39 |         """
 40 |         Reconstructs an instance of this class from a folder of files written with its `save´-method.
 41 |         """
 42 |         with open(os.path.join(path, cls._name + ModelGrid._json_suffix)) as fp:
 43 |             mgrid = cls.from_json(json.load(fp))
 44 |         for unit_idx in range(len(mgrid.range_units)):
 45 |             mgrid[unit_idx] = {}
 46 |             for layer_idx in range(len(mgrid.range_layers)):
 47 |                 mgrid[unit_idx][layer_idx] = load_model(
 48 |                     os.path.join(path, cls._name + '_units%i_layers%i.h5' % (unit_idx, layer_idx)))
 49 |         with open(os.path.join(path, cls._name + ModelGrid._history_suffix), 'rb') as fp:
 50 |             mgrid.history = pickle.load(fp)
 51 |         return mgrid
 52 | 
 53 |     def __getitem__(self, key):
 54 |         """
 55 |         :param key: A tuple (unit_idx, layer_idx)
 56 |         :return: Model with self.unit_grid[unit_idx][layer_idx] number of units and self.range_layers[layer_idx]
 57 |                 number of layers.
 58 |         """
 59 |         return self.models[key]
 60 | 
 61 |     def __setitem__(self, key, item):
 62 |         self.models[key] = item
 63 | 
 64 |     def _initialize_unit_grid(self):
 65 |         self.unit_grid = np.repeat(self.range_units[:, np.newaxis], len(self.range_layers), axis=1)
 66 |         if self.constant_num_weights:
 67 |             for layer_idx in range(1, len(self.range_layers)):
 68 |                 for unit_idx in range(len(self.range_units)):
 69 |                     self.unit_grid[unit_idx, layer_idx] = self.get_num_equiv_units(self.range_units[unit_idx],
 70 |                                                                                    self.range_layers[layer_idx])
 71 | 
 72 |     def _initialize_model_grid(self):
 73 |         for unit_idx in range(len(self.range_units)):
 74 |             self.models[unit_idx] = {}
 75 |             for layer_idx in range(len(self.range_layers)):
 76 |                 self.models[unit_idx][layer_idx] = self._create_model(unit_idx, layer_idx)
 77 | 
 78 |     def initialize(self):
 79 |         self._initialize_unit_grid()
 80 |         self._initialize_model_grid()
 81 | 
 82 |     def compile(self, **kwargs):
 83 |         for units_idx in range(len(self.range_units)):
 84 |             for layer_idx in range(len(self.range_layers)):
 85 |                 self.models[units_idx][layer_idx].compile(**kwargs)
 86 | 
 87 |     def fit(self, **kwargs):
 88 |         for unit_idx in range(len(self.range_units)):
 89 |             self.history[unit_idx] = {}
 90 |             for layer_idx in range(len(self.range_layers)):
 91 |                 print("-"*30)
 92 |                 print("Fitting with units_index=%i and layers_index=%i" % (unit_idx, layer_idx))
 93 |                 print("-"*30)
 94 |                 self.history[unit_idx][layer_idx] = self.models[unit_idx][layer_idx].fit(**kwargs).history
 95 |                 print("DONE")
 96 |                 print("")
 97 |                 print("")
 98 |         return self.history
 99 | 
100 |     def save(self, path):
101 |         try:
102 |             os.stat(path)
103 |         except:
104 |             os.mkdir(path)
105 |         for unit_idx in range(len(self.range_units)):
106 |             for layer_idx in range(len(self.range_layers)):
107 |                 self.models[unit_idx][layer_idx].save(
108 |                     os.path.join(path, type(self)._name + '_units%i_layers%i.h5' % (unit_idx, layer_idx)))
109 |         with open(os.path.join(path, type(self)._name + ModelGrid._history_suffix), 'wb') as fp:
110 |             pickle.dump(self.history, fp)
111 |         with open(os.path.join(path, type(self)._name + ModelGrid._json_suffix), 'w') as fp:
112 |             json.dump(self.to_json(), fp)
113 | 
114 | 
115 |     def get_num_equiv_units(self, num_units, num_layers):
116 |         if self.constant_num_weights:
117 |             if num_layers == self.range_layers[0]:
118 |                 n_u = num_units
119 |             else:
120 |                 n_w = type(self).num_weights(self.num_inputs, self.num_outputs, self.range_layers[0], num_units)
121 |                 n_u = type(self).num_units(self.num_inputs, self.num_outputs, num_layers, n_w)
122 |         else:
123 |             n_u = num_units
124 |         return n_u
125 | 
126 |     @abstractmethod
127 |     def _create_model(self, unit_idx, layer_idx):
128 |         pass
129 | 
130 |     def to_json(self):
131 |         return {'num_inputs': self.num_inputs,
132 |                 'num_outputs': self.num_outputs,
133 |                 'range_units': self.range_units.tolist(),
134 |                 'range_layers': self.range_layers.tolist(),
135 |                 'constant_num_weights': self.constant_num_weights,
136 |                 'unit_grid': self.unit_grid.tolist()}
137 |     
138 |     @classmethod
139 |     @abstractmethod
140 |     def from_json(self, jsondict):
141 |         pass
142 |     
143 |     @staticmethod
144 |     def jsondict_to_initdict(jsondict):
145 |         return {'num_inputs': jsondict['num_inputs'],
146 |                 'num_outputs': jsondict['num_outputs'],
147 |                 'range_units': np.array(jsondict['range_units']),
148 |                 'range_layers': np.array(jsondict['range_layers']),
149 |                 'constant_num_weights': jsondict['constant_num_weights']}
150 | 
151 |     @classmethod
152 |     @abstractmethod
153 |     def num_weights(cls, n_i, n_o, n_L, n_u):
154 |         """
155 |         Computes the total number of parameters in the model assuming all layers have the same
156 |         number of units (except the output layer).
157 | 
158 |         param n_i: number of inputs
159 |         param n_o: number of outputs
160 |         param n_L: number of layers
161 |         param n_u: number of units per layer
162 | 
163 |         returns: total number of trainable weights
164 | 
165 |         """
166 |         pass
167 | 
168 |     @classmethod
169 |     @abstractmethod
170 |     def num_units(cls, n_i, n_o, n_L, n_w):
171 |         """
172 |         Computes the total number ofunits per layer to achieve a given number of parameters assuming all layers
173 |         have the same number of units (except the output layer).
174 | 
175 |         param n_i: number of inputs
176 |         param n_o: number of outputs
177 |         param n_L: number of weights
178 |         param n_w: number of weights
179 | 
180 |         returns: number of units per layer needed
181 | 
182 |         """
183 |         pass
184 | 
185 | 
186 | class MLPGrid(ModelGrid):
187 |     """
188 |     This class wraps a 2D grid of keras sequential models with dense layers.
189 |     """
190 |     _name = 'mlp'
191 | 
192 |     @classmethod
193 |     def num_weights(cls, n_i, n_o, n_L, n_u):
194 |         if n_L == 2:
195 |             return n_u * (n_i + n_o + 1) + n_o
196 |         else:
197 |             return (n_L - 2) * n_u ** 2 + (n_i + n_o + n_L - 1) * n_u + n_o
198 | 
199 |     @classmethod
200 |     def num_units(cls, n_i, n_o, n_L, n_w):
201 |         if n_L == 2:
202 |             return int((n_w - n_o) / (n_i + n_o + 1))
203 |         else:
204 |             p = n_i + n_o + n_L - 1
205 |             p /= n_L - 2
206 |             q = n_o - n_w
207 |             q /= n_L - 2
208 |             return int(-p / 2 + np.sqrt(p ** 2 / 4 - q))
209 |     
210 |     @classmethod
211 |     def from_json(cls, jsondict):
212 |         initdict = ModelGrid.jsondict_to_initdict(jsondict)
213 |         mlpg = MLPGrid(**initdict)
214 |         mlpg.unit_grid=np.array(jsondict['unit_grid'])
215 |         return mlpg
216 | 
217 |     def _create_model(self, unit_idx, layer_idx):
218 |         num_layers = self.range_layers[layer_idx]
219 |         n_u = self.unit_grid[unit_idx][layer_idx]
220 |         model = Sequential()
221 |         model.add(Dense(units=n_u, input_shape=(self.num_inputs,), activation='sigmoid'))
222 |         for layer in range(1, num_layers - 1):
223 |             model.add(Dense(units=n_u, activation='sigmoid'))
224 |         model.add(Dense(units=self.num_outputs, activation='linear'))
225 |         return model
226 | 
227 | 
228 | class LSTMGrid(ModelGrid):
229 |     """
230 |     This class wraps a 2D grid of keras LSTM models.
231 |     """
232 |     _name = 'lstm'
233 | 
234 |     def __init__(self, num_inputs, num_outputs, range_units, range_layers, num_time_steps, constant_num_weights=False):
235 |         super(LSTMGrid, self).__init__(num_inputs, num_outputs, range_units, range_layers, constant_num_weights)
236 |         self.num_time_steps = num_time_steps
237 | 
238 |     @classmethod
239 |     def num_weights(cls, n_i, n_o, n_L, n_u):
240 |         return 4 * (2 * n_L - 3) * n_u**2  + (4 * n_i + n_o + 4 * n_L - 4 ) * n_u + n_o
241 | 
242 |     @classmethod
243 |     def num_units(cls, n_i, n_o, n_L, n_w):
244 |         a = 4 * (2 * n_L - 3)
245 |         p = (4 * n_i + n_o + 4 * n_L - 4 )
246 |         q = n_o - n_w
247 |         p /= a
248 |         q /= a
249 |         return int(-p / 2 + np.sqrt(p ** 2 / 4 - q))
250 | 
251 |     def _create_model(self, unit_idx, layer_idx):
252 |         num_units = self.range_units[unit_idx]
253 |         num_layers = self.range_layers[layer_idx]
254 |         n_u = self.get_num_equiv_units(num_units, num_layers)
255 |         model = Sequential()
256 |         model.add(LSTM(input_shape=(self.num_time_steps, self.num_inputs),
257 |                        units=n_u,
258 |                        activation='tanh',
259 |                        recurrent_activation='sigmoid',
260 |                        use_bias=True,
261 |                        return_sequences=True))
262 |         for layer in range(1, num_layers - 1):
263 |             model.add(LSTM(units=n_u,
264 |                            activation='tanh',
265 |                            recurrent_activation='sigmoid',
266 |                            use_bias=True,
267 |                            return_sequences=True))
268 |         model.add(Dense(self.num_outputs))
269 |         return model
270 | 
271 |     def to_json(self):
272 |         d = super().to_json()
273 |         d['num_time_steps'] = self.num_time_steps
274 |         return d
275 | 
276 |     @classmethod
277 |     def from_json(cls, jsondict):
278 |         initdict = ModelGrid.jsondict_to_initdict(jsondict)
279 |         lstm = LSTMGrid(**initdict, num_time_steps=jsondict['num_time_steps'])
280 |         lstm.unit_grid=np.array(jsondict['unit_grid'])
281 |         return lstm
282 | 


--------------------------------------------------------------------------------
/network_topology_selection/keras_grid/test_model_grid.py:
--------------------------------------------------------------------------------
  1 | from unittest import TestCase
  2 | import numpy as np
  3 | import tempfile
  4 | 
  5 | from keras_grid import MLPGrid, LSTMGrid
  6 | 
  7 | import os
  8 | import json
  9 | 
 10 | 
 11 | class TestModelGrid(TestCase):
 12 |     """
 13 |     Tests abstract ModelGrid class via MLPGrid instantiation.
 14 |     """
 15 | 
 16 |     def setUp(self):
 17 |         self.mlpg = MLPGrid(num_inputs=3,
 18 |                             num_outputs=2,
 19 |                             range_units=np.array([8, 26, 32]),
 20 |                             range_layers=np.array([2, 3]))
 21 |         self.mlpgc = MLPGrid(num_inputs=3,
 22 |                              num_outputs=2,
 23 |                              range_units=np.array([8, 26, 32]),
 24 |                              range_layers=np.array([2, 3]),
 25 |                              constant_num_weights=True)
 26 |         self.mlpg_trained = MLPGrid(num_inputs=3,
 27 |                             num_outputs=2,
 28 |                             range_units=np.array([8, 26, 32]),
 29 |                             range_layers=np.array([2, 3]))
 30 |         self.mlpg_trained.initialize()
 31 |         self.mlpg_trained.compile(loss='mean_squared_error',
 32 |                           metrics=['mean_squared_error', 'mean_absolute_error'],
 33 |                           optimizer='Adam')
 34 |         np.random.seed(1)
 35 |         self.mlpg_trained.fit(
 36 |             x=np.random.normal(0, 1, (10, 3)),
 37 |             y=np.random.normal(0, 1, (10, 2)),
 38 |             validation_split=0.2,
 39 |             epochs=1,
 40 |         )
 41 | 
 42 | 
 43 | 
 44 |     def test_can_instantiate(self):
 45 |         self.assertTrue(isinstance(self.mlpg, MLPGrid))
 46 |         self.assertEqual(self.mlpg.num_inputs, 3)
 47 |         self.assertEqual(self.mlpg.num_outputs, 2)
 48 |         self.assertEqual(self.mlpg.constant_num_weights, False)
 49 |         self.assertEqual(self.mlpgc.constant_num_weights, True)
 50 |         np.testing.assert_array_equal(self.mlpg.range_units, np.array([8, 26, 32]))
 51 |         np.testing.assert_array_equal(self.mlpg.range_layers, np.array([2, 3]))
 52 | 
 53 |     def test__initialize_unit_grid(self):
 54 |         self.mlpg._initialize_unit_grid()
 55 |         for i in range(len(self.mlpg.range_layers)):
 56 |             np.testing.assert_array_equal(self.mlpg.unit_grid[:, i], self.mlpg.range_units)
 57 |         self.mlpgc._initialize_unit_grid()
 58 |         for i in range(len(self.mlpgc.range_units)):
 59 |             for j in range(len(self.mlpgc.range_layers)):
 60 |                 self.assertEqual(self.mlpgc.unit_grid[i][j],
 61 |                                  self.mlpgc.get_num_equiv_units(self.mlpgc.range_units[i],
 62 |                                                                 self.mlpgc.range_layers[j]))
 63 | 
 64 |     def test_initialize_model_grid(self):
 65 |         self.mlpg._initialize_unit_grid()
 66 |         self.mlpg._initialize_model_grid()
 67 |         for i in range(len(self.mlpg.range_units)):
 68 |             for j in range(len(self.mlpg.range_layers)):
 69 |                 self.assertTrue(self.mlpg[i][j] is not None)
 70 | 
 71 |     def test_compile(self):
 72 |         self.mlpg.initialize()
 73 |         self.mlpg.compile(loss='mean_squared_error',
 74 |                           metrics=['mean_squared_error', 'mean_absolute_error'],
 75 |                           optimizer='Adam')
 76 | 
 77 |     def test_fit(self):
 78 |         for unit_idx in range(len(self.mlpg_trained.range_units)):
 79 |             for layer_idx in range(len(self.mlpg_trained.range_layers)):
 80 |                 self.assertTrue(layer_idx in self.mlpg_trained.history[unit_idx])
 81 | 
 82 |     def test_json(self):
 83 |         self.mlpg.initialize()
 84 |         jd = json.dumps(self.mlpg.to_json())
 85 |         jl = json.loads(jd)
 86 |         mlpg2 = MLPGrid.from_json(jl)
 87 |         self.assertEqual(self.mlpg, mlpg2)
 88 |         jl2 = mlpg2.to_json()
 89 |         self.assertEqual(jl, jl2)
 90 | 
 91 |     def test_save(self):
 92 |         with tempfile.TemporaryDirectory() as dirpath:
 93 |             self.mlpg_trained.save(dirpath)
 94 |             expected_files = sorted(['mlp_grid.json',
 95 |                 'mlp_grid_history.pickle',
 96 |                 'mlp_units0_layers0.h5',
 97 |                 'mlp_units0_layers1.h5',
 98 |                 'mlp_units1_layers0.h5',
 99 |                 'mlp_units1_layers1.h5',
100 |                 'mlp_units2_layers0.h5',
101 |                 'mlp_units2_layers1.h5'])
102 |             self.assertEqual(sorted(list(os.walk(dirpath))[0][2]), expected_files)
103 |             mlpg2 = MLPGrid.from_disk(dirpath)
104 |             self.assertEqual(self.mlpg_trained, mlpg2)
105 |             for unit_idx in range(len(self.mlpg_trained.range_units)):
106 |                 for layer_idx in range(len(self.mlpg_trained.range_layers)):
107 |                     self.assertEqual(self.mlpg_trained.history[unit_idx][layer_idx], mlpg2.history[unit_idx][layer_idx])
108 |                     for l1, l2 in zip(self.mlpg_trained[unit_idx][layer_idx].layers,  mlpg2[unit_idx][layer_idx].layers):
109 |                         self.assertEqual(l1.get_config(), l2.get_config())
110 | 
111 | 
112 | class TestMLPGrid(TestCase):
113 | 
114 |     def setUp(self):
115 |         self.mlpg = MLPGrid(num_inputs=3,
116 |                             num_outputs=2,
117 |                             range_units=np.array([8, 26, 32]),
118 |                             range_layers=np.array([2, 3, 4]))
119 | 
120 |     def test_num_weights(self):
121 |         self.mlpg.initialize()
122 |         n_i = self.mlpg.num_inputs
123 |         n_o = self.mlpg.num_outputs
124 |         n_w = np.array([[MLPGrid.num_weights(n_i, n_o, n_L, n_u) for n_L in self.mlpg.range_layers] for n_u in self.mlpg.range_units])
125 |         n_params = np.array([[self.mlpg[unit_idx][layer_idx].count_params() 
126 |                             for layer_idx in range(len(self.mlpg.range_layers))]
127 |                             for unit_idx in range(len(self.mlpg.range_units))])
128 |         np.testing.assert_array_equal(n_w.flatten(), n_params.flatten())
129 |     
130 |     def test_num_units(self):
131 |         n_i = self.mlpg.num_inputs
132 |         n_o = self.mlpg.num_outputs
133 |         n_w = np.array([[MLPGrid.num_weights(n_i, n_o, n_L, n_u) for n_L in self.mlpg.range_layers] for n_u in self.mlpg.range_units])
134 |         n_u = np.array([[MLPGrid.num_units(n_i, n_o, self.mlpg.range_layers[layer_idx], n_w[unit_idx][layer_idx]) for layer_idx in range(len(self.mlpg.range_layers))] for unit_idx in range(len(self.mlpg.range_units))])
135 |         np.testing.assert_array_equal(n_u.flatten(), np.repeat(self.mlpg.range_units[:, np.newaxis], 3))
136 | 
137 | 
138 | class TestLSTMGrid(TestCase):
139 | 
140 |     def setUp(self):
141 |         self.lstm = LSTMGrid(num_inputs=3,
142 |                              num_outputs=2,
143 |                              num_time_steps=10,
144 |                              range_units=np.array([8, 26, 32]),
145 |                              range_layers=np.array([2, 3, 4]))
146 |     def test_json(self):
147 |         self.lstm.initialize()
148 |         jd = json.dumps(self.lstm.to_json())
149 |         jl = json.loads(jd)
150 |         lstm2 = LSTMGrid.from_json(jl)
151 |         self.assertEqual(self.lstm, lstm2)
152 |         jl2 = lstm2.to_json()
153 |         self.assertEqual(jl, jl2)
154 | 
155 |     def test_num_weights(self):
156 |         self.lstm.initialize()
157 |         n_i = self.lstm.num_inputs
158 |         n_o = self.lstm.num_outputs
159 |         n_w = np.array([[LSTMGrid.num_weights(n_i, n_o, n_L, n_u) 
160 |                        for n_L in self.lstm.range_layers]
161 |                        for n_u in self.lstm.range_units])
162 |         n_params = np.array([[self.lstm[unit_idx][layer_idx].count_params() 
163 |                             for layer_idx in range(len(self.lstm.range_layers))]
164 |                             for unit_idx in range(len(self.lstm.range_units))])
165 |         np.testing.assert_array_equal(n_w.flatten(), n_params.flatten())
166 |     
167 |     def test_num_units(self):
168 |         n_i = self.lstm.num_inputs
169 |         n_o = self.lstm.num_outputs
170 |         n_w = np.array([[LSTMGrid.num_weights(n_i, n_o, n_L, n_u) 
171 |                         for n_L in self.lstm.range_layers]
172 |                         for n_u in self.lstm.range_units])
173 |         n_u = np.array([[LSTMGrid.num_units(n_i, n_o, self.lstm.range_layers[layer_idx], n_w[unit_idx][layer_idx])
174 |                         for layer_idx in range(len(self.lstm.range_layers))]
175 |                         for unit_idx in range(len(self.lstm.range_units))])
176 |         self.assertTrue(np.all(np.abs(n_u.flatten() - np.repeat(self.lstm.range_units[:, np.newaxis], 3))<=1))
177 |         
178 | 


--------------------------------------------------------------------------------
/network_topology_selection/pricinglib/__init__.py:
--------------------------------------------------------------------------------
1 | from pricinglib.black_scholes import BlackScholesCallPrice
2 | from pricinglib.heston import HestonCallPrice
3 | 
4 | 
5 | __all__ = ['BlackScholesCallPrice',
6 |            'HestonCallPrice']
7 | 


--------------------------------------------------------------------------------
/network_topology_selection/pricinglib/black_scholes.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy.stats as stats
 3 | 
 4 | 
 5 | def BS_d1(S, dt, r, sigma, K):
 6 |     """
 7 |     Computes the auxilliary quantity d1 in the Black/Scholes forumla
 8 |     
 9 |     param S:  the current spot price of the stock
10 |     param dt: the remaining time to maturity of the option
11 |     param r:  the assumed risk-free rate
12 |     param sigma: the volatility of the stock
13 |     param K: the strike of the option
14 |     
15 |     returns: d1 as per Black/scholes formula (scalar)
16 |     """
17 |     return (np.log(S/K) + (r+sigma**2/2)*dt) / (sigma*np.sqrt(dt))
18 | 
19 | 
20 | def BlackScholesCallPrice(S, r, sigma, T, K, t=0):
21 |     """
22 |     Computes the price of a call option in the Black/Scholes model.
23 |     
24 |     param S: the current spot price of the stock
25 |     param r: the assumed risk-free rate
26 |     param r: the assumed volatility of the stock
27 |     param T: the maturity of the option    
28 |     param K: the strike of the option
29 |     param t: the current time
30 |     
31 |     returns: price of call option maturing at T as of t (scalar)
32 |     """
33 |     
34 |     dt = T-t
35 |     Phi = stats.norm(loc=0, scale=1).cdf
36 |     d1 = BS_d1(S, dt, r, sigma, K)
37 |     d2 = d1 - sigma * np.sqrt(dt)
38 |     return S * Phi(d1) - K * np.exp(-r * dt) * Phi(d2)
39 | 


--------------------------------------------------------------------------------
/network_topology_selection/pricinglib/heston.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy import  pi, exp, real, log
 3 | from scipy.integrate import quad,quadrature, trapz
 4 | from scipy.optimize import least_squares,fminbound
 5 | from scipy.stats import norm
 6 | 
 7 | 
 8 | #Heston Characteristic Function
 9 | def heston_char_fkt(S,T,r,q,u,v0,vLong,kappa,sigma,rho):
10 |         gamma = kappa - 1j*rho*sigma*u
11 |         d = np.sqrt( gamma**2 + (sigma**2)*u*(u+1j) )
12 |         g = (gamma - d)/(gamma + d)
13 |         C = (kappa*vLong)/(sigma**2)*((gamma-d)*T-2*np.log((1 - g*exp(-d*T))/( 1 - g ) ))
14 |         D = (gamma - d)/(sigma**2)*((1 - np.exp(-d*T))/
15 |           (1 - g*np.exp(-d*T)))
16 |         F = S*exp((r-q)*T)
17 |         return exp(1j*u*np.log(F) + C + D*v0)
18 | 
19 | #Heston Fundamental Transform
20 | def heston_trafo(S,T,r,q,u,v0,vLong,kappa,sigma,rho):
21 |         F = S*np.exp((r-q)*T)
22 |         return np.exp(-1j*u*log(F))* heston_char_fkt(S,T,r,q,u,v0,vLong,kappa,sigma,rho)
23 |     
24 | #Heston Integrand(self,k,K,T)
25 | def hestonintegrand(S,T,K,r,q,k,v0,vLong,kappa,sigma,rho):
26 |         F = S*np.exp((r-q)*T)
27 |         x = np.log(F/K)
28 |         return real(np.exp(1j*k*x)/(k**2 + 1.0/4.0) * heston_trafo(S,T,r,q,k - 0.5*1j,v0,vLong,kappa,sigma,rho))
29 | 
30 | def heston_f1(u,logeps,v0,T):
31 |         return abs(-0.5* v0* T * u**2 - np.log(u) - logeps)
32 |         
33 | def heston_f2(u,logeps,v0,vLong,kappa,sigma,rho,T):
34 |         Cinf = (v0+kappa*vLong*T)/sigma*np.sqrt(1-rho**2)
35 |         return abs(-Cinf*u - np.log(u) - logeps) 
36 |     
37 | def HestonCallPrice(S,T,K,r,q,v0,vLong,kappa,sigma,rho):
38 |     """
39 |     Computes the price of a call option in the Black/Scholes model.
40 |     
41 |     param S: the current spot price of the stock
42 |     param r: the assumed risk-free rate
43 |     param q: the assumed dividend yield
44 |     param v0: spot variance
45 |     param vLong: long term variance
46 |     param kappa: mean reversion of variance
47 |     param sigma: volatility of variance
48 |     param rho: correlation of the driving BMs
49 |     param T: the maturity of the option    
50 |     param K: the strike of the option
51 |     param t: the current time
52 |     
53 |     returns: price of call option maturing at T as of t (scalar)
54 |     """
55 | 
56 |     val = 0
57 |     a = (v0 * T)**0.5
58 |     d1 = (log(S /K) + ((r-q) + v0 / 2) * T) / a
59 |     d2 = d1 - a
60 |     BSCall = S * np.exp(-q*T) * norm.cdf(d1) - K * exp(-r*T) * norm.cdf(d2)
61 |     logeps = log(0.00001)
62 |     F = S*exp((r-q)*T)
63 |     x = log(K/F)
64 | 
65 |     umax1 = fminbound(heston_f1,0,1000,args=(logeps,v0,T,))
66 |     umax2 = fminbound(heston_f2,0,1000,args=(logeps,v0,vLong,kappa,sigma,rho,T,))
67 |     umax = max(umax1,umax2)
68 |     X = np.linspace(0,umax,1000)
69 |     integrand = lambda k: real(np.exp(-1j*k*x)/(k**2 + 0.25) *(np.exp(-0.5*T*v0*(k**2 + 0.25))-
70 |                      heston_trafo(S,T,r,q,k - 0.5*1j,v0,vLong,kappa,sigma,rho)))
71 |     integral = trapz(integrand(X),x=X)
72 |     val = (BSCall + np.sqrt(F*K)/pi * np.exp(-r*T) * integral)
73 |     return val
74 | 


--------------------------------------------------------------------------------
/neural_network_intro/neural_network_intro_model_setup.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<p style=\"font-size:30px; text-align:center\"><b>Artificial Neural Networks</b></p>"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "<div style=\"text-align: right\">(C) <a href=\"https://github.com/niknow\">Nikolai Nowaczyk</a>, <a href=\"https://github.com/Lapsilago\">Jörg Kienitz</a> 2019-2021</div>"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "metadata": {},
 21 |    "outputs": [
 22 |     {
 23 |      "name": "stderr",
 24 |      "output_type": "stream",
 25 |      "text": [
 26 |       "Using TensorFlow backend.\n"
 27 |      ]
 28 |     }
 29 |    ],
 30 |    "source": [
 31 |     "%matplotlib widget\n",
 32 |     "import matplotlib.pyplot as plt\n",
 33 |     "import numpy as np\n",
 34 |     "from keras.models import Sequential\n",
 35 |     "from keras.layers import Dense\n",
 36 |     "from IPython.display import SVG"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "# Mathematical Definitions"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "markdown",
 48 |    "metadata": {},
 49 |    "source": [
 50 |     "**Definition (neural network):** A *neural network* $\\operatorname{NN}$ is a tuple $\\operatorname{NN}=(A_l, b_l, \\sigma_l)_{1 \\leq l \\leq N_l}$ defined by\n",
 51 |     "* a numer $n_i$ of *inputs*,\n",
 52 |     "* a number $n_o$ of *outputs*\n",
 53 |     "* a number $N_L$ of *layers* and\n",
 54 |     "* for each layer $1 \\leq l \\leq N_L$ \n",
 55 |     "  * a number $n_l$ of *neurons* (or *units*),\n",
 56 |     "  * a matrix $A_{l} = (A_{l;ij}) \\in \\mathbb{R}^{n_{l-1} \\times n_l}$ and a vector $b_l = b_{l;i} \\in \\mathbb{R}^{n_l}$ of *weights* such that $n_0 = n_i$, $n_{N_L}=n_o$ and\n",
 57 |     "  * an *activation function* $\\sigma_l:\\mathbb{R} \\to \\mathbb{R}$.\n",
 58 |     "For any $1 \\leq l \\leq N_L$, the tuple $(A_l, b_l, \\sigma_l)$ is called a *layer*. For $l=N_L$, the layer is called *output layer* and for $1 \\leq l<N_L$, the layer is called *hidden layer*."
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "markdown",
 63 |    "metadata": {},
 64 |    "source": [
 65 |     "**Definition (feed forward):** Let $\\operatorname{NN}=(A_l, b_l, \\sigma_l)_{1 \\leq l \\leq N_l}$  be a neural network. Then for each $1 \\leq l \\leq N_l$, we define a function \n",
 66 |     "$$F_l:\\mathbb{R}^{n_{l-1}} \\to \\mathbb{R}^{n_l}, \\qquad v \\mapsto \\sigma_l(v^T A_l + b_l),$$\n",
 67 |     "where we employ the convention that $\\sigma_l$ is applied in every component.\n",
 68 |     "The composition $F:\\mathbb{R}^{n_i} \\to \\mathbb{R}^{n_o}$, $F := F_{N_L} \\circ \\ldots \\circ F_2 \\circ F_1$ is called the *feed forward* of  $\\operatorname{NN}$. Any set of inputs $x \\in \\mathbb{R}^{n_i}$ is called an *input layer*."
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "metadata": {},
 74 |    "source": [
 75 |     "**Warning:** The formulation $F_l(v) = \\sigma_l(A_l v + b_l)$ is also very common. Conceptually, this doesn't make a difference as long as the dimensions of $A_l$ are then chosen accordingly. The conventions in this notebook are consistent with ``keras``."
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "markdown",
 80 |    "metadata": {},
 81 |    "source": [
 82 |     "# Graphical Illustration"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 15,
 88 |    "metadata": {},
 89 |    "outputs": [
 90 |     {
 91 |      "data": {
 92 |       "application/vnd.jupyter.widget-view+json": {
 93 |        "model_id": "e53ab03a95c642c180d737be58b0c9f8",
 94 |        "version_major": 2,
 95 |        "version_minor": 0
 96 |       },
 97 |       "text/plain": [
 98 |        "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to  previous…"
 99 |       ]
100 |      },
101 |      "metadata": {},
102 |      "output_type": "display_data"
103 |     }
104 |    ],
105 |    "source": [
106 |     "fig, ax = plt.subplots(figsize=(7, 5))\n",
107 |     "ax.axis(\"equal\")\n",
108 |     "xs = 2\n",
109 |     "ys = 1.5\n",
110 |     "radius = 0.5\n",
111 |     "fsize = 20\n",
112 |     "\n",
113 |     "def add_circle(x, y, label, color='r'):\n",
114 |     "    c1 = plt.Circle((x, y), radius, alpha=0.2, color=color, zorder=1)\n",
115 |     "    c2 = plt.Circle((x, y), radius, color='k', fill=False, zorder=1)\n",
116 |     "    ax.add_artist(c1)\n",
117 |     "    ax.add_artist(c2)\n",
118 |     "    ax.text(x-radius/2, y-radius/3, label, size=fsize)\n",
119 |     "    \n",
120 |     "def add_line(x1, x2, y1, y2):\n",
121 |     "    ax.plot([x1, x2], [y1, y2], alpha=0.1, color='k', zorder=-1)\n",
122 |     "    \n",
123 |     "for unit in (1, 0, -1):\n",
124 |     "    add_circle(0, unit * ys, \"$x_{%s}$\" % str(3 - unit - 1), color='green')\n",
125 |     "for hlayer in (1,2,3):\n",
126 |     "    for unit in (-2, -1, 0, 1, 2):\n",
127 |     "        add_circle(hlayer * xs, unit * ys, \"$v_{%s;%s}$\" % (str(hlayer),str(5 - unit - 1)), color='blue')\n",
128 |     "for unit in (-1, 1):\n",
129 |     "    add_circle(4*xs, unit * ys, \"$y_{%s}$\" % (\"1\" if unit==1 else \"2\"), color='red')\n",
130 |     "for iunit in (-1, 0, 1):\n",
131 |     "    for hunit in (-2, -1, 0, 1, 2):\n",
132 |     "        add_line(0, xs, iunit*ys, hunit*ys)\n",
133 |     "for hlayer in (1, 2):\n",
134 |     "    for unita in (-2, -1, 0, 1, 2):\n",
135 |     "        for unitb in (-2, -1, 0, 1, 2):\n",
136 |     "            add_line(hlayer*xs, (hlayer+1)*xs, unita*ys, unitb*ys)\n",
137 |     "for hunit in (-2, -1, 0, 1, 2):\n",
138 |     "    for ounit in (-1, 1):\n",
139 |     "        add_line(3*xs, 4*xs, hunit*ys, ounit*ys)\n",
140 |     "ax.text(0, -4, \"Input\", size=fsize)\n",
141 |     "ax.text(1.7*xs, -4, \"Hidden\", size=fsize)\n",
142 |     "ax.text(3.7*xs, -4, \"Output\", size=fsize)\n",
143 |     "        \n",
144 |     "ax.set_xlim([0,5])\n",
145 |     "ax.set_ylim([-4,4])\n",
146 |     "_ = plt.axis('off')"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "markdown",
151 |    "metadata": {},
152 |    "source": [
153 |     "Neural networks can be visualized like in the above picture: This shows a network $(A_l, b_l, \\sigma_l)_{1 \\leq l \\leq N_L}$ with a total of $N_L=4$ layers, i.e. $3$ layers are hidden. Notice that the input layer is just a visualization of the input and is not part of the actual network topology. \n",
154 |     "\n",
155 |     "The links between the nodes visualize the feedforward. This can be seen as follows: The input layer as $n_i=3$ units, i.e. any $x \\in \\mathbb{R}^3$ can be used as an input layer. The first hidden layer as $n_1=5$ units, i.e. the first function in the feedforward is a map $F_1: \\mathbb{R}^3 \\to \\mathbb{R}^5$, $x \\mapsto F_1(x) = \\sigma(xA_1 + b_1)$. If we unwind the definition of the matrix-vector multiplication and compute the components $v_{1;i}$ of the result $v_1 := F_1(x) $, we obtain for any $1 \\leq i \\leq 5$\n",
156 |     "$$ v_{1;i} = \\sigma_1 \\Big{(} \\sum_{j=1}^{3}{x_j A_{1;ij}}  \\Big{)}$$\n",
157 |     "For the first component $i=1$, we can see that it depends on all the inputs, i.e. on $x_1, x_2, x_3$. This is visualized by the $3$ lines connecting the respective units. The same holds for the other components. \n",
158 |     "The computation of the feedforward now proceeds through the second hidden layers in the same fashion as from the input layer into the first hidden layer, i.e. the computed outputs of the first layer $v_{1} \\in \\mathbb{R}^5$ can now be though of as an input to the second layer. Here, the feedforward function is $F_2:\\mathbb{R}^5 \\to \\mathbb{R}^5$ and the layer computes $v_2 := F_2(v_1)$ again for every component $1 \\leq i \\leq 5$ via\n",
159 |     "$$ v_{2;i} = \\sigma_2 \\Big{(} \\sum_{j=1}^{5}{x_j A_{2;ij}}  \\Big{)}$$\n",
160 |     "The computation for $F_3:\\mathbb{R}^5 \\to \\mathbb{R}^5$ is analogous and we obtain $v_3 := F_3(v_2)$. Finally, the output layer applies $F_4:\\mathbb{R}^5 \\to \\mathbb{R}^2$ and we obtain $v_4 := F_4(v_3)$. The output layer computes this just like the hidden layer. By definition the result of the output layer is the final result of the complete computation, i.e. $y:= F(x) = v_4 = F_4(v_3)$."
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "markdown",
165 |    "metadata": {},
166 |    "source": [
167 |     "# Implementation in Keras"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "markdown",
172 |    "metadata": {},
173 |    "source": [
174 |     "Neural Networks can be conveniently set up in Python using `keras`. In order to train them, a backend is required. We chose `tensorflow` here."
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "markdown",
179 |    "metadata": {},
180 |    "source": [
181 |     "## Setting up the model"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "markdown",
186 |    "metadata": {},
187 |    "source": [
188 |     "We setup the neural network from the previous section in `keras`."
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": 3,
194 |    "metadata": {},
195 |    "outputs": [],
196 |    "source": [
197 |     "model = Sequential() # initializes the model whith no layers yet\n",
198 |     "model.add(Dense(units=5, input_dim=3, activation='sigmoid')) # first hidden layer needs to know the input dimension\n",
199 |     "model.add(Dense(units=5, activation='sigmoid')) # second hidden layer\n",
200 |     "model.add(Dense(units=5, activation='sigmoid')) # third hidden layer\n",
201 |     "model.add(Dense(units=2)) # output layer (uses linear activation here as a default)\n",
202 |     "model.compile(optimizer='adam', loss='MAE') # this creates the actual model (optimizer and loss metric are needed for training later)"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "markdown",
207 |    "metadata": {},
208 |    "source": [
209 |     "## Inspecting the model"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": 4,
215 |    "metadata": {},
216 |    "outputs": [
217 |     {
218 |      "name": "stdout",
219 |      "output_type": "stream",
220 |      "text": [
221 |       "_________________________________________________________________\n",
222 |       "Layer (type)                 Output Shape              Param #   \n",
223 |       "=================================================================\n",
224 |       "dense_1 (Dense)              (None, 5)                 20        \n",
225 |       "_________________________________________________________________\n",
226 |       "dense_2 (Dense)              (None, 5)                 30        \n",
227 |       "_________________________________________________________________\n",
228 |       "dense_3 (Dense)              (None, 5)                 30        \n",
229 |       "_________________________________________________________________\n",
230 |       "dense_4 (Dense)              (None, 2)                 12        \n",
231 |       "=================================================================\n",
232 |       "Total params: 92\n",
233 |       "Trainable params: 92\n",
234 |       "Non-trainable params: 0\n",
235 |       "_________________________________________________________________\n"
236 |      ]
237 |     }
238 |    ],
239 |    "source": [
240 |     "model.summary() # provides neat summary about the network topology and the parameters"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "code",
245 |    "execution_count": 16,
246 |    "metadata": {},
247 |    "outputs": [],
248 |    "source": [
249 |     "# keras can also plot the model automatically (Graphviz needs to be installed and in PATH)\n",
250 |     "# from keras.utils.vis_utils import model_to_dot\n",
251 |     "# SVG(model_to_dot(model).create(prog='dot', format='svg'))"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "markdown",
256 |    "metadata": {},
257 |    "source": [
258 |     "## Inspecting the weights"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "code",
263 |    "execution_count": 6,
264 |    "metadata": {},
265 |    "outputs": [
266 |     {
267 |      "data": {
268 |       "text/plain": [
269 |        "[array([[ 0.7183232 ,  0.38307422,  0.0776127 ,  0.8320963 , -0.09762895],\n",
270 |        "        [-0.2450003 , -0.85496855, -0.35251206,  0.66085297, -0.47060898],\n",
271 |        "        [-0.52232635, -0.4491664 ,  0.41039938, -0.19148451, -0.11118329]],\n",
272 |        "       dtype=float32),\n",
273 |        " array([0., 0., 0., 0., 0.], dtype=float32),\n",
274 |        " array([[-0.23155713, -0.24680787, -0.0723573 , -0.66734   ,  0.7170485 ],\n",
275 |        "        [-0.6729239 , -0.5296908 ,  0.09631431, -0.06348813, -0.04985815],\n",
276 |        "        [-0.573444  , -0.5506638 , -0.39181206,  0.7385675 ,  0.5457883 ],\n",
277 |        "        [-0.6523951 , -0.69977224, -0.66549873,  0.57492626, -0.5063471 ],\n",
278 |        "        [ 0.28198433, -0.39915782,  0.2621491 ,  0.0493477 ,  0.69479597]],\n",
279 |        "       dtype=float32),\n",
280 |        " array([0., 0., 0., 0., 0.], dtype=float32),\n",
281 |        " array([[-0.6755266 ,  0.60235417, -0.13710588, -0.2865331 ,  0.5920446 ],\n",
282 |        "        [ 0.14386547,  0.43755114,  0.71060646,  0.20056444, -0.24053401],\n",
283 |        "        [-0.76083595,  0.14162123,  0.4353727 , -0.21385908, -0.25838703],\n",
284 |        "        [ 0.01410168,  0.640501  , -0.1288696 , -0.11130595, -0.02846223],\n",
285 |        "        [-0.25267512,  0.2775594 ,  0.54000807,  0.40621924,  0.7530713 ]],\n",
286 |        "       dtype=float32),\n",
287 |        " array([0., 0., 0., 0., 0.], dtype=float32),\n",
288 |        " array([[ 0.5556489 , -0.34421366],\n",
289 |        "        [-0.5555906 , -0.02127665],\n",
290 |        "        [-0.8151509 ,  0.861356  ],\n",
291 |        "        [-0.21263468,  0.13752174],\n",
292 |        "        [-0.8979237 ,  0.4667455 ]], dtype=float32),\n",
293 |        " array([0., 0.], dtype=float32)]"
294 |       ]
295 |      },
296 |      "execution_count": 6,
297 |      "metadata": {},
298 |      "output_type": "execute_result"
299 |     }
300 |    ],
301 |    "source": [
302 |     "# after the model is set up the weights are initialized with random values\n",
303 |     "model.get_weights() # notice that the shape of the matrices and vectors are as expected"
304 |    ]
305 |   },
306 |   {
307 |    "cell_type": "markdown",
308 |    "metadata": {},
309 |    "source": [
310 |     "## Feedforward"
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "code",
315 |    "execution_count": 7,
316 |    "metadata": {},
317 |    "outputs": [
318 |     {
319 |      "data": {
320 |       "text/plain": [
321 |        "array([-1.3470876,  0.7427888], dtype=float32)"
322 |       ]
323 |      },
324 |      "execution_count": 7,
325 |      "metadata": {},
326 |      "output_type": "execute_result"
327 |     }
328 |    ],
329 |    "source": [
330 |     "# we create an input layer\n",
331 |     "x = np.array([1,2,3])\n",
332 |     "X = x.reshape(1,3) # because usually ouputs are computed for multiple inputs, the input layer needs an additional axis here\n",
333 |     "Y = model.predict(X) # computes the outputs for all of the current inputs\n",
334 |     "y = Y[0] # currently, we only have one input\n",
335 |     "y"
336 |    ]
337 |   },
338 |   {
339 |    "cell_type": "markdown",
340 |    "metadata": {},
341 |    "source": [
342 |     "## Reproduce the Computation"
343 |    ]
344 |   },
345 |   {
346 |    "cell_type": "markdown",
347 |    "metadata": {},
348 |    "source": [
349 |     "In this section, we manually reproduce the computation of `y` from `X`."
350 |    ]
351 |   },
352 |   {
353 |    "cell_type": "code",
354 |    "execution_count": 8,
355 |    "metadata": {},
356 |    "outputs": [],
357 |    "source": [
358 |     "def sigmoid(x):\n",
359 |     "    return 1 / (1 + np.exp(-x))"
360 |    ]
361 |   },
362 |   {
363 |    "cell_type": "code",
364 |    "execution_count": 9,
365 |    "metadata": {},
366 |    "outputs": [
367 |     {
368 |      "name": "stdout",
369 |      "output_type": "stream",
370 |      "text": [
371 |       "[-1.34708779  0.74278881]\n"
372 |      ]
373 |     }
374 |    ],
375 |    "source": [
376 |     "A = model.get_weights()[::2] # extracting the matrix weights\n",
377 |     "b = model.get_weights()[1::2] # extracting the vector weights\n",
378 |     "\n",
379 |     "# compute the feedforward\n",
380 |     "v = [sigmoid(x @ A[0] + b[0])]\n",
381 |     "v.append(sigmoid(v[0] @ A[1] + b[1]))\n",
382 |     "v.append(sigmoid(v[1] @ A[2] + b[2]))\n",
383 |     "v.append(v[2] @ A[3] + b[3])\n",
384 |     "print(v[3])\n",
385 |     "\n",
386 |     "# result is in the last vector, which should agree with the keras computation (up to numerical errors)\n",
387 |     "np.testing.assert_array_almost_equal(y,v[3])"
388 |    ]
389 |   }
390 |  ],
391 |  "metadata": {
392 |   "kernelspec": {
393 |    "display_name": "heroku",
394 |    "language": "python",
395 |    "name": "heroku"
396 |   },
397 |   "language_info": {
398 |    "codemirror_mode": {
399 |     "name": "ipython",
400 |     "version": 3
401 |    },
402 |    "file_extension": ".py",
403 |    "mimetype": "text/x-python",
404 |    "name": "python",
405 |    "nbconvert_exporter": "python",
406 |    "pygments_lexer": "ipython3",
407 |    "version": "3.6.12"
408 |   },
409 |   "toc": {
410 |    "base_numbering": 1,
411 |    "nav_menu": {},
412 |    "number_sections": true,
413 |    "sideBar": true,
414 |    "skip_h1_title": false,
415 |    "title_cell": "Table of Contents",
416 |    "title_sidebar": "Contents",
417 |    "toc_cell": false,
418 |    "toc_position": {},
419 |    "toc_section_display": true,
420 |    "toc_window_display": false
421 |   }
422 |  },
423 |  "nbformat": 4,
424 |  "nbformat_minor": 2
425 | }
426 | 


--------------------------------------------------------------------------------
/newton_gradient_backprop/adjoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<p style=\"font-size:30px; text-align:center\"><b>Adjoint Method & Backpropagation</b></p>"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "<div style=\"text-align: right\">(C) <a href=\"https://github.com/niknow\">Nikolai Nowaczyk</a>, <a href=\"https://github.com/Lapsilago\">Jörg Kienitz</a> 2019-2021</div>"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "The adjoint method is a technique to speed up the calculation of derivatives for complicated functions, which can we written as a decomposition of simpler functions. It is widely used in applied mathematics such as oceanography and mathematical finance. We recap the method and show how it is related to the [backpropagation algorithm of neural networks](backpropagation.ipynb). "
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "# Derivatives and the Chain Rule"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "markdown",
 33 |    "metadata": {},
 34 |    "source": [
 35 |     "The key ingredient to the adjoint method is the chain rule.\n",
 36 |     "\n",
 37 |     "**Theorem (chain rule):** Let $f_1:\\mathbb{R}^{n_0} \\to \\mathbb{R}^{n_1}$ and $f_2:\\mathbb{R}^{n_1} \\to \\mathbb{R}^{n_2}$ be two differentiable funcions and $f := f_2 \\circ f_1: \\mathbb{R}^{n_0} \\to \\mathbb{R}^{n_2}$ be the composition. Then $f$ is differentiable and\n",
 38 |     "\\begin{align}\n",
 39 |     "    \\forall x \\in \\mathbb{R}^{n_0}: \\nabla f(x) = \\nabla f_2 (f_1(x)) \\bullet \\nabla f_1(x),\n",
 40 |     "\\end{align}\n",
 41 |     "where $\\bullet$ denotes the ordinary matrix product. \n",
 42 |     "\n",
 43 |     "Remark: This theorem holds analogously on functions, which are only defined on open subsets."
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "markdown",
 48 |    "metadata": {},
 49 |    "source": [
 50 |     "# Forward method\n",
 51 |     "\n",
 52 |     "Of course, the chain rule can be applied successively: Assume we are given a scalar function, which is a result of a complicated compisition of functions, i.e. we are given functions $f_l:\\mathbb{R}^{n_{l-1}} \\to \\mathbb{R}^{n_l}$ and $g:\\mathbb{R}^{n_L} \\to \\mathbb{R}$ and we want to calculate the derivative of the composition\n",
 53 |     "\\begin{align}\n",
 54 |     "    F := g \\circ f_L \\circ \\ldots \\circ f_1: \\mathbb{R}^{n_0} \\to \\mathbb{R}\n",
 55 |     "\\end{align}\n",
 56 |     "at some point $x_0 \\in \\mathbb{R}^{n_0}$. Then the straight-forward way to calculate this would be to proceed in two steps:\n",
 57 |     "\n",
 58 |     "1. Starting from $x_0$ successively compute\n",
 59 |     "\\begin{align}\n",
 60 |     "    x_l := f_l(x_{l-1})\n",
 61 |     "\\end{align}\n",
 62 |     "for $l=1, \\ldots, L$ and finally  $y := g(x_L)$. \n",
 63 |     "\n",
 64 |     "2. Compute all the derivatives $\\nabla f_l (x_{l-1})$ for $l=1, \\ldots, L$ and $\\nabla g(x_L)$ and then starting from $D_1 := \\nabla f_1 (x_0)$ sucessively compute\n",
 65 |     "\\begin{align}\n",
 66 |     "    D_l := \\nabla f_l(x_{l}) \\bullet D_{l-1}\n",
 67 |     "\\end{align}\n",
 68 |     "and finally $D := \\nabla g (x_L) \\bullet D_L$.\n",
 69 |     "\n",
 70 |     "Because of the chain rule\n",
 71 |     "\\begin{align}\n",
 72 |     "    \\nabla F(x_0) = \\nabla g(x_L) \\bullet \\nabla f_L(x_{l-1}) \\bullet \\ldots \\bullet \\nabla f_1(x_0)\n",
 73 |     "\\end{align}\n",
 74 |     "this *forward method* correctly computes $\\nabla F(x_0)$."
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "markdown",
 79 |    "metadata": {},
 80 |    "source": [
 81 |     "## Complexity\n",
 82 |     "The foward method computes $\\nabla F(x_0)$ by a sequence of matrix multiplications starting on the right of the above equation. That means that in each step when $D_l$ is calculated from $D_{l-1}$ a matrix multiplication needs to be executed, which in a naive implementation has complexity $\\mathcal{O}(n^3)$. \n",
 83 |     "\n",
 84 |     "**Key insight:** Because the last function $g$ in the chain is scalar valued, the derivative $\\nabla g(x_L)$ is a vector and thus $\\nabla F(x_0) = \\nabla g(x_L) \\bullet D_L$ is also a vector. Thus, computing the matrix products by the foward method just to arrive at a vector is not very efficient."
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "markdown",
 89 |    "metadata": {},
 90 |    "source": [
 91 |     "# Adjoint Method\n",
 92 |     "The adjoint method computes the $\\nabla F(x_0)$ using the exact same equation of the chain rule but not evaluated from right to left, but from left to right. Mathematically, one often formulates that by computing the adjoint equation instead, which in $\\mathbb{R}^n$ simply amounts to taking the transpose:\n",
 93 |     "\n",
 94 |     "\\begin{align}\n",
 95 |     "    \\nabla F(x_0)^{\\top} = \\nabla f_1(x_0)^{\\top} \\bullet \\ldots \\bullet \\nabla f_L(x_{L-1})^{\\top} \\bullet \\nabla g(x_L)^{\\top} \n",
 96 |     "\\end{align}\n",
 97 |     "\n",
 98 |     "Algorithmically, this means that the adjoint method starts by computing $V_{L+1} := \\nabla g(x_L)^{\\top}$ and then computes\n",
 99 |     "\n",
100 |     "\\begin{align*}\n",
101 |     "    V_l := \\nabla f_l(x_{l-1})^{\\top} \\bullet V_{l+1}\n",
102 |     "\\end{align*}\n",
103 |     "\n",
104 |     "backwards from $l=L, \\ldots, 1$. \n",
105 |     "\n",
106 |     "This is a sequence of matrix-vector multiplications, which only as complexity $\\mathcal{O}(n^2)$ and is thus much faster."
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "markdown",
111 |    "metadata": {},
112 |    "source": [
113 |     "# Adjoint Method and Backpropagation\n",
114 |     "\n",
115 |     "The forward method and the adjoint method relate to neural networks as follows:\n",
116 |     "\n",
117 |     "Let $\\operatorname{NN} = (A_l, b_l, \\sigma_l)_{1 \\leq l \\leq L}$ be a neural network with $L$ layers. Its feed-forward function can be written as a composition $F = F_L \\circ \\ldots \\circ F_1$ and for each $l$, we have $F_l = \\sigma_l \\circ f_{A_l, b_l}$. Here, for any matrix $A$ and $b$ of compatible dimensions, we denote by $f_{A,b}$ the function $v \\mapsto Av + b$. \n",
118 |     "\n",
119 |     "For any fixed input $x=:a_0$, we can compute the feed-forward via\n",
120 |     "\\begin{align}\n",
121 |     "    z_l := f_{A_l, b_l}(a_{l-1}), && a_l := \\sigma_l(z_l)\n",
122 |     "\\end{align}\n",
123 |     "for $l=1, \\ldots, L$. This is exactly the first step of the forward method above."
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "markdown",
128 |    "metadata": {},
129 |    "source": [
130 |     "Now, for that fixed input $x$ also fix a cost function $C$. Then the function $C \\circ F$ is exactly of the shape above, i.e. it is a composition\n",
131 |     "\\begin{align}\n",
132 |     "    C\\circ F &= (C \\circ \\sigma_L) \\circ (f_{A_L, b_L} \\circ \\sigma_{L-1}) \\circ \\ldots \\sigma_2 \\circ (f_{A_2, b_2} \\circ \\sigma_1) \\circ f_{A_1, b_1} \\\\\n",
133 |     "    &=: g \\circ h_L \\circ \\ldots h_2 \\circ f_{A_1, b_1},\n",
134 |     "\\end{align}\n",
135 |     "where $g := C \\circ \\sigma_L$ and $h_l := f_{A_{l+1}, b_{l+1}} \\circ \\sigma_{l}$. Thus, its transposed derivative is given via\n",
136 |     "\\begin{align}\n",
137 |     "    \\nabla (C\\circ F)(x)^{\\top} = \\nabla f_{A_1, b_1}(a_0)^{\\top} \\bullet \\nabla h_2(z_2)^{\\top} \\ldots \\bullet \\nabla h_{L-1}(z_{L-1})^{\\top} \\bullet \\nabla g(z_L)^{\\top}\n",
138 |     "\\end{align}\n",
139 |     "and can be computed via the adjoint method by a backwards sequence of vectors $V_l$, where\n",
140 |     "\\begin{align}\n",
141 |     "    V_{L} & = \\nabla g(z_L)^{\\top} = \\nabla \\sigma_L(z_L)^{\\top} \\bullet \\nabla C(a_L)^{\\top}\n",
142 |     "\\end{align}\n",
143 |     "and for all $l=L-1, \\ldots, 2$\n",
144 |     "\\begin{align}\n",
145 |     "    V_{l} = \\nabla h_l(z_l)^{\\top} \\bullet V_{l+1} \n",
146 |     "    = \\nabla(f_{A_{l+1},b_{l+1}} \\circ \\sigma_{l})(z_l)^{\\top} \\bullet V_{l+1} \n",
147 |     "    = \\nabla \\sigma_{l}(z_l)^{\\top} \\bullet A_{l+1}^{\\top} \\bullet V_{l+1}\n",
148 |     "\\end{align}\n",
149 |     "and finally\n",
150 |     "\\begin{align}\n",
151 |     "    V_1 = \\nabla f_{A_1, b_1}(a_0)^{\\top} \\bullet V_2 = A_1^{\\top} \\bullet V_2.\n",
152 |     "\\end{align}"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "markdown",
157 |    "metadata": {},
158 |    "source": [
159 |     "In the [chapter on backpropagation](backpropagation.ipynb), we also computed the derivative $\\nabla(C\\circ F)$. There, we defined the functions $G_l := C \\circ F_L \\circ \\ldots \\circ F_{l+1} \\circ \\sigma_l$ and have shown that their gradients $\\varepsilon_l := \\nabla G_l(z_l)$ satisfy \n",
160 |     "\\begin{align*}\n",
161 |     "   \\varepsilon_L &= \\nabla \\sigma_L(z_L)^{\\top} \\bullet \\nabla C(a_L)^{\\top}, \\\\\n",
162 |     "   \\varepsilon_{l} &=  \\nabla \\sigma_{l}(z_l)^{\\top} \\bullet A_{l+1}^{\\top} \\bullet \\varepsilon_{l+1}.\n",
163 |     "\\end{align*}\n",
164 |     "Thus, \n",
165 |     "\\begin{align*}\n",
166 |     "    \\forall 2 \\leq l \\leq L: \\varepsilon_l = V_l,\n",
167 |     "\\end{align*}\n",
168 |     "i.e. the backpropagation lemma is just a special case of the adjoint method."
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "markdown",
173 |    "metadata": {},
174 |    "source": [
175 |     "# Conclusion\n",
176 |     "\n",
177 |     "1. The [Newton's method](newton.ipynb) finds roots of non-linear functions $F$ and the algorithm needs $\\nabla F$. It can be used for optimization by finding roots of $\\nabla F$. This requires $\\nabla ^2 F$ though, which might not always be conveniently available.\n",
178 |     "\n",
179 |     "2. [Gradien Descent](gradient_descent.ipynb) minimizes a function $F$ by using only its first order derivative $\\nabla F$.\n",
180 |     "\n",
181 |     "3. [Backpropagation](backpropagation.ipynb) is a method to efficiently compute the gradient $\\nabla_{\\Theta}(C\\circ F_{\\Theta}(x))$ of the cost function of feed-foward of a neural network with respect to its weights $\\Theta$. This gradient is needed to minimize the cost function via gradient descent. Backpropagation computes this gradient by first computing $\\nabla _x (C \\circ F_{\\Theta})(x)$, the cost of the feed-forward with respect to its input, via a backwards recursion.\n",
182 |     "\n",
183 |     "4. The adjoint method is a general method to compute derivatives of complicated compositions of functions via a backwards recursion. The backpropagation method is a special case of the adjoint method.\n",
184 |     "\n",
185 |     "Notice that as a consequence, we obtain that a neural network whose feed-forward function $F$ has been trained to compute the price of a derivative, also computes the sensitivities of that price using the adjoint method along the way automatically when it runs its backpropagation."
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "markdown",
190 |    "metadata": {},
191 |    "source": [
192 |     "# References\n",
193 |     "\n",
194 |     "* Giles, Glasserman: Smoking Adjoints\n",
195 |     "* Leclerq, Lian, Schneider: [Fast Monte Carlo Bermudan Greeks](https://www.risk.net/derivatives/interest-rate-derivatives/1500258/fast-monte-carlo-bermudan-greeks), RISK 07/2009\n",
196 |     "* Kienitz, Nowaczyk: [Affine Recursion Problem and a General Framework for Adjoint Methods for Calculating Sensitivities for Financial Instruments](https://ssrn.com/abstract=1957082), SSRN 11/2011"
197 |    ]
198 |   }
199 |  ],
200 |  "metadata": {
201 |   "kernelspec": {
202 |    "display_name": "Python 3",
203 |    "language": "python",
204 |    "name": "python3"
205 |   },
206 |   "language_info": {
207 |    "codemirror_mode": {
208 |     "name": "ipython",
209 |     "version": 3
210 |    },
211 |    "file_extension": ".py",
212 |    "mimetype": "text/x-python",
213 |    "name": "python",
214 |    "nbconvert_exporter": "python",
215 |    "pygments_lexer": "ipython3",
216 |    "version": "3.9.1"
217 |   },
218 |   "toc": {
219 |    "base_numbering": 1,
220 |    "nav_menu": {},
221 |    "number_sections": true,
222 |    "sideBar": true,
223 |    "skip_h1_title": false,
224 |    "title_cell": "Table of Contents",
225 |    "title_sidebar": "Contents",
226 |    "toc_cell": false,
227 |    "toc_position": {},
228 |    "toc_section_display": true,
229 |    "toc_window_display": false
230 |   }
231 |  },
232 |  "nbformat": 4,
233 |  "nbformat_minor": 2
234 | }
235 | 


--------------------------------------------------------------------------------
/newton_gradient_backprop/backpropagation.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<p style=\"font-size:30px; text-align:center\"><b>Derivation of the Backpropagation Algorithm</b></p>"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Assume we are given a neural network $\\operatorname{NN}$ with feed forward $F = F_{\\Theta}:\\mathbb{R}^{n_i} \\to \\mathbb{R}^{n_o}$, where $\\Theta$ is the collection of the weights in all the layers. If we want to train this network using [gradient descent](https://nbviewer.jupyter.org/github/niknow/machine-learning-examples/blob/master/newton_gradient_backprop/gradient_descent.ipynb), we need to calculate the derivative $\\nabla_{\\Theta} F_{\\Theta}$. Because $F = F_L \\circ \\ldots F_1$ is a composition of the various feed forwards of the layers and each layer has some weights, computing this derivative is not entirely trivial.\n",
 15 |     "\n",
 16 |     "Backpropagation is an algorithm that is based on a clever computation of the derivative $\\nabla_{\\Theta}F_{\\Theta}$, which - as the name might suggest - starts from the back of the network, i.e. the output layer $F_L$ and then works its way backwards to the first layer.\n",
 17 |     "\n",
 18 |     "In this notebook, we provide the mathematical foundations of backpropagations and derive the key equations."
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "markdown",
 23 |    "metadata": {},
 24 |    "source": [
 25 |     "# Recall Definition & Notation for Neural Networks"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "In order to pin down the precise equations for backpropagation, we first have to pin down the definition of neural network. Even for multilayer perceptrons (MLPs), there are various formulations of them in the literature. We will use the following:\n",
 33 |     "\n",
 34 |     "**Definition (neural network):** A *neural network* $\\operatorname{NN}$ is a tuple $\\operatorname{NN}=(A_l, b_l, \\sigma_l)_{1 \\leq l \\leq L}$ defined by\n",
 35 |     "* a numer $n_i$ of *inputs*,\n",
 36 |     "* a number $n_o$ of *outputs*\n",
 37 |     "* a number $L$ of *layers* and\n",
 38 |     "* for each layer $1 \\leq l \\leq L$ \n",
 39 |     "  * a number $n_l$ of *neurons* (or *units*),\n",
 40 |     "  * a matrix $A_l \\in \\mathbb{R}^{n_{l} \\times n_{l-1}}$ and a vector $b_l \\in \\mathbb{R}^{n_l}$ of *weights* such that $n_0 = n_i$, $n_{L}=n_o$ and\n",
 41 |     "  * an *activation function* $\\sigma_l:\\mathbb{R} \\to \\mathbb{R}$.\n",
 42 |     "\n",
 43 |     "For any $1 \\leq l \\leq L$, the tuple $(A_l, b_l, \\sigma_l)$ is called a *layer*. For $l=L$, the layer is called *output layer* and for $1 \\leq l< L$, the layer is called *hidden layer*. We denote by $\\Theta_l := (b_l, A_l) \\in \\mathbb{R}^{(n_l+1) \\times n_{l-1}}$ the total weights of layer $l$ and set $\\Theta := (\\Theta_1, \\ldots, \\Theta_L)$.\n",
 44 |     "\n",
 45 |     "A graphical representation of the layers can be found in the [introduction to MLPs](https://nbviewer.jupyter.org/github/niknow/machine-learning-examples/blob/master/neural_network_intro/neural_network_intro_model_setup.ipynb). "
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "# Feed Forward\n",
 53 |     "The *feed forward* of a neural network is the process of feeding an input data sample into the network and computing the output."
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "metadata": {},
 59 |    "source": [
 60 |     "The following notation will be convenient:\n",
 61 |     "\n",
 62 |     "**Definition (affine linear map):** Let $A \\in \\mathbb{R}^{m \\times n}$ be a matrix and $b \\in \\mathbb{R}^{m}$ be a vector. Then we denote by\n",
 63 |     "\\begin{align*}\n",
 64 |     "    f_{A,b}:\\mathbb{R}^{n} \\to \\mathbb{R}^m, && v \\mapsto Av + b\n",
 65 |     "\\end{align*}\n",
 66 |     "the *affine linear map with parameters $A$ and $b$*."
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "metadata": {},
 72 |    "source": [
 73 |     "**Definition (feed forward function):** Let $\\operatorname{NN}=(A_l, b_l, \\sigma_l)_{1 \\leq l \\leq L}$ be a neural network. Then for each $1 \\leq l \\leq L$, we define a function \n",
 74 |     "\\begin{align*}\n",
 75 |     "F_l := \\sigma_l \\circ f_{A_l, b_l}: \\mathbb{R}^{n_{l-1}} \\to \\mathbb{R}^{n_l}, && v \\mapsto \\sigma_l(A_lv + b_l),\n",
 76 |     "\\end{align*}\n",
 77 |     "\n",
 78 |     "where we employ the convention that $\\sigma_l$ is applied in every component.\n",
 79 |     "The composition $F:= F_{\\Theta}:\\mathbb{R}^{n_i} \\to \\mathbb{R}^{n_o}$, $F_{\\Theta} := F_L \\circ \\ldots \\circ F_2 \\circ F_1$ is called the *feed forward function* of  $\\operatorname{NN}$. Any set of inputs $x \\in \\mathbb{R}^{n_i}$ is called an *input layer*."
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "markdown",
 84 |    "metadata": {},
 85 |    "source": [
 86 |     "**Algorithm (feed forward):** The feed forward of a neural network on an input $x \\in \\mathbb{R}^{n_i}$ is simply the evaluation of the feed forward function $F$ on $x$, i.e. the computation of $y=F(x)$. As $F$ is a composition of the various $F_l$, this evaluation is computed by evaluating the $F_l$ one by one feeding the input foward through the network as follows:\n",
 87 |     "\n",
 88 |     "\\begin{align}\n",
 89 |     "    a_0 &:= x \\in \\mathbb{R}^{n_i} \\\\\n",
 90 |     "    z_1 & := f_{A_1, b_1}(a_0) = A_1 a_0 + b_1 \\in \\mathbb{R}^{n_1} \\\\\n",
 91 |     "    a_1 & := \\sigma_1(z_1) \\in \\mathbb{R}^{n_1} \\\\\n",
 92 |     "    z_2 & := f_{A_2, b_2}(a_1) = A_2 a_1 + b_2 \\in \\mathbb{R}^{n_2} \\\\\n",
 93 |     "    a_2 & := \\sigma_2(z_2) \\in \\mathbb{R}^{n_2} \\\\\n",
 94 |     "    & \\vdots \\\\\n",
 95 |     "    z_l &:= f_{A_l, b_l}(a_{l-1}) = A_l a_{l-1} + b_l \\in \\mathbb{R}^{n_l}\\\\\n",
 96 |     "    a_l &:= \\sigma_l(z_{l}) \\in \\mathbb{R}^{n_l}\\\\\n",
 97 |     "    & \\vdots \\\\\n",
 98 |     "    z_L &:= f_{A_L, b_L}(a_{L-1}) = A_L a_{L-1} + b_L \\in \\mathbb{R}^{n_L} \\\\\n",
 99 |     "    a_L &:= \\sigma_L(z_L) \\in \\mathbb{R}^{n_L} \\\\\n",
100 |     "    y &:= a_L \\in \\mathbb{R}^{n_o}\n",
101 |     "\\end{align}"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "markdown",
106 |    "metadata": {},
107 |    "source": [
108 |     "# Backpropagation"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "markdown",
113 |    "metadata": {},
114 |    "source": [
115 |     "## Cost Functions"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "markdown",
120 |    "metadata": {},
121 |    "source": [
122 |     "The final result $y=F(x)$ of the feed-forward depends on all the weights in all the layers. In supervised learning, we are typically given a labeled training set $(x_1, y_1), \\ldots, (x_N, y_N)$, $x_k \\in \\mathbb{R}^{n_i}$, $y_k \\in \\mathbb{R}^{n_o}$, and we are interested in how well the network fits the data set, i.e. how close the $F(x_k)$ are to the given $y_k$. In order to measure this, we need a *cost function* $J$ that measures the distance between the vector of vectors $(F(x_1), \\ldots, F(x_N))$ and $(y_1, \\ldots, y_N)$. While in theory, this function can have arbitrary shape, the most common way to chose it, is to choose a cost function $C_k$, which only measures the distance between $F(x_k)$ and $y_k$, and then aggregate these to the total cost via\n",
123 |     "\\begin{align*}\n",
124 |     "    J_{\\Theta}(x_1, \\ldots, x_N, y_1, \\ldots, y_N) = \\frac{1}{N} \\sum_{k=1}^{N}{C_k(F_{\\Theta}(x_k))}\n",
125 |     "\\end{align*}\n",
126 |     "One of the most common choices for the cost function is $C_k(y) := \\|y - y_k\\|^2$, i.e. to choose the least squares. "
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "markdown",
131 |    "metadata": {},
132 |    "source": [
133 |     "When training the neural network we want to minimize the cost function $J_{\\Theta}$ by changing the parameters $\\Theta$ - usually via gradient descent. Obviously, gradient descent requires the gradient of the function it is trying to minimize. The big advantage of assuming that the cost function $J_{\\Theta}$ can be written as a sum of cost functions $C_k$ is that instead of having to compute the gradient $\\nabla_{\\Theta} J_{\\Theta}(x_1, \\ldots, x_N, y_1, \\ldots, y_N)$, we can compute the gradients $\\nabla_{\\Theta}C_k(F_{\\Theta}(x_k))$ separately. Thus, instead of working on the whole training set, we will restrict our attention to a single sample $(x,y)$ with $x \\in \\mathbb{R}^{n_i}$ and $y \\in \\mathbb{R}^{n_o}$. Our aim is to compute the gradient of a single cost function $C$ on that sample, i.e. to compute\n",
134 |     "\\begin{align*}\n",
135 |     "    \\nabla_{\\Theta}(C \\circ F_{\\Theta})(x)).\n",
136 |     "\\end{align*}\n",
137 |     "This means, we assume that\n",
138 |     "\\begin{align*}\n",
139 |     "    C:\\mathbb{R}^{n_o} \\to \\mathbb{R}, && a \\mapsto C(a)\n",
140 |     "\\end{align*}\n",
141 |     "is a differentiable function."
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "markdown",
146 |    "metadata": {},
147 |    "source": [
148 |     "## Reminder of Calculus: Nabla, Grad and Chain Rule"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "markdown",
153 |    "metadata": {},
154 |    "source": [
155 |     "To derive the backpropagation algorithm, we employ the following notation from calculus: \n",
156 |     "\n",
157 |     "**Nabla:** For any differentiable function $g:\\mathbb{R}^{n} \\to \\mathbb{R}^{m}$ and any $x \\in \\mathbb{R}^n$, we denote by $\\nabla g(x) \\in \\mathbb{R}^{m \\times n}$ the matrix of partial derivatives, i.e. \n",
158 |     "\\begin{align*}\n",
159 |     "    (\\nabla g(x))_{ij}) = \\partial_{x_j} g_i\n",
160 |     "\\end{align*}\n",
161 |     "In particular, for a function $g: \\mathbb{R}^n \\to \\mathbb{R}$, we denote by $\\nabla g(x) \\in \\mathbb{R}^{1 \\times n}$ the row vector of partial derivatives.\n",
162 |     "\n",
163 |     "**Gradient:** For a differentiable function $g: \\mathbb{R}^n \\to \\mathbb{R}$ and an $x \\in \\mathbb{R}^n$, we denote by $\\operatorname{grad}(x) \\in \\mathbb{R}^{n \\times 1}$ the column vector of partial derivatives, i.e.\n",
164 |     "\\begin{align*}\n",
165 |     "    \\operatorname{grad} g(x) = \\nabla g(x)^{\\top}\n",
166 |     "\\end{align*}\n",
167 |     "\n",
168 |     "We generally regard $\\mathbb{R}^n$ as a space of column vectors.\n",
169 |     "\n",
170 |     "**Transpose:** For any matrix $A \\in \\mathbb{R}^{m \\times n}$, we denote its transpose by $A^{\\top} \\in \\mathbb{R}^{n \\times m}$.\n",
171 |     "\n",
172 |     "**Chain Rule:** For two differentiable functions $g:\\mathbb{R}^n \\to \\mathbb{R}^m$ and $h:\\mathbb{R}^m \\to \\mathbb{R}^{k}$, the derivative of the composition $h \\circ g$ is related to the derivative of the components via\n",
173 |     "\\begin{align*}\n",
174 |     "    \\forall x \\in \\mathbb{R}^n: \\nabla(h \\circ g)(x) = \\nabla h(g(x)) \\bullet \\nabla g(x),\n",
175 |     "\\end{align*}\n",
176 |     "where $\\bullet$ denotes the matrix product."
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "markdown",
181 |    "metadata": {},
182 |    "source": [
183 |     "## Plan of Attack\n",
184 |     "In order to compute the gradient $\\nabla_{\\Theta}(C(F_\\Theta(x))$ , we will proceed in two steps:\n",
185 |     "1. Compute $\\nabla_x (C(F_{\\Theta}(x))$ step by step working backwards through the network\n",
186 |     "2. Relate the result to $\\nabla_\\Theta (C(F_{\\Theta}(x))$ "
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "markdown",
191 |    "metadata": {},
192 |    "source": [
193 |     "## Backwards Recursion\n",
194 |     "The key idea to execute the first step is the following insight: The function $F_{\\Theta} = F_L \\circ \\ldots \\circ F_1$ is a complex composition of many functions $F_l$. Thus, computing $\\nabla F$ requires a lot of applications of the chain rule. However, computing only the last gradient $F_L$ is easy. Therefore, the idea is to work backwards by computing the derivatives of increasingly comples compositions. To that end, the following definition is helpful.\n",
195 |     "\n",
196 |     "**Definition:** Let $\\operatorname{NN}$ be a neural network with feed-forward function $F = F_{\\Theta} = F_L \\circ \\ldots \\circ F_1$ and $C$ be a const function for a single sample. We define the functions\n",
197 |     "\\begin{align*}\n",
198 |     "    G_l := C \\circ F_L \\circ \\ldots \\circ F_{l+1} \\circ \\sigma_l : \\mathbb{R}^{n_l} \\to \\mathbb{R}\n",
199 |     "\\end{align*}\n",
200 |     "for $1 \\leq l \\leq L$.\n",
201 |     "\n",
202 |     "The main insight into these function is the following\n",
203 |     "\n",
204 |     "**Lemma:** Let $G_l$ be as a above and assume that $z_l$ are computed via feed-forward as above. Then the sequence of error terms\n",
205 |     "\\begin{align*}\n",
206 |     "    \\varepsilon_l := \\operatorname{grad} G_l(z_l) \\in \\mathbb{R}^{n_l}\n",
207 |     "\\end{align*}\n",
208 |     "satisfies the backward recusion\n",
209 |     "\\begin{align*}\n",
210 |     "    \\varepsilon_L = \\nabla \\sigma_L (z_L) \\bullet \\operatorname{grad} C(a_L), && \\varepsilon_l = \\nabla \\sigma_l (z_l) \\bullet A_{l+1}^{\\top} \\bullet \\varepsilon_{l+1}.\n",
211 |     "\\end{align*}\n",
212 |     "\n",
213 |     "**Proof:** For $l=L$, this follows from the definitions and the chain rule as\n",
214 |     "\\begin{align*}\n",
215 |     "    \\nabla G_L(z_L)\n",
216 |     "    = \\nabla (C \\circ \\sigma_L)(z_L)\n",
217 |     "    = \\nabla C(\\sigma_L(z_L)) \\bullet \\nabla \\sigma_L(z_L)\n",
218 |     "\\end{align*}\n",
219 |     "and thus\n",
220 |     "\\begin{align*}\n",
221 |     "    \\varepsilon_L = \\operatorname{grad} G_L (z_L) = (\\nabla G_L(z_L))^{\\top} = \\nabla \\sigma_L(z_L) \\bullet \\operatorname{grad} C(\\sigma_L(z_L)).\n",
222 |     "\\end{align*}\n",
223 |     "Here, we use the above mentioned convention that we identify the scalar function $\\sigma_l:\\mathbb{R} \\to \\mathbb{R}$ with the vector valued function $\\sigma_l:\\mathbb{R}^{n_l} \\to \\mathbb{R}^{n_l}$, $v \\mapsto (\\sigma(v_1), \\ldots, \\sigma(v_{n_l}))$. Thus, the derivative of this vector valued function is given as a diagonal matrix $\\nabla \\sigma_l (v)$, where the diagonal is given by $\\sigma'(v_1), \\ldots, \\sigma'(v_{n_l})$. Thus, this matrix is symmetric, i.e. $\\nabla \\sigma_l (v) = \\nabla \\sigma_l (v)^{\\top}$.\n",
224 |     "\n",
225 |     "For $l+1 \\to l$, notice that by definition, the funtions $G_l$ satisfy\n",
226 |     "\\begin{align*}\n",
227 |     "    G_l &= C \\circ F_L \\circ \\ldots \\circ F_{l+2} \\circ F_{l+1} \\circ \\sigma_l \\\\\n",
228 |     "        &= C \\circ F_L \\circ \\ldots \\circ F_{l+2} \\circ \\sigma_{l+1} \\circ f_{A_{l+1},b_{l+1}} \\circ \\sigma_l  \\\\\n",
229 |     "        &= G_{l+1} \\circ f_{A_{l+1},b_{l+1}} \\circ \\sigma_l  \\\\\n",
230 |     "\\end{align*}\n",
231 |     "\n",
232 |     "Thus,\n",
233 |     "\\begin{align*}\n",
234 |     "    \\nabla G_l(z_l) & = \\nabla G_{l+1}(f_{A_{l+1},b_{l+1}}(\\sigma_l(z_l))) \\bullet \\nabla f_{A_{l+1},b_{l+1}}(\\sigma_l(z_l)) \\bullet \\nabla \\sigma_l(z_l)   \\\\\n",
235 |     "    &= \\nabla G_{l+1}(z_{l+1})) \\bullet A_{l+1} \\bullet \\nabla \\sigma_l(z_l),\n",
236 |     "\\end{align*}\n",
237 |     "which implies\n",
238 |     "\\begin{align*}\n",
239 |     "    \\varepsilon_l \n",
240 |     "    = \\operatorname{grad} G_l(z_l)\n",
241 |     "    = \\nabla G_l(z_l)^{\\top} \n",
242 |     "    = \\nabla \\sigma_l(z_l) \\bullet A_{l+1}^{\\top} \\bullet \\varepsilon_{l+1}.\n",
243 |     "\\end{align*}"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "markdown",
248 |    "metadata": {},
249 |    "source": [
250 |     "## Backwards Gradient Computation\n",
251 |     "\n",
252 |     "Finally, we use the result of the previous lemma to compute the derivative $\\nabla_{\\Theta}(F_{\\Theta}(x))$.\n",
253 |     "\n",
254 |     "**Theorem (backpropagation):** Let $\\operatorname{NN} = (\\Theta_l \\sigma_l)_{1 \\leq l \\leq L}$, $\\Theta_l=(A_l, b_l)$, be an MLP and $x \\in \\mathbb{R}^{n_i}$ be an input. Let $C:\\mathbb{R}^{n_o} \\to \\mathbb{R}$ be a differentiable cost function. Let $(\\varepsilon_l)_{1 \\leq l \\leq L}$ be the sequence of error terms of the previous lemma. Then\n",
255 |     "\\begin{align*}\n",
256 |     "    \\operatorname{grad}_{b_l}(C(F_{\\Theta}(x))) &= \\varepsilon_{l} \\\\\n",
257 |     "    \\operatorname{grad}_{A_l}(C(F_{\\Theta}(x))) &= a_{l-1} \\varepsilon_{l}^{\\top}\n",
258 |     "\\end{align*}\n",
259 |     "where $a_l$ is defined as above (feed forward).\n",
260 |     "\n",
261 |     "**Proof:** Analogously to the previous lemma, we define the functions\n",
262 |     "\\begin{align*}\n",
263 |     "    G_{A,b}^l := C \\circ F_L \\circ \\ldots \\circ F_{l+1} \\circ \\sigma_l \\circ f_{A,b}: \\mathbb{R}^{n_{l-1}} \\to \\mathbb{R}\n",
264 |     "\\end{align*}\n",
265 |     "By construction $G_{A,b}^l = G_l \\circ f_{A,b}$. Therefore,\n",
266 |     "\\begin{align*}\n",
267 |     "    \\nabla_b (G_{A_l,b}^l(a_{l-1}))(b_l) = \\nabla G_l (f_{A_l,b_l}(a_{l-1})) \\bullet \\nabla_b f_{A_l, b}(b_l) = \\nabla G_l(z_l),\n",
268 |     "\\end{align*}\n",
269 |     "as $\\nabla b f_{A,b}$ is the identity matrix. Therefore, \n",
270 |     "\\begin{align*}\n",
271 |     "    \\nabla_{b_l} C(F_{\\Theta}(x)) \n",
272 |     "    & = \\nabla _b(C \\circ F_L \\circ \\ldots \\circ F_1(x))(b_l) \\\\\n",
273 |     "    & = \\nabla_b(G_{A_l,b} \\circ F_{l-1} \\circ \\ldots \\circ F_1(x))(b_l)\\\\\n",
274 |     "    &= \\nabla G_l(z_l) = \\varepsilon_l,\n",
275 |     "\\end{align*}\n",
276 |     "which implies the first claim. \n",
277 |     "\n",
278 |     "To see the second, notice that as a function of $A$, we have $f_{\\_,b_l}(a_{l-1}):\\mathbb{R}^{n_l \\times n_{l-1}} \\to \\mathbb{R}^{n_l}$ and hence analogously, $G_{\\_,b}^l(a_{l-1}) = (G_l \\circ f_{\\_,b})(a_{l-1}):\\mathbb{R}^{n_l \\times n_{l-1}} \\to \\mathbb{R}$. Thence, we can calculate in coordinates using the chain rule\n",
279 |     "\n",
280 |     "\\begin{align*}\n",
281 |     "    \\frac{\\partial( G_{\\_,b}^l(a_{l-1}))(A_l)}{\\partial A_{\\nu \\mu}} \n",
282 |     "    =\\sum_{k=1}^{n_l}{\\nabla G_l}(f_{A_l,b_l}(a_{l-1}))_k \\frac{\\partial (A a_{l-1}+b)(A_l)_k}{\\partial A_{\\nu \\mu}}\n",
283 |     "    =\\sum_{k=1}^{n_l}{ \\nabla G_l(z_l)_k \\delta_{\\nu k} a_{l-1;\\mu} }\n",
284 |     "    =\\varepsilon_{l;\\nu} a_{l-1;\\mu}\n",
285 |     "    =(\\varepsilon_{l} a_{l-1}^{\\top})_{\\nu \\mu}.\n",
286 |     "\\end{align*}"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "markdown",
291 |    "metadata": {},
292 |    "source": [
293 |     "## Algorithm\n",
294 |     "Putting everything together, the backpropagation algorithm works as follows:\n",
295 |     "\n",
296 |     "**Algorithm (backpropagation):**\n",
297 |     "\n",
298 |     "**Inputs:**\n",
299 |     "* A neural network $\\operatorname{NN} = (A_l, b_l, \\sigma_l)_{1 \\leq l \\leq L}$,\n",
300 |     "* a single input $x \\in \\mathbb{R}^{n_i}$,\n",
301 |     "* a cost function $C:\\mathbb{R}^{n_o} \\to \\mathbb{R}$ for that input.\n",
302 |     "\n",
303 |     "**Outputs:**\n",
304 |     "The gradients\n",
305 |     "* $\\nabla_{b_l}(C(F_{\\Theta}(x))$ and \n",
306 |     "* $\\nabla_{A_l}(C(F_{\\Theta}(x))$.\n",
307 |     "\n",
308 |     "**Steps:**\n",
309 |     "1. Compute the feed forward $F_{\\Theta}(x)$\n",
310 |     "  * Initialize: $a_0 := x$\n",
311 |     "  * For $l=1, \\ldots, L$:\n",
312 |     "    * $a_l := f_{A_l,b_l}(a_{l-1})$\n",
313 |     "    * $z_l := \\sigma_l(a_l)$\n",
314 |     "2. Compute the errors $\\varepsilon_l$:\n",
315 |     "  * Initialize: $\\varepsilon_L := \\nabla \\sigma_L(z_L) \\operatorname{grad}C(a_L)$\n",
316 |     "  * For $L=l-1, \\ldots, 1$: $\\varepsilon_l := \\nabla \\sigma_l(z_l) A_{l+1}^{\\top} \\varepsilon_{l+1}$.\n",
317 |     "3. Compute the gradients: For $l=1, \\ldots, L$ (or in any order):\n",
318 |     "  * $\\operatorname{grad}_{b_l}(C(F_{\\Theta}(x)) = \\varepsilon_l$\n",
319 |     "  * $\\operatorname{grad}_{A_l}(C(F_{\\Theta}(x)) = a_{l-1} \\varepsilon_l^{\\top}$\n",
320 |     "\n",
321 |     "In case, we have multiple training samples $x_i$ (which we usually have), the above is repeated on every training sample and then the gradient of the total cost function $J$ is given as the average of the gradients over the samples. "
322 |    ]
323 |   },
324 |   {
325 |    "cell_type": "markdown",
326 |    "metadata": {},
327 |    "source": [
328 |     "# References\n",
329 |     "There are various other sources on backpropagations you might find helpful (list not exhaustive):\n",
330 |     "\n",
331 |     "* http://neuralnetworksanddeeplearning.com/chap2.html\n",
332 |     "* https://brilliant.org/wiki/backpropagation/\n",
333 |     "* https://datascience.stackexchange.com/questions/44703/how-does-gradient-descent-and-backpropagation-work-together\n",
334 |     "* https://stackoverflow.com/questions/47416861/backward-propagation-in-keras"
335 |    ]
336 |   }
337 |  ],
338 |  "metadata": {
339 |   "kernelspec": {
340 |    "display_name": "Python 3 (ipykernel)",
341 |    "language": "python",
342 |    "name": "python3"
343 |   },
344 |   "language_info": {
345 |    "codemirror_mode": {
346 |     "name": "ipython",
347 |     "version": 3
348 |    },
349 |    "file_extension": ".py",
350 |    "mimetype": "text/x-python",
351 |    "name": "python",
352 |    "nbconvert_exporter": "python",
353 |    "pygments_lexer": "ipython3",
354 |    "version": "3.9.18"
355 |   },
356 |   "toc": {
357 |    "base_numbering": 1,
358 |    "nav_menu": {},
359 |    "number_sections": true,
360 |    "sideBar": true,
361 |    "skip_h1_title": false,
362 |    "title_cell": "Table of Contents",
363 |    "title_sidebar": "Contents",
364 |    "toc_cell": false,
365 |    "toc_position": {},
366 |    "toc_section_display": true,
367 |    "toc_window_display": false
368 |   }
369 |  },
370 |  "nbformat": 4,
371 |  "nbformat_minor": 2
372 | }
373 | 


--------------------------------------------------------------------------------
/newton_gradient_backprop/gradient_descent.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<p style=\"font-size:30px; text-align:center\"><b>Gradient Descent</b></p>"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "<div style=\"text-align: right\">(C) <a href=\"https://github.com/niknow\">Nikolai Nowaczyk</a>, <a href=\"https://github.com/Lapsilago\">Jörg Kienitz</a> 2019-2021</div>"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "%matplotlib widget\n",
 24 |     "import matplotlib.pyplot as plt\n",
 25 |     "import numpy as np\n",
 26 |     "import ipywidgets as wd\n",
 27 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 28 |     "from matplotlib import cm"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "markdown",
 33 |    "metadata": {},
 34 |    "source": [
 35 |     "Let $f:\\mathbb{R}^m \\to \\mathbb{R}$ be a differentiable function. Gradient descent is a class of methods to find the minimum\n",
 36 |     "$$ \\min_{x \\in \\mathbb{R}^m}{f(x)}$$\n",
 37 |     "\n",
 38 |     "One method to find a minimum is to find a zero of the gradient $\\operatorname{grad} f$, for example via Newton's method. However, this requires the computation of the Hessian of $f$, which can be quite costly. Gradient descent is an alternative method, which only requires the computation of the gradient of $f$, but not the Hessian."
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "markdown",
 43 |    "metadata": {},
 44 |    "source": [
 45 |     "## Basic Algorithm\n",
 46 |     "The basic idea of gradient descent is simple: Start with an arbitrary guess $x_0 \\in \\mathbb{R}^m$ and then recursively descent into the direction negative gradient:\n",
 47 |     "$$ x_{n+1} = x_n - \\alpha_n \\operatorname{grad} f(x_n)$$\n",
 48 |     "Here, the $\\alpha_n \\in \\mathbb{R}$ are a choice of *step sizes*. This method is motivated by the below mathematical background."
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "metadata": {},
 54 |    "source": [
 55 |     "## Mathematical Background\n",
 56 |     "The key principles behind gradient descent rely on the following simple insights:\n",
 57 |     "\n",
 58 |     "**Lemma**: Let $f:\\mathbb{R}^m \\to \\mathbb{R}$ be a differentiable function and $x_0 \\in \\mathbb{R}^m$. \n",
 59 |     "1. For any $v \\in \\mathbb{R}^m$, the directional derivative $\\partial _v f$ satisfies\n",
 60 |     "\\begin{align*}\n",
 61 |     "    \\lim_{t \\searrow 0}\\frac{f(x_0 + tv)}{t} = \\partial_v f(x_0) =\\nabla f(x_0) v = \\langle \\nabla f(x_0)^{\\top}, v \\rangle \n",
 62 |     "    = \\langle \\operatorname{grad} f(x_0), v \\rangle \n",
 63 |     "\\end{align*}\n",
 64 |     "2. The negative gradient is the direction of steepest descent, i.e.\n",
 65 |     "\\begin{align*}\n",
 66 |     "    \\underset{\\|v\\|=1}{\\operatorname{argmin}}\\partial_v f(x_0) = - \\frac{\\nabla f(x_0)}{\\|\\nabla f(x_0)\\|},\n",
 67 |     "\\end{align*}\n",
 68 |     "provided $\\nabla f(x_0) \\neq 0$.\n",
 69 |     "\n",
 70 |     "**Proof**: The first claim is the definition of directional derivative and an application of the chain rule. To see the second claim, denote by $\\vartheta$ the angle between $\\nabla f(x_0)$ and a $v \\in \\mathbb{R}^m$, $\\|v\\|=1$. By definition of the angle\n",
 71 |     "$$ \\cos(\\vartheta) = \\langle \\nabla f(x_0)^{\\top}, v \\rangle \\|\\nabla f(x_0)\\| ,$$\n",
 72 |     "thus\n",
 73 |     "$$\\partial_v f(x_0) = \\langle \\nabla f(x_0)^{\\top}, v \\rangle = \\frac{\\cos(\\vartheta)}{\\|\\nabla f(x_0)\\|}, $$\n",
 74 |     "which becomes smallest when $\\cos(\\vartheta)=-1$, i.e. when $v = -\\frac{\\nabla f(x_0)}{\\|\\nabla f(x_0)\\|}$.\n",
 75 |     "\n",
 76 |     "**Interpretation:** This means that at any point $x_0 \\in \\mathbb{R}^m$ descending an infinitesimal step into the direction of $-\\operatorname{grad} f(x_0)$ decreases $f$ most. The problem with that view is that 1) in reality, one needs to chose a finite step size and 2) this only yields a local minimum of $f$."
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "markdown",
 81 |    "metadata": {},
 82 |    "source": [
 83 |     "## Example"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 2,
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "def f(x):\n",
 93 |     "    return x[0]**2 + x[1]**2\n",
 94 |     "\n",
 95 |     "def df(x):\n",
 96 |     "    return 2*(x[0]+x[1])\n",
 97 |     "\n",
 98 |     "def gradient_descent_sequence(x0, f, df, N):\n",
 99 |     "    X = np.zeros((N+1, x0.shape[0]))\n",
100 |     "    X[0] = x0\n",
101 |     "    alpha=0.1\n",
102 |     "    for n in range(N):\n",
103 |     "        X[n+1] = X[n] - alpha * df(X[n])\n",
104 |     "    return X"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 3,
110 |    "metadata": {},
111 |    "outputs": [
112 |     {
113 |      "name": "stderr",
114 |      "output_type": "stream",
115 |      "text": [
116 |       "D:\\Apps\\Anaconda3\\envs\\heroku\\lib\\site-packages\\matplotlib\\__init__.py:942: MatplotlibDeprecationWarning: nbagg.transparent is deprecated and ignored. Use figure.facecolor instead.\n",
117 |       "  mplDeprecation)\n"
118 |      ]
119 |     },
120 |     {
121 |      "data": {
122 |       "application/vnd.jupyter.widget-view+json": {
123 |        "model_id": "4bae68a51a404ae08486c7669289a11c",
124 |        "version_major": 2,
125 |        "version_minor": 0
126 |       },
127 |       "text/plain": [
128 |        "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to  previous…"
129 |       ]
130 |      },
131 |      "metadata": {},
132 |      "output_type": "display_data"
133 |     },
134 |     {
135 |      "data": {
136 |       "application/vnd.jupyter.widget-view+json": {
137 |        "model_id": "ec07f46d7b5c4f9d8a9da13719ecc23b",
138 |        "version_major": 2,
139 |        "version_minor": 0
140 |       },
141 |       "text/plain": [
142 |        "interactive(children=(IntSlider(value=1, description='N', max=20, min=1), Output()), _dom_classes=('widget-int…"
143 |       ]
144 |      },
145 |      "metadata": {},
146 |      "output_type": "display_data"
147 |     }
148 |    ],
149 |    "source": [
150 |     "x = np.linspace(-1, 1, 20)\n",
151 |     "y = np.linspace(-1, 1, 20)\n",
152 |     "Z = np.array([[f(np.array([xx, yy])) for xx in x] for yy in y])\n",
153 |     "\n",
154 |     "X, Y = np.meshgrid(x, y)\n",
155 |     "\n",
156 |     "fig = plt.figure()\n",
157 |     "ax = fig.add_subplot(111, projection='3d')\n",
158 |     "\n",
159 |     "@wd.interact(N=wd.IntSlider(min=1, max=20, value=1))\n",
160 |     "def plot_gradient_descent(N):\n",
161 |     "    ax.clear()\n",
162 |     "    ax.plot_wireframe(X, Y, Z, alpha=0.8, cmap=cm.coolwarm)\n",
163 |     "    Xgd = gradient_descent_sequence(np.array([1, 1]), f, df, N)\n",
164 |     "    ax.plot(Xgd[:, 0], Xgd[:, 1], np.array([f(xx) for xx in Xgd]), marker='x', color='k')\n",
165 |     "    plt.show()"
166 |    ]
167 |   }
168 |  ],
169 |  "metadata": {
170 |   "kernelspec": {
171 |    "display_name": "heroku",
172 |    "language": "python",
173 |    "name": "heroku"
174 |   },
175 |   "language_info": {
176 |    "codemirror_mode": {
177 |     "name": "ipython",
178 |     "version": 3
179 |    },
180 |    "file_extension": ".py",
181 |    "mimetype": "text/x-python",
182 |    "name": "python",
183 |    "nbconvert_exporter": "python",
184 |    "pygments_lexer": "ipython3",
185 |    "version": "3.6.12"
186 |   },
187 |   "toc": {
188 |    "base_numbering": 1,
189 |    "nav_menu": {},
190 |    "number_sections": true,
191 |    "sideBar": true,
192 |    "skip_h1_title": false,
193 |    "title_cell": "Table of Contents",
194 |    "title_sidebar": "Contents",
195 |    "toc_cell": false,
196 |    "toc_position": {},
197 |    "toc_section_display": true,
198 |    "toc_window_display": false
199 |   }
200 |  },
201 |  "nbformat": 4,
202 |  "nbformat_minor": 2
203 | }
204 | 


--------------------------------------------------------------------------------
/newton_gradient_backprop/newton.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<p style=\"font-size:30px; text-align:center\"><b>Newton's Method</b></p>"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "<div style=\"text-align: right\">(C) <a href=\"https://github.com/niknow\">Nikolai Nowaczyk</a>, <a href=\"https://github.com/Lapsilago\">Jörg Kienitz</a> 2019-2021</div>"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "%matplotlib widget\n",
 24 |     "import matplotlib.pyplot as plt\n",
 25 |     "import numpy as np\n",
 26 |     "from numpy import polyder, poly1d\n",
 27 |     "import ipywidgets as wd\n",
 28 |     "from scipy.optimize import newton"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "markdown",
 33 |    "metadata": {},
 34 |    "source": [
 35 |     "Newton's method is a standard algorithm to solve non-linear equations, i.e. equations of the form\n",
 36 |     "$$ F(x)=0, $$\n",
 37 |     "where $F:\\mathbb{R}^m \\to \\mathbb{R}^m$ is a non-linear function. Solving such a non-linear equation can be very difficult in general, yet solving linear equations is easy and well understood. The idea of Newton's method is to assume that $F$ is differentiable and thus reducing the problem of solving a non-linear equation to solving a linear equation by approximating $F$ by its differential $\\nabla F$. This only yields an approximate solution, but when applied iteratively, one can hope to converge to the exact solution"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "# The 1D case"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "markdown",
 49 |    "metadata": {},
 50 |    "source": [
 51 |     "We first consider the one-dimensional case of a function $f:\\mathbb{R} \\to \\mathbb{R}$ and we have to assume that $f$ is differentiable at least once. We also assume that we are given a start value $x_0$. The tangent to the graph of $f$ through the point $(x_0, f(x_0))$ is given by\n",
 52 |     "$$t_0(x) = f'(x_0)(x-x_0) + f(x_0)$$\n",
 53 |     "The root $x_1$ of the tangent can be found easily as\n",
 54 |     "$$ 0 = t_0(x_1) = f'(x_0)(x_1-x_0) + f(x_0) \\Longleftrightarrow x_1 = x_0 - \\frac{f(x_0)}{f'(x_0)} $$\n",
 55 |     "Thus, we take $x_1$ as our improved guess of the root of $f$. If we are not yet happy with that approximation, we can successively continue this, which yields the following recursively defined sequence, called the **Newton's method sequence**:\n",
 56 |     "\n",
 57 |     "\\begin{align}\n",
 58 |     "    x_{n+1} = x_n - \\frac{f(x_n)}{f'(x_n)}\n",
 59 |     "\\end{align}"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "markdown",
 64 |    "metadata": {},
 65 |    "source": [
 66 |     "## Example: Polynomials"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "metadata": {},
 72 |    "source": [
 73 |     "As an example we consider 3rd order polynomials, i.e. the functions of the form\n",
 74 |     "$$ p(x) = c_3 x^3 + c_2 x^2 +  c_1 x^1 + c_0 $$\n"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 2,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "def newton_1d_poly(c, x0, N):\n",
 84 |     "    \"\"\"\n",
 85 |     "    Calculates the Newton's sequence for a polynomial with coefficients c=(c_0, \\ldots, c_d) at initial value x0.\n",
 86 |     "    \n",
 87 |     "    params:\n",
 88 |     "    c : numpy array with coefficients c[i] corresponds to c_i\n",
 89 |     "    x0: initial value\n",
 90 |     "    N : number of iterations\n",
 91 |     "    \n",
 92 |     "    returns: numpy array x with x_0, \\ldots, x_N elements of the Newton sequence\n",
 93 |     "    \"\"\"\n",
 94 |     "    x = np.zeros(N+1)\n",
 95 |     "    x[0] = x0\n",
 96 |     "    c = c[::-1] # poly1d and polyder assume that coefficients are in decreasing order\n",
 97 |     "    p = poly1d(c)\n",
 98 |     "    dp = poly1d(polyder(c))\n",
 99 |     "    for n in range(N):\n",
100 |     "        x[n+1] = x[n] - p(x[n]) / dp(x[n])\n",
101 |     "    return x"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": 6,
107 |    "metadata": {},
108 |    "outputs": [
109 |     {
110 |      "data": {
111 |       "application/vnd.jupyter.widget-view+json": {
112 |        "model_id": "16ffa9bc19804c7daeba0955b3c602e0",
113 |        "version_major": 2,
114 |        "version_minor": 0
115 |       },
116 |       "text/plain": [
117 |        "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to  previous…"
118 |       ]
119 |      },
120 |      "metadata": {},
121 |      "output_type": "display_data"
122 |     },
123 |     {
124 |      "data": {
125 |       "application/vnd.jupyter.widget-view+json": {
126 |        "model_id": "31d3292062ba4901bed9bc351d7282ed",
127 |        "version_major": 2,
128 |        "version_minor": 0
129 |       },
130 |       "text/plain": [
131 |        "interactive(children=(FloatSlider(value=2.0, description='c0', max=3.0, min=-3.0), FloatSlider(value=1.0, desc…"
132 |       ]
133 |      },
134 |      "metadata": {},
135 |      "output_type": "display_data"
136 |     }
137 |    ],
138 |    "source": [
139 |     "fig_netwton, ax_newton = plt.subplots()\n",
140 |     "\n",
141 |     "@wd.interact(c0=wd.FloatSlider(min=-3, max=3, value=2),\n",
142 |     "             c1=wd.FloatSlider(min=-3, max=3, value=1),\n",
143 |     "             c2=wd.FloatSlider(min=-3, max=3, value=2),\n",
144 |     "             c3=wd.FloatSlider(min=-3, max=3, value=0.5),\n",
145 |     "             x0=wd.FloatSlider(min=-5, max=5, value=-3),\n",
146 |     "             N=wd.IntSlider(min=1, max=20, value=1),\n",
147 |     "             xlim_min=wd.FloatSlider(min=-10, max=0, value=-7),\n",
148 |     "             xlim_max=wd.FloatSlider(min=0, max=10, value=5),\n",
149 |     "             ylim_min=wd.FloatSlider(min=-50, max=0, value=-10),\n",
150 |     "             ylim_max=wd.FloatSlider(min=0, max=50, value=20),\n",
151 |     "            )\n",
152 |     "def plot_1d_example(c0, c1, c2, c3, x0, N, xlim_min, xlim_max, ylim_min, ylim_max):\n",
153 |     "    ax_newton.clear()\n",
154 |     "    c = np.array([c0, c1, c2, c3])\n",
155 |     "    xgrid = np.linspace(xlim_min, xlim_max, 100)\n",
156 |     "    ygrid = poly1d(c[::-1])(xgrid)\n",
157 |     "    x = newton_1d_poly(c, x0, N)\n",
158 |     "    fig_netwton.suptitle('Newton\\'s Method: Iteration %i' % N)\n",
159 |     "    ax_newton.plot(xgrid, ygrid, label='f')\n",
160 |     "    ax_newton.axhline(y=0, color='k')\n",
161 |     "    ax_newton.scatter(x, np.zeros_like(x), marker='o', facecolors='none', edgecolors='r')\n",
162 |     "    ax_newton.scatter(x[N], 0, marker='x', color='r')\n",
163 |     "    ax_newton.plot(xgrid,poly1d(polyder(c[::-1]))(x[N-1])*(xgrid -x[N-1]) + poly1d(c[::-1])(x[N-1]), color='g', label='tangent')\n",
164 |     "    ax_newton.set_xlim([xlim_min, xlim_max])\n",
165 |     "    ax_newton.set_ylim([ylim_min, ylim_max])\n",
166 |     "    ax_newton.set_xlabel('x')\n",
167 |     "    ax_newton.set_ylabel('y')\n",
168 |     "    ax_newton.legend()\n",
169 |     "    plt.show()\n",
170 |     "    print(\"Solution (current guess): %f\" % x[N])\n",
171 |     "    print(\"Solution (scipy): %f\" % newton(poly1d(c[::-1]), x0))"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "markdown",
176 |    "metadata": {},
177 |    "source": [
178 |     "## Example: Square Root"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "markdown",
183 |    "metadata": {},
184 |    "source": [
185 |     "One of the most popular examples of the Newton's method is the calculation of the square root of a number $a \\in \\mathbb{R}_{>0}$. This can be expressed as the root of $f_a:\\mathbb{R}_{>0} \\to \\mathbb{R}$, $x \\mapsto 1 - \\frac{a}{x^2}$. We choose $x_0 := a$ as the initial guess and the Newton sequence in this case is given by\n",
186 |     "$$ x_{n+1} \n",
187 |     "= x_n - \\frac{f_a(x_n)}{f_a'(x_n)}\n",
188 |     "= x_n - \\frac{1-\\frac{a}{x_n^2}}{2ax_n^{-3}}\n",
189 |     "= x_n - \\frac{x_n^3}{2a} + \\frac{x_n}{2}\n",
190 |     "= \\frac{x_n}{2}\\Big( 3 - \\frac{x_n^2}{a} \\Big)$$\n",
191 |     "This sequence is called *Heron's method* and converges quite quickly."
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": 7,
197 |    "metadata": {},
198 |    "outputs": [
199 |     {
200 |      "data": {
201 |       "application/vnd.jupyter.widget-view+json": {
202 |        "model_id": "4e354d218191417fa5613d08e78a25a9",
203 |        "version_major": 2,
204 |        "version_minor": 0
205 |       },
206 |       "text/plain": [
207 |        "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to  previous…"
208 |       ]
209 |      },
210 |      "metadata": {},
211 |      "output_type": "display_data"
212 |     },
213 |     {
214 |      "data": {
215 |       "application/vnd.jupyter.widget-view+json": {
216 |        "model_id": "7a847a01691d422c9a5752a32204de07",
217 |        "version_major": 2,
218 |        "version_minor": 0
219 |       },
220 |       "text/plain": [
221 |        "interactive(children=(FloatSlider(value=2.0, description='a', max=3.0, min=1.0), IntSlider(value=10, descripti…"
222 |       ]
223 |      },
224 |      "metadata": {},
225 |      "output_type": "display_data"
226 |     }
227 |    ],
228 |    "source": [
229 |     "fig_root, ax_root = plt.subplots()\n",
230 |     "\n",
231 |     "@wd.interact(a=wd.FloatSlider(min=1, max=3, value=2),\n",
232 |     "             N=wd.IntSlider(min=1, max=100, value=10))\n",
233 |     "def plot_square_root(a, N):\n",
234 |     "    fig_root.suptitle('Heron\\'s method for a=%0.2f' % a)\n",
235 |     "    ax_root.clear()\n",
236 |     "    x = np.zeros(N+1)\n",
237 |     "    x[0] = a\n",
238 |     "    for n in range(N):\n",
239 |     "        x[n+1] = x[n]/2*(3 - x[n]**2/a)\n",
240 |     "    ax_root.plot(range(N+1), x, label='heron approx')\n",
241 |     "    ax_root.axhline(y=np.sqrt(a), color='k', label='exact')\n",
242 |     "    ax_root.set_ylim([0, 3])\n",
243 |     "    ax_root.set_xlabel('num iterations')\n",
244 |     "    ax_root.set_ylabel('value')\n",
245 |     "    ax_root.legend()\n",
246 |     "    plt.show()"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "markdown",
251 |    "metadata": {},
252 |    "source": [
253 |     "# Multi-Dimensional Newton's Method\n",
254 |     "\n",
255 |     "The Newton's method can be applied to multivariate functions $F:\\mathbb{R}^m \\to \\mathbb{R}^m$ in the same fashion. The Newton sequence in this case is given by\n",
256 |     "\\begin{align}\n",
257 |     "    x_{n+1} = x_n - \\nabla F(x_n)^{-1} F(x_n).\n",
258 |     "\\end{align}"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "code",
263 |    "execution_count": 8,
264 |    "metadata": {},
265 |    "outputs": [],
266 |    "source": [
267 |     "def F(x):\n",
268 |     "    return np.array([0.2 * x[0]**2 + x[1] - 1, \n",
269 |     "                     0.5*x[1]**3 + x[0] + 1])\n",
270 |     "\n",
271 |     "def dF(x):\n",
272 |     "    return np.array([[0.4 * x[0], 1],\n",
273 |     "                     [1, 1.5*x[1]**2]])\n",
274 |     "\n",
275 |     "def newton_naive(F, dF, x0, N):\n",
276 |     "    X = np.zeros((N+1, x0.shape[0]))\n",
277 |     "    X[0, :] = x0\n",
278 |     "    for n in range(N):\n",
279 |     "        X[n+1, :] = X[n, :] - np.linalg.inv(dF(X[n, :])) @ F(X[n, :]) \n",
280 |     "    return X"
281 |    ]
282 |   },
283 |   {
284 |    "cell_type": "code",
285 |    "execution_count": 9,
286 |    "metadata": {},
287 |    "outputs": [],
288 |    "source": [
289 |     "X, Y = np.meshgrid(np.linspace(0, 5, 20), \n",
290 |     "                   np.linspace(-4, 1, 20))\n",
291 |     "X = X.flatten()\n",
292 |     "Y = Y.flatten()\n",
293 |     "ZX, ZY = np.array(np.array([F(np.array([x, y])) for x,y in zip(X,Y)])).T"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "code",
298 |    "execution_count": 10,
299 |    "metadata": {},
300 |    "outputs": [
301 |     {
302 |      "data": {
303 |       "application/vnd.jupyter.widget-view+json": {
304 |        "model_id": "3b6c41fe915f40d68287d56788c81f4d",
305 |        "version_major": 2,
306 |        "version_minor": 0
307 |       },
308 |       "text/plain": [
309 |        "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to  previous…"
310 |       ]
311 |      },
312 |      "metadata": {},
313 |      "output_type": "display_data"
314 |     },
315 |     {
316 |      "data": {
317 |       "application/vnd.jupyter.widget-view+json": {
318 |        "model_id": "5bf075b42dd24891b9e50031fb6a5b15",
319 |        "version_major": 2,
320 |        "version_minor": 0
321 |       },
322 |       "text/plain": [
323 |        "interactive(children=(FloatSlider(value=2.1, description='x0x', max=5.0, min=-5.0), FloatSlider(value=-1.1, de…"
324 |       ]
325 |      },
326 |      "metadata": {},
327 |      "output_type": "display_data"
328 |     }
329 |    ],
330 |    "source": [
331 |     "fig_netwton2d, ax_newton2d = plt.subplots()\n",
332 |     "fig_netwton2d.suptitle(\"Newton 2D\")\n",
333 |     "\n",
334 |     "@wd.interact(x0x=wd.FloatSlider(min=-5, max=5, value=2.1),\n",
335 |     "             x0y=wd.FloatSlider(min=-5, max=5, value=-1.1),\n",
336 |     "             N=wd.IntSlider(min=1, max=100, value=1))\n",
337 |     "def plot_newton(x0x, x0y, N):\n",
338 |     "    x0 = np.array([x0x, x0y])\n",
339 |     "    ax_newton2d.clear()\n",
340 |     "    ax_newton2d.quiver(X, Y, ZX, ZY)\n",
341 |     "    Xnwt = newton_naive(F, dF, x0, N)\n",
342 |     "    ax_newton2d.scatter(Xnwt[:,0], Xnwt[:,1], marker='o', facecolors='none', edgecolors='r')\n",
343 |     "    ax_newton2d.set_xlim([0, 5])\n",
344 |     "    ax_newton2d.set_ylim([-4, 1])\n",
345 |     "    plt.show()\n",
346 |     "    print(\"Solution (current guess): \" + str(Xnwt[N, :]))\n",
347 |     "    print(\"Solution (scipy):  \" + str(newton(F, x0, maxiter=100)))"
348 |    ]
349 |   },
350 |   {
351 |    "cell_type": "markdown",
352 |    "metadata": {},
353 |    "source": [
354 |     "# Netwon's Method for Optimization\n",
355 |     "Newton's method can be used for optimization: If one wants to find a solution to\n",
356 |     "$$ \\min_{x \\in \\mathbb{R}}{f(x)} $$\n",
357 |     "then a neccessary condition for a local optimum $x^*$ is $f'(x^*)=0$. Thus by applying Newton's method to $f'$ one can find a local optimum of $f$. The Newton sequence is then given by\n",
358 |     "$$ x_{n+1} = x_n - \\frac{f'(x_n)}{f''(x_n)}. $$\n",
359 |     "As we can see, this requires $f$ to be twice differentiable and requires the computation of second order derivatives. This also applies in multiple dimensions: If $f:\\mathbb{R}^m \\to \\mathbb{R}$, then a necessary condition for $x^*$ to be a local optimum is that $\\nabla F: \\mathbb{R}^m \\to \\mathbb{R}^m$ vanishes at $x^*$. The Newton sequence requires the computation of the Hessian $\\nabla^2 F: \\mathbb{R}^m \\to \\mathbb{R}^{m \\times m}$:\n",
360 |     "$$ x_{n+1} = x_n - \\nabla^2 F(x_n)^{-1} \\nabla F(x_n)$$\n",
361 |     "For that reason, other methods such as *gradient descent* are sometimes used."
362 |    ]
363 |   },
364 |   {
365 |    "cell_type": "markdown",
366 |    "metadata": {},
367 |    "source": [
368 |     "# Properties of the Newton's Method\n",
369 |     "\n",
370 |     "**1. Local Quadratic Convergence:** The Newton sequence converges quadratically against the root in a local neighbourhood around the root. Thus, the choice of a good starting value is key. In particular, if $F$ has multiple roots, Newton's method finds one root, not all of them.\n",
371 |     "\n",
372 |     "**2. Singluar Jacobian:** If the Jacobian $\\nabla F$ fails to be invertible, the computation of the next element in the sequence becomes problematic.\n",
373 |     "\n",
374 |     "**3. Cycles:** For certain values, the sequence can become periodic, thus failing to converge.\n",
375 |     "\n",
376 |     "**4. Computational Complexities:** Calculating the full Jacobian $\\nabla F$ and solving the resulting system of linear equations in every step can be quite intense computationally.\n",
377 |     "\n",
378 |     "The last step can be addressed by replacing the full Jacobian $\\nabla F$ by an approximation. Those methods are called *Quasi-Newton methods*. "
379 |    ]
380 |   }
381 |  ],
382 |  "metadata": {
383 |   "kernelspec": {
384 |    "display_name": "heroku",
385 |    "language": "python",
386 |    "name": "heroku"
387 |   },
388 |   "language_info": {
389 |    "codemirror_mode": {
390 |     "name": "ipython",
391 |     "version": 3
392 |    },
393 |    "file_extension": ".py",
394 |    "mimetype": "text/x-python",
395 |    "name": "python",
396 |    "nbconvert_exporter": "python",
397 |    "pygments_lexer": "ipython3",
398 |    "version": "3.6.12"
399 |   },
400 |   "toc": {
401 |    "base_numbering": 1,
402 |    "nav_menu": {},
403 |    "number_sections": true,
404 |    "sideBar": true,
405 |    "skip_h1_title": false,
406 |    "title_cell": "Table of Contents",
407 |    "title_sidebar": "Contents",
408 |    "toc_cell": false,
409 |    "toc_position": {},
410 |    "toc_section_display": true,
411 |    "toc_window_display": false
412 |   }
413 |  },
414 |  "nbformat": 4,
415 |  "nbformat_minor": 2
416 | }
417 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy==1.14.5
 2 | matplotlib==2.2.2
 3 | ipywidgets==7.6.3
 4 | scikit-learn==0.21.2
 5 | scipy==1.5.4
 6 | GPy==1.9.9
 7 | ipympl==0.6.3
 8 | keras==2.2.2
 9 | tensorflow==1.10.0
10 | voila
11 | 


--------------------------------------------------------------------------------
/runtime.txt:
--------------------------------------------------------------------------------
1 | python-3.6.12


--------------------------------------------------------------------------------
/start.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<p style=\"font-size:30px; text-align:center\"><b>Machine Learning in Finance</b></p>"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "<div style=\"text-align: right\">(C) <a href=\"https://github.com/niknow\">Nikolai Nowaczyk</a>, <a href=\"https://github.com/Lapsilago\">Jörg Kienitz</a> 2021</div>"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "<style>\n",
 22 |     "body {\n",
 23 |     "    counter-reset: h1\n",
 24 |     "}\n",
 25 |     "\n",
 26 |     "h1 {\n",
 27 |     "    counter-reset: h2\n",
 28 |     "}\n",
 29 |     "\n",
 30 |     "h2 {\n",
 31 |     "    counter-reset: h3\n",
 32 |     "}\n",
 33 |     "\n",
 34 |     "h3 {\n",
 35 |     "    counter-reset: h4\n",
 36 |     "}\n",
 37 |     "\n",
 38 |     "h1:before {\n",
 39 |     "    counter-increment: h1;\n",
 40 |     "    content: counter(h1) \". \"\n",
 41 |     "}\n",
 42 |     "\n",
 43 |     "h2:before {\n",
 44 |     "    counter-increment: h2;\n",
 45 |     "    content: counter(h1) \".\" counter(h2) \". \"\n",
 46 |     "}\n",
 47 |     "\n",
 48 |     "h3:before {\n",
 49 |     "    counter-increment: h3;\n",
 50 |     "    content: counter(h1) \".\" counter(h2) \".\" counter(h3) \". \"\n",
 51 |     "}\n",
 52 |     "\n",
 53 |     "h4:before {\n",
 54 |     "    counter-increment: h4;\n",
 55 |     "    content: counter(h1) \".\" counter(h2) \".\" counter(h3) \".\" counter(h4) \". \"\n",
 56 |     "}\n",
 57 |     "</style>"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "markdown",
 62 |    "metadata": {},
 63 |    "source": [
 64 |     "# Regression"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "markdown",
 69 |    "metadata": {},
 70 |    "source": [
 71 |     "## [Linear Regression](regression_revisited/regression_revisited.ipynb)\n",
 72 |     "A recap of linear regression - a core fundamental of machine learning."
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "markdown",
 77 |    "metadata": {},
 78 |    "source": [
 79 |     "## [Local Regression](local_regression/local_regression.ipynb)\n",
 80 |     "Local regression is a refinement of linear regression that adapts the model at each point of the prediction."
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "## [Gaussian Process Regression](gaussian_process_regression/gaussian_process_regression.ipynb)\n",
 88 |     "An advanced regression technique that produces not only predictions, but also confidence bounds around them."
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "## [Dynamically Controlled Kernel Estimation (DCKE)](dynamically_controlled_kernel_estimation/dynamically_controlled_kernel_estimation.ipynb)\n",
 96 |     "A combination of local regression, control variates and Gaussian process regression to estimate conditional expectations. The method is model free, data-driven an particularly suited for financial applications."
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "metadata": {},
102 |    "source": [
103 |     "# Neural Network Topologies"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "markdown",
108 |    "metadata": {},
109 |    "source": [
110 |     "## [Multilayer Perceptrons (MLP)](neural_network_intro/neural_network_intro_model_setup.ipynb)\n",
111 |     "Introduction to the most common form of artificial neural networks (ANN)."
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "markdown",
116 |    "metadata": {},
117 |    "source": [
118 |     "## [Long-Term-Short-Term-Memory networks (LSTM)](lstm_intro/lstm_intro.ipynb)\n",
119 |     "Introduction to LSTMs, a popular form of recurrent neural networks (RNNs)."
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "markdown",
124 |    "metadata": {},
125 |    "source": [
126 |     "# Network Training & Optimization Techniques"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "markdown",
131 |    "metadata": {},
132 |    "source": [
133 |     "## [Newton's Method](newton_gradient_backprop/newton.ipynb)\n",
134 |     "A recap of the Newton's method."
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "markdown",
139 |    "metadata": {},
140 |    "source": [
141 |     "## [Gradient Descent](newton_gradient_backprop/gradient_descent.ipynb)\n",
142 |     "Mathematical foundations and basics of gradient descent."
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "markdown",
147 |    "metadata": {},
148 |    "source": [
149 |     "## [Backpropagation](newton_gradient_backprop/backpropagation.ipynb)\n",
150 |     "Derivation of the backpropagation algorithm."
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "markdown",
155 |    "metadata": {},
156 |    "source": [
157 |     "## [Adjoint Method](newton_gradient_backprop/adjoint.ipynb)\n",
158 |     "Relationship between backpropagation and the adjoint method."
159 |    ]
160 |   }
161 |  ],
162 |  "metadata": {
163 |   "kernelspec": {
164 |    "display_name": "heroku",
165 |    "language": "python",
166 |    "name": "heroku"
167 |   },
168 |   "language_info": {
169 |    "codemirror_mode": {
170 |     "name": "ipython",
171 |     "version": 3
172 |    },
173 |    "file_extension": ".py",
174 |    "mimetype": "text/x-python",
175 |    "name": "python",
176 |    "nbconvert_exporter": "python",
177 |    "pygments_lexer": "ipython3",
178 |    "version": "3.6.12"
179 |   },
180 |   "toc": {
181 |    "base_numbering": 1,
182 |    "nav_menu": {},
183 |    "number_sections": true,
184 |    "sideBar": true,
185 |    "skip_h1_title": false,
186 |    "title_cell": "Table of Contents",
187 |    "title_sidebar": "Contents",
188 |    "toc_cell": false,
189 |    "toc_position": {},
190 |    "toc_section_display": true,
191 |    "toc_window_display": false
192 |   }
193 |  },
194 |  "nbformat": 4,
195 |  "nbformat_minor": 2
196 | }
197 | 


--------------------------------------------------------------------------------