├── .gitignore
├── LICENSE
├── README.md
├── conf.py
├── data
    ├── Lagrange_multiplier.png
    ├── airports.csv
    ├── leslie.png
    ├── markov.png
    ├── milkmaid.png
    ├── mystery.txt
    ├── names.csv
    ├── pagerank.png
    ├── pubmed.pic
    ├── q4.png
    ├── q5.dot~
    ├── q5.png
    ├── spectral.png
    └── vecs.png
├── exams
    ├── Midterm01.ipynb
    ├── Midterm01_Scratch.ipynb
    ├── Midterm01_Solutions.ipynb
    ├── Midterm02.ipynb
    ├── Midterm02_Q4revision.ipynb
    ├── Midterm02_Solutions.ipynb
    ├── MockExam01.ipynb
    ├── MockExam01_Solutions.ipynb
    ├── MockExam02.ipynb
    ├── MockExam02_Solutions.ipynb
    ├── MockExam03.ipynb
    ├── MockExam03_Solutions.ipynb
    ├── Q1.npy
    ├── Q2.npy
    ├── Q3.png
    ├── Q5.npy
    ├── mtcars.png
    └── plots.png
├── homework
    ├── HW06.ipynb
    ├── Homework01.ipynb
    ├── Homework02.ipynb
    ├── Homework03.ipynb
    ├── Homework04.ipynb
    ├── Homework05.ipynb
    ├── figs
    │   ├── 7_1.png
    │   ├── 7_2A.png
    │   ├── 7_2B.png
    │   ├── 7_3.png
    │   └── 7_4.png
    ├── forest_fire.mp4
    └── forest_fire.png
├── index.rst
├── labs
    ├── Lab01.ipynb
    ├── Lab01_Solutions.ipynb
    ├── Lab02.ipynb
    ├── Lab02_Solutions.ipynb
    ├── Lab03.ipynb
    ├── Lab03_Solutions.ipynb
    ├── Lab04.ipynb
    ├── Lab04_Solutions.ipynb
    ├── Lab05.ipynb
    ├── Lab05_Solutions.ipynb
    ├── Lab06.ipynb
    ├── Lab06_Solutions.ipynb
    ├── Lab07.ipynb
    ├── Lab07_Solutions.ipynb
    ├── Lab08.ipynb
    ├── Lab08_Solutions.ipynb
    └── figs
    │   └── elephant.jpg
├── notebooks
    ├── S01_Jupyter_and_Python_Annotated.ipynb
    ├── S02_Text_Annotated.ipynb
    ├── S03_Numpy_Annotated.ipynb
    ├── S04_Using_Pandas_Annotated.ipynb
    ├── S05_Graphics_Annotated.ipynb
    ├── S06_Functional_Annotated.ipynb
    ├── S07 Introduction to Statistical Computing.ipynb
    ├── S07A_Scalars_Annotated.ipynb
    ├── S07B_Vectors_Annotated.ipynb
    ├── S07C_Matrices_Annotated.ipynb
    ├── S07D_Sparse_Matrices_Annotated.ipynb
    ├── S08A_Matrices_Linear_Combinations_Annotated.ipynb
    ├── S08B_Sovling_Linear_Equations_Annotated.ipynb
    ├── S08C_Least_Squares.ipynb
    ├── S08D_PCA.ipynb
    ├── S08E_SVD.ipynb
    ├── S08F_LinearAlgebraExamples.ipynb
    ├── S08G_Linear_Algebra_Application_Exercises.ipynb
    ├── S08H_Linear_Algebra_Applications.ipynb
    ├── S08_Linear_Algebra_Review.ipynb
    ├── S09A_Root_Finding.ipynb
    ├── S09B_Optimization.ipynb
    ├── S09C_Optimization_Algorithms.ipynb
    ├── S09D_Optimization_Examples.ipynb
    ├── S09E_Optimization_Line_Search.ipynb
    ├── S09F_Least_Squares_Optimization.ipynb
    ├── S09G_Gradient_Descent_Optimization.ipynb
    ├── S09H_Constrained_Optimization.ipynb
    ├── S10A_Parallel_Porgramming.ipynb
    ├── S10B_Multicore_Parallelism.ipynb
    ├── S10C_IPyParallel.ipynb
    ├── S11A_Overview_numba_cython.ipynb
    ├── S11B_Numba.ipynb
    ├── S11C_Cython.ipynb
    ├── S12_CPP.ipynb
    ├── S13_pybind11.ipynb
    ├── S14A_Random_Variables.ipynb
    ├── S14B_Probabilisitc_Programming.ipynb
    ├── S14C_Monte_Carlo_Methods.ipynb
    ├── S14D_Monte_Carlo_Integration.ipynb
    ├── S15A_MarkovChains.ipynb
    ├── S15B_MCMC.ipynb
    ├── S15C_HMC.ipynb
    ├── S16A_PyMC3.ipynb
    ├── S16B_PyMC3.ipynb
    ├── S16C_PyMC3.ipynb
    ├── S16D_PyMC3.ipynb
    ├── S16E_PyStan.ipynb
    ├── S16F_TF.ipynb
    ├── data
    │   ├── HtWt.csv
    │   ├── cal_house.npy
    │   ├── radon.csv
    │   └── targt.npy
    └── figs
    │   ├── Lagrange_multiplier.png
    │   ├── agents.png
    │   ├── alap.png
    │   ├── asap.png
    │   ├── auto_diff.png
    │   ├── bb1.png
    │   ├── bb2.png
    │   ├── bb3.png
    │   ├── box_loop.png
    │   ├── data_flow.png
    │   ├── elim1.png
    │   ├── elim2.png
    │   ├── inference.png
    │   ├── mcmc.png
    │   ├── operations.png
    │   ├── pagerank.png
    │   ├── score_distribution.png
    │   ├── slice.jpg
    │   └── spectral.png
└── project
    └── template.ipynb


/.gitignore:
--------------------------------------------------------------------------------
  1 | .DS_Store
  2 | foo.py
  3 | 
  4 | *.cpp
  5 | *.o
  6 | *.exe
  7 | *.out
  8 | hello*
  9 | 
 10 | # Byte-compiled / optimized / DLL files
 11 | __pycache__/
 12 | *.py[cod]
 13 | *$py.class
 14 | 
 15 | # C extensions
 16 | *.so
 17 | 
 18 | # Distribution / packaging
 19 | .Python
 20 | build/
 21 | develop-eggs/
 22 | dist/
 23 | downloads/
 24 | eggs/
 25 | .eggs/
 26 | lib/
 27 | lib64/
 28 | parts/
 29 | sdist/
 30 | var/
 31 | wheels/
 32 | pip-wheel-metadata/
 33 | share/python-wheels/
 34 | *.egg-info/
 35 | .installed.cfg
 36 | *.egg
 37 | MANIFEST
 38 | 
 39 | # PyInstaller
 40 | #  Usually these files are written by a python script from a template
 41 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 42 | *.manifest
 43 | *.spec
 44 | 
 45 | # Installer logs
 46 | pip-log.txt
 47 | pip-delete-this-directory.txt
 48 | 
 49 | # Unit test / coverage reports
 50 | htmlcov/
 51 | .tox/
 52 | .nox/
 53 | .coverage
 54 | .coverage.*
 55 | .cache
 56 | nosetests.xml
 57 | coverage.xml
 58 | *.cover
 59 | *.py,cover
 60 | .hypothesis/
 61 | .pytest_cache/
 62 | 
 63 | # Translations
 64 | *.mo
 65 | *.pot
 66 | 
 67 | # Django stuff:
 68 | *.log
 69 | local_settings.py
 70 | db.sqlite3
 71 | db.sqlite3-journal
 72 | 
 73 | # Flask stuff:
 74 | instance/
 75 | .webassets-cache
 76 | 
 77 | # Scrapy stuff:
 78 | .scrapy
 79 | 
 80 | # Sphinx documentation
 81 | docs/_build/
 82 | 
 83 | # PyBuilder
 84 | target/
 85 | 
 86 | # Jupyter Notebook
 87 | .ipynb_checkpoints
 88 | 
 89 | # IPython
 90 | profile_default/
 91 | ipython_config.py
 92 | 
 93 | # pyenv
 94 | .python-version
 95 | 
 96 | # pipenv
 97 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 98 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 99 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
100 | #   install all needed dependencies.
101 | #Pipfile.lock
102 | 
103 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
104 | __pypackages__/
105 | 
106 | # Celery stuff
107 | celerybeat-schedule
108 | celerybeat.pid
109 | 
110 | # SageMath parsed files
111 | *.sage.py
112 | 
113 | # Environments
114 | .env
115 | .venv
116 | env/
117 | venv/
118 | ENV/
119 | env.bak/
120 | venv.bak/
121 | 
122 | # Spyder project settings
123 | .spyderproject
124 | .spyproject
125 | 
126 | # Rope project settings
127 | .ropeproject
128 | 
129 | # mkdocs documentation
130 | /site
131 | 
132 | # mypy
133 | .mypy_cache/
134 | .dmypy.json
135 | dmypy.json
136 | 
137 | # Pyre type checker
138 | .pyre/
139 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 cliburn
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # STA 663 (Spring 2020) Syllabus
  2 | 
  3 | ## Synopsis
  4 | 
  5 | STA 663: Computational Statistics and Statistical Computing
  6 | 
  7 | This course is designed for graduate research students who need to analyze complex data sets, and/or implement efficient statistical algorithms from the literature. We focus on the following analytical skills:
  8 | 
  9 | Functional programming in Python: Python is a dynamic programming language that is increasingly dominant in many scientific domains such as data science, computer vision and deep learning. Modern parallel and distributed programming tools such as Spark and TensorFlow encourage a functional programming style that emphasizes use of pure functions and lazy evaluation. The course will develop fluency in the Python language and the standard scientific libraries `numpy`, `scipy`, `pnadas`, `matplotlib` and `seaborn`, and showcase functional idioms for numerical algorithms.
 10 | 
 11 | Statistical algorithms: Statisticians need to understand the methods they use, so that the methods can be used appropriately and extended if necessary. Using Python, we will study common numerical algorithms used in statistical model construction and fitting, starting with the basic tools for solving numerical problems, and moving on to statistical inference using optimization and simulation strategies. Algorithmic concepts covered include matrix decompositions, solution of linear systems, optimization, interpolation, clustering, numerical and Monte Carlo integration, MCMC samplers and probabilistic deep learning.
 12 | 
 13 | Improving performance: With real-world data being generated at an ever-increasing clip, we also need to be concerned with computational performance, so that we can complete our calculations in a reasonable time or handle data that is too large to fit into memory. To do so, we need to understand how to evaluate the performance of different data structures and algorithms, language-specific idioms for efficient data processing, native code compilation, and exploit resources for parallel computing. One rapidly evolving area are statistical approaches that capitalizes on the high-performance libraries originally developed for deep learning.
 14 | 
 15 | The capstone project involves the creation of an optimized Python package implementing a statistical algorithm from the research literature.
 16 | 
 17 | ## Learning objectives
 18 | 
 19 | - Develop fluency in Python for scientific computing
 20 | - Explain how common statistical algorithms work
 21 | - Construct models using probabilistic programming
 22 | - Implement, test, optimize, and package a statistical algorithm
 23 | 
 24 | Note: The syllabus is aspirational and is likely to be adjusted over the semester depending on how fast we are able to cover the material.
 25 | 
 26 | ## Administration
 27 | 
 28 | ### Office hours 
 29 | 
 30 | - Cliburn: Thursday 4-5 PM at 11078 Hock Suite 1102
 31 | - Zixi Wang:  2-4 pm Thursday at 203B Old Chemistry
 32 | - Chudi Zhong: 9:30-11:30am Tuesday at 203B Old Chemistry
 33 | 
 34 | ### Grading
 35 | 
 36 | - Homework 40%
 37 | - Midterm 1 15%
 38 | - Midterm 2 15%
 39 | - Project 30%
 40 | 
 41 | ### Point range for letter grade
 42 | 
 43 | - A 94 - 100
 44 | - B 85 - 93
 45 | - C 70 - 84
 46 | - D Below 70
 47 | 
 48 | Grades will be based on rounded scores.
 49 | 
 50 | ### Module 1: Develop fluency in Python for scientific computing
 51 | 
 52 | #### 1. Jupyter and Python
 53 | 
 54 | - Introduction to Jupyter
 55 | - Using Markdown
 56 | - Magic functions
 57 | - REPL
 58 | - Data types
 59 | - Operators
 60 | - Collections
 61 | - Functions and methods
 62 | - Control flow
 63 | - Packages and namespace
 64 | - Coding style
 65 | - Understanding error messages
 66 | - Getting help
 67 | - Saving and exporting Jupyter notebooks
 68 | 
 69 | #### 2. Text 
 70 | 
 71 | - The string package
 72 | - String methods
 73 | - Regular expressions
 74 | - Loading and saving text files
 75 | - Context managers
 76 | - Dealing with encoding errors
 77 | 
 78 | #### 3. Numerics
 79 | 
 80 | - Issues with floating point numbers
 81 | - The `math` package
 82 | - Constructing `numpy` arrays
 83 | - Indexing
 84 | - Splitting and merging arrays
 85 | - Universal functions - transforms and reductions
 86 | - Broadcasting rules
 87 | - Masking
 88 | - Sparse matrices with `scipy.sparse`
 89 | 
 90 | #### 4. Data manipulation
 91 | 
 92 | - Series and DataFrames in `pandas`
 93 | - Creating, loading and saving DataFrames
 94 | - Basic information
 95 | - Indexing
 96 | - Method chaining
 97 | - Selecting rows and columns
 98 | - Transformations
 99 | - Aggregate functions
100 | - Split-apply-combine
101 | - Window functions
102 | - Hierarchical indexing
103 | 
104 | #### 5. Graphics
105 | 
106 | - Grammar of graphics
107 | - Graphics from the group up with `matplotlib`
108 | - Statistical visualizations with `seaborn`
109 | 
110 | #### 6. Functional programming in Python
111 | 
112 | - Writing a custom function
113 | - Pure functions
114 | - Anonymous functions
115 | - Lazy evaluation
116 | - Higher-order functions
117 | - Decorators
118 | - Partial application
119 | - Using operator
120 | - Using `functional`
121 | - Using `itertools`
122 | - Pipelines with `toolz`
123 | 
124 | ### Midterm 1 (31 Jan 2020 08:15 - 09:45)
125 | 
126 | ### Module 2: Explain how common statistical algorithms work
127 | 
128 | #### 7. Data structures, algorithms and complexity
129 | 
130 | - Sequence and mapping containers
131 | - Using collections
132 | - Sorting
133 | - Priority queues
134 | - Working with recursive algorithms
135 | - Tabling and dynamic programing
136 | - Time and space complexity
137 | - Measuring time
138 | - Measuring space
139 | 
140 | #### 8. Solving linear equations
141 | 
142 | - Solving Ax = bAx=b
143 | - Gaussian elimination and LR decomposition
144 | - Symmetric matrices and Cholesky decomposition
145 | - Geometry of the normal equations
146 | - Gradient descent to solve linear equations
147 | - Using `scipy.linalg`
148 | 
149 | #### 9. Singular Value Decomposition
150 | 
151 | - Change of basis
152 | - Spectral decomposition
153 | - Geometry of spectral decomposition
154 | - The four fundamental subspaces of linear algebra
155 | - The SVD
156 | - Geometry of spectral decomposition
157 | - SVD and low rank approximation
158 | - Using `scipy.linalg`
159 | 
160 | #### 10. Optimization I
161 | 
162 | - Root finding
163 | - Univariate optimization
164 | - Geometry and calculus of optimization
165 | - Gradient descent
166 | - Batch, mini-batch and stochastic variants
167 | - Improving gradient descent
168 | - Root finding and univariate optimization with `scipy.optim`
169 | 
170 | #### 11. Optimization II
171 | 
172 | - Nelder-Mead (Zeroth order method)
173 | - Line search methods
174 | - Trust region methods
175 | - IRLS
176 | - Lagrange multipliers, KKT and constrained optimization
177 | - Multivariate optimization with `scipy.optim`
178 | 
179 | #### 12. Dimension reduction
180 | 
181 | - Matrix factorization - PCA and SVD, MMF
182 | - Optimization methods - MDS and t-SNE
183 | - Using `sklearn.decomposition` and `sklearn.manifold`
184 |   
185 | #### 13. Interpolation
186 | 
187 | - Polynomial
188 | - Spline
189 | - Gaussian process
190 | - Using `scipy.interpolate`
191 | 
192 | #### 14. Clustering
193 | 
194 | - Partitioning (k-means)
195 | - Hierarchical (agglomerative Hierarchical Clustering)
196 | - Density based (dbscan, mean-shift)
197 | - Model based (GMM)
198 | - Self-organizing maps
199 | - Cluster initialization
200 | - Cluster evaluation
201 | - Cluster alignment (Munkres)
202 | - Using `skearn.cluster`
203 | 
204 | ### Midterm 2
205 | 
206 | ### Revised Schedule
207 | 
208 | Mon 4:40 - 5:55 PM EST Zoom: https://zoom.us/j/900920288
209 | Wed 4:40 - 5:55 PM EST Zoom: https://zoom.us/j/395651734 
210 | 
211 | ## Module 3: Making code faster
212 | 
213 | #### Mar 23 Parallel programming
214 | 
215 | - Parallel, concurrent, distributed
216 | - Synchronous and asynchronous calls
217 | - Threads and processes
218 | - Shared memory programming pitfalls: deadlock and race conditions
219 | - Embarrassingly parallel programs with `concurrent.futures` and `multiprocessing`
220 | - Using `ipyparallel` for interactive parallelization
221 | 
222 | #### Mar 25 JIT and AOT code optimization
223 | 
224 | - Source code, machine code, runtime
225 | - Interpreted vs compiled code
226 | - Static vs dynamic typing
227 | - The costs of dynamic typing
228 | - Vectorization in interpreted languages
229 | - JIT compilation with `numba`
230 | - AOT compilation with `cython`
231 | 
232 | #### Mar 27 Midterm 2
233 | 
234 | #### Mar 30 Introduction to modern C++
235 | 
236 | - Hello world
237 | - Headers and source files
238 | - Compiling and executing a C++ program
239 | - Using `make`
240 | - Basic types and type declaration
241 | - Loops and conditional execution
242 | - I/O
243 | - Functions
244 | - Template functions
245 | - Anonymous functions
246 | 
247 | #### Apr 01 Wrapping C++ for use in Python
248 | 
249 | - Using STL containers
250 | - Using STL algorithms
251 | - Numeric libraries for C++
252 | - Hello world with `pybind11`
253 | - Wrapping a function with `pybind11`
254 | - Integration with `eigen`
255 | 
256 | #### Apr 03 Lab
257 | 
258 | ### Module 4: Probabilistic Programming
259 | 
260 | #### Apr 06 Random numbers and Monte Carlo methods
261 | 
262 | - Working with probability distributions
263 | - Where do random numbers in the computer come from?
264 | - Sampling form data
265 | - Bootstrap
266 | - Permutation
267 | - Leave-one-out
268 | - Likelihood and MLE
269 | - Using `random`,  `np.random` and  `scipy.statistics`
270 | 
271 | #### Apr 08 Review of Markov Chain Monte Carlo (MCMC)
272 | 
273 | - Bayes theorem and integration
274 | - Numerical integration (quadrature)
275 | - MCMC concepts
276 | - Makrov chains
277 | - Metropolis-Hastings random walk
278 | - Gibbs sampler
279 | - Hamiltonian systems
280 | - Integration of Hamiltonian system dynamics
281 | - Energy and probability distributions
282 | - HMC
283 | - NUTS
284 | 
285 | #### Apr 10 Lab
286 | 
287 | #### Apr 13 PyMC and PyStan
288 | 
289 | - Multi-level Bayesian models
290 | - Using daft to draw plate diagrams
291 | - Using `pymc3`
292 | - Using `pystan`
293 | 
294 | #### Apr 15 TensorFlow Probability
295 | 
296 | - TensorFlow basics
297 | - Distributions and transformations
298 | - Building probabilistic models with `Tfp`
299 | - Basic concepts of deep learning
300 | - Probabilistic deep learning
301 | 


--------------------------------------------------------------------------------
/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # import os
14 | # import sys
15 | # sys.path.insert(0, os.path.abspath('.'))
16 | 
17 | 
18 | # -- Project information -----------------------------------------------------
19 | 
20 | project = 'STA663-2020'
21 | copyright = '2020, Cliburn  Chan'
22 | author = 'Cliburn  Chan'
23 | 
24 | # The full version, including alpha/beta/rc tags
25 | release = '1.0'
26 | 
27 | 
28 | # -- General configuration ---------------------------------------------------
29 | 
30 | # Add any Sphinx extension module names here, as strings. They can be
31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
32 | # ones.
33 | extensions = [
34 |     'nbsphinx',
35 |     'sphinx.ext.mathjax',
36 | ]
37 | 
38 | # Add any paths that contain templates here, relative to this directory.
39 | templates_path = ['_templates']
40 | 
41 | # List of patterns, relative to source directory, that match files and
42 | # directories to ignore when looking for source files.
43 | # This pattern also affects html_static_path and html_extra_path.
44 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', '**.ipynb_checkpoints']
45 | 
46 | 
47 | # -- Options for HTML output -------------------------------------------------
48 | 
49 | # The theme to use for HTML and HTML Help pages.  See the documentation for
50 | # a list of builtin themes.
51 | #
52 | html_theme = 'alabaster'
53 | 
54 | # Add any paths that contain custom static files (such as style sheets) here,
55 | # relative to this directory. They are copied after the builtin static files,
56 | # so a file named "default.css" will overwrite the builtin "default.css".
57 | html_static_path = ['_static']
58 | 
59 | 
60 | nbsphinx_allow_errors = True
61 | 


--------------------------------------------------------------------------------
/data/Lagrange_multiplier.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/data/Lagrange_multiplier.png


--------------------------------------------------------------------------------
/data/leslie.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/data/leslie.png


--------------------------------------------------------------------------------
/data/markov.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/data/markov.png


--------------------------------------------------------------------------------
/data/milkmaid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/data/milkmaid.png


--------------------------------------------------------------------------------
/data/mystery.txt:
--------------------------------------------------------------------------------
 1 | ﻿Intensive blood-glucose control with sulphonylureas or insulin compared with
 2 |       conventional treatment and risk of complications in patients with type 2 diabetes
 3 | BACKGROUND: Improved blood-glucose control decreases the progression of diabetic 
 4 |       microvascular disease, but the effect on macrovascular complications is unknown. 
 5 |       There is concern that sulphonylureas may increase cardiovascular mortality in
 6 |       patients with type 2 diabetes and that high insulin concentrations may enhance
 7 |       atheroma formation. We compared the effects of intensive blood-glucose control
 8 |       with either sulphonylurea or insulin and conventional treatment on the risk of
 9 |       microvascular and macrovascular complications in patients with type 2 diabetes in
10 |       a randomised controlled trial. METHODS: 3867 newly diagnosed patients with type 2
11 |       diabetes, median age 54 years (IQR 48-60 years), who after 3 months' diet
12 |       treatment had a mean of two fasting plasma glucose (FPG) concentrations of
13 |       6.1-15.0 mmol/L were randomly assigned intensive policy with a sulphonylurea
14 |       (chlorpropamide, glibenclamide, or glipizide) or with insulin, or conventional
15 |       policy with diet. The aim in the intensive group was FPG less than 6 mmol/L. In
16 |       the conventional group, the aim was the best achievable FPG with diet alone;
17 |       drugs were added only if there were hyperglycaemic symptoms or FPG greater than
18 |       15 mmol/L. Three aggregate endpoints were used to assess differences between
19 |       conventional and intensive treatment: any diabetes-related endpoint (sudden
20 |       death, death from hyperglycaemia or hypoglycaemia, fatal or non-fatal myocardial 
21 |       infarction, angina, heart failure, stroke, renal failure, amputation [of at least
22 |       one digit], vitreous haemorrhage, retinopathy requiring photocoagulation,
23 |       blindness in one eye, or cataract extraction); diabetes-related death (death from
24 |       myocardial infarction, stroke, peripheral vascular disease, renal disease,
25 |       hyperglycaemia or hypoglycaemia, and sudden death); all-cause mortality. Single
26 |       clinical endpoints and surrogate subclinical endpoints were also assessed. All
27 |       analyses were by intention to treat and frequency of hypoglycaemia was also
28 |       analysed by actual therapy. FINDINGS: Over 10 years, haemoglobin A1c (HbA1c) was 
29 |       7.0% (6.2-8.2) in the intensive group compared with 7.9% (6.9-8.8) in the
30 |       conventional group--an 11% reduction. There was no difference in HbA1c among
31 |       agents in the intensive group. Compared with the conventional group, the risk in 
32 |       the intensive group was 12% lower (95% CI 1-21, p=0.029) for any diabetes-related
33 |       endpoint; 10% lower (-11 to 27, p=0.34) for any diabetes-related death; and 6%
34 |       lower (-10 to 20, p=0.44) for all-cause mortality. Most of the risk reduction in 
35 |       the any diabetes-related aggregate endpoint was due to a 25% risk reduction
36 |       (7-40, p=0.0099) in microvascular endpoints, including the need for retinal
37 |       photocoagulation. There was no difference for any of the three aggregate
38 |       endpoints between the three intensive agents (chlorpropamide, glibenclamide, or
39 |       insulin). Patients in the intensive group had more hypoglycaemic episodes than
40 |       those in the conventional group on both types of analysis (both p<0.0001). The
41 |       rates of major hypoglycaemic episodes per year were 0.7% with conventional
42 |       treatment, 1.0% with chlorpropamide, 1.4% with glibenclamide, and 1.8% with
43 |       insulin. Weight gain was significantly higher in the intensive group (mean 2.9
44 |       kg) than in the conventional group (p<0.001), and patients assigned insulin had a
45 |       greater gain in weight (4.0 kg) than those assigned chlorpropamide (2.6 kg) or
46 |       glibenclamide (1.7 kg). INTERPRETATION: Intensive blood-glucose control by either
47 |       sulphonylureas or insulin substantially decreases the risk of microvascular
48 |       complications, but not macrovascular disease, in patients with type 2
49 |       diabetes.(ABSTRACT TRUNCATED)
50 | 


--------------------------------------------------------------------------------
/data/pagerank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/data/pagerank.png


--------------------------------------------------------------------------------
/data/q4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/data/q4.png


--------------------------------------------------------------------------------
/data/q5.dot~:
--------------------------------------------------------------------------------
 1 | 
 2 | digraph g {
 3 |     a -> a [label = 1]
 4 |     a -> b [label = 2]
 5 |     a -> c [label = 3]
 6 |     b -> c [label = 4]
 7 |     c -> a [label = 1]
 8 |     c -> b [label = 2]
 9 |     c -> d [label = 3]
10 |     d -> e [label = 4]
11 |     e -> a [label = 5]
12 | }
13 | 


--------------------------------------------------------------------------------
/data/q5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/data/q5.png


--------------------------------------------------------------------------------
/data/spectral.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/data/spectral.png


--------------------------------------------------------------------------------
/data/vecs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/data/vecs.png


--------------------------------------------------------------------------------
/exams/Midterm01.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Instructions\n",
  8 |     "\n",
  9 |     "**When taking this exam, you agree to follow the Duke Honor Code.**\n",
 10 |     "\n",
 11 |     "- This is a closed book exam. You can use the `help()` function, and the `?` prefix or suffix but are restricted to a SINGLE browser tab.\n",
 12 |     "- All necessary imports are provided. You should not need to import any other packages.\n",
 13 |     "- Answer all 5 questions."
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": null,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "%matplotlib inline\n",
 23 |     "import matplotlib.pyplot as plt\n",
 24 |     "import numpy as np\n",
 25 |     "import pandas as pd\n",
 26 |     "import string\n",
 27 |     "import collections\n",
 28 |     "import seaborn as sns\n",
 29 |     "from functools import reduce\n",
 30 |     "sns.set_context('notebook', font_scale=1.5)"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "**1**. 20 points\n",
 38 |     "\n",
 39 |     "A palindromic number reads the same both ways. The largest palindrome made from the product of two 2-digit numbers is 9009 = 91 × 99.\n",
 40 |     "\n",
 41 |     "Find the largest palindrome made from the product of two 3-digit numbers.\n",
 42 |     "\n",
 43 |     "(Source: Project Euler Problem 4)"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "\n",
 53 |     "\n",
 54 |     "\n"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "**2**. 20 points\n",
 62 |     "\n",
 63 |     "Count the number of each unique word longer than 3 characters in the nursery rhyme given, ignoring case and removing punctuation.\n",
 64 |     "\n",
 65 |     "```text\n",
 66 |     "Humpty Dumpty sat on a wall,\n",
 67 |     "Humpty Dumpty had a great fall;\n",
 68 |     "All the king's horses and all the king's men\n",
 69 |     "Couldn't put Humpty together again.\n",
 70 |     "```"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "s = '''Humpty Dumpty sat on a wall,\n",
 80 |     "Humpty Dumpty had a great fall;\n",
 81 |     "All the king's horses and all the king's men\n",
 82 |     "Couldn't put Humpty together again.'''"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": null,
 88 |    "metadata": {},
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "\n",
 92 |     "\n",
 93 |     "\n"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "markdown",
 98 |    "metadata": {},
 99 |    "source": [
100 |     "**3**. 20 points\n",
101 |     "\n",
102 |     "Create a DataFrame containing the standardized data for each row feature - that is, every row has mean 0 and standardized deviation 1. The DataFrame should have the same row and column names as the original."
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "metadata": {},
109 |    "outputs": [],
110 |    "source": [
111 |     "df = pd.DataFrame(\n",
112 |     "    dict(ann=(23,67,1.6), bob=(24,92,1.7), dan=(30,89,1.8)), \n",
113 |     "    index=['age', 'wt', 'ht'])"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": null,
119 |    "metadata": {},
120 |    "outputs": [],
121 |    "source": [
122 |     "\n",
123 |     "\n",
124 |     "\n"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "markdown",
129 |    "metadata": {},
130 |    "source": [
131 |     "**4**. 20 points\n",
132 |     "\n",
133 |     "Download the `mtcars` data set as a Dataframe from https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv\n",
134 |     "\n",
135 |     "and create the following panel of strip plots\n",
136 |     "\n",
137 |     "![](./mtcars.png)"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": null,
143 |    "metadata": {},
144 |    "outputs": [],
145 |    "source": [
146 |     "url = 'https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv'"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": null,
152 |    "metadata": {},
153 |    "outputs": [],
154 |    "source": [
155 |     "\n",
156 |     "\n",
157 |     "\n"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "markdown",
162 |    "metadata": {},
163 |    "source": [
164 |     "**5**. 20 points\n",
165 |     "\n",
166 |     "Given \n",
167 |     "\n",
168 |     "```\n",
169 |     "y = [ 1.70222116,  1.95783341, -0.51612387, -2.06639513, -2.41302321,\n",
170 |     "     -0.73439023,  1.84515596,  11.58990399, -1.68093127, -1.31909487]\n",
171 |     "y_pred = [ 1., 0.54030231, -0.41614684, -0.9899925 , -0.65364362,\n",
172 |     "          0.28366219,  0.96017029, 0.75390225, -0.14550003, -0.91113026]\n",
173 |     "```\n",
174 |     "\n",
175 |     "find the residual sum of squares (RSS) - i.e. the sum of squared difference between `y` and `y_pred` , excluding any residual whose absolute value is greater or equal to 3. The residual or error is the difference between `y` and `y_pred`. Use only anonymous functions and the higher order functions map, reduce and filter to do so.\n",
176 |     "\n",
177 |     "Do not use any of the following\n",
178 |     "\n",
179 |     "- for loops\n",
180 |     "- comprehensions or generator expressions\n",
181 |     "- the sum function\n",
182 |     "- numpy"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": null,
188 |    "metadata": {},
189 |    "outputs": [],
190 |    "source": [
191 |     "y = [ 1.70222116,  1.95783341, -0.51612387, -2.06639513, -2.41302321,\n",
192 |     "     -0.73439023,  1.84515596,  11.58990399, -1.68093127, -1.31909487]\n",
193 |     "y_pred = [ 1., 0.54030231, -0.41614684, -0.9899925 , -0.65364362,\n",
194 |     "          0.28366219,  0.96017029, 0.75390225, -0.14550003, -0.91113026]"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": null,
200 |    "metadata": {},
201 |    "outputs": [],
202 |    "source": [
203 |     "\n",
204 |     "\n",
205 |     "\n"
206 |    ]
207 |   }
208 |  ],
209 |  "metadata": {
210 |   "kernelspec": {
211 |    "display_name": "Python 3",
212 |    "language": "python",
213 |    "name": "python3"
214 |   },
215 |   "language_info": {
216 |    "codemirror_mode": {
217 |     "name": "ipython",
218 |     "version": 3
219 |    },
220 |    "file_extension": ".py",
221 |    "mimetype": "text/x-python",
222 |    "name": "python",
223 |    "nbconvert_exporter": "python",
224 |    "pygments_lexer": "ipython3",
225 |    "version": "3.8.2"
226 |   }
227 |  },
228 |  "nbformat": 4,
229 |  "nbformat_minor": 2
230 | }
231 | 


--------------------------------------------------------------------------------
/exams/Midterm01_Scratch.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Instructions\n",
  8 |     "\n",
  9 |     "**When taking this exam, you agree to follow the Duke Honor Code.**\n",
 10 |     "\n",
 11 |     "- This is a closed book exam. You can use the `help()` function, and the `?` prefix or suffix but are restricted to a SINGLE browser tab.\n",
 12 |     "- All necessary imports are provided. You should not need to import any other packages.\n",
 13 |     "- Answer all 5 questions."
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": null,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "%matplotlib inline\n",
 23 |     "import matplotlib.pyplot as plt\n",
 24 |     "import numpy as np\n",
 25 |     "import pandas as pd\n",
 26 |     "import string\n",
 27 |     "import collections\n",
 28 |     "import seaborn as sns\n",
 29 |     "from functools import reduce\n",
 30 |     "sns.set_context('notebook', font_scale=1.5)"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "**1**. 20 points\n",
 38 |     "\n",
 39 |     "A palindromic number reads the same both ways. The largest palindrome made from the product of two 2-digit numbers is 9009 = 91 × 99.\n",
 40 |     "\n",
 41 |     "Find the largest palindrome made from the product of two 3-digit numbers.\n",
 42 |     "\n",
 43 |     "(Source: Project Euler Problem 4)"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "markdown",
 48 |    "metadata": {},
 49 |    "source": [
 50 |     "- Loop over 3 digit numbers only\n",
 51 |     "- Check for palindrome should work for all numbers"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": []
 60 |   },
 61 |   {
 62 |    "cell_type": "markdown",
 63 |    "metadata": {},
 64 |    "source": [
 65 |     "**2**. 20 points\n",
 66 |     "\n",
 67 |     "Count the number of each unique word longer than 3 characters in the nursery rhyme given, ignoring case and removing punctuation.\n",
 68 |     "\n",
 69 |     "```text\n",
 70 |     "Humpty Dumpty sat on a wall,\n",
 71 |     "Humpty Dumpty had a great fall;\n",
 72 |     "All the king's horses and all the king's men\n",
 73 |     "Couldn't put Humpty together again.\n",
 74 |     "```"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": null,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "s = '''Humpty Dumpty sat on a wall,\n",
 84 |     "Humpty Dumpty had a great fall;\n",
 85 |     "All the king's horses and all the king's men\n",
 86 |     "Couldn't put Humpty together again.'''"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "markdown",
 91 |    "metadata": {},
 92 |    "source": [
 93 |     "- No special notes\n",
 94 |     "- Points deducted if solution is overly complex"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": null,
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": []
103 |   },
104 |   {
105 |    "cell_type": "markdown",
106 |    "metadata": {},
107 |    "source": [
108 |     "**3**. 20 points\n",
109 |     "\n",
110 |     "Create a DataFrame containing the standardized data for each row feature - that is, every row has mean 0 and standardized deviation 1. The DataFrame should have the same row and column names as the original."
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": null,
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": [
119 |     "df = pd.DataFrame(\n",
120 |     "    dict(ann=(23,67,1.6), bob=(24,92,1.7), dan=(30,89,1.8)), \n",
121 |     "    index=['age', 'wt', 'ht'])"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "markdown",
126 |    "metadata": {},
127 |    "source": [
128 |     "- This is simple if you use broadcasting rules\n",
129 |     "- Points deducted for row-by-row manual operations"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": null,
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": []
138 |   },
139 |   {
140 |    "cell_type": "markdown",
141 |    "metadata": {},
142 |    "source": [
143 |     "**Alternative solution**"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": null,
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": []
152 |   },
153 |   {
154 |    "cell_type": "markdown",
155 |    "metadata": {},
156 |    "source": [
157 |     "**4**. 20 points\n",
158 |     "\n",
159 |     "Download the `mtcars` data set as a Dataframe from https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv\n",
160 |     "\n",
161 |     "and create the following panel of strip plots\n",
162 |     "\n",
163 |     "![](./mtcars.png)"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": null,
169 |    "metadata": {},
170 |    "outputs": [],
171 |    "source": [
172 |     "url = 'https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv'"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "markdown",
177 |    "metadata": {},
178 |    "source": [
179 |     "- If you don't use `melt` the solution is tedious\n",
180 |     "- If constructed manually, need to check\n",
181 |     "    - titles\n",
182 |     "    - axis labels\n",
183 |     "    - same vertical scale\n",
184 |     "    - color mapping"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": null,
190 |    "metadata": {},
191 |    "outputs": [],
192 |    "source": []
193 |   },
194 |   {
195 |    "cell_type": "markdown",
196 |    "metadata": {},
197 |    "source": [
198 |     "**5**. 20 points\n",
199 |     "\n",
200 |     "Given \n",
201 |     "\n",
202 |     "```\n",
203 |     "y = [ 1.70222116,  1.95783341, -0.51612387, -2.06639513, -2.41302321,\n",
204 |     "     -0.73439023,  1.84515596,  11.58990399, -1.68093127, -1.31909487]\n",
205 |     "y_pred = [ 1., 0.54030231, -0.41614684, -0.9899925 , -0.65364362,\n",
206 |     "          0.28366219,  0.96017029, 0.75390225, -0.14550003, -0.91113026]\n",
207 |     "```\n",
208 |     "\n",
209 |     "find the residual sum of squares (RSS) - i.e. the sum of squared difference between `y` and `y_pred` , excluding any residual whose absolute value is greater or equal to 3. The residual or error is the difference between `y` and `y_pred`. Use only anonymous functions and the higher order functions map, reduce and filter to do so.\n",
210 |     "\n",
211 |     "Do not use any of the following\n",
212 |     "\n",
213 |     "- for loops\n",
214 |     "- comprehensions or generator expressions\n",
215 |     "- the sum function\n",
216 |     "- numpy"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": null,
222 |    "metadata": {},
223 |    "outputs": [],
224 |    "source": [
225 |     "y = [ 1.70222116,  1.95783341, -0.51612387, -2.06639513, -2.41302321,\n",
226 |     "     -0.73439023,  1.84515596,  11.58990399, -1.68093127, -1.31909487]\n",
227 |     "y_pred = [ 1., 0.54030231, -0.41614684, -0.9899925 , -0.65364362,\n",
228 |     "          0.28366219,  0.96017029, 0.75390225, -0.14550003, -0.91113026]"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "markdown",
233 |    "metadata": {},
234 |    "source": [
235 |     "- Minor deduction for use of zip\n",
236 |     "- Remember to filter"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": null,
242 |    "metadata": {},
243 |    "outputs": [],
244 |    "source": []
245 |   }
246 |  ],
247 |  "metadata": {
248 |   "kernelspec": {
249 |    "display_name": "Python 3",
250 |    "language": "python",
251 |    "name": "python3"
252 |   },
253 |   "language_info": {
254 |    "codemirror_mode": {
255 |     "name": "ipython",
256 |     "version": 3
257 |    },
258 |    "file_extension": ".py",
259 |    "mimetype": "text/x-python",
260 |    "name": "python",
261 |    "nbconvert_exporter": "python",
262 |    "pygments_lexer": "ipython3",
263 |    "version": "3.7.4"
264 |   }
265 |  },
266 |  "nbformat": 4,
267 |  "nbformat_minor": 2
268 | }
269 | 


--------------------------------------------------------------------------------
/exams/Midterm02.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Instructions\n",
  8 |     "\n",
  9 |     "**When taking this exam, you agree to follow the Duke Honor Code.**\n",
 10 |     "\n",
 11 |     "- This is an OPEN book exam.\n",
 12 |     "- Import any module you need from the standard Python library, `numpy`, `scipy`, `pandas`, `matplotlib` or `seaborn`. Do not use any other 3rd party packages.\n",
 13 |     "- Answer all 5 questions."
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "**1**. 20 points\n",
 21 |     "\n",
 22 |     "- Load the matrix stored in `Q1.npy` into `A` and use factorize using SVD (5 points)\n",
 23 |     "- Find the condition number of `A` from the SVD (5 points)\n",
 24 |     "- Generate the matrix that is the best rank 3 approximation to `A` (5 points)\n",
 25 |     "- Determine how many singular vectors would be needed to explain at least 95% of the variance of the original matrix `A` (5 points)"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": null,
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "\n",
 35 |     "\n",
 36 |     "\n"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "**2**. 20 points\n",
 44 |     "\n",
 45 |     "- Load the matrix in `Q2.npy` - this consists of two columns representing the x and y coordinates of 10 points\n",
 46 |     "- Find the equation of the circle that best fits these points (15 points)\n",
 47 |     "- Plot the points and fitted circle (5 points)\n",
 48 |     "\n",
 49 |     "Hint: You need to estimate the center of the circle and its radius."
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "\n",
 59 |     "\n",
 60 |     "\n"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "**3**. 20 points\n",
 68 |     "\n",
 69 |     "The figure below shows the current population of Durham, Chapel Hill and Raleigh. Arrows show fractions that move between cities each year.\n",
 70 |     "\n",
 71 |     "- What are the population sizes of the 3 cities after 3 years have passed? (5 points)\n",
 72 |     "- Find the steady state population of the 3 cities by solving a linear system. (15 points)\n",
 73 |     "\n",
 74 |     "Assume no births, deaths or any other fluxes other than those shown.\n",
 75 |     "\n",
 76 |     "![img](Q3.png)\n"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "\n",
 86 |     "\n",
 87 |     "\n"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "markdown",
 92 |    "metadata": {},
 93 |    "source": [
 94 |     "**4**. 20 points\n",
 95 |     "\n",
 96 |     "Consider the following matrix\n",
 97 |     "\n",
 98 |     "```\n",
 99 |     "A = np.array([\n",
100 |     "    [5,1,2],\n",
101 |     "    [1,5,3],\n",
102 |     "    [2,3,5]\n",
103 |     "])\n",
104 |     "```\n",
105 |     "\n",
106 |     "- Find the characteristic polynomial of the matrix `A` (5 points)\n",
107 |     "- Find the eigenvalues using the companion matrix method `C` (5 points)\n",
108 |     "- Find a matrix `P` such that $A = P C P^{-1}$ (10 points)"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": null,
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": [
117 |     "\n",
118 |     "\n",
119 |     "\n"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "markdown",
124 |    "metadata": {},
125 |    "source": [
126 |     "**5**. 20 points\n",
127 |     "\n",
128 |     "The file `Q5.npy` contains the x and y coordinates in cols 1 and 2 respectively. \n",
129 |     "\n",
130 |     "- Find a cubic polynomial model to fit the data using the normal equations (5 points)\n",
131 |     "- Now solve using mini-batch gradient descent Use a learning rate of 0.0001. Start with a guess of a=1, b=1, c=1, d=1 for the model $ax^3 + bx^2 + cx + d$, and report $(a, b, c, d)$ after 1,000 epochs with a batch size of 5. (15 points)\n",
132 |     "\n",
133 |     "For simplicity, you don't need to randomize the order when choosing data for each batch."
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": 37,
139 |    "metadata": {},
140 |    "outputs": [],
141 |    "source": [
142 |     "\n",
143 |     "\n",
144 |     "\n"
145 |    ]
146 |   }
147 |  ],
148 |  "metadata": {
149 |   "kernelspec": {
150 |    "display_name": "Python 3",
151 |    "language": "python",
152 |    "name": "python3"
153 |   },
154 |   "language_info": {
155 |    "codemirror_mode": {
156 |     "name": "ipython",
157 |     "version": 3
158 |    },
159 |    "file_extension": ".py",
160 |    "mimetype": "text/x-python",
161 |    "name": "python",
162 |    "nbconvert_exporter": "python",
163 |    "pygments_lexer": "ipython3",
164 |    "version": "3.7.7"
165 |   }
166 |  },
167 |  "nbformat": 4,
168 |  "nbformat_minor": 2
169 | }
170 | 


--------------------------------------------------------------------------------
/exams/Midterm02_Q4revision.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Instructions\n",
  8 |     "\n",
  9 |     "**When taking this exam, you agree to follow the Duke Honor Code.**\n",
 10 |     "\n",
 11 |     "- This is an OPEN book exam.\n",
 12 |     "- Import any module you need from the standard Python library, `numpy`, `scipy`, `pandas`, `matplotlib` or `seaborn`. Do not use any other 3rd party packages.\n",
 13 |     "- Answer all 5 questions."
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "**1**. 20 points\n",
 21 |     "\n",
 22 |     "- Load the matrix stored in `Q1.npy` into `A` and use factorize using SVD (5 points)\n",
 23 |     "- Find the condition number of `A` from the SVD (5 points)\n",
 24 |     "- Generate the matrix that is the best rank 3 approximation to `A` (5 points)\n",
 25 |     "- Determine how many singular vectors would be needed to explain at least 95% of the variance of the original matrix `A` (5 points)"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": null,
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "\n",
 35 |     "\n",
 36 |     "\n"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "**2**. 20 points\n",
 44 |     "\n",
 45 |     "- Load the matrix in `Q2.npy` - this consists of two columns representing the x and y coordinates of 10 points\n",
 46 |     "- Find the equation of the circle that best fits these points (15 points)\n",
 47 |     "- Plot the points and fitted circle (5 points)\n",
 48 |     "\n",
 49 |     "Hint: You need to estimate the center of the circle and its radius."
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "\n",
 59 |     "\n",
 60 |     "\n"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "**3**. 20 points\n",
 68 |     "\n",
 69 |     "The figure below shows the current population of Durham, Chapel Hill and Raleigh. Arrows show fractions that move between cities each year.\n",
 70 |     "\n",
 71 |     "- What are the population sizes of the 3 cities after 3 years have passed? (5 points)\n",
 72 |     "- Find the steady state population of the 3 cities by solving a linear system. (15 points)\n",
 73 |     "\n",
 74 |     "Assume no births, deaths or any other fluxes other than those shown.\n",
 75 |     "\n",
 76 |     "![img](Q3.png)\n"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "\n",
 86 |     "\n",
 87 |     "\n"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "markdown",
 92 |    "metadata": {},
 93 |    "source": [
 94 |     "**4**. 20 points\n",
 95 |     "\n",
 96 |     "Consider the following matrix\n",
 97 |     "\n",
 98 |     "```\n",
 99 |     "A = np.array([\n",
100 |     "    [5,2],\n",
101 |     "    [2,5]\n",
102 |     "])\n",
103 |     "```\n",
104 |     "\n",
105 |     "- Find the characteristic polynomial of the matrix `A` (5 points)\n",
106 |     "- Find the eigenvalues using the companion matrix method `C` (5 points)\n",
107 |     "- Find a matrix `P` such that $A = P C P^{-1}$ (10 points)"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": null,
113 |    "metadata": {},
114 |    "outputs": [],
115 |    "source": [
116 |     "\n",
117 |     "\n",
118 |     "\n"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "**5**. 20 points\n",
126 |     "\n",
127 |     "The file `Q5.npy` contains the x and y coordinates in cols 1 and 2 respectively. \n",
128 |     "\n",
129 |     "- Find a cubic polynomial model to fit the data using the normal equations (5 points)\n",
130 |     "- Now solve using mini-batch gradient descent Use a learning rate of 0.0001. Start with a guess of a=1, b=1, c=1, d=1 for the model $ax^3 + bx^2 + cx + d$, and report $(a, b, c, d)$ after 1,000 epochs with a batch size of 5. (15 points)\n",
131 |     "\n",
132 |     "For simplicity, you don't need to randomize the order when choosing data for each batch."
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 37,
138 |    "metadata": {},
139 |    "outputs": [],
140 |    "source": [
141 |     "\n",
142 |     "\n",
143 |     "\n"
144 |    ]
145 |   }
146 |  ],
147 |  "metadata": {
148 |   "kernelspec": {
149 |    "display_name": "Python 3",
150 |    "language": "python",
151 |    "name": "python3"
152 |   },
153 |   "language_info": {
154 |    "codemirror_mode": {
155 |     "name": "ipython",
156 |     "version": 3
157 |    },
158 |    "file_extension": ".py",
159 |    "mimetype": "text/x-python",
160 |    "name": "python",
161 |    "nbconvert_exporter": "python",
162 |    "pygments_lexer": "ipython3",
163 |    "version": "3.7.7"
164 |   }
165 |  },
166 |  "nbformat": 4,
167 |  "nbformat_minor": 2
168 | }
169 | 


--------------------------------------------------------------------------------
/exams/MockExam01.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Instructions\n",
  8 |     "\n",
  9 |     "**When taking this exam, you agree to follow the Duke Honor Code.**\n",
 10 |     "\n",
 11 |     "- This is a closed book exam. You can use the `help()` function, and the `?` prefix or suffix but are restricted to a SINGLE browser tab.\n",
 12 |     "- All necessary imports are provided. You should not need to import any other packages.\n",
 13 |     "- Answer all 5 questions."
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 1,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "%matplotlib inline\n",
 23 |     "import matplotlib.pyplot as plt\n",
 24 |     "import numpy as np\n",
 25 |     "import pandas as pd\n",
 26 |     "import string\n",
 27 |     "import collections\n",
 28 |     "import seaborn as sns\n",
 29 |     "from functools import reduce\n",
 30 |     "sns.set_context('notebook', font_scale=1.5)"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "**1**. 20 points\n",
 38 |     "\n",
 39 |     "The four adjacent digits in the 1000-digit number that have the greatest product are 9 × 9 × 8 × 9 = 5832.\n",
 40 |     "\n",
 41 |     "```\n",
 42 |     "73167176531330624919225119674426574742355349194934\n",
 43 |     "96983520312774506326239578318016984801869478851843\n",
 44 |     "85861560789112949495459501737958331952853208805511\n",
 45 |     "12540698747158523863050715693290963295227443043557\n",
 46 |     "66896648950445244523161731856403098711121722383113\n",
 47 |     "62229893423380308135336276614282806444486645238749\n",
 48 |     "30358907296290491560440772390713810515859307960866\n",
 49 |     "70172427121883998797908792274921901699720888093776\n",
 50 |     "65727333001053367881220235421809751254540594752243\n",
 51 |     "52584907711670556013604839586446706324415722155397\n",
 52 |     "53697817977846174064955149290862569321978468622482\n",
 53 |     "83972241375657056057490261407972968652414535100474\n",
 54 |     "82166370484403199890008895243450658541227588666881\n",
 55 |     "16427171479924442928230863465674813919123162824586\n",
 56 |     "17866458359124566529476545682848912883142607690042\n",
 57 |     "24219022671055626321111109370544217506941658960408\n",
 58 |     "07198403850962455444362981230987879927244284909188\n",
 59 |     "84580156166097919133875499200524063689912560717606\n",
 60 |     "05886116467109405077541002256983155200055935729725\n",
 61 |     "71636269561882670428252483600823257530420752963450\n",
 62 |     "```\n",
 63 |     "\n",
 64 |     "Find the thirteen adjacent digits in the 1000-digit number that have the greatest product. What is the value of this product?\n",
 65 |     "\n",
 66 |     "(Source: Project Euler Problem 8)"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "\n",
 76 |     "\n",
 77 |     "\n"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "metadata": {},
 83 |    "source": [
 84 |     "**2**. 20 points\n",
 85 |     "\n",
 86 |     "Count the unique words in s1, s2, and s3 combined after removing punctuation and converting to lower case. "
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 8,
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "s1 = 'Twinkle, twinkle, little star, How I wonder what you are. Up above the world so high, Like a diamond in the sky. Twinkle, twinkle, little star, How I wonder what you are!'\n",
 96 |     "s2 = 'Jack and Jill went up the hill, To fetch a pail of water. Jack fell down and broke his crown, And Jill came tumbling after.'\n",
 97 |     "s3 = 'Oh, do you know the muffin man, The muffin man, the muffin man, Oh, do you know the muffin man, That lives on Drury Lane?'"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 9,
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "\n",
107 |     "\n",
108 |     "\n"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "markdown",
113 |    "metadata": {},
114 |    "source": [
115 |     "**3**. 20 points\n",
116 |     "\n",
117 |     "Generate a DataFrame of word counts for each \"document\" from problem 2. Show the top 3 rows with the largest row sums."
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "metadata": {},
124 |    "outputs": [],
125 |    "source": [
126 |     "\n",
127 |     "\n",
128 |     "\n"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "markdown",
133 |    "metadata": {},
134 |    "source": [
135 |     "**4**. 20 points\n",
136 |     "\n",
137 |     "Using the data provided, recreate this plot.\n",
138 |     "\n",
139 |     "- Top row is plotted with `matplotllib` functions `plot` and `hist`\n",
140 |     "- Bottom row is plotted with `seaborn` functions `kdeplot` and `regplot`\n",
141 |     "\n",
142 |     "![](plots.png)"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": 17,
148 |    "metadata": {},
149 |    "outputs": [],
150 |    "source": [
151 |     "n = 100\n",
152 |     "xs = np.linspace(0, 4*np.pi, n)\n",
153 |     "ys = np.sin(xs) + np.random.normal(0, 0.5, n)"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": null,
159 |    "metadata": {},
160 |    "outputs": [],
161 |    "source": [
162 |     "\n",
163 |     "\n",
164 |     "\n"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "markdown",
169 |    "metadata": {},
170 |    "source": [
171 |     "**5** 20 points\n",
172 |     "\n",
173 |     "Sum the nested list using only anonymous functions, and one or more of map, reduce and filter."
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": 19,
179 |    "metadata": {},
180 |    "outputs": [],
181 |    "source": [
182 |     "xxs = [[1,2,3], [4,5,6], [7,8,9]]"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": null,
188 |    "metadata": {},
189 |    "outputs": [],
190 |    "source": [
191 |     "\n",
192 |     "\n",
193 |     "\n"
194 |    ]
195 |   }
196 |  ],
197 |  "metadata": {
198 |   "kernelspec": {
199 |    "display_name": "Python 3",
200 |    "language": "python",
201 |    "name": "python3"
202 |   },
203 |   "language_info": {
204 |    "codemirror_mode": {
205 |     "name": "ipython",
206 |     "version": 3
207 |    },
208 |    "file_extension": ".py",
209 |    "mimetype": "text/x-python",
210 |    "name": "python",
211 |    "nbconvert_exporter": "python",
212 |    "pygments_lexer": "ipython3",
213 |    "version": "3.7.4"
214 |   }
215 |  },
216 |  "nbformat": 4,
217 |  "nbformat_minor": 2
218 | }
219 | 


--------------------------------------------------------------------------------
/exams/MockExam02.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Instructions\n",
  8 |     "\n",
  9 |     "**When taking this exam, you agree to follow the Duke Honor Code.**\n",
 10 |     "\n",
 11 |     "- This is a closed book exam. You can use the `help()` function, and the `?` prefix or suffix but are restricted to a SINGLE browser tab.\n",
 12 |     "- All necessary imports are provided. You should not need to import any other packages.\n",
 13 |     "- Answer all 5 questions."
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "**1**. (20 points)\n",
 21 |     "\n",
 22 |     "- Find the matrix $A$ that results in rotating the standard vectors in $\\mathbb{R}^2$ by 30 degrees counter-clockwise and stretches $e_1$ by a factor of 3 and contracts $e_2$ by a factor of $0.5$. \n",
 23 |     "- What is the inverse of this matrix? How you find the inverse should reflect your understanding.\n",
 24 |     "\n",
 25 |     "The effects of the matrix $A$ and $A^{-1}$ are shown in the figure below:\n",
 26 |     "\n",
 27 |     "![image](../data/vecs.png)"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": []
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "**2**. (20 points)\n",
 42 |     "\n",
 43 |     "- Given the DNA sequence below, create a $4 \\times 4$ transition matrix $A$ where $A[i,j]$ is the probability of the base $j$ appearing immediately after base $i$. Note that a *base* is one of the four letters `a`, `c`, `t` or `g`. The letters below should be treated as a single sequence, broken into separate lines just for formatting purposes. You should check that row probabilities sum to 1. \n",
 44 |     "- Find the steady state distribution of the 4 bases from the row stochastic transition matrix - that is the, the values of $x$ for which $x^TA = x$ (You can solve this as a set of linear equations). Hint: you need to add a constraint on the values of $x$. \n",
 45 |     "\n",
 46 |     "```\n",
 47 |     "gggttgtatgtcacttgagcctgtgcggacgagtgacacttgggacgtgaacagcggcggccgatacgttctctaagatc\n",
 48 |     "ctctcccatgggcctggtctgtatggctttcttgttgtgggggcggagaggcagcgagtgggtgtacattaagcatggcc\n",
 49 |     "accaccatgtggagcgtggcgtggtcgcggagttggcagggtttttgggggtggggagccggttcaggtattccctccgc\n",
 50 |     "gtttctgtcgggtaggggggcttctcgtaagggattgctgcggccgggttctctgggccgtgatgactgcaggtgccatg\n",
 51 |     "gaggcggtttggggggcccccggaagtctagcgggatcgggcttcgtttgtggaggagggggcgagtgcggaggtgttct\n",
 52 |     "```"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": []
 61 |   },
 62 |   {
 63 |    "cell_type": "markdown",
 64 |    "metadata": {},
 65 |    "source": [
 66 |     "**3**. (20 points) \n",
 67 |     "\n",
 68 |     "We observe some data points $(x_i, y_i)$, and believe that an appropriate model for the data is that\n",
 69 |     "\n",
 70 |     "$$\n",
 71 |     "f(x) = ax^2 + bx^3 + c\\sin{x}\n",
 72 |     "$$\n",
 73 |     "\n",
 74 |     "with some added noise. Find optimal values of the parameters $\\beta = (a, b, c)$ that minimize $\\Vert y - f(x) \\Vert^2$\n",
 75 |     "\n",
 76 |     "using gradient descent with RMSProp (no bias correction) and starting with an initial value of $\\beta = \\begin{bmatrix}1 & 1 & 1\\end{bmatrix}$. Use a learning rate of 0.01 and 10,000 iterations. This should take a few seconds to complete. (25 points)\n",
 77 |     "\n",
 78 |     "Plot the data and fitted curve using `matplotlib`.\n",
 79 |     "\n",
 80 |     "Data\n",
 81 |     "```\n",
 82 |     "x = array([ 3.4027718 ,  4.29209002,  5.88176277,  6.3465969 ,  7.21397852,\n",
 83 |     "        8.26972154, 10.27244608, 10.44703778, 10.79203455, 14.71146298])\n",
 84 |     "y = array([ 25.54026428,  29.4558919 ,  58.50315846,  70.24957254,\n",
 85 |     "        90.55155435, 100.56372833,  91.83189927,  90.41536733,\n",
 86 |     "        90.43103028,  23.0719842 ])\n",
 87 |     "```"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": []
 96 |   },
 97 |   {
 98 |    "cell_type": "markdown",
 99 |    "metadata": {},
100 |    "source": [
101 |     "**4**. (20 points)\n",
102 |     "\n",
103 |     "Consider the following system of equations:\n",
104 |     "\n",
105 |     "$$\\begin{align*}\n",
106 |     "2x_1& - x_2&    +x_1    &=& 6\\\\\n",
107 |     "-x_1& +2x_2& -  x_3 &=& 2\\\\\n",
108 |     " x_1   &  -x_2& + x_3 &=& 1\n",
109 |     "\\end{align*}$$\n",
110 |     "\n",
111 |     "1. Write the system in matrix form $Ax=b$ and define these in numpy or scipy.\n",
112 |     "2. Show that $A$ is positive-definite\n",
113 |     "3. Use the appropriate matrix decomposition function in numpy and back-substitution to solve the system. Remember to use the structure of the problem to determine the appropriate decomposition."
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": null,
119 |    "metadata": {},
120 |    "outputs": [],
121 |    "source": []
122 |   },
123 |   {
124 |    "cell_type": "markdown",
125 |    "metadata": {},
126 |    "source": [
127 |     "**5**. (20 points)\n",
128 |     "\n",
129 |     "Let\n",
130 |     "\n",
131 |     "$A = \\left(\\begin{matrix}2 & -1 &1\\\\-1& 2& -1 \\\\1&-1& 1\n",
132 |     "\\end{matrix}\\right) \\;\\;\\;\\;\\;\\;\\textrm{ and }\\;\\;\\;\\;\\;\\; v = \\left(\\begin{matrix}1 \\\\ 1 \\\\2\\end{matrix}\\right)$\n",
133 |     "\n",
134 |     "Find $w$ such that $w$ is conjugate to $v$ under $A$. You may use *basic* linear algebra in scipy or numpy - i.e. matrix products."
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": null,
140 |    "metadata": {},
141 |    "outputs": [],
142 |    "source": []
143 |   }
144 |  ],
145 |  "metadata": {
146 |   "kernelspec": {
147 |    "display_name": "Python 3",
148 |    "language": "python",
149 |    "name": "python3"
150 |   },
151 |   "language_info": {
152 |    "codemirror_mode": {
153 |     "name": "ipython",
154 |     "version": 3
155 |    },
156 |    "file_extension": ".py",
157 |    "mimetype": "text/x-python",
158 |    "name": "python",
159 |    "nbconvert_exporter": "python",
160 |    "pygments_lexer": "ipython3",
161 |    "version": "3.8.2"
162 |   }
163 |  },
164 |  "nbformat": 4,
165 |  "nbformat_minor": 4
166 | }
167 | 


--------------------------------------------------------------------------------
/exams/MockExam03.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Instructions\n",
  8 |     "\n",
  9 |     "**When taking this exam, you agree to follow the Duke Honor Code.**\n",
 10 |     "\n",
 11 |     "- This is a closed book exam. You can use the `help()` function, and the `?` prefix or suffix but are restricted to a SINGLE browser tab.\n",
 12 |     "- All necessary imports are provided. You should not need to import any other packages.\n",
 13 |     "- Answer all 5 questions."
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "**1**. (20 points)\n",
 21 |     "\n",
 22 |     "- Write a function to find where f(x) crosses zero using a bisection algorithm. The bisection algorithm works in the following way - given the left and right boundary, it checks if the crossing is between left and mid-point, or between the right and mid-point. If the former, the mid-point is set as the new right boundary and so on. In other words the interval containing the crossing is halved at each step. Stop when the length of the interval is less than 1e-6, and return the left and right boundaries at that point.\n",
 23 |     "    - The function should have the signature `bisect(f, left=0, right=1, tol=1e-6)`\n",
 24 |     "    - Use `bisect` to find the left and right intervals containing a zero (root) of the function $x^3 + 4x^2 - 1$ between 0 and 1.\n",
 25 |     "    - <font color=red>Hint</font>: If the crossing is between left and right, the function evaluated at left and right must have opposite signs. (20 points)\n",
 26 |     "\n",
 27 |     "- Plot the function together with a horizontal line for $y=0$ and a red circle at the root whose coordinates are (`left`, 0). The x-limits of the plot should be 0 and 1."
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": []
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "**2**. (20 points)\n",
 42 |     "\n",
 43 |     "Implement a Python function to find the minimum using the Newton-Raphson method. Use it to find a minimum of $x^3 + 4x^2 -3 = x$ given the initial location $x_0 = -1$. Do not use any library methods apart from those in `np` - the idea is to develop the algorithm using only basic Python language constructs."
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 6,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": []
 52 |   },
 53 |   {
 54 |    "cell_type": "markdown",
 55 |    "metadata": {},
 56 |    "source": [
 57 |     "**3**. (20 points)\n",
 58 |     "\n",
 59 |     "We observe some data points $(x_i, y_i)$, and believe that an appropriate model for the data is that\n",
 60 |     "\n",
 61 |     "$$\n",
 62 |     "f(x) = ax^2 + bx^3 + c\\sin{x}\n",
 63 |     "$$\n",
 64 |     "\n",
 65 |     "with some added noise. Find optimal values of the parameters $\\beta = (a, b, c)$ that minimize $\\Vert y - f(x) \\Vert^2$ using gradient descent with RMSprop (no bias correction) and starting with an initial value of $\\beta = \\begin{bmatrix}1 & 1 & 1\\end{bmatrix}$. Use a learning rate of 0.01 and 10,000 iterations. This should take a few seconds to complete. (15 points)\n",
 66 |     "\n",
 67 |     "Plot the data and fitted curve using `matplotlib`. (5 points)\n",
 68 |     "\n",
 69 |     "Data\n",
 70 |     "```\n",
 71 |     "x = array([ 3.4027718 ,  4.29209002,  5.88176277,  6.3465969 ,  7.21397852,\n",
 72 |     "        8.26972154, 10.27244608, 10.44703778, 10.79203455, 14.71146298])\n",
 73 |     "y = array([ 25.54026428,  29.4558919 ,  58.50315846,  70.24957254,\n",
 74 |     "        90.55155435, 100.56372833,  91.83189927,  90.41536733,\n",
 75 |     "        90.43103028,  23.0719842 ])\n",
 76 |     "```"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 10,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "x = np.array([ 3.4027718 ,  4.29209002,  5.88176277,  6.3465969 ,  7.21397852,\n",
 86 |     "        8.26972154, 10.27244608, 10.44703778, 10.79203455, 14.71146298])\n",
 87 |     "y = np.array([ 25.54026428,  29.4558919 ,  58.50315846,  70.24957254,\n",
 88 |     "        90.55155435, 100.56372833,  91.83189927,  90.41536733,\n",
 89 |     "        90.43103028,  23.0719842 ])"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {},
 96 |    "outputs": [],
 97 |    "source": []
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "metadata": {},
102 |    "source": [
103 |     "**4**. (20 points)\n",
104 |     "\n",
105 |     "Given the set of vectors\n",
106 |     "\n",
107 |     "```\n",
108 |     "v1 = np.array([1,2,3])\n",
109 |     "v2 = np.array([2,4,7])\n",
110 |     "v3 = np.array([1,0,1])\n",
111 |     "```\n",
112 |     "\n",
113 |     "1. Calculate the pairwise Euclidean distance matrix  (5 points)\n",
114 |     "2. Find an orthogonal basis for the space spanned by the vectors $v_1, v_2, v_3$ without using any functions from `numpy.linag` or `scipy.linalg` (5 points)\n",
115 |     "3. Use Gram-Schmidt decomposition to  find an orthogonal basis for the space spanned by the vectors $v_1, v_2, v_3$ where one of the basis vectors must be in the direction of $v_3$. (10 points)"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": null,
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": []
124 |   },
125 |   {
126 |    "cell_type": "markdown",
127 |    "metadata": {},
128 |    "source": [
129 |     "**5**. (20 points)\n",
130 |     "\n",
131 |     "A milkmaid is at point A and needs to get to point B. However, she also needs to fill a pail of water from the river en route from A to B. The equation of the river's path is shown in the figure below. What is the minimum distance she has to travel to do this?\n",
132 |     "\n",
133 |     "1. Solve using `scipy.optimize` and constrained minimization (15 points)\n",
134 |     "2. Plot the solution (5 point)\n",
135 |     "\n",
136 |     "![Milkmaid problem](../data/milkmaid.png)\n",
137 |     "\n",
138 |     "Hint: There is a local and a global optimum for this problem, so try different initial conditions."
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": null,
144 |    "metadata": {},
145 |    "outputs": [],
146 |    "source": []
147 |   }
148 |  ],
149 |  "metadata": {
150 |   "kernelspec": {
151 |    "display_name": "Python 3",
152 |    "language": "python",
153 |    "name": "python3"
154 |   },
155 |   "language_info": {
156 |    "codemirror_mode": {
157 |     "name": "ipython",
158 |     "version": 3
159 |    },
160 |    "file_extension": ".py",
161 |    "mimetype": "text/x-python",
162 |    "name": "python",
163 |    "nbconvert_exporter": "python",
164 |    "pygments_lexer": "ipython3",
165 |    "version": "3.8.2"
166 |   }
167 |  },
168 |  "nbformat": 4,
169 |  "nbformat_minor": 2
170 | }
171 | 


--------------------------------------------------------------------------------
/exams/Q1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/exams/Q1.npy


--------------------------------------------------------------------------------
/exams/Q2.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/exams/Q2.npy


--------------------------------------------------------------------------------
/exams/Q3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/exams/Q3.png


--------------------------------------------------------------------------------
/exams/Q5.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/exams/Q5.npy


--------------------------------------------------------------------------------
/exams/mtcars.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/exams/mtcars.png


--------------------------------------------------------------------------------
/exams/plots.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/exams/plots.png


--------------------------------------------------------------------------------
/homework/HW06.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "**Functions and data set to optimize in Q1 and Q2**"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "%matplotlib inline\n",
 17 |     "import matplotlib.pyplot as plt\n",
 18 |     "import numpy as np\n",
 19 |     "from sklearn.datasets import make_blobs"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": null,
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "def logistic(x):\n",
 29 |     "    \"\"\"Logistic function.\"\"\"\n",
 30 |     "    return np.exp(x)/(1 + np.exp(x))\n",
 31 |     "\n",
 32 |     "def gd(X, y, beta, alpha, niter):\n",
 33 |     "    \"\"\"Gradient descent algorihtm.\"\"\"\n",
 34 |     "    n, p = X.shape\n",
 35 |     "    Xt = X.T\n",
 36 |     "    for i in range(niter):\n",
 37 |     "        y_pred = logistic(X @ beta)\n",
 38 |     "        epsilon = y - y_pred\n",
 39 |     "        grad = Xt @ epsilon / n\n",
 40 |     "        beta += alpha * grad\n",
 41 |     "    return beta"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": null,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "x = np.linspace(-6, 6, 100)\n",
 51 |     "plt.plot(x, logistic(x))\n",
 52 |     "pass"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "n = 10000\n",
 62 |     "p = 2\n",
 63 |     "X, y = make_blobs(n_samples=n, n_features=p, centers=2, \n",
 64 |     "                  cluster_std=1.05, random_state=23)\n",
 65 |     "X = np.c_[np.ones(len(X)), X]\n",
 66 |     "y = y.astype('float')"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "# initial parameters\n",
 76 |     "niter = 1000\n",
 77 |     "α = 0.01\n",
 78 |     "β = np.zeros(p+1)\n",
 79 |     "\n",
 80 |     "# call gradient descent\n",
 81 |     "β = gd(X, y, β, α, niter)\n",
 82 |     "\n",
 83 |     "# assign labels to points based on prediction\n",
 84 |     "y_pred = logistic(X @ β)\n",
 85 |     "labels = y_pred > 0.5\n",
 86 |     "\n",
 87 |     "# calculate separating plane\n",
 88 |     "sep = (-β[0] - β[1] * X)/β[2]\n",
 89 |     "\n",
 90 |     "plt.scatter(X[:, 1], X[:, 2], c=labels, cmap='winter')\n",
 91 |     "plt.plot(X, sep, 'r-')\n",
 92 |     "pass"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "markdown",
 97 |    "metadata": {},
 98 |    "source": [
 99 |     "**1**. 20 points \n",
100 |     "\n",
101 |     "Use `numba` to compile the gradient descent function. \n",
102 |     "\n",
103 |     "- Use the `@vectorize` decorator to create a ufunc version of the logistic function and call this `logistic_numba_cpu` with function signatures of `float64(float64)`. Create another function called `logistic_numba_parallel` by giving an extra argument to the decorator of `target=parallel` (5 points)\n",
104 |     "- For each function, check that the answers are the same as with the original logistic function using  `np.testing.assert_array_almost_equal`. Use `%timeit` to compare the three logistic functions (5 points)\n",
105 |     "- Now use `@jit` to create a JIT_compiled version of the `logistic` and `gd` functions, calling them `logistic_numba` and `gd_numba`. Provide appropriate function signatures to the decorator in each case. (5 points)\n",
106 |     "- Compare the two gradient descent functions `gd` and `gd_numba` for correctness and performance. (5 points)"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": [
115 |     "\n",
116 |     "\n",
117 |     "\n"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "markdown",
122 |    "metadata": {},
123 |    "source": [
124 |     "**2**. 30 points \n",
125 |     "\n",
126 |     "Use `cython` to compile the gradient descent function. \n",
127 |     "\n",
128 |     "- Cythonize the logistic function as `logistic_cython`. Use the `--annotate` argument to the `cython` magic function to find slow regions. Compare accuracy and performance. The final performance should be comparable to the `numba` cpu version. (10 points)\n",
129 |     "- Now cythonize the gd function as `gd_cython`. This function should use of the cythonized `logistic_cython` as a C function call.  Compare accuracy and performance. The final performance should be comparable to the `numba` cpu version. (20 points)\n",
130 |     "\n",
131 |     "Hints: \n",
132 |     "\n",
133 |     "- Give static types to all variables\n",
134 |     "- Know how to use `def`, `cdef` and `cpdef`\n",
135 |     "- Use Typed MemoryViews\n",
136 |     "- Find out how to transpose a Typed MemoryView to store the transpose of X\n",
137 |     "- Typed MemoryVeiws are not `numpy` arrays - you often have to write explicit loops to operate on them\n",
138 |     "- Use the cython boundscheck, wraparound, and cdivision operators"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": null,
144 |    "metadata": {},
145 |    "outputs": [],
146 |    "source": [
147 |     "\n",
148 |     "\n",
149 |     "\n"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "markdown",
154 |    "metadata": {},
155 |    "source": [
156 |     "**3**. 25 points\n",
157 |     "\n",
158 |     "Implement Newton's method in 1D for root finding. Pass in the function and gradient as generalized function pointers. Use the method to find all roots of the polynomial equation $f(x) = x^3 - 7x - 6$"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": null,
164 |    "metadata": {},
165 |    "outputs": [],
166 |    "source": [
167 |     "\n",
168 |     "\n",
169 |     "\n"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "markdown",
174 |    "metadata": {},
175 |    "source": [
176 |     "**4**. 25 points\n",
177 |     "\n",
178 |     "Use the `armadillo` or `eigen` library to\n",
179 |     "\n",
180 |     "- Generate 10 x-coordinates linearly spaced between 10 and 15\n",
181 |     "- Generate 10 random y-values as $y = 3x^2 - 7x + 2 + \\epsilon$ where $\\epsilon \\sim 10 N(0,1)$\n",
182 |     "- Find the length of $x$ and $y$ and the Euclidean distance between $x$ and $y$\n",
183 |     "- Find the correlation between $x$ and $y$\n",
184 |     "- Solve the linear system to find a quadratic fit for this data"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": null,
190 |    "metadata": {},
191 |    "outputs": [],
192 |    "source": [
193 |     "\n",
194 |     "\n",
195 |     "\n"
196 |    ]
197 |   }
198 |  ],
199 |  "metadata": {
200 |   "kernelspec": {
201 |    "display_name": "Python 3",
202 |    "language": "python",
203 |    "name": "python3"
204 |   },
205 |   "language_info": {
206 |    "codemirror_mode": {
207 |     "name": "ipython",
208 |     "version": 3
209 |    },
210 |    "file_extension": ".py",
211 |    "mimetype": "text/x-python",
212 |    "name": "python",
213 |    "nbconvert_exporter": "python",
214 |    "pygments_lexer": "ipython3",
215 |    "version": "3.7.7"
216 |   }
217 |  },
218 |  "nbformat": 4,
219 |  "nbformat_minor": 2
220 | }
221 | 


--------------------------------------------------------------------------------
/homework/Homework01.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Homework 01"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "**1**. (25 points)\n",
 15 |     "\n",
 16 |     "The code below gives five \"documents\" with titles in `titles` and text in `contents`. \n",
 17 |     "\n",
 18 |     "- Convert each text into \"words\" by converting to lower case, removing punctuation and splitting on whitespace\n",
 19 |     "- Make a list of all unique \"words\" in any of the texts\n",
 20 |     "- Create an pandas DataFrame whose rows are words, columns are titles, and values are counts of the word in the document\n",
 21 |     "- Add a column `total` that counts the total number of occurrences for each word across all documents\n",
 22 |     "- Show the rows for the 5 most commonly used words"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 1,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "import sklearn\n",
 32 |     "from sklearn.datasets import fetch_20newsgroups\n",
 33 |     "twenty = fetch_20newsgroups(subset='train')\n",
 34 |     "target_names = twenty['target_names']\n",
 35 |     "titles = [target_names[i] for i in twenty['target'][2:7]]\n",
 36 |     "contents = twenty['data'][2:7]"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": null,
 42 |    "metadata": {},
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "\n",
 46 |     "\n",
 47 |     "\n"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "markdown",
 52 |    "metadata": {},
 53 |    "source": [
 54 |     "**2**. (75 points)\n",
 55 |     "\n",
 56 |     "A Caesar cipher is a very simple method of encoding and decoding data. The cipher simply replaces characters with the character offset by $k$ places. For example, if the offset is 3, we replace `a` with `d`, `b` with `e` etc. The cipher wraps around so we replace `y` with `b`, `z` with `c` and so on. Punctuation, spaces and numbers are left unchanged.\n",
 57 |     "\n",
 58 |     "- Write a function `encode` that takes as arguments a string and an integer offset and returns the encoded cipher.\n",
 59 |     "- Write a function `decode` that takes as arguments a cipher and an integer offset and returns the decoded string. \n",
 60 |     "- Write a function `auto_decode` that takes as argument a cipher and uses a statistical method to guess the optimal offset to decode the cipher, assuming the original string is in English which has the following letter frequency:\n",
 61 |     "\n",
 62 |     "```python\n",
 63 |     "freq = {\n",
 64 |     " 'a': 0.08167,\n",
 65 |     " 'b': 0.01492,\n",
 66 |     " 'c': 0.02782,\n",
 67 |     " 'd': 0.04253,\n",
 68 |     " 'e': 0.12702,\n",
 69 |     " 'f': 0.02228,\n",
 70 |     " 'g': 0.02015,\n",
 71 |     " 'h': 0.06094,\n",
 72 |     " 'i': 0.06966,\n",
 73 |     " 'j': 0.00153,\n",
 74 |     " 'k': 0.00772,\n",
 75 |     " 'l': 0.04025,\n",
 76 |     " 'm': 0.02406,\n",
 77 |     " 'n': 0.06749,\n",
 78 |     " 'o': 0.07507,\n",
 79 |     " 'p': 0.01929,\n",
 80 |     " 'q': 0.00095,\n",
 81 |     " 'r': 0.05987,\n",
 82 |     " 's': 0.06327,\n",
 83 |     " 't': 0.09056,\n",
 84 |     " 'u': 0.02758,\n",
 85 |     " 'v': 0.00978,\n",
 86 |     " 'w': 0.0236,\n",
 87 |     " 'x': 0.0015,\n",
 88 |     " 'y': 0.01974,\n",
 89 |     " 'z': 0.00074\n",
 90 |     "}\n",
 91 |     "```\n",
 92 |     "\n",
 93 |     "- Encode the following nursery rhyme using a random offset from 10 to 20, then recover the original using `auto_decode`:\n",
 94 |     "\n",
 95 |     "```text\n",
 96 |     "Baa, baa, black sheep,\n",
 97 |     "Have you any wool?\n",
 98 |     "Yes, sir, yes, sir,\n",
 99 |     "Three bags full;\n",
100 |     "One for the master,\n",
101 |     "And one for the dame,\n",
102 |     "And one for the little boy\n",
103 |     "Who lives down the lane.\n",
104 |     "```"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": null,
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": [
113 |     "\n",
114 |     "\n",
115 |     "\n"
116 |    ]
117 |   }
118 |  ],
119 |  "metadata": {
120 |   "kernelspec": {
121 |    "display_name": "Python 3",
122 |    "language": "python",
123 |    "name": "python3"
124 |   },
125 |   "language_info": {
126 |    "codemirror_mode": {
127 |     "name": "ipython",
128 |     "version": 3
129 |    },
130 |    "file_extension": ".py",
131 |    "mimetype": "text/x-python",
132 |    "name": "python",
133 |    "nbconvert_exporter": "python",
134 |    "pygments_lexer": "ipython3",
135 |    "version": "3.6.5"
136 |   }
137 |  },
138 |  "nbformat": 4,
139 |  "nbformat_minor": 2
140 | }
141 | 


--------------------------------------------------------------------------------
/homework/Homework02.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "**1**. (100 points)\n",
  8 |     "\n",
  9 |     "Implement the Drossel and Schwabl forest fire model using the [description](https://en.wikipedia.org/wiki/Forest-fire_model#/media/File:Forest_fire_model.gif) from Wikipedia\n",
 10 |     "\n",
 11 |     "```\n",
 12 |     "The model is defined as a cellular automaton on a grid with Ld cells. L is the sidelength of the grid and d is its dimension. A cell can be empty, occupied by a tree, or burning. The model of Drossel and Schwabl (1992) is defined by four rules which are executed simultaneously:\n",
 13 |     "A burning cell turns into an empty cell\n",
 14 |     "A tree will burn if at least one neighbor is burning\n",
 15 |     "A tree ignites with probability f even if no neighbor is burning\n",
 16 |     "An empty space fills with a tree with probability p\n",
 17 |     "```\n",
 18 |     "\n",
 19 |     "- Parameters for the simulation are\n",
 20 |     "```python\n",
 21 |     "steps = 200  # Number of steps\n",
 22 |     "p = 0.03     # Probability of EMPTY -> TREE\n",
 23 |     "f = p * 0.01 # Probability of TREE -> FIRE\n",
 24 |     "n = 200      # Length of square grid\n",
 25 |     "steps = 200  # Number of simulation steps\n",
 26 |     "```\n",
 27 |     "- You need to write code create an array named `sim` with shape (steps, n, n)\n",
 28 |     "- **Note**: Your code should make use of `numpy` optimizations as much as possible - the simulation itself takes less than a second to complete on my fairly old desktop\n",
 29 |     "- Now create a plot as shown from the `sims` array\n",
 30 |     "![](./forest_fire.png)\n",
 31 |     "- Code to display an animation of the `sims` array is provided\n",
 32 |     "- Your animation should be similar to the one shown\n",
 33 |     "\n",
 34 |     "Hints:\n",
 35 |     "\n",
 36 |     "- For simplicity, assume that the borders of the simulation grid are fixed so they are always empty (the borders are *outside* the 100 by 100 grid you are saving at each step.\n",
 37 |     "- Use marginalization to get the required numbers for the plot of population sizes over time\n",
 38 |     "- Use *views* liberally - they do not incur the cost of a copy\n"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "from IPython.display import Video\n",
 48 |     "\n",
 49 |     "Video(\"forest_fire.mp4\")"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "%matplotlib inline\n",
 59 |     "import matplotlib.pyplot as plt\n",
 60 |     "from matplotlib.colors import ListedColormap\n",
 61 |     "import matplotlib.animation as animation\n",
 62 |     "from IPython.display import HTML\n",
 63 |     "import numpy as np\n",
 64 |     "np.random.seed(123)"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "EMPTY, TREE, BURN = 0, 1, 2"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "steps = 200  # Number of steps\n",
 83 |     "p = 0.03     # Probability of EMPTY -> TREE\n",
 84 |     "f = p * 0.01 # Probability of TREE -> FIRE\n",
 85 |     "n = 200      # Length of square grid\n",
 86 |     "steps = 200  # Number of simulation steps"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": null,
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "%%time\n",
 96 |     "\n",
 97 |     "# Your code to generate the `sim` array here."
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "# Your code to make the first plot here"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "markdown",
111 |    "metadata": {},
112 |    "source": [
113 |     "# Check movie\n",
114 |     "\n",
115 |     "If your `sim` array is correct, the animation will play upon execution. It takes a bit of time (< 1 minute) to genreate the animation and render to HTML."
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": null,
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": [
124 |     "%%capture\n",
125 |     "\n",
126 |     "fig = plt.figure(figsize=(6,6))\n",
127 |     "im = plt.imshow(sim[0], cmap=ListedColormap(['black', 'green', 'red']), vmin=0, vmax=2)\n",
128 |     "\n",
129 |     "def updatefig(j):\n",
130 |     "    im.set_array(sim[j])\n",
131 |     "    return [im]\n",
132 |     "\n",
133 |     "ani = animation.FuncAnimation(fig, updatefig, frames=range(steps), \n",
134 |     "                              interval=50, blit=True);"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": null,
140 |    "metadata": {},
141 |    "outputs": [],
142 |    "source": [
143 |     "%%time\n",
144 |     "\n",
145 |     "HTML(ani.to_jshtml())"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "markdown",
150 |    "metadata": {},
151 |    "source": [
152 |     "To write to file if you have ffmpeg instaled\n",
153 |     "\n",
154 |     "```python \n",
155 |     "Writer = animation.writers['ffmpeg']\n",
156 |     "writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800)\n",
157 |     "ani.save('forest_fire.mp4', writer=writer)\n",
158 |     "```"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": null,
164 |    "metadata": {},
165 |    "outputs": [],
166 |    "source": []
167 |   }
168 |  ],
169 |  "metadata": {
170 |   "kernelspec": {
171 |    "display_name": "Python 3",
172 |    "language": "python",
173 |    "name": "python3"
174 |   },
175 |   "language_info": {
176 |    "codemirror_mode": {
177 |     "name": "ipython",
178 |     "version": 3
179 |    },
180 |    "file_extension": ".py",
181 |    "mimetype": "text/x-python",
182 |    "name": "python",
183 |    "nbconvert_exporter": "python",
184 |    "pygments_lexer": "ipython3",
185 |    "version": "3.7.4"
186 |   }
187 |  },
188 |  "nbformat": 4,
189 |  "nbformat_minor": 2
190 | }
191 | 


--------------------------------------------------------------------------------
/homework/Homework03.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Homework03:  Topic Modeling with Latent Semantic Analysis"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Latent Semantic Analysis (LSA) is a method for finding latent similarities between documents treated as a bag of words by using a low rank approximation. It is used for document classification, clustering and retrieval. For example, LSA can be used to search for prior art given a new patent application. In this homework, we will implement a small library for simple latent semantic analysis as a practical example of the application of SVD. The ideas are very similar to PCA. SVD is also used in recommender systems in an similar fashion (for an SVD-based recommender system library, see [Surpise](http://surpriselib.com). \n",
 15 |     "\n",
 16 |     "We will implement a toy example of LSA to get familiar with the ideas. If you want to use LSA or similar methods for statistical language analysis, the most efficient Python libraries are probably [gensim](https://radimrehurek.com/gensim/) and [spaCy](https://spacy.io) - these also provide an online algorithm - i.e. the training information can be continuously updated. Other useful functions for processing natural language can be found in the [Natural Language Toolkit](http://www.nltk.org/)."
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "**Note**: The SVD from scipy.linalg performs a full decomposition, which is inefficient since we only need to decompose until we get the first k singluar values. If the SVD from `scipy.linalg` is too slow, please use the `sparsesvd` function from the [sparsesvd](https://pypi.python.org/pypi/sparsesvd/) package to perform SVD instead.  You can install in the usual way with \n",
 24 |     "```\n",
 25 |     "!pip install sparsesvd\n",
 26 |     "```\n",
 27 |     "\n",
 28 |     "Then import the following\n",
 29 |     "```python\n",
 30 |     "from sparsesvd import sparsesvd \n",
 31 |     "from scipy.sparse import csc_matrix \n",
 32 |     "```\n",
 33 |     "\n",
 34 |     "and use as follows\n",
 35 |     "```python\n",
 36 |     "sparsesvd(csc_matrix(M), k=10)\n",
 37 |     "```"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "**Exercise 1 (20 points)**.  Calculating pairwise distance matrices.\n",
 45 |     "\n",
 46 |     "Suppose we want to construct a distance matrix between the rows of a matrix. For example, given the matrix \n",
 47 |     "\n",
 48 |     "```python\n",
 49 |     "M = np.array([[1,2,3],[4,5,6]])\n",
 50 |     "```\n",
 51 |     "\n",
 52 |     "the distance matrix using Euclidean distance as the measure would be\n",
 53 |     "```python\n",
 54 |     "[[ 0.000  1.414  2.828]\n",
 55 |     " [ 1.414  0.000  1.414]\n",
 56 |     " [ 2.828  1.414  0.000]] \n",
 57 |     "```\n",
 58 |     "if $M$ was a collection of column vectors.\n",
 59 |     "\n",
 60 |     "Write a function to calculate the pairwise-distance matrix given the matrix $M$ and some arbitrary distance function. Your functions should have the following signature:\n",
 61 |     "```\n",
 62 |     "def func_name(M, distance_func):\n",
 63 |     "    pass\n",
 64 |     "```\n",
 65 |     "\n",
 66 |     "0. Write a distance function for the Euclidean, squared Euclidean and cosine measures.\n",
 67 |     "1. Write the function using looping for M as a collection of row vectors.\n",
 68 |     "2. Write the function using looping for M as a collection of column vectors.\n",
 69 |     "3. Wrtie the function using broadcasting for M as a collection of row vectors.\n",
 70 |     "4. Write the function using broadcasting for M as a collection of column vectors. \n",
 71 |     "\n",
 72 |     "For 3 and 4, try to avoid using transposition (but if you get stuck, there will be no penalty for using transposition). Check that all four functions give the same result when applied to the given matrix $M$."
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {
 79 |     "collapsed": true
 80 |    },
 81 |    "outputs": [],
 82 |    "source": []
 83 |   },
 84 |   {
 85 |    "cell_type": "markdown",
 86 |    "metadata": {},
 87 |    "source": [
 88 |     "**Exercise 2 (20 points)**. "
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "**Exercise 2 (20 points)**. Write 3 functions to calculate the term frequency (tf), the inverse document frequency (idf) and the product (tf-idf). Each function should take a single argument `docs`, which is a dictionary of (key=identifier, value=document text) pairs, and return an appropriately sized array. Convert '-' to ' ' (space), remove punctuation, convert text to lowercase and split on whitespace to generate a collection of terms from the document text.\n",
 96 |     "\n",
 97 |     "- tf = the number of occurrences of term $i$ in document $j$\n",
 98 |     "- idf = $\\log \\frac{n}{1 + \\text{df}_i}$ where $n$ is the total number of documents and $\\text{df}_i$ is the number of documents in which term $i$ occurs.\n",
 99 |     "\n",
100 |     "Print the table of tf-idf values for the following document collection\n",
101 |     "\n",
102 |     "```\n",
103 |     "s1 = \"The quick brown fox\"\n",
104 |     "s2 = \"Brown fox jumps over the jumps jumps jumps\"\n",
105 |     "s3 = \"The the the lazy dog elephant.\"\n",
106 |     "s4 = \"The the the the the dog peacock lion tiger elephant\"\n",
107 |     "\n",
108 |     "docs = {'s1': s1, 's2': s2, 's3': s3, 's4': s4}\n",
109 |     "```"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": null,
115 |    "metadata": {
116 |     "collapsed": true
117 |    },
118 |    "outputs": [],
119 |    "source": []
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "**Exercise 3 (20 points)**. \n",
126 |     "\n",
127 |     "1. Write a function that takes a matrix $M$ and an integer $k$ as arguments, and reconstructs a reduced matrix using only the $k$ largest singular values. Use the `scipy.linagl.svd` function to perform the decomposition. This is the least squares approximation to the matrix $M$ in $k$ dimensions.\n",
128 |     "\n",
129 |     "2. Apply the function you just wrote to the following term-frequency matrix for a set of $9$ documents using $k=2$ and print the reconstructed matrix $M'$.\n",
130 |     "```\n",
131 |     "M = np.array([[1, 0, 0, 1, 0, 0, 0, 0, 0],\n",
132 |     "       [1, 0, 1, 0, 0, 0, 0, 0, 0],\n",
133 |     "       [1, 1, 0, 0, 0, 0, 0, 0, 0],\n",
134 |     "       [0, 1, 1, 0, 1, 0, 0, 0, 0],\n",
135 |     "       [0, 1, 1, 2, 0, 0, 0, 0, 0],\n",
136 |     "       [0, 1, 0, 0, 1, 0, 0, 0, 0],\n",
137 |     "       [0, 1, 0, 0, 1, 0, 0, 0, 0],\n",
138 |     "       [0, 0, 1, 1, 0, 0, 0, 0, 0],\n",
139 |     "       [0, 1, 0, 0, 0, 0, 0, 0, 1],\n",
140 |     "       [0, 0, 0, 0, 0, 1, 1, 1, 0],\n",
141 |     "       [0, 0, 0, 0, 0, 0, 1, 1, 1],\n",
142 |     "       [0, 0, 0, 0, 0, 0, 0, 1, 1]])\n",
143 |     "```\n",
144 |     "\n",
145 |     "3. Calculate the pairwise correlation matrix for the original matrix M and the reconstructed matrix using $k=2$ singular values (you may use [scipy.stats.spearmanr](http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.spearmanr.html) to do the calculations). Consider the fist 5 sets of documents as one group $G1$ and the last 4 as another group $G2$ (i.e. first 5 and last 4 columns). What is the average within group correlation for $G1$, $G2$ and the average cross-group correlation for G1-G2 using either $M$ or $M'$. (Do not include self-correlation in the within-group calculations.)."
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": null,
151 |    "metadata": {
152 |     "collapsed": true
153 |    },
154 |    "outputs": [],
155 |    "source": []
156 |   },
157 |   {
158 |    "cell_type": "markdown",
159 |    "metadata": {},
160 |    "source": [
161 |     "**Exercise 4 (40 points)**. Clustering with LSA\n",
162 |     "\n",
163 |     "1. Begin by loading a PubMed database of selected article titles using 'pickle'. With the following:\n",
164 |     "```import pickle\n",
165 |     "docs = pickle.load(open('data/pubmed.pic', 'rb'))```\n",
166 |     "\n",
167 |     "    Create a tf-idf matrix for every term that appears at least once in any of the documents. What is the shape of the tf-idf matrix? \n",
168 |     "\n",
169 |     "2. Perform SVD on the tf-idf matrix to obtain $U \\Sigma V^T$ (often written as $T \\Sigma D^T$ in this context with $T$ representing the terms and $D$ representing the documents). If we set all but the top $k$ singular values to 0, the reconstructed matrix is essentially $U_k \\Sigma_k V_k^T$, where $U_k$ is $m \\times k$, $\\Sigma_k$ is $k \\times k$ and $V_k^T$ is $k \\times n$. Terms in this reduced space are represented by $U_k \\Sigma_k$ and documents by $\\Sigma_k V^T_k$. Reconstruct the matrix using the first $k=10$ singular values.\n",
170 |     "\n",
171 |     "3. Use agglomerative hierarchical clustering with complete linkage to plot a dendrogram and comment on the likely number of  document clusters with $k = 100$. Use the dendrogram function from [SciPy ](https://docs.scipy.org/doc/scipy-0.15.1/reference/generated/scipy.cluster.hierarchy.dendrogram.html).\n",
172 |     "\n",
173 |     "4. Determine how similar each of the original documents is to the new document `data/mystery.txt`. Since $A = U \\Sigma V^T$, we also have $V = A^T U S^{-1}$ using orthogonality and the rule for transposing matrix products. This suggests that in order to map the new document to the same concept space, first find the tf-idf vector $v$ for the new document - this must contain all (and only) the terms present in the existing tf-idx matrix. Then the query vector $q$ is given by $v^T U_k \\Sigma_k^{-1}$. Find the 10 documents most similar to the new document and the 10 most dissimilar. "
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": null,
179 |    "metadata": {
180 |     "collapsed": true
181 |    },
182 |    "outputs": [],
183 |    "source": []
184 |   },
185 |   {
186 |    "cell_type": "markdown",
187 |    "metadata": {},
188 |    "source": [
189 |     "**Notes on the Pubmed articles**\n",
190 |     "\n",
191 |     "These were downloaded with the following script.\n",
192 |     "\n",
193 |     "```python\n",
194 |     "from Bio import Entrez, Medline\n",
195 |     "Entrez.email = \"YOUR EMAIL HERE\"\n",
196 |     "import cPickle\n",
197 |     "\n",
198 |     "try:\n",
199 |     "    docs = cPickle.load(open('pubmed.pic'))\n",
200 |     "except Exception, e:\n",
201 |     "    print e\n",
202 |     "\n",
203 |     "    docs = {}\n",
204 |     "    for term in ['plasmodium', 'diabetes', 'asthma', 'cytometry']:\n",
205 |     "        handle = Entrez.esearch(db=\"pubmed\", term=term, retmax=50)\n",
206 |     "        result = Entrez.read(handle)\n",
207 |     "        handle.close()\n",
208 |     "        idlist = result[\"IdList\"]\n",
209 |     "        handle2 = Entrez.efetch(db=\"pubmed\", id=idlist, rettype=\"medline\", retmode=\"text\")\n",
210 |     "        result2 = Medline.parse(handle2)\n",
211 |     "        for record in result2:\n",
212 |     "            title = record.get(\"TI\", None)\n",
213 |     "            abstract = record.get(\"AB\", None)\n",
214 |     "            if title is None or abstract is None:\n",
215 |     "                continue\n",
216 |     "            docs[title] = '\\n'.join([title, abstract])\n",
217 |     "            print title\n",
218 |     "        handle2.close()\n",
219 |     "    cPickle.dump(docs, open('pubmed.pic', 'w'))\n",
220 |     "docs.values()\n",
221 |     "```"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": null,
227 |    "metadata": {},
228 |    "outputs": [],
229 |    "source": []
230 |   }
231 |  ],
232 |  "metadata": {
233 |   "kernelspec": {
234 |    "display_name": "Python 3",
235 |    "language": "python",
236 |    "name": "python3"
237 |   },
238 |   "language_info": {
239 |    "codemirror_mode": {
240 |     "name": "ipython",
241 |     "version": 3
242 |    },
243 |    "file_extension": ".py",
244 |    "mimetype": "text/x-python",
245 |    "name": "python",
246 |    "nbconvert_exporter": "python",
247 |    "pygments_lexer": "ipython3",
248 |    "version": "3.7.4"
249 |   }
250 |  },
251 |  "nbformat": 4,
252 |  "nbformat_minor": 1
253 | }
254 | 


--------------------------------------------------------------------------------
/homework/Homework04.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Homework 04: Optimization"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "**Brief Honor Code**. Do the homework on your own. You may discuss ideas with your classmates, but DO NOT copy the solutions from someone else or the Internet. If stuck, discuss with TA."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "**Note**: The expected figures are provided so you can check your solutions."
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "**1**. (20 points)\n",
 29 |     "\n",
 30 |     "Find the gradient and Hessian for the following equation\n",
 31 |     "\n",
 32 |     "$$\n",
 33 |     "f(x, y) = 1 + 2x + 3y + 4x^2 + 2xy + y^2\n",
 34 |     "$$\n",
 35 |     "\n",
 36 |     "- Plot the contours of this function using `matplotlib` in the box $-10 \\le x \\le 10$ and $-10 \\le y \\le 10$ using a $100 \\times 100$ grid. \n",
 37 |     "- Then plot the gradient vectors using the `quiver` function on top of the contour plot using a $10 \\times 10$ grid. Are the gradients orthogonal to the contours?\n",
 38 |     "\n",
 39 |     "Hint: Use `numpy.meshgrid`, `matplotlib.contour` and `matplotllib.quiver`.\n",
 40 |     "\n",
 41 |     "![img](figs/7_1.png)"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": null,
 47 |    "metadata": {
 48 |     "collapsed": true
 49 |    },
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "\n",
 53 |     "\n",
 54 |     "\n"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "**2**. (30 points)\n",
 62 |     "\n",
 63 |     "This exercise is about using Newton's method to find the cube roots of unity - find $z$ such that $z^3 = 1$. From the fundamental theorem of algebra, we know there must be exactly 3 complex roots since this is a degree 3 polynomial.\n",
 64 |     "\n",
 65 |     "We start with Euler's equation\n",
 66 |     "$$\n",
 67 |     "e^{ix} = \\cos x + i \\sin x\n",
 68 |     "$$\n",
 69 |     "\n",
 70 |     "Raising $e^{ix}$ to the $n$th power where $n$ is an integer, we get from Euler's formula with $nx$ substituting for $x$\n",
 71 |     "$$\n",
 72 |     "(e^{ix})^n = e^{i(nx)} = \\cos nx + i \\sin nx\n",
 73 |     "$$\n",
 74 |     "\n",
 75 |     "Whenever $nx$ is an integer multiple of $2\\pi$, we have\n",
 76 |     "$$\n",
 77 |     "\\cos nx + i \\sin nx = 1\n",
 78 |     "$$\n",
 79 |     "\n",
 80 |     "So\n",
 81 |     "$$\n",
 82 |     "e^{2\\pi i \\frac{k}{n}}\n",
 83 |     "$$\n",
 84 |     "is a root of 1 whenever $k/n = 0, 1, 2, \\ldots$.\n",
 85 |     "\n",
 86 |     "So the cube roots of unity are $1, e^{2\\pi i/3}, e^{4\\pi i/3}$. \n",
 87 |     "\n",
 88 |     "![img](figs/7_2.png)\n",
 89 |     "\n",
 90 |     "While we can do this analytically, the idea is to use Newton's method to find these roots, and in the process, discover some rather perplexing behavior of Newton's method.\n",
 91 |     "\n",
 92 |     "Newton's method for functions of complex variables - stability and basins of attraction. (30 points)\n",
 93 |     "\n",
 94 |     "1. Write a function with the following function signature `newton(z, f, fprime, max_iter=100, tol=1e-6)` where\n",
 95 |     "    - `z` is a starting value (a complex number e.g.  ` 3 + 4j`)\n",
 96 |     "    - `f` is a function of `z`\n",
 97 |     "    - `fprime` is the derivative of  `f`\n",
 98 |     "The function will run until either max_iter is reached or the absolute value of the Newton step is less than tol. In either case, the function should return the number of iterations taken and the final value of `z` as a tuple (`i`, `z`). \n",
 99 |     "\n",
100 |     "2. Define the function `f` and `fprime` that will result in Newton's method finding the cube roots of 1. Find 3 starting points that will give different roots, and print both the start and end points. \n",
101 |     "\n",
102 |     "Write the following two plotting functions to see some (pretty) aspects of Newton's algorithm in the complex plane.\n",
103 |     "\n",
104 |     "3. The first function `plot_newton_iters(f, fprime, n=200, extent=[-1,1,-1,1], cmap='hsv')` calculates and stores the number of iterations taken for convergence (or max_iter) for each point in a 2D array. The 2D array limits are given by `extent` - for example, when `extent = [-1,1,-1,1]` the corners of the plot are `(-i, -i), (1, -i), (1, i), (-1, i)`. There are `n` grid points in both the real and imaginary axes. The argument `cmap` specifies the color map to use - the suggested defaults are fine. Finally plot the image using `plt.imshow` - make sure the axis ticks are correctly scaled. Make a plot for the cube roots of 1.\n",
105 |     "\n",
106 |     "![img](figs/7_2A.png)\n",
107 |     "\n",
108 |     "4. The second function `plot_newton_basins(f, fprime, n=200, extent=[-1,1,-1,1], cmap='jet')` has the same arguments, but this time the grid stores the identity of the root that the starting point converged to. Make a plot for the cube roots of 1 - since there are 3 roots, there should be only 3 colors in the plot.\n",
109 |     "\n",
110 |     "![img](figs/7_2B.png)"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": null,
116 |    "metadata": {
117 |     "collapsed": true
118 |    },
119 |    "outputs": [],
120 |    "source": [
121 |     "\n",
122 |     "\n",
123 |     "\n"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "markdown",
128 |    "metadata": {},
129 |    "source": [
130 |     "**3**. (20 points)\n",
131 |     "\n",
132 |     "Consider the following function on $\\mathbb{R}^2$:\n",
133 |     "\n",
134 |     "$$\n",
135 |     "f(x_1,x_2) = -x_1x_2e^{-\\frac{(x_1^2+x_2^2)}{2}}\n",
136 |     "$$\n",
137 |     "\n",
138 |     "- Find the minimum under the constraint \n",
139 |     "$$g(x) = x_1^2+x_2^2 \\leq 10$$\n",
140 |     "and \n",
141 |     "$$h(x) = 2x_1 + 3x_2 = 5$$ using `scipy.optimize.minimize`.\n",
142 |     "- Plot the function contours using `matplotlib`, showing the constraints $g$ and $h$ and indicate the constrained minimum with an `X`.\n",
143 |     "\n",
144 |     "![img](figs/7_3.png)"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "metadata": {
151 |     "collapsed": true
152 |    },
153 |    "outputs": [],
154 |    "source": [
155 |     "\n",
156 |     "\n",
157 |     "\n"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "markdown",
162 |    "metadata": {},
163 |    "source": [
164 |     "**4** (30 points)\n",
165 |     "\n",
166 |     "Find solutions to $x^3 + 4x^2 -3 = x$. \n",
167 |     "\n",
168 |     "- Write a function to find brackets, assuming roots are always at least 1 unit apart and that the roots lie between -10 and 10\n",
169 |     "- For each bracket, find the enclosed root using\n",
170 |     "    - a bisection method\n",
171 |     "    - Newton-Raphson (no guarantee to stay within brackets)\n",
172 |     "- Use the end points of the bracket as starting points for the bisection methods and the midpoint for Newton-Raphson.\n",
173 |     "- Use the companion matrix and characteristic polynomial to find the solutions\n",
174 |     "- Plot the function and its roots (marked with a circle) in a window just large enough to contain all roots.\n",
175 |     "\n",
176 |     "Use a tolerance of 1e-6.\n",
177 |     "\n",
178 |     "![img](figs/7_4.png)"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": null,
184 |    "metadata": {
185 |     "collapsed": true
186 |    },
187 |    "outputs": [],
188 |    "source": [
189 |     "\n",
190 |     "\n",
191 |     "\n"
192 |    ]
193 |   }
194 |  ],
195 |  "metadata": {
196 |   "kernelspec": {
197 |    "display_name": "Python 3",
198 |    "language": "python",
199 |    "name": "python3"
200 |   },
201 |   "language_info": {
202 |    "codemirror_mode": {
203 |     "name": "ipython",
204 |     "version": 3
205 |    },
206 |    "file_extension": ".py",
207 |    "mimetype": "text/x-python",
208 |    "name": "python",
209 |    "nbconvert_exporter": "python",
210 |    "pygments_lexer": "ipython3",
211 |    "version": "3.7.4"
212 |   }
213 |  },
214 |  "nbformat": 4,
215 |  "nbformat_minor": 2
216 | }
217 | 


--------------------------------------------------------------------------------
/homework/Homework05.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Homework 05: Conjugate Gradient Descent"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "In this homework, we will implement the conjugate graident descent algorithm. While you should nearly always use an optimization routine from a library for practical data analyiss, this exercise is useful because it will make concepts from multivariatble calculus and linear algebra covered in the lectrures concrete for you. Also, it brings you up the learning curve for the implementaiton of more complex algorithms than the ones you have been exposed to so far.\n",
 15 |     "\n",
 16 |     "Note: The exercise assumes that we can calculate the gradient and Hessian of the fucntion we are trying to minimize. This can be computationally expensive or not even possible for soeme functions. Approximate methods can then be used; we do not go into such complexities here."
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "**Conjugate gradient descent**\n",
 24 |     "\n",
 25 |     "We want to implement the line search method \n",
 26 |     "\n",
 27 |     "$$ \n",
 28 |     "x_{k+1} = x_k + \\alpha_k p_k\n",
 29 |     "$$\n",
 30 |     "\n",
 31 |     "where $\\alpha_k$ is the step size and $p_k$ is the search direction. \n",
 32 |     "\n",
 33 |     "In particular, we want the search directions $p_k$ to be *conjugate*, as this will allow us to find the minimum in $n$ steps for $x \\in \\mathbb{R}^n$ if $f(x)$ is a quadratic function.\n",
 34 |     "\n",
 35 |     "The following exercises will unpack this:\n",
 36 |     "\n",
 37 |     "- What quadratic functions are\n",
 38 |     "- What conjugate vectors are\n",
 39 |     "- How to find conjugate vectors by Gram-Schmidt process\n",
 40 |     "- How to find the step size $\\alpha_k$\n",
 41 |     "\n",
 42 |     "and finally wrap them all into a conjugate gradient algorithm."
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "markdown",
 47 |    "metadata": {},
 48 |    "source": [
 49 |     "**Quadratic function surfaces**\n",
 50 |     "\n",
 51 |     "Recall that our objective is to minimize a scalar valued function which maps $\\mathbb{R}^n \\mapsto \\mathbb{R}$, for example, a log likelihoood function (for MLE) or unnormalized posterior distribution (for MAP). Geometrically, we are tring to find the value of the lowest point of some surface. The conjugate gradient algorihtm assumes that the surface can be approximated by the quadratic expression (say, by using a Taylor series expansion about $x$)\n",
 52 |     "\n",
 53 |     "$$\n",
 54 |     "f(x) = \\frac{1}{2}x^TAx - b^Tx + c\n",
 55 |     "$$\n",
 56 |     "\n",
 57 |     "and that \n",
 58 |     "\n",
 59 |     "$$\n",
 60 |     "\\nabla f = Ax - b = 0\n",
 61 |     "$$\n",
 62 |     "\n",
 63 |     "at the minimum (if A is positive definite). Note that $A$ is a matrix, $b$ is a vector, and $c$ is a scalar.  Also, note that the matrix $A$ is the Hessian of the quadratic function.For simplicity, we'll work in $\\mathbb{R}^2$ so we can visualize the surface, so that $x$ is a 2-vector.\n",
 64 |     "\n",
 65 |     "Note: A **form** is a polynomial function where every term has the same degree - for example, $x^2 + 2xy + y^2$ is a quadratic form, whcih can be rewritten as \n",
 66 |     "$$\n",
 67 |     "\\begin{pmatrix}\n",
 68 |     "x & y\n",
 69 |     "\\end{pmatrix}\n",
 70 |     "\\begin{pmatrix}\n",
 71 |     "    1 & 1\\\\\n",
 72 |     "    1 & 1\n",
 73 |     "\\end{pmatrix}\n",
 74 |     "\\begin{pmatrix}\n",
 75 |     "x \\\\\n",
 76 |     "y\n",
 77 |     "\\end{pmatrix}\n",
 78 |     "$$\n",
 79 |     "\n",
 80 |     "That is, $x^TAx$ is a quadratic form."
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 3,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "%matplotlib inline\n",
 90 |     "import matplotlib.pyplot as plt\n",
 91 |     "import numpy as np"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "markdown",
 96 |    "metadata": {},
 97 |    "source": [
 98 |     "**Exercise 1 (20 points)** \n",
 99 |     "\n",
100 |     "We will work with function $f_1$\n",
101 |     "\n",
102 |     "$$\n",
103 |     "f1(x) = \\frac{1}{2} x^T \\pmatrix{1 & 0 \\\\ 0 & 1}x\n",
104 |     "$$\n",
105 |     "\n",
106 |     "and function $f_2$\n",
107 |     "\n",
108 |     "$$\n",
109 |     "f2(x) = \\frac{1}{2} x^T \\pmatrix{1 & 0 \\\\ 0 & 3}x\n",
110 |     "$$\n",
111 |     "\n",
112 |     "- Plot the labeled contours of the quadratic functions\n",
113 |     "- Use a `streamplot` to show the gradient vector field of the above quadratic functions."
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": null,
119 |    "metadata": {
120 |     "collapsed": true
121 |    },
122 |    "outputs": [],
123 |    "source": [
124 |     "\n",
125 |     "\n",
126 |     "\n"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "markdown",
131 |    "metadata": {},
132 |    "source": [
133 |     "**Gram-Schmidt**\n",
134 |     "\n",
135 |     "The way to numerically find conjugate vectors is to use the Gram-Schmidt process. Here, instead of the usual projection \n",
136 |     "\n",
137 |     "$$\n",
138 |     "\\text{proj}_u(v) = \\frac{u \\cdot v}{u \\cdot u} \\, u\n",
139 |     "$$\n",
140 |     "\n",
141 |     "we use the generalized projection\n",
142 |     "$$\n",
143 |     "\\text{proj}_u(v) = \\frac{uA^Tv}{uA^Tu} \\, u\n",
144 |     "$$"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "markdown",
149 |    "metadata": {},
150 |    "source": [
151 |     "**Exercise 2 (30 points)**\n",
152 |     "\n",
153 |     "The vectors $u$ and $v$ are orthogonal i.e. $u^Tv = 0$ and conjugate with respect to $A$ if $u^TAv = 0$. The geometric intuition for conjugate vecrors $u$ and $v$ is that $u$ and $v$ would be orthogonal if we stretched the contour plots so that it became isotropic (same in all directions, just like when A=𝟙). Write a Gram-Schmidt function to find orthogonal and conjuate vectors with the following signature\n",
154 |     "\n",
155 |     "```python\n",
156 |     "def gram_schmidt(U, inner):\n",
157 |     "    \"\"\"Return an orthogonal matrix.\n",
158 |     "    \n",
159 |     "    U is a matrix of (column) vecotrs.\n",
160 |     "    inner is a function that calculates the inner product.\n",
161 |     "    \n",
162 |     "    Returns an orthogonal matrix of the same shape as U.\n",
163 |     "    \"\"\"\n",
164 |     "```\n",
165 |     "\n",
166 |     "Use this function and the appropiate inner product to plot\n",
167 |     "\n",
168 |     "- An orhtogonal set of basis vectors for $f_1$\n",
169 |     "- A conjugate set of basic vectors for $f_2$ \n",
170 |     "\n",
171 |     "where the first basis vector is to parallel to $\\pmatrix{1 \\\\ 1}$."
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": null,
177 |    "metadata": {
178 |     "collapsed": true
179 |    },
180 |    "outputs": [],
181 |    "source": [
182 |     "\n",
183 |     "\n",
184 |     "\n"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "markdown",
189 |    "metadata": {
190 |     "collapsed": true
191 |    },
192 |    "source": [
193 |     "**Exercise 3 (20 points)**\n",
194 |     "\n",
195 |     "We now need to find the \"step size\" $\\alpha$ to take in the direction of the search vector $p$. We can get a quadratic approximation to a general nonliner function $f$ by taking the Taylor series in the driection of $p$\n",
196 |     "\n",
197 |     "$$\n",
198 |     "f(x + \\alpha p) = f(x) + \\alpha [f'(x)]^T p + \\frac{\\alpha^2}{2} p^T f''(x) p\n",
199 |     "$$\n",
200 |     "\n",
201 |     "Find the derivative with respect to $\\alpha$ and use this to find the optimal value for $\\alpha$ with respect to the quadratic approcimaiton. \n",
202 |     "\n",
203 |     "* Write the derivation of the optimal step size $\\alpha$ using LaTeX in a Markdown cell.\n",
204 |     "* Write a funciton that returns $\\alpha$ for a quadratic funciton with the following signature\n",
205 |     "\n",
206 |     "```python\n",
207 |     "def step(x, p, A, b):\n",
208 |     "    \"\"\"Returns the optimal step size to take in line search on a quadratic.\n",
209 |     "    \n",
210 |     "    A and b are the coefficients of the quadartic expression \n",
211 |     "    $$\n",
212 |     "    f(x) = \\frac{1}{2}x^TAx - b^Tx + c\n",
213 |     "    $$\n",
214 |     "    p is the search direction\n",
215 |     "    x is the current location\n",
216 |     "    \"\"\"\n",
217 |     "```"
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "code",
222 |    "execution_count": null,
223 |    "metadata": {
224 |     "collapsed": true
225 |    },
226 |    "outputs": [],
227 |    "source": [
228 |     "\n",
229 |     "\n",
230 |     "\n"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "markdown",
235 |    "metadata": {},
236 |    "source": [
237 |     "**Line search**\n",
238 |     "\n",
239 |     "We now know how to find a search direction $p_k$ - this is a vector that is conjugate to the previous search direction. The first search direction is usually set to be the gradient. Next we need to find out how far along $p_k$ we need to travel, i.e., we need to find $\\alpha_k$. First we take a Taylor expansion in the direction of $p$\n",
240 |     "\n",
241 |     "$$\n",
242 |     "f(x + \\alpha p) = f(x) + \\alpha [f'(x)]^T p + \\frac{\\alpha^2}{2} p^T f''(x) p\n",
243 |     "$$\n",
244 |     "\n",
245 |     "followed by finding the derivative with respect to $\\alpha$\n",
246 |     "\n",
247 |     "$$\n",
248 |     "\\frac{d}{d\\alpha} f(x + \\alpha p) = [f'(x)]^T p + \\alpha p^T f''(x) p\n",
249 |     "$$\n",
250 |     "\n",
251 |     "Solvign for $\\frac{d}{d\\alpha} f(x + \\alpha p) = 0$, we get\n",
252 |     "\n",
253 |     "$$\n",
254 |     "\\alpha = - \\frac{[f'(x)]^T p}{p^T f''(x) p} \\\\\n",
255 |     "= - \\frac{\\nabla f^T p}{p^T A p} \\\\\n",
256 |     "= \\frac{(b - Ax)^T p}{p^T A p}\n",
257 |     "$$\n",
258 |     "\n"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "markdown",
263 |    "metadata": {},
264 |    "source": [
265 |     "**Exercise 4 (30 points)**\n",
266 |     "\n",
267 |     "Implement the conjugate grdient descent algorithm with the following signature\n",
268 |     "\n",
269 |     "```python\n",
270 |     "def cg(x, A, b, c, max_iter=100, tol=1e-3):\n",
271 |     "    \"\"\"Conjugate gradient descent on a quadratic function surface.\n",
272 |     "    \n",
273 |     "    x is the starting position\n",
274 |     "    A, b and c are the coefficients of the quadartic expression \n",
275 |     "    $$\n",
276 |     "    f(x) = \\frac{1}{2}x^TAx - b^Tx + c\n",
277 |     "    $$\n",
278 |     "    max_iter is the maximum number of iterations to take\n",
279 |     "    tol is the tolerance (stop if the length of the gradient is smaller than tol)\n",
280 |     "\n",
281 |     "    Returns the number of steps taken and the list of all positions visited.\n",
282 |     "    \"\"\"\n",
283 |     "```\n",
284 |     "\n",
285 |     "- Use cg to find the minimum of the funciton $f_2$ from Exercise 1, starting from $\\pmatrix{6 \\\\ 7}$.\n",
286 |     "\n",
287 |     "- Plot the contour of the funciton f and the trajectory taken from the inital starting poitn $x$ to the final position, inlcuding all the intermediate steps. \n",
288 |     "\n",
289 |     "We are not particularly concerned about efficiency here, so don't worry about JIT/AOT/C++ level optimization. "
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "code",
294 |    "execution_count": null,
295 |    "metadata": {
296 |     "collapsed": true
297 |    },
298 |    "outputs": [],
299 |    "source": [
300 |     "\n",
301 |     "\n",
302 |     "\n"
303 |    ]
304 |   }
305 |  ],
306 |  "metadata": {
307 |   "kernelspec": {
308 |    "display_name": "Python 3",
309 |    "language": "python",
310 |    "name": "python3"
311 |   },
312 |   "language_info": {
313 |    "codemirror_mode": {
314 |     "name": "ipython",
315 |     "version": 3
316 |    },
317 |    "file_extension": ".py",
318 |    "mimetype": "text/x-python",
319 |    "name": "python",
320 |    "nbconvert_exporter": "python",
321 |    "pygments_lexer": "ipython3",
322 |    "version": "3.7.6"
323 |   }
324 |  },
325 |  "nbformat": 4,
326 |  "nbformat_minor": 1
327 | }
328 | 


--------------------------------------------------------------------------------
/homework/figs/7_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/homework/figs/7_1.png


--------------------------------------------------------------------------------
/homework/figs/7_2A.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/homework/figs/7_2A.png


--------------------------------------------------------------------------------
/homework/figs/7_2B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/homework/figs/7_2B.png


--------------------------------------------------------------------------------
/homework/figs/7_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/homework/figs/7_3.png


--------------------------------------------------------------------------------
/homework/figs/7_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/homework/figs/7_4.png


--------------------------------------------------------------------------------
/homework/forest_fire.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/homework/forest_fire.mp4


--------------------------------------------------------------------------------
/homework/forest_fire.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/homework/forest_fire.png


--------------------------------------------------------------------------------
/index.rst:
--------------------------------------------------------------------------------
 1 | .. STA663-2020 documentation master file, created by
 2 |    sphinx-quickstart on Thu Apr 16 13:55:55 2020.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to STA663-2020's documentation!
 7 | =======================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 |    :caption: Contents:
12 | 
13 |    notebooks/S02_Text_Annotated.ipynb
14 |    notebooks/S03_Numpy_Annotated.ipynb
15 |    notebooks/S04_Using_Pandas_Annotated.ipynb
16 |    notebooks/S05_Graphics_Annotated.ipynb
17 |    notebooks/S06_Functional_Annotated.ipynb
18 |    notebooks/S07 Introduction to Statistical Computing.ipynb
19 |    notebooks/S07A_Scalars_Annotated.ipynb
20 |    notebooks/S07B_Vectors_Annotated.ipynb
21 |    notebooks/S07C_Matrices_Annotated.ipynb
22 |    notebooks/S07D_Sparse_Matrices_Annotated.ipynb
23 |    notebooks/S08A_Matrices_Linear_Combinations_Annotated.ipynb
24 |    notebooks/S08B_Sovling_Linear_Equations_Annotated.ipynb
25 |    notebooks/S08C_Least_Squares.ipynb
26 |    notebooks/S08D_PCA.ipynb
27 |    notebooks/S08E_SVD.ipynb
28 |    notebooks/S08F_LinearAlgebraExamples.ipynb
29 |    notebooks/S08G_Linear_Algebra_Application_Exercises.ipynb
30 |    notebooks/S08H_Linear_Algebra_Applications.ipynb
31 |    notebooks/S08_Linear_Algebra_Review.ipynb
32 |    notebooks/S09A_Root_Finding.ipynb
33 |    notebooks/S09B_Optimization.ipynb
34 |    notebooks/S09C_Optimization_Algorithms.ipynb
35 |    notebooks/S09D_Optimization_Examples.ipynb
36 |    notebooks/S09E_Optimization_Line_Search.ipynb
37 |    notebooks/S09F_Least_Squares_Optimization.ipynb
38 |    notebooks/S09G_Gradient_Descent_Optimization.ipynb
39 |    notebooks/S09H_Constrained_Optimization.ipynb
40 |    notebooks/S10A_Parallel_Porgramming.ipynb
41 |    notebooks/S10B_Multicore_Parallelism.ipynb
42 |    notebooks/S10C_IPyParallel.ipynb
43 |    notebooks/S11A_Overview_numba_cython.ipynb
44 | books/S11B_Numba.ipynb
45 |    notebooks/S11C_Cython.ipynb
46 |    notebooks/S12_CPP.ipynb
47 |    notebooks/S13_pybind11.ipynb
48 |    notebooks/S14A_Random_Variables.ipynb
49 |    notebooks/S14B_Probabilisitc_Programming.ipynb
50 |    notebooks/S14C_Monte_Carlo_Methods.ipynb
51 |    notebooks/S14D_Monte_Carlo_Integration.ipynb
52 |    notebooks/S15A_MarkovChains.ipynb
53 |    notebooks/S15B_MCMC-Copy1.ipynb
54 |    notebooks/S15B_MCMC.ipynb
55 |    notebooks/S15C_HMC.ipynb
56 |    notebooks/S16A_PyMC3.ipynb
57 |    notebooks/S16B_PyMC3.ipynb
58 |    notebooks/S16C_PyMC3.ipynb
59 |    notebooks/S16D_PyMC3.ipynb
60 |    notebooks/S16E_PyStan.ipynb
61 |    notebooks/S16F_TF.ipynb
62 | 
63 | 
64 | Indices and tables
65 | ==================
66 | 
67 | * :ref:`genindex`
68 | * :ref:`modindex`
69 | * :ref:`search`
70 | 


--------------------------------------------------------------------------------
/labs/Lab01.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "**Lab 01**\n",
  8 |     "\n",
  9 |     "Labs in general are for you to solve short programming challenges in class. In contrast, homework assignments will involve more challenging and lengthy problems.\n",
 10 |     "\n",
 11 |     "Feel free to ask the TAs for help if there is anything you do not understand. The TAs will go through suggested solutions in the last 15 minutes of the lab - typically by solving them in a live demo. **Your midterm exams will be like this, so it is highly beneficial for you to attend these labs**.\n",
 12 |     "\n",
 13 |     "The first lab is to gain basic familiarity with Python. \n",
 14 |     "\n",
 15 |     "- Any library needed has been imported for you\n",
 16 |     "- You are not to use any 3rd party library such as `numpy` for the first lab\n",
 17 |     "- Do this lab without using the web to search for solutions"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": null,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "import random\n",
 27 |     "import math\n",
 28 |     "\n",
 29 |     "random.seed(123)"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "markdown",
 34 |    "metadata": {},
 35 |    "source": [
 36 |     "**1**. Write Bayes theorem in $\\LaTeX$ using a Markdown cell."
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": null,
 42 |    "metadata": {},
 43 |    "outputs": [],
 44 |    "source": []
 45 |   },
 46 |   {
 47 |    "cell_type": "markdown",
 48 |    "metadata": {},
 49 |    "source": [
 50 |     "**2**. Create two lists of 100 random integers in the range`[0, 1000)`.  Find all numbers that are shared by the two lists.\n",
 51 |     "\n",
 52 |     "Hint: Use the function `random.randint` to generate a random integer"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": []
 61 |   },
 62 |   {
 63 |    "cell_type": "markdown",
 64 |    "metadata": {},
 65 |    "source": [
 66 |     "**3**. A Pythagorean triplet is a tuple of integers $(a, b, c)$ such that $a^2 + b^2 = c^2$. Find all unique Pythagorean triplets for which $a$ and $b$ are less than 25. For example, $(3,4,5)$ is a Pythagorean triplet.\n",
 67 |     "\n",
 68 |     "Hint: To check if a number $x$ is a whole number, you can use\n",
 69 |     "\n",
 70 |     "```python\n",
 71 |     "math.isclose(x % 1, 0)\n",
 72 |     "```\n",
 73 |     "\n",
 74 |     "or the built-in method\n",
 75 |     "\n",
 76 |     "```python\n",
 77 |     "x.is_integer()\n",
 78 |     "```"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": []
 87 |   },
 88 |   {
 89 |    "cell_type": "markdown",
 90 |    "metadata": {},
 91 |    "source": [
 92 |     "**4**. Implement a bubble sort function in Python. Use it to sort the list `[9,1,9,6,6,8,2,4,5,9]`. Make sure your function has a `docstring`.\n",
 93 |     "\n",
 94 |     "Hint:\n",
 95 |     "\n",
 96 |     "Recall that in Python, a swap is as simple as\n",
 97 |     "\n",
 98 |     "```python\n",
 99 |     "x, y = y, x\n",
100 |     "```"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "metadata": {},
107 |    "outputs": [],
108 |    "source": []
109 |   },
110 |   {
111 |    "cell_type": "markdown",
112 |    "metadata": {},
113 |    "source": [
114 |     "**5**. The \"Fizz-Buzz test\" is an interview question designed to help filter out the 99.5% of programming job candidates who can't seem to program their way out of a wet paper bag. The text of the programming assignment is as follows:\n",
115 |     "\n",
116 |     "```Write a program that prints the numbers from 1 to 100. But for multiples of three print “Fizz” instead of the number and for the multiples of five print “Buzz”. For numbers which are multiples of both three and five print “FizzBuzz”.```\n",
117 |     "\n",
118 |     "Source: \"Using FizzBuzz to Find Developers who Grok Coding\" http://tickletux.wordpress.com/2007/01/24/using-fizzbuzz-to-find-developers-who-grok-coding/\n",
119 |     "\n",
120 |     "Write the Fizz-Buzz program in Python. Print the 100 encoded numbers in a single line.\n",
121 |     "\n",
122 |     "Hint: See what the `end` argument to the `print` function does. Alternatively, see how to use the `join` method of any string."
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": null,
128 |    "metadata": {},
129 |    "outputs": [],
130 |    "source": []
131 |   },
132 |   {
133 |    "cell_type": "markdown",
134 |    "metadata": {},
135 |    "source": [
136 |     "**6**. Bonus for those who found the above exercises too easy.\n",
137 |     "\n",
138 |     "Write a function that returns the specified output for the given input.\n",
139 |     "\n",
140 |     "```\n",
141 |     "Input: ['Tokyo', 'London', 'Rome', 'Donlon', 'Kyoto', 'Paris']\n",
142 |     "// YOUR ALGORITHM\n",
143 |     "Output: [['Tokyo', 'Kyoto'], ['London', 'Donlon'], ['Rome'], ['Paris']]\n",
144 |     "```"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "metadata": {},
151 |    "outputs": [],
152 |    "source": []
153 |   }
154 |  ],
155 |  "metadata": {
156 |   "kernelspec": {
157 |    "display_name": "Python 3",
158 |    "language": "python",
159 |    "name": "python3"
160 |   },
161 |   "language_info": {
162 |    "codemirror_mode": {
163 |     "name": "ipython",
164 |     "version": 3
165 |    },
166 |    "file_extension": ".py",
167 |    "mimetype": "text/x-python",
168 |    "name": "python",
169 |    "nbconvert_exporter": "python",
170 |    "pygments_lexer": "ipython3",
171 |    "version": "3.6.5"
172 |   }
173 |  },
174 |  "nbformat": 4,
175 |  "nbformat_minor": 2
176 | }
177 | 


--------------------------------------------------------------------------------
/labs/Lab01_Solutions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "**Lab 01**\n",
  8 |     "\n",
  9 |     "Labs in general are for you to solve short programming challenges in class. In contrast, homework assignments will involve more challenging and lengthy problems.\n",
 10 |     "\n",
 11 |     "Feel free to ask the TAs for help if there is anything you do not understand. The TAs will go through suggested solutions in the last 15 minutes of the lab - typically by solving them in a live demo. **Your midterm exams will be like this, so it is highly beneficial for you to attend these labs**.\n",
 12 |     "\n",
 13 |     "The first lab is to gain basic familiarity with Python. \n",
 14 |     "\n",
 15 |     "- Any library needed has been imported for you\n",
 16 |     "- You are not to use any 3rd party library such as `numpy` for the first lab\n",
 17 |     "- Do this lab without using the web to search for solutions"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": null,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "import random\n",
 27 |     "import math\n",
 28 |     "\n",
 29 |     "random.seed(123)"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "markdown",
 34 |    "metadata": {},
 35 |    "source": [
 36 |     "**1**. Write Bayes theorem in $\\LaTeX$ using a Markdown cell."
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "$$\n",
 44 |     "\\Pr(A \\mid B) = \\frac{\\Pr(B \\mid A) \\Pr(A)}{\\Pr(B)}\n",
 45 |     "$$"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "**2**. Create two lists of 100 random integers in the range`[0, 1000]`.  Find all numbers that are shared by the two lists.\n",
 53 |     "\n",
 54 |     "Note: `randint` includes *both* endpoints"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "xs = [random.randint(0, 1000) for i in range(100)]\n",
 64 |     "ys = [random.randint(0, 1000) for i in range(100)]\n",
 65 |     "set(xs).intersection(set(ys))"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "markdown",
 70 |    "metadata": {},
 71 |    "source": [
 72 |     "**3**. A Pythagorean triplet is a tuple of integers $(a, b, c)$ such that $a^2 + b^2 = c^2$. Find all unique Pythagorean triplets for which $a$ and $b$ are less than 25. For example, $(3,4,5)$ is a Pythagorean triplet."
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "import math"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": null,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "triplets = []\n",
 91 |     "for a in range(1, 25):\n",
 92 |     "    for b in range(a, 25):\n",
 93 |     "        c = math.sqrt(a**2 + b**2)\n",
 94 |     "        if c.is_integer():\n",
 95 |     "            triplets.append((a, b, int(c)))\n",
 96 |     "triplets"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "metadata": {},
102 |    "source": [
103 |     "**4**. Implement a bubble sort function in Python. Use it to sort the list `[9,1,9,6,6,8,2,4,5,9]`."
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": null,
109 |    "metadata": {},
110 |    "outputs": [],
111 |    "source": [
112 |     "def bubblesort(xs):\n",
113 |     "    \"\"\"Bubble sort.\"\"\"\n",
114 |     "    \n",
115 |     "    n = len(xs)\n",
116 |     "    for i in range(n):\n",
117 |     "        for j in range(i+1, n):\n",
118 |     "            if xs[i] > xs[j]:\n",
119 |     "                xs[i], xs[j] = xs[j], xs[i]\n",
120 |     "    return xs"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": null,
126 |    "metadata": {},
127 |    "outputs": [],
128 |    "source": [
129 |     "xs = [9,1,9,6,6,8,2,4,5,9]"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": null,
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "bubblesort(xs)"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "markdown",
143 |    "metadata": {},
144 |    "source": [
145 |     "**5**. The \"Fizz-Buzz test\" is an interview question designed to help filter out the 99.5% of programming job candidates who can't seem to program their way out of a wet paper bag. The text of the programming assignment is as follows:\n",
146 |     "\n",
147 |     "```Write a program that prints the numbers from 1 to 100. But for multiples of three print “Fizz” instead of the number and for the multiples of five print “Buzz”. For numbers which are multiples of both three and five print “FizzBuzz”.```\n",
148 |     "\n",
149 |     "Source: \"Using FizzBuzz to Find Developers who Grok Coding\" http://tickletux.wordpress.com/2007/01/24/using-fizzbuzz-to-find-developers-who-grok-coding/\n",
150 |     "\n",
151 |     "Write the Fizz-Buzz program in Python. Print the 100 encoded numbers in a single line."
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "metadata": {},
158 |    "outputs": [],
159 |    "source": [
160 |     "for i in range (1, 101):\n",
161 |     "    if i % 15 == 0:\n",
162 |     "        print('FizzBuzz', end=', ')\n",
163 |     "    elif i % 3 == 0:\n",
164 |     "        print('Fizz', end=', ')\n",
165 |     "    elif i % 5 == 0:\n",
166 |     "        print('Buzz', end=', ')\n",
167 |     "    else:\n",
168 |     "        print(i, end=',')"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "markdown",
173 |    "metadata": {},
174 |    "source": [
175 |     "**6**.\n",
176 |     "\n",
177 |     "Write a function that returns the specified output for the given input.\n",
178 |     "\n",
179 |     "```\n",
180 |     "Input: ['Tokyo', 'London', 'Rome', 'Donlon', 'Kyoto', 'Paris']\n",
181 |     "// YOUR ALGORITHM\n",
182 |     "Output: [['Tokyo', 'Kyoto'], ['London', 'Donlon'], ['Rome'], ['Paris']]\n",
183 |     "```"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": null,
189 |    "metadata": {},
190 |    "outputs": [],
191 |    "source": [
192 |     "def find_paired(cities):\n",
193 |     "    \"\"\"Organize ciites in some way.\"\"\"\n",
194 |     "\n",
195 |     "    n = len(cities)\n",
196 |     "    paired = set([])\n",
197 |     "    for i in range(n):\n",
198 |     "        for j in range(i+1, n):\n",
199 |     "            c1 = cities[i]\n",
200 |     "            c2 = cities[j]\n",
201 |     "            if sorted(c1.lower()) == sorted(c2.lower()):\n",
202 |     "                paired.add((c1, c2))\n",
203 |     "    paired_ = [x for xs in paired for x in xs]\n",
204 |     "    unpaired = [[city] for city in cities if not city in paired_]\n",
205 |     "    return list(map(list, paired)) + unpaired"
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "code",
210 |    "execution_count": null,
211 |    "metadata": {},
212 |    "outputs": [],
213 |    "source": [
214 |     "cities = ['Tokyo', 'London', 'Rome', 'Donlon', 'Kyoto', 'Paris']\n",
215 |     "find_paired(cities)"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "code",
220 |    "execution_count": null,
221 |    "metadata": {},
222 |    "outputs": [],
223 |    "source": []
224 |   }
225 |  ],
226 |  "metadata": {
227 |   "kernelspec": {
228 |    "display_name": "Python 3",
229 |    "language": "python",
230 |    "name": "python3"
231 |   },
232 |   "language_info": {
233 |    "codemirror_mode": {
234 |     "name": "ipython",
235 |     "version": 3
236 |    },
237 |    "file_extension": ".py",
238 |    "mimetype": "text/x-python",
239 |    "name": "python",
240 |    "nbconvert_exporter": "python",
241 |    "pygments_lexer": "ipython3",
242 |    "version": "3.6.5"
243 |   }
244 |  },
245 |  "nbformat": 4,
246 |  "nbformat_minor": 2
247 | }
248 | 


--------------------------------------------------------------------------------
/labs/Lab02.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "**Lab 02**\n",
  8 |     "\n",
  9 |     "Labs in general are for you to solve short programming challenges in class. In contrast, homework assignments will involve more challenging and lengthy problems.\n",
 10 |     "\n",
 11 |     "Feel free to ask the TAs for help if there is anything you do not understand. The TAs will go through suggested solutions in the last 15 minutes of the lab - typically by solving them in a live demo. **Your midterm exams will be like this, so it is highly beneficial for you to attend these labs**.\n",
 12 |     "\n",
 13 |     "The second lab is to gain basic familiarity with handling strings and text. \n",
 14 |     "\n",
 15 |     "- You can import any Python standard library module you need\n",
 16 |     "- Do this lab without using the web to search for solutions"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "**1**. Basic text file handling\n",
 24 |     "\n",
 25 |     "- Write the following into a text file called `snark.txt` using Python code and a context manager\n",
 26 |     "\n",
 27 |     "```text\n",
 28 |     "Just the place for a Snark! I have said it twice:\n",
 29 |     "   That alone should encourage the crew.\n",
 30 |     "Just the place for a Snark! I have said it thrice:\n",
 31 |     "   What I tell you three times is true.\n",
 32 |     "```\n",
 33 |     "\n",
 34 |     "- Now read in the file and change all occurrences of `Snark` to `Snack` and save to a new file call `snack.txt`\n",
 35 |     "- Read and print the contents of `snack.txt`\n",
 36 |     "- Using Python, write code to delete both these files\n",
 37 |     "\n",
 38 |     "Hint: See what the `os` module provides using TAB completion to delete a file."
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": []
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "**2**. Download the following file `https://molb7621.github.io/workshop/_downloads/sample.fa` using the `get` mthod of the `requests` module, and save the `text` attribute of the resulting object as a string. \n",
 53 |     "\n",
 54 |     "- Convert to a list of strings by splitting on line breaks\n",
 55 |     "- Delete all comment lines that start with `>`\n",
 56 |     "- Combine the remaining 4 DNA sequences into a single sequence\n",
 57 |     "- Perform reverse complementation by\n",
 58 |     "    - replacing A with T, C with G, G with C and T with A\n",
 59 |     "    - reversing the order of the sequence\n",
 60 |     "- Print the reverse complement sequence"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": null,
 66 |    "metadata": {},
 67 |    "outputs": [],
 68 |    "source": []
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "**3**. Starting with the sequence `CGATCGTACGATCGATCGATCTACGTACGTACGATCGATGATCGTAGCTAGCTACGATGCCAGTTAGCTAGCTAGTCAGT`, find the top 3 most common subsequences of length 3. We want to consider all possible subsequences that are generated by a shifting window of size 3.\n",
 75 |     "\n",
 76 |     "Hint: \n",
 77 |     "\n",
 78 |     "- Consider using `zip` to generate shifting windows\n",
 79 |     "- Use `collections.Counter` or a dictionary to count"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": null,
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": []
 88 |   },
 89 |   {
 90 |    "cell_type": "markdown",
 91 |    "metadata": {},
 92 |    "source": [
 93 |     "**4**. Use Python regular expressions to find the longest contiguous purine (the letters A and G) subsequences in \n",
 94 |     "\n",
 95 |     "`CGATCGTACGATCGATCGATCTACGTACAAGGAGGAGAGGTACGATCGATGATCGTAGCTAGCTACGATGCCAGTTAGCTAGCTAGTCAGT`"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": []
104 |   }
105 |  ],
106 |  "metadata": {
107 |   "kernelspec": {
108 |    "display_name": "Python 3",
109 |    "language": "python",
110 |    "name": "python3"
111 |   },
112 |   "language_info": {
113 |    "codemirror_mode": {
114 |     "name": "ipython",
115 |     "version": 3
116 |    },
117 |    "file_extension": ".py",
118 |    "mimetype": "text/x-python",
119 |    "name": "python",
120 |    "nbconvert_exporter": "python",
121 |    "pygments_lexer": "ipython3",
122 |    "version": "3.6.5"
123 |   }
124 |  },
125 |  "nbformat": 4,
126 |  "nbformat_minor": 2
127 | }
128 | 


--------------------------------------------------------------------------------
/labs/Lab02_Solutions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "**Lab 02**\n",
  8 |     "\n",
  9 |     "Labs in general are for you to solve short programming challenges in class. In contrast, homework assignments will involve more challenging and lengthy problems.\n",
 10 |     "\n",
 11 |     "Feel free to ask the TAs for help if there is anything you do not understand. The TAs will go through suggested solutions in the last 15 minutes of the lab - typically by solving them in a live demo. **Your midterm exams will be like this, so it is highly beneficial for you to attend these labs**.\n",
 12 |     "\n",
 13 |     "The second lab is to gain basic familiarity with handling strings and text. \n",
 14 |     "\n",
 15 |     "- You can import any Python standard library module you need\n",
 16 |     "- Do this lab without using the web to search for solutions"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "**1**. Basic text file handling\n",
 24 |     "\n",
 25 |     "- Write the following into a text file called `snark.txt` using Python code and a context manager\n",
 26 |     "\n",
 27 |     "```text\n",
 28 |     "Just the place for a Snark! I have said it twice:\n",
 29 |     "   That alone should encourage the crew.\n",
 30 |     "Just the place for a Snark! I have said it thrice:\n",
 31 |     "   What I tell you three times is true.\n",
 32 |     "```\n",
 33 |     "\n",
 34 |     "- Now read in the file and change all occurrences of `Snark` to `Snack` and save to a new file call `snack.txt`\n",
 35 |     "- Read and print the contents of `snack.txt`\n",
 36 |     "- Using Python, write code to delete both these files\n",
 37 |     "\n",
 38 |     "Hint: See what the `os` module provides using TAB completion to delete a file."
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 1,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "s = '''Just the place for a Snark! I have said it twice:\n",
 48 |     "   That alone should encourage the crew.\n",
 49 |     "Just the place for a Snark! I have said it thrice:\n",
 50 |     "   What I tell you three times is true'''\n",
 51 |     "\n",
 52 |     "with open('snark.txt', 'w') as f:\n",
 53 |     "    f.write(s)"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 2,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "with open('snark.txt') as fin:\n",
 63 |     "    with open('snack.txt', 'w') as fout:\n",
 64 |     "        for line in fin:\n",
 65 |     "            line = line.replace('Snark', 'Snack')\n",
 66 |     "            fout.write(line)"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 3,
 72 |    "metadata": {},
 73 |    "outputs": [
 74 |     {
 75 |      "name": "stdout",
 76 |      "output_type": "stream",
 77 |      "text": [
 78 |       "Just the place for a Snack! I have said it twice:\n",
 79 |       "   That alone should encourage the crew.\n",
 80 |       "Just the place for a Snack! I have said it thrice:\n",
 81 |       "   What I tell you three times is true\n"
 82 |      ]
 83 |     }
 84 |    ],
 85 |    "source": [
 86 |     "with open('snack.txt') as f:\n",
 87 |     "    print(f.read())"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 4,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "import os\n",
 97 |     "\n",
 98 |     "os.remove('snack.txt')\n",
 99 |     "os.remove('snark.txt')"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "metadata": {},
105 |    "source": [
106 |     "**2**. Download the following file `https://molb7621.github.io/workshop/_downloads/sample.fa` using the `get` mthod of the `requests` module, and save the `text` attribute of the resulting object as a string. \n",
107 |     "\n",
108 |     "- Convert to a list of strings by splitting on line breaks\n",
109 |     "- Delete all comment lines that start with `>`\n",
110 |     "- Combine the remaining 4 DNA sequences into a single sequence\n",
111 |     "- Perform reverse complementation by\n",
112 |     "    - replacing A with T, C with G, G with C and T with A\n",
113 |     "    - reversing the order of the sequence\n",
114 |     "- Print the reverse complement sequence"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": 5,
120 |    "metadata": {},
121 |    "outputs": [
122 |     {
123 |      "data": {
124 |       "text/plain": [
125 |        "'CGATCGTACGATCGATCGATCTACGTACGTACGATCGATGATCGTAGCTAGCTACGATGCCAGTTAGCTAGCTAGTCAGT'"
126 |       ]
127 |      },
128 |      "execution_count": 5,
129 |      "metadata": {},
130 |      "output_type": "execute_result"
131 |     }
132 |    ],
133 |    "source": [
134 |     "import requests\n",
135 |     "\n",
136 |     "url = 'https://molb7621.github.io/workshop/_downloads/sample.fa'\n",
137 |     "seq = requests.get(url).text\n",
138 |     "lines = seq.split()\n",
139 |     "lines = [line for line in lines if not line.startswith('>')]\n",
140 |     "dna = ''.join(lines)\n",
141 |     "rc = dna.translate(str.maketrans('ACTG', 'TGAC'))[::-1]\n",
142 |     "rc"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "markdown",
147 |    "metadata": {},
148 |    "source": [
149 |     "**3**. Starting with the sequence `CGATCGTACGATCGATCGATCTACGTACGTACGATCGATGATCGTAGCTAGCTACGATGCCAGTTAGCTAGCTAGTCAGT`, find the top 3 most common subsequences of length 3. We want to consider all possible subsequences that are generated by a shifting window of size 3.\n",
150 |     "\n",
151 |     "Hint: \n",
152 |     "\n",
153 |     "- Consider using `zip` to generate shifting windows\n",
154 |     "- Use `collections.Counter` or a dictionary to count"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": 6,
160 |    "metadata": {},
161 |    "outputs": [],
162 |    "source": [
163 |     "from collections import Counter"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": 7,
169 |    "metadata": {},
170 |    "outputs": [
171 |     {
172 |      "data": {
173 |       "text/plain": [
174 |        "[('GAT', 8), ('CGA', 7), ('ATC', 6)]"
175 |       ]
176 |      },
177 |      "execution_count": 7,
178 |      "metadata": {},
179 |      "output_type": "execute_result"
180 |     }
181 |    ],
182 |    "source": [
183 |     "s = 'CGATCGTACGATCGATCGATCTACGTACGTACGATCGATGATCGTAGCTAGCTACGATGCCAGTTAGCTAGCTAGTCAGT'\n",
184 |     "c = Counter(''.join([i, j, k]) for i, j, k in zip(s, s[1:], s[2:]))\n",
185 |     "c.most_common(3)"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "markdown",
190 |    "metadata": {},
191 |    "source": [
192 |     "**4**. Use Python regular expressions to find the longest contiguous purine (the letters A and G) subsequences in \n",
193 |     "\n",
194 |     "`CGATCGTACGATCGATCGATCTACGTACAAGGAGGAGAGGTACGATCGATGATCGTAGCTAGCTACGATGCCAGTTAGCTAGCTAGTCAGT`"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": 8,
200 |    "metadata": {},
201 |    "outputs": [
202 |     {
203 |      "data": {
204 |       "text/plain": [
205 |        "'AAGGAGGAGAGG'"
206 |       ]
207 |      },
208 |      "execution_count": 8,
209 |      "metadata": {},
210 |      "output_type": "execute_result"
211 |     }
212 |    ],
213 |    "source": [
214 |     "import re\n",
215 |     "\n",
216 |     "purine = re.compile(r'[A|G]+')\n",
217 |     "s = 'CGATCGTACGATCGATCGATCTACGTACAAGGAGGAGAGGTACGATCGATGATCGTAGCTAGCTACGATGCCAGTTAGCTAGCTAGTCAGT'\n",
218 |     "max(purine.findall(s), key=len)"
219 |    ]
220 |   }
221 |  ],
222 |  "metadata": {
223 |   "kernelspec": {
224 |    "display_name": "Python 3",
225 |    "language": "python",
226 |    "name": "python3"
227 |   },
228 |   "language_info": {
229 |    "codemirror_mode": {
230 |     "name": "ipython",
231 |     "version": 3
232 |    },
233 |    "file_extension": ".py",
234 |    "mimetype": "text/x-python",
235 |    "name": "python",
236 |    "nbconvert_exporter": "python",
237 |    "pygments_lexer": "ipython3",
238 |    "version": "3.6.5"
239 |   }
240 |  },
241 |  "nbformat": 4,
242 |  "nbformat_minor": 2
243 | }
244 | 


--------------------------------------------------------------------------------
/labs/Lab04.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "### Lab 04\n",
  8 |     "\n",
  9 |     "Labs in general are for you to solve short programming challenges in class. In contrast, homework assignments will involve more challenging and lengthy problems.\n",
 10 |     "\n",
 11 |     "Feel free to ask the TAs for help if there is anything you do not understand. The TAs will go through suggested solutions in the last 15 minutes of the lab - typically by solving them in a live demo. **Your midterm exams will be like this, so it is highly beneficial for you to attend these labs**.\n",
 12 |     "\n",
 13 |     "The second lab is to gain basic familiarity with handling vectors, matrices and basic linear algebra. \n",
 14 |     "\n",
 15 |     "- You can import any Python standard library module you need\n",
 16 |     "- Do this lab without using the web to search for solutions"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 2,
 22 |    "metadata": {},
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "import numpy as np\n",
 26 |     "import scipy.linalg as la"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "np.random.seed(123)\n",
 36 |     "m = 10\n",
 37 |     "n = 10\n",
 38 |     "A = np.random.normal(0, 1, (m, n))\n",
 39 |     "b = np.random.normal(0, 1,(n, 1))"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "**1**.\n",
 47 |     "\n",
 48 |     "Perform an LU decomposition to solve $Ax = b$\n",
 49 |     "\n",
 50 |     "- Using `lu_factor` and `solve_triangular`\n",
 51 |     "- Using `lu` and `solve_triangular`\n",
 52 |     "- Check that your answer is correct using `np.allclose` in each case"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": []
 61 |   },
 62 |   {
 63 |    "cell_type": "markdown",
 64 |    "metadata": {},
 65 |    "source": [
 66 |     "**2**.\n",
 67 |     "\n",
 68 |     "Calculate the Gram matrix $S = A^TA$. Use the same $A$ and $b$ from the previous question. \n",
 69 |     "\n",
 70 |     "- Solve $Sx = b$ using Cholesky decomposition\n",
 71 |     "- Check that your answer is correct using `np.allclose`"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": null,
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": []
 80 |   },
 81 |   {
 82 |    "cell_type": "markdown",
 83 |    "metadata": {},
 84 |    "source": [
 85 |     "**3**. \n",
 86 |     "\n",
 87 |     "- Diagonalize the matrix $S$ by finding its eigenvalues and eigenvectors\n",
 88 |     "- Check that your answer is correct using `np.allclose`"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": []
 97 |   },
 98 |   {
 99 |    "cell_type": "markdown",
100 |    "metadata": {},
101 |    "source": [
102 |     "**4**. \n",
103 |     "\n",
104 |     "- Perform a singular value decomposition (SVD) of the matrix $A$.\n",
105 |     "- Use the singular values to calculate the $L_\\text{Frobenius}$ and $L_2$ norms of $A$\n",
106 |     "- Check your answers using `la.norm` and `np.allclose`\n",
107 |     "- Express the eigenvalues of $S$ in terms of the singular values $\\sigma$\n",
108 |     "- Check your answers using `np.allclose`\n",
109 |     "\n"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": null,
115 |    "metadata": {},
116 |    "outputs": [],
117 |    "source": []
118 |   },
119 |   {
120 |    "cell_type": "markdown",
121 |    "metadata": {},
122 |    "source": [
123 |     "**5**.\n",
124 |     "\n",
125 |     "Suppose a vector $v$ has coordinates $b$ when expressed as a linear combination of the columns of $A$. What are the new coordinates of $v$ when expressed as a linear combination of the (normalized) eigenvectors of $A$?"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": null,
131 |    "metadata": {},
132 |    "outputs": [],
133 |    "source": []
134 |   }
135 |  ],
136 |  "metadata": {
137 |   "kernelspec": {
138 |    "display_name": "Python 3",
139 |    "language": "python",
140 |    "name": "python3"
141 |   },
142 |   "language_info": {
143 |    "codemirror_mode": {
144 |     "name": "ipython",
145 |     "version": 3
146 |    },
147 |    "file_extension": ".py",
148 |    "mimetype": "text/x-python",
149 |    "name": "python",
150 |    "nbconvert_exporter": "python",
151 |    "pygments_lexer": "ipython3",
152 |    "version": "3.7.4"
153 |   }
154 |  },
155 |  "nbformat": 4,
156 |  "nbformat_minor": 2
157 | }
158 | 


--------------------------------------------------------------------------------
/labs/Lab04_Solutions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "### Lab 04\n",
  8 |     "\n",
  9 |     "Labs in general are for you to solve short programming challenges in class. In contrast, homework assignments will involve more challenging and lengthy problems.\n",
 10 |     "\n",
 11 |     "Feel free to ask the TAs for help if there is anything you do not understand. The TAs will go through suggested solutions in the last 15 minutes of the lab - typically by solving them in a live demo. **Your midterm exams will be like this, so it is highly beneficial for you to attend these labs**.\n",
 12 |     "\n",
 13 |     "The second lab is to gain basic familiarity with handling vectors, matrices and basic linear algebra. \n",
 14 |     "\n",
 15 |     "- You can import any Python standard library module you need\n",
 16 |     "- Do this lab without using the web to search for solutions"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 2,
 22 |    "metadata": {},
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "import numpy as np\n",
 26 |     "import scipy.linalg as la"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "np.random.seed(123)\n",
 36 |     "m = 10\n",
 37 |     "n = 10\n",
 38 |     "A = np.random.normal(0, 1, (m, n))\n",
 39 |     "b = np.random.normal(0, 1,(n, 1))"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "**1**.\n",
 47 |     "\n",
 48 |     "Perform an LU decomposition to solve $Ax = b$\n",
 49 |     "\n",
 50 |     "- Using `lu_factor` and `solve_triangular`\n",
 51 |     "- Using `lu` and `solve_triangular`\n",
 52 |     "- Check that your answer is correct using `np.allclose` in each case"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 52,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "lu, pv = la.lu_factor(A)\n",
 62 |     "x = la.lu_solve((lu, pv), b)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 65,
 68 |    "metadata": {},
 69 |    "outputs": [
 70 |     {
 71 |      "data": {
 72 |       "text/plain": [
 73 |        "True"
 74 |       ]
 75 |      },
 76 |      "execution_count": 65,
 77 |      "metadata": {},
 78 |      "output_type": "execute_result"
 79 |     }
 80 |    ],
 81 |    "source": [
 82 |     "np.allclose(A@x, b)"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 38,
 88 |    "metadata": {},
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "P, L, U = la.lu(A)\n",
 92 |     "y = la.solve_triangular(L, P.T@b, lower=True)\n",
 93 |     "x = la.solve_triangular(U, y)"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": 64,
 99 |    "metadata": {},
100 |    "outputs": [
101 |     {
102 |      "data": {
103 |       "text/plain": [
104 |        "True"
105 |       ]
106 |      },
107 |      "execution_count": 64,
108 |      "metadata": {},
109 |      "output_type": "execute_result"
110 |     }
111 |    ],
112 |    "source": [
113 |     "np.allclose(A@x, b)"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "markdown",
118 |    "metadata": {},
119 |    "source": [
120 |     "**2**.\n",
121 |     "\n",
122 |     "Calculate the Gram matrix $S = A^TA$. Use the same $A$ and $b$ from the previous question. \n",
123 |     "\n",
124 |     "- Solve $Sx = b$ using Cholesky decomposition\n",
125 |     "- Check that your answer is correct using `np.allclose`"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": 66,
131 |    "metadata": {},
132 |    "outputs": [],
133 |    "source": [
134 |     "S = A.T @ A"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": 69,
140 |    "metadata": {},
141 |    "outputs": [],
142 |    "source": [
143 |     "x = la.cho_solve(la.cho_factor(S), b)"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 72,
149 |    "metadata": {},
150 |    "outputs": [
151 |     {
152 |      "data": {
153 |       "text/plain": [
154 |        "True"
155 |       ]
156 |      },
157 |      "execution_count": 72,
158 |      "metadata": {},
159 |      "output_type": "execute_result"
160 |     }
161 |    ],
162 |    "source": [
163 |     "np.allclose(S@x, b)"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "markdown",
168 |    "metadata": {},
169 |    "source": [
170 |     "**3**. \n",
171 |     "\n",
172 |     "- Diagonalize the matrix $S$ by finding its eigenvalues and eigenvectors\n",
173 |     "- Check that your answer is correct using `np.allclose`"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": 74,
179 |    "metadata": {},
180 |    "outputs": [],
181 |    "source": [
182 |     "lam, V = la.eigh(S)"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": 76,
188 |    "metadata": {},
189 |    "outputs": [
190 |     {
191 |      "data": {
192 |       "text/plain": [
193 |        "True"
194 |       ]
195 |      },
196 |      "execution_count": 76,
197 |      "metadata": {},
198 |      "output_type": "execute_result"
199 |     }
200 |    ],
201 |    "source": [
202 |     "np.allclose(V @ np.diag(lam) @ V.T, S)"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "markdown",
207 |    "metadata": {},
208 |    "source": [
209 |     "**4**. \n",
210 |     "\n",
211 |     "- Perform a singular value decomposition (SVD) of the matrix $A$.\n",
212 |     "- Use the singular values to calculate the $L_\\text{Frobenius}$ and $L_2$ norms of $A$\n",
213 |     "- Check your answers using `la.norm` and `np.allclose`\n",
214 |     "- Express the eigenvalues of $S$ in terms of the singular values $\\sigma$\n",
215 |     "- Check your answers using `np.allclose`\n",
216 |     "\n"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": 77,
222 |    "metadata": {},
223 |    "outputs": [],
224 |    "source": [
225 |     "U, sigma, Vt = la.svd(A)"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": 83,
231 |    "metadata": {},
232 |    "outputs": [],
233 |    "source": [
234 |     "l2 = sigma[0]\n",
235 |     "lF = np.sqrt((sigma**2).sum())"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": 88,
241 |    "metadata": {},
242 |    "outputs": [
243 |     {
244 |      "data": {
245 |       "text/plain": [
246 |        "True"
247 |       ]
248 |      },
249 |      "execution_count": 88,
250 |      "metadata": {},
251 |      "output_type": "execute_result"
252 |     }
253 |    ],
254 |    "source": [
255 |     "np.allclose(l2, la.norm(A, ord=2))"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "code",
260 |    "execution_count": 89,
261 |    "metadata": {},
262 |    "outputs": [
263 |     {
264 |      "data": {
265 |       "text/plain": [
266 |        "True"
267 |       ]
268 |      },
269 |      "execution_count": 89,
270 |      "metadata": {},
271 |      "output_type": "execute_result"
272 |     }
273 |    ],
274 |    "source": [
275 |     "np.allclose(lF, la.norm(A, ord='fro'))"
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "code",
280 |    "execution_count": 96,
281 |    "metadata": {},
282 |    "outputs": [
283 |     {
284 |      "data": {
285 |       "text/plain": [
286 |        "True"
287 |       ]
288 |      },
289 |      "execution_count": 96,
290 |      "metadata": {},
291 |      "output_type": "execute_result"
292 |     }
293 |    ],
294 |    "source": [
295 |     "np.allclose(sorted(np.sqrt(lam), reverse=True), sigma)"
296 |    ]
297 |   },
298 |   {
299 |    "cell_type": "markdown",
300 |    "metadata": {},
301 |    "source": [
302 |     "**5**.\n",
303 |     "\n",
304 |     "Suppose a vector $v$ has coordinates $b$ when expressed as a linear combination of the columns of $A$. What are the new coordinates of $v$ when expressed as a linear combination of the (normalized) eigenvectors of $A$?"
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "code",
309 |    "execution_count": 97,
310 |    "metadata": {},
311 |    "outputs": [
312 |     {
313 |      "data": {
314 |       "text/plain": [
315 |        "array([[ 10.93172268],\n",
316 |        "       [  2.90291995],\n",
317 |        "       [  0.52477765],\n",
318 |        "       [ -4.212113  ],\n",
319 |        "       [ -7.5536649 ],\n",
320 |        "       [ -4.24966306],\n",
321 |        "       [-22.28443303],\n",
322 |        "       [ -3.25318145],\n",
323 |        "       [  1.15596081],\n",
324 |        "       [ 38.10898024]])"
325 |       ]
326 |      },
327 |      "execution_count": 97,
328 |      "metadata": {},
329 |      "output_type": "execute_result"
330 |     }
331 |    ],
332 |    "source": [
333 |     "V @ np.diag(lam) @ V @ b"
334 |    ]
335 |   }
336 |  ],
337 |  "metadata": {
338 |   "kernelspec": {
339 |    "display_name": "Python 3",
340 |    "language": "python",
341 |    "name": "python3"
342 |   },
343 |   "language_info": {
344 |    "codemirror_mode": {
345 |     "name": "ipython",
346 |     "version": 3
347 |    },
348 |    "file_extension": ".py",
349 |    "mimetype": "text/x-python",
350 |    "name": "python",
351 |    "nbconvert_exporter": "python",
352 |    "pygments_lexer": "ipython3",
353 |    "version": "3.7.4"
354 |   }
355 |  },
356 |  "nbformat": 4,
357 |  "nbformat_minor": 2
358 | }
359 | 


--------------------------------------------------------------------------------
/labs/Lab05.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "### Lab 05\n",
  8 |     "\n",
  9 |     "Labs in general are for you to solve short programming challenges in class. In contrast, homework assignments will involve more challenging and lengthy problems.\n",
 10 |     "\n",
 11 |     "Feel free to ask the TAs for help if there is anything you do not understand. The TAs will go through suggested solutions in the last 15 minutes of the lab - typically by solving them in a live demo. **Your midterm exams will be like this, so it is highly beneficial for you to attend these labs**.\n",
 12 |     "\n",
 13 |     "The second lab is to gain basic familiarity with using linear algebra to solve problems. \n",
 14 |     "\n",
 15 |     "- You can import any Python library module you need\n",
 16 |     "- Do this lab without using the web to search for solutions"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": null,
 22 |    "metadata": {},
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "%matplotlib inline\n",
 26 |     "\n",
 27 |     "import os\n",
 28 |     "import glob\n",
 29 |     "from pathlib import Path\n",
 30 |     "import numpy as np\n",
 31 |     "import pandas as pd\n",
 32 |     "import matplotlib as mpl\n",
 33 |     "import matplotlib.pyplot as plt\n",
 34 |     "import seaborn as sns\n",
 35 |     "from scipy import linalg as la\n",
 36 |     "\n",
 37 |     "sns.set_context('notebook', font_scale=1.5)"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "**1**.\n",
 45 |     "\n",
 46 |     "- Check that the 3 points (1,1,0), (2,0,-1), (2,9,2) are not collinear\n",
 47 |     "- Find the equation of the plane that passes through these 3 points"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": null,
 53 |    "metadata": {},
 54 |    "outputs": [],
 55 |    "source": []
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "**2**.\n",
 62 |     "\n",
 63 |     "Find the center of the sphere that passes through the points (0,3,2), (1,-1,1), (2,1,0), (5,1,3)."
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": []
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "metadata": {},
 76 |    "source": [
 77 |     "**3**. \n",
 78 |     "\n",
 79 |     "We are given (x, y) observations and believe that an appropriate model is $y = b_0 + b_1 x + b_2 x^2 + b_3 \\sin(x)$ with some measurement noise. Find the coefficients $b_0, b_1, b_2, b_3$. Plot the data and fitted curve,"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": null,
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])\n",
 89 |     "y = np.array([ 2.80472383, 10.96184197, 11.79499471,  7.72517373,  2.2119748 ,\n",
 90 |     "       -2.74622086, -0.10312465,  2.78538419,  0.70236037, -8.85784432])"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": []
 99 |   },
100 |   {
101 |    "cell_type": "markdown",
102 |    "metadata": {},
103 |    "source": [
104 |     "**4**. \n",
105 |     "\n",
106 |     "Find the steady state probability vector for the Markov chain shown\n",
107 |     "\n",
108 |     "- By solving a linear equation\n",
109 |     "- By Jacobi iteration starting from a random probability vector\n",
110 |     "- By eigen-decomposition\n",
111 |     "\n",
112 |     "![img](../data/q4.png)"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": null,
118 |    "metadata": {},
119 |    "outputs": [],
120 |    "source": []
121 |   },
122 |   {
123 |    "cell_type": "markdown",
124 |    "metadata": {},
125 |    "source": [
126 |     "**5**. \n",
127 |     "\n",
128 |     "- Find the nodes that belong to a clique in this graph.\n",
129 |     "- Express the following graph as a sparse matrix in `coo` format\n",
130 |     "\n",
131 |     "![img](../data/q5.png)"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": null,
137 |    "metadata": {},
138 |    "outputs": [],
139 |    "source": []
140 |   }
141 |  ],
142 |  "metadata": {
143 |   "kernelspec": {
144 |    "display_name": "Python 3",
145 |    "language": "python",
146 |    "name": "python3"
147 |   },
148 |   "language_info": {
149 |    "codemirror_mode": {
150 |     "name": "ipython",
151 |     "version": 3
152 |    },
153 |    "file_extension": ".py",
154 |    "mimetype": "text/x-python",
155 |    "name": "python",
156 |    "nbconvert_exporter": "python",
157 |    "pygments_lexer": "ipython3",
158 |    "version": "3.7.4"
159 |   }
160 |  },
161 |  "nbformat": 4,
162 |  "nbformat_minor": 2
163 | }
164 | 


--------------------------------------------------------------------------------
/labs/Lab06.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "### Lab 06\n",
  8 |     "\n",
  9 |     "Labs in general are for you to solve short programming challenges in class. In contrast, homework assignments will involve more challenging and lengthy problems.\n",
 10 |     "\n",
 11 |     "Feel free to ask the TAs for help if there is anything you do not understand. The TAs will go through suggested solutions in the last 15 minutes of the lab - typically by solving them in a live demo. **Your midterm exams will be like this, so it is highly beneficial for you to attend these labs**.\n",
 12 |     "\n",
 13 |     "The second lab is to gain basic familiarity with root finding and optimization. \n",
 14 |     "\n",
 15 |     "- You can import any Python library module you need\n",
 16 |     "- Do this lab without using the web to search for solutions"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "**1**. Use the secant method to find the solution to $x^2 + 4x - 5 = 0$ starting from the (2,3) and running 5 iterations."
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": null,
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": []
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "**2**. Construct the companion matrix to find all solutions to $x^3 + 4x + 5 = 0$. "
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": []
 46 |   },
 47 |   {
 48 |    "cell_type": "markdown",
 49 |    "metadata": {},
 50 |    "source": [
 51 |     "**3**. Use the Newton-Raphson method to find the real cube root of 10 starting with an initial guess of 2.."
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": []
 60 |   },
 61 |   {
 62 |    "cell_type": "markdown",
 63 |    "metadata": {},
 64 |    "source": [
 65 |     "**4**. The Lagrange basis functions are given by \n",
 66 |     "\n",
 67 |     "$$\n",
 68 |     "l_j(x_j) = \\prod_{0 \\le m \\le k, m \\ne j} \\frac{x - x_m}{x_j - x_m}\n",
 69 |     "$$\n",
 70 |     "\n",
 71 |     "Here, $x$ represents the points at which you want to interpolate, $x_j$ and $x_m$ are indices of the given points.\n",
 72 |     "\n",
 73 |     "Use this to fit and plot a quadratic to the 3 points (1,1), (3,7) and (4,11)"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": []
 82 |   }
 83 |  ],
 84 |  "metadata": {
 85 |   "kernelspec": {
 86 |    "display_name": "Python 3",
 87 |    "language": "python",
 88 |    "name": "python3"
 89 |   },
 90 |   "language_info": {
 91 |    "codemirror_mode": {
 92 |     "name": "ipython",
 93 |     "version": 3
 94 |    },
 95 |    "file_extension": ".py",
 96 |    "mimetype": "text/x-python",
 97 |    "name": "python",
 98 |    "nbconvert_exporter": "python",
 99 |    "pygments_lexer": "ipython3",
100 |    "version": "3.7.6"
101 |   }
102 |  },
103 |  "nbformat": 4,
104 |  "nbformat_minor": 2
105 | }
106 | 


--------------------------------------------------------------------------------
/labs/Lab07.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "**1**. Write the FizzBuzz program in C++.\n",
  8 |     "\n",
  9 |     "\"Write a program that prints the numbers from 1 to 100. But for multiples of three print “Fizz” instead of the number and for the multiples of five print “Buzz”. For numbers which are multiples of both three and five print “FizzBuzz”.\""
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": []
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "**2**.\n",
 24 |     "\n",
 25 |     "Write a C++ program that takes a single number `n` as input on the command line and then prints the square of that number."
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": null,
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": []
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "**3**.\n",
 40 |     "\n",
 41 |     "Use loops to generate the 12 by 12 times table. Compile and run. You don't have to worry much about formatting, but the output should have 12 rows with numbers separated by spaces."
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": null,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": []
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "metadata": {},
 54 |    "source": [
 55 |     "**4**.\n",
 56 |     "\n",
 57 |     "Write a C++ program that uses a function to calculate the 10th Fibonacci number."
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "metadata": {},
 64 |    "outputs": [],
 65 |    "source": []
 66 |   },
 67 |   {
 68 |    "cell_type": "markdown",
 69 |    "metadata": {},
 70 |    "source": [
 71 |     "**5**.\n",
 72 |     "\n",
 73 |     "Generate 100 numbers from $N(100, 15)$ in C++. Write to a plain text file."
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": []
 82 |   }
 83 |  ],
 84 |  "metadata": {
 85 |   "kernelspec": {
 86 |    "display_name": "Python 3",
 87 |    "language": "python",
 88 |    "name": "python3"
 89 |   },
 90 |   "language_info": {
 91 |    "codemirror_mode": {
 92 |     "name": "ipython",
 93 |     "version": 3
 94 |    },
 95 |    "file_extension": ".py",
 96 |    "mimetype": "text/x-python",
 97 |    "name": "python",
 98 |    "nbconvert_exporter": "python",
 99 |    "pygments_lexer": "ipython3",
100 |    "version": "3.7.7"
101 |   }
102 |  },
103 |  "nbformat": 4,
104 |  "nbformat_minor": 2
105 | }
106 | 


--------------------------------------------------------------------------------
/labs/Lab07_Solutions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "**1**. Write the FizzBuzz program in C++."
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 2,
 13 |    "metadata": {},
 14 |    "outputs": [
 15 |     {
 16 |      "name": "stdout",
 17 |      "output_type": "stream",
 18 |      "text": [
 19 |       "Writing fizzbuzz.cpp\n"
 20 |      ]
 21 |     }
 22 |    ],
 23 |    "source": [
 24 |     "%%file fizzbuzz.cpp\n",
 25 |     "\n",
 26 |     "#include <iostream>\n",
 27 |     "using std::cout;\n",
 28 |     "\n",
 29 |     "int main() {\n",
 30 |     "    for (int i=1; i<=100; i++) {\n",
 31 |     "        if ((i % 5 == 0) and (i % 3 == 0)) {\n",
 32 |     "            cout << \"FizzBuzz \";\n",
 33 |     "        }\n",
 34 |     "        else if (i % 3 == 0) {\n",
 35 |     "            cout << \"Fizz \";\n",
 36 |     "        }\n",
 37 |     "        else if (i % 5 == 0) {\n",
 38 |     "            cout << \"Buzz \";\n",
 39 |     "        }\n",
 40 |     "        else {\n",
 41 |     "            cout << i << \" \";\n",
 42 |     "        }        \n",
 43 |     "    }\n",
 44 |     "    cout << \"\\n\";\n",
 45 |     "}"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 3,
 51 |    "metadata": {},
 52 |    "outputs": [
 53 |     {
 54 |      "name": "stdout",
 55 |      "output_type": "stream",
 56 |      "text": [
 57 |       "1 2 Fizz 4 Buzz Fizz 7 8 Fizz Buzz 11 Fizz 13 14 FizzBuzz 16 17 Fizz 19 Buzz Fizz 22 23 Fizz Buzz 26 Fizz 28 29 FizzBuzz 31 32 Fizz 34 Buzz Fizz 37 38 Fizz Buzz 41 Fizz 43 44 FizzBuzz 46 47 Fizz 49 Buzz Fizz 52 53 Fizz Buzz 56 Fizz 58 59 FizzBuzz 61 62 Fizz 64 Buzz Fizz 67 68 Fizz Buzz 71 Fizz 73 74 FizzBuzz 76 77 Fizz 79 Buzz Fizz 82 83 Fizz Buzz 86 Fizz 88 89 FizzBuzz 91 92 Fizz 94 Buzz Fizz 97 98 Fizz Buzz \n"
 58 |      ]
 59 |     }
 60 |    ],
 61 |    "source": [
 62 |     "%%bash\n",
 63 |     "\n",
 64 |     "g++ fizzbuzz.cpp -o fizzbuzz.exe\n",
 65 |     "./fizzbuzz.exe"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "markdown",
 70 |    "metadata": {},
 71 |    "source": [
 72 |     "**2**.\n",
 73 |     "\n",
 74 |     "Write a C++ program that takes a single number `n` as input on the command line and then prints the square of that number."
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 4,
 80 |    "metadata": {},
 81 |    "outputs": [
 82 |     {
 83 |      "name": "stdout",
 84 |      "output_type": "stream",
 85 |      "text": [
 86 |       "Writing square.cpp\n"
 87 |      ]
 88 |     }
 89 |    ],
 90 |    "source": [
 91 |     "%%file square.cpp\n",
 92 |     "\n",
 93 |     "#include <iostream>\n",
 94 |     "\n",
 95 |     "int main() {\n",
 96 |     "    int n;\n",
 97 |     "    std::cin >> n;\n",
 98 |     "    std::cout << n*n << \"\\n\";\n",
 99 |     "}"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": 5,
105 |    "metadata": {},
106 |    "outputs": [],
107 |    "source": [
108 |     "%%bash\n",
109 |     "\n",
110 |     "g++ square.cpp -o square.exe"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": 6,
116 |    "metadata": {},
117 |    "outputs": [
118 |     {
119 |      "name": "stdout",
120 |      "output_type": "stream",
121 |      "text": [
122 |       "Writing square_1.cpp\n"
123 |      ]
124 |     }
125 |    ],
126 |    "source": [
127 |     "%%file square_1.cpp\n",
128 |     "\n",
129 |     "#include <iostream>\n",
130 |     "\n",
131 |     "int main(int argc, char** argv) {\n",
132 |     "    int n = atoi(argv[1]);\n",
133 |     "    std::cout << n*n << \"\\n\";\n",
134 |     "}"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": 7,
140 |    "metadata": {},
141 |    "outputs": [],
142 |    "source": [
143 |     "%%bash\n",
144 |     "\n",
145 |     "g++ square_1.cpp -o square_1.exe"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": 8,
151 |    "metadata": {},
152 |    "outputs": [
153 |     {
154 |      "name": "stdout",
155 |      "output_type": "stream",
156 |      "text": [
157 |       "25\n"
158 |      ]
159 |     }
160 |    ],
161 |    "source": [
162 |     "%%bash\n",
163 |     "\n",
164 |     "./square_1.exe 5"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "markdown",
169 |    "metadata": {},
170 |    "source": [
171 |     "**3**.\n",
172 |     "\n",
173 |     "Use loops to generate the 12 by 12 times table. Compile and run. You don't have to worry much about formatting, but the output should have 12 rows with numbers separated by spaces."
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": 9,
179 |    "metadata": {},
180 |    "outputs": [
181 |     {
182 |      "name": "stdout",
183 |      "output_type": "stream",
184 |      "text": [
185 |       "Writing table.cpp\n"
186 |      ]
187 |     }
188 |    ],
189 |    "source": [
190 |     "%%file table.cpp\n",
191 |     "\n",
192 |     "#include <iostream>\n",
193 |     "using std::cout;\n",
194 |     "\n",
195 |     "int main() {\n",
196 |     "    for (int i=1; i<=12; i++) {\n",
197 |     "        for (int j=1; j<=12; j++) {\n",
198 |     "            cout << i*j << \"\\t\";\n",
199 |     "        }\n",
200 |     "        cout << \"\\n\";\n",
201 |     "    }\n",
202 |     "}"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "code",
207 |    "execution_count": 10,
208 |    "metadata": {},
209 |    "outputs": [
210 |     {
211 |      "name": "stdout",
212 |      "output_type": "stream",
213 |      "text": [
214 |       "1\t2\t3\t4\t5\t6\t7\t8\t9\t10\t11\t12\t\n",
215 |       "2\t4\t6\t8\t10\t12\t14\t16\t18\t20\t22\t24\t\n",
216 |       "3\t6\t9\t12\t15\t18\t21\t24\t27\t30\t33\t36\t\n",
217 |       "4\t8\t12\t16\t20\t24\t28\t32\t36\t40\t44\t48\t\n",
218 |       "5\t10\t15\t20\t25\t30\t35\t40\t45\t50\t55\t60\t\n",
219 |       "6\t12\t18\t24\t30\t36\t42\t48\t54\t60\t66\t72\t\n",
220 |       "7\t14\t21\t28\t35\t42\t49\t56\t63\t70\t77\t84\t\n",
221 |       "8\t16\t24\t32\t40\t48\t56\t64\t72\t80\t88\t96\t\n",
222 |       "9\t18\t27\t36\t45\t54\t63\t72\t81\t90\t99\t108\t\n",
223 |       "10\t20\t30\t40\t50\t60\t70\t80\t90\t100\t110\t120\t\n",
224 |       "11\t22\t33\t44\t55\t66\t77\t88\t99\t110\t121\t132\t\n",
225 |       "12\t24\t36\t48\t60\t72\t84\t96\t108\t120\t132\t144\t\n"
226 |      ]
227 |     }
228 |    ],
229 |    "source": [
230 |     "%%bash\n",
231 |     "\n",
232 |     "g++ table.cpp -o table.exe\n",
233 |     "./table.exe"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "markdown",
238 |    "metadata": {},
239 |    "source": [
240 |     "**4**.\n",
241 |     "\n",
242 |     "Write a C++ program that uses a function to calculate the 10th Fibonacci number."
243 |    ]
244 |   },
245 |   {
246 |    "cell_type": "code",
247 |    "execution_count": 12,
248 |    "metadata": {},
249 |    "outputs": [
250 |     {
251 |      "name": "stdout",
252 |      "output_type": "stream",
253 |      "text": [
254 |       "Writing fib.cpp\n"
255 |      ]
256 |     }
257 |    ],
258 |    "source": [
259 |     "%%file fib.cpp\n",
260 |     "\n",
261 |     "#include <iostream>\n",
262 |     "using std::cout;\n",
263 |     "        \n",
264 |     "long fib(int n) {\n",
265 |     "    int a = 0, b = 1;\n",
266 |     "    int tmp;\n",
267 |     "    for (int i=0; i<n; i++) {\n",
268 |     "        tmp = a;\n",
269 |     "        a = b;\n",
270 |     "        b = tmp + b;\n",
271 |     "    }\n",
272 |     "    return a;\n",
273 |     "}\n",
274 |     "\n",
275 |     "int main() {\n",
276 |     "    cout << fib(10) << \"\\n\";\n",
277 |     "}"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "code",
282 |    "execution_count": 13,
283 |    "metadata": {},
284 |    "outputs": [
285 |     {
286 |      "name": "stdout",
287 |      "output_type": "stream",
288 |      "text": [
289 |       "55\n"
290 |      ]
291 |     }
292 |    ],
293 |    "source": [
294 |     "%%bash\n",
295 |     "\n",
296 |     "g++ fib.cpp -o fib.exe\n",
297 |     "./fib.exe"
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "markdown",
302 |    "metadata": {},
303 |    "source": [
304 |     "**5**.\n",
305 |     "\n",
306 |     "Generate 100 numbers from $N(100, 15)$ in C++. Write to a plain text file."
307 |    ]
308 |   },
309 |   {
310 |    "cell_type": "code",
311 |    "execution_count": 14,
312 |    "metadata": {},
313 |    "outputs": [
314 |     {
315 |      "name": "stdout",
316 |      "output_type": "stream",
317 |      "text": [
318 |       "Writing random.cpp\n"
319 |      ]
320 |     }
321 |    ],
322 |    "source": [
323 |     "%%file random.cpp\n",
324 |     "\n",
325 |     "#include <iostream>\n",
326 |     "#include <fstream>\n",
327 |     "#include <random>\n",
328 |     "#include <functional>\n",
329 |     "\n",
330 |     "using std::cout;\n",
331 |     "using std::endl;\n",
332 |     "using std::ofstream;\n",
333 |     "        \n",
334 |     "using std::default_random_engine;\n",
335 |     "using std::normal_distribution;\n",
336 |     "using std::bind;\n",
337 |     "        \n",
338 |     "// start random number engine with fixed seed\n",
339 |     "default_random_engine re{12345};\n",
340 |     "\n",
341 |     "normal_distribution<double> norm(100, 15); // mean and standard deviation\n",
342 |     "auto rnorm = bind(norm, re);  \n",
343 |     "\n",
344 |     "int main() {\n",
345 |     "    ofstream fout(\"norm_data.txt\");\n",
346 |     "    \n",
347 |     "    for (int i=0; i<100; i++) {\n",
348 |     "        fout << rnorm() << \"\\n\";\n",
349 |     "    }\n",
350 |     "}"
351 |    ]
352 |   },
353 |   {
354 |    "cell_type": "code",
355 |    "execution_count": 15,
356 |    "metadata": {},
357 |    "outputs": [],
358 |    "source": [
359 |     "%%bash\n",
360 |     "\n",
361 |     "g++ random.cpp -o random.exe -std=c++14\n",
362 |     "./random.exe"
363 |    ]
364 |   },
365 |   {
366 |    "cell_type": "code",
367 |    "execution_count": 16,
368 |    "metadata": {},
369 |    "outputs": [
370 |     {
371 |      "name": "stdout",
372 |      "output_type": "stream",
373 |      "text": [
374 |       "101.293\r\n",
375 |       "102.529\r\n",
376 |       "124.156\r\n"
377 |      ]
378 |     }
379 |    ],
380 |    "source": [
381 |     "! head -n3 norm_data.txt"
382 |    ]
383 |   },
384 |   {
385 |    "cell_type": "code",
386 |    "execution_count": null,
387 |    "metadata": {},
388 |    "outputs": [],
389 |    "source": []
390 |   }
391 |  ],
392 |  "metadata": {
393 |   "kernelspec": {
394 |    "display_name": "Python 3",
395 |    "language": "python",
396 |    "name": "python3"
397 |   },
398 |   "language_info": {
399 |    "codemirror_mode": {
400 |     "name": "ipython",
401 |     "version": 3
402 |    },
403 |    "file_extension": ".py",
404 |    "mimetype": "text/x-python",
405 |    "name": "python",
406 |    "nbconvert_exporter": "python",
407 |    "pygments_lexer": "ipython3",
408 |    "version": "3.7.7"
409 |   }
410 |  },
411 |  "nbformat": 4,
412 |  "nbformat_minor": 2
413 | }
414 | 


--------------------------------------------------------------------------------
/labs/Lab08.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "**Exercise 1**\n",
  8 |     "\n",
  9 |     "Implement a function that returns $n$ samples from a multivariate Gaussian distribution in C++ and wrap it for use in Python using `pybind11`. Use only standard C++ and the `Eigen` library. The function signature in Python  is\n",
 10 |     "\n",
 11 |     "```python\n",
 12 |     "def mvnorm(mu, Sigma, n):\n",
 13 |     "    \"\"\"Returns n random samples from a multivariate Gaussian distribution.\n",
 14 |     "    \n",
 15 |     "    mu is a mean vector\n",
 16 |     "    Sigma is a covariance matrix\n",
 17 |     "    \n",
 18 |     "    Returns an n by p matrix, where p is the dimension of the distribution.\n",
 19 |     "    \"\"\"\n",
 20 |     "```"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": null,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": []
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "**Exercise 2**\n",
 35 |     "\n",
 36 |     "- Consider a sequence of $n$ Bernoulli trials with success probability $p$ per trial. A string of consecutive successes is known as a success *run*. Write a function that returns the counts for runs of length $k$ for each $k$ observed in a dictionary.\n",
 37 |     "\n",
 38 |     "For example: if the trials were [0, 1, 0, 1, 1, 0, 0, 0, 0, 1], the function should return \n",
 39 |     "```\n",
 40 |     "{1: 2, 2: 1})\n",
 41 |     "```\n",
 42 |     "\n",
 43 |     "- What is the probability of observing at least one run of length 5 or more when $n=100$ and $p=0.5$?. Estimate this from 100,000 simulated experiments. Is this more, less or equally likely than finding runs of length 7 or more when $p=0.7$?"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "\n",
 53 |     "\n",
 54 |     "\n"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "**Exercise 3**.\n",
 62 |     "\n",
 63 |     "- Consider an unbiased random walk of length $n$ as simulated with a sequence of -1 or +1 values. If we start from 0, plot the distribution of *last* return times for 100,000 simulations with $n = 100$. The last return time is the last time the cumulative sum of the random walk is zero - this may be the starting point if the walk never returns to zero in 100 steps. \n",
 64 |     "\n",
 65 |     "- Do a maximum likeliood fit of a beta distribution to the set of last return times using the `beta.fit()` function from `scipy.stats`. Set the lower bound (loc) = 0 and the upper bound (scale) = 100 for plotting. Superimpose the fitted beta PDF on the normalized histogram of last reutrn times."
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": null,
 71 |    "metadata": {},
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "\n",
 75 |     "\n",
 76 |     "\n"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "markdown",
 81 |    "metadata": {},
 82 |    "source": [
 83 |     "**Exercise 4**\n",
 84 |     "\n",
 85 |     "The Cauchy distribution is given by \n",
 86 |     "$$\n",
 87 |     "f(x) = \\frac{1}{\\pi (1 + x^2)}, \\ \\ -\\infty \\lt x \\lt \\infty \n",
 88 |     "$$\n",
 89 |     "\n",
 90 |     "- Integrate the tail probability $P(X > 2)$ using Monte Carlo\n",
 91 |     "    1. Sampling from the Cauchy distribution directly\n",
 92 |     "    2. Sampling from the uniform distribution using an appropriate change of variables\n",
 93 |     "- Plot the 95% CI for the Monte Carlo estimates for n = 1 to 1000\n",
 94 |     "    1. For sampling from the Cauchy distribution using mulitple Monte Carlo sequences\n",
 95 |     "    2. For sampling from the uniform distribution using bootstrap samples of a single Monte Carlo sequence\n",
 96 |     "\n",
 97 |     "\n"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "\n",
107 |     "\n",
108 |     "\n"
109 |    ]
110 |   }
111 |  ],
112 |  "metadata": {
113 |   "kernelspec": {
114 |    "display_name": "Python 3",
115 |    "language": "python",
116 |    "name": "python3"
117 |   },
118 |   "language_info": {
119 |    "codemirror_mode": {
120 |     "name": "ipython",
121 |     "version": 3
122 |    },
123 |    "file_extension": ".py",
124 |    "mimetype": "text/x-python",
125 |    "name": "python",
126 |    "nbconvert_exporter": "python",
127 |    "pygments_lexer": "ipython3",
128 |    "version": "3.7.7"
129 |   },
130 |   "latex_envs": {
131 |    "bibliofile": "biblio.bib",
132 |    "cite_by": "apalike",
133 |    "current_citInitial": 1,
134 |    "eqLabelWithNumbers": true,
135 |    "eqNumInitial": 0
136 |   }
137 |  },
138 |  "nbformat": 4,
139 |  "nbformat_minor": 2
140 | }
141 | 


--------------------------------------------------------------------------------
/labs/figs/elephant.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/labs/figs/elephant.jpg


--------------------------------------------------------------------------------
/notebooks/S08C_Least_Squares.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Linear least squares "
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {
 14 |     "collapsed": true
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import warnings\n",
 19 |     "warnings.filterwarnings(action=\"ignore\", module=\"scipy\", message=\"^internal gelsd\")\n",
 20 |     "import numpy as np\n",
 21 |     "import scipy.linalg as la"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "## Normal equations\n",
 29 |     "\n",
 30 |     "- Suppose $Ax = b$ is inconsistent. In this case, we can look instead for $\\widehat{x}$ which minimizes the distance between $Ax$ and $b$. In other words, we need to minimize $\\Vert Ax - b \\Vert^2$.\n",
 31 |     "- The minimum will occur when $\\langle Ax-b, Ax \\rangle = 0$ \n",
 32 |     "- Solving $(Ax)^T(Ax - b) = 0$ gives the normal equations $\\widehat{x} = (A^TA)^{-1}A^T b$\n",
 33 |     "- The corresponding vector in $C(A)$ is $A\\widehat{x} = A(A^TA)^{-1}A^T b$\n",
 34 |     "- Note that $P_A = A(A^TA)^{-1}A^T$ is the projection matrix for $C(A)$\n",
 35 |     "- This makes sense, since any solution to $Ax = b$ must be in $C(A)$, and the nearest such vector to $b$ must be the projection of $b$ onto $C(A)$."
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 2,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "m = 100\n",
 45 |     "n = 5\n",
 46 |     "A = np.random.rand(m, n)\n",
 47 |     "b = np.random.rand(m, 1)"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "markdown",
 52 |    "metadata": {},
 53 |    "source": [
 54 |     "Using least squares function (SVD)"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": 3,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "x, res, rank, s, = la.lstsq(A, b)"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 4,
 69 |    "metadata": {},
 70 |    "outputs": [
 71 |     {
 72 |      "data": {
 73 |       "text/plain": [
 74 |        "array([[0.31192056],\n",
 75 |        "       [0.15414167],\n",
 76 |        "       [0.18783713],\n",
 77 |        "       [0.07997706],\n",
 78 |        "       [0.17878726]])"
 79 |       ]
 80 |      },
 81 |      "execution_count": 4,
 82 |      "metadata": {},
 83 |      "output_type": "execute_result"
 84 |     }
 85 |    ],
 86 |    "source": [
 87 |     "x"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 5,
 93 |    "metadata": {},
 94 |    "outputs": [
 95 |     {
 96 |      "data": {
 97 |       "text/plain": [
 98 |        "(100, 1)"
 99 |       ]
100 |      },
101 |      "execution_count": 5,
102 |      "metadata": {},
103 |      "output_type": "execute_result"
104 |     }
105 |    ],
106 |    "source": [
107 |     "(A @ x).shape"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "markdown",
112 |    "metadata": {},
113 |    "source": [
114 |     "Using normal equations (projection) - for illustration only."
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": 6,
120 |    "metadata": {},
121 |    "outputs": [
122 |     {
123 |      "data": {
124 |       "text/plain": [
125 |        "array([[0.31192056],\n",
126 |        "       [0.15414167],\n",
127 |        "       [0.18783713],\n",
128 |        "       [0.07997706],\n",
129 |        "       [0.17878726]])"
130 |       ]
131 |      },
132 |      "execution_count": 6,
133 |      "metadata": {},
134 |      "output_type": "execute_result"
135 |     }
136 |    ],
137 |    "source": [
138 |     "la.inv(A.T @ A) @ A.T @ b"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "markdown",
143 |    "metadata": {},
144 |    "source": [
145 |     "## Projection matrices\n",
146 |     "\n",
147 |     "- Let $P$ be the projection matrix onto $C(A)$\n",
148 |     "- Since it is a projection operator, $P^2$ = $P$ (check)\n",
149 |     "- Check that $I-P$ is also idempotent (it turns out to also be a projection matrix)\n",
150 |     "- Where does $I-P$ project to?\n",
151 |     "- Show $C(I-P) = N(P)$\n",
152 |     "- Show $\\rho(P) + \\nu(P) = n$\n",
153 |     "- This implies $\\mathbb{R}^n = C(P) \\oplus C(I-P)$\n",
154 |     "- Trivial example\n",
155 |     "$$\n",
156 |     "P = \\begin{bmatrix}\n",
157 |     "1 & 0 & 0 \\\\\n",
158 |     "0 & 1 & 0 \\\\\n",
159 |     "0 & 0 & 0\n",
160 |     "\\end{bmatrix}, (I-P) = \\begin{bmatrix}\n",
161 |     "0 & 0 & 0 \\\\\n",
162 |     "0 & 0 & 0 \\\\\n",
163 |     "0 & 0 & 1\n",
164 |     "\\end{bmatrix}\n",
165 |     "$$\n",
166 |     "- Geometry of $P$ and $I-P$ in linear least squares"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": 7,
172 |    "metadata": {
173 |     "collapsed": true
174 |    },
175 |    "outputs": [],
176 |    "source": [
177 |     "P = A @ la.inv(A.T @ A) @ A.T "
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": 8,
183 |    "metadata": {},
184 |    "outputs": [
185 |     {
186 |      "data": {
187 |       "text/plain": [
188 |        "(100, 100)"
189 |       ]
190 |      },
191 |      "execution_count": 8,
192 |      "metadata": {},
193 |      "output_type": "execute_result"
194 |     }
195 |    ],
196 |    "source": [
197 |     "P.shape"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": 9,
203 |    "metadata": {
204 |     "collapsed": true
205 |    },
206 |    "outputs": [],
207 |    "source": [
208 |     "x, res, rank, s, = la.lstsq(A, b)"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": 10,
214 |    "metadata": {
215 |     "collapsed": true
216 |    },
217 |    "outputs": [],
218 |    "source": [
219 |     "Q = np.eye(m) - P"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": 11,
225 |    "metadata": {},
226 |    "outputs": [
227 |     {
228 |      "data": {
229 |       "text/plain": [
230 |        "array([[0.2763989 ],\n",
231 |        "       [0.33523772],\n",
232 |        "       [0.4619637 ],\n",
233 |        "       [0.50292904],\n",
234 |        "       [0.49148152]])"
235 |       ]
236 |      },
237 |      "execution_count": 11,
238 |      "metadata": {},
239 |      "output_type": "execute_result"
240 |     }
241 |    ],
242 |    "source": [
243 |     "(P @ b)[:5]"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": 12,
249 |    "metadata": {},
250 |    "outputs": [
251 |     {
252 |      "data": {
253 |       "text/plain": [
254 |        "array([[0.2763989 ],\n",
255 |        "       [0.33523772],\n",
256 |        "       [0.4619637 ],\n",
257 |        "       [0.50292904],\n",
258 |        "       [0.49148152]])"
259 |       ]
260 |      },
261 |      "execution_count": 12,
262 |      "metadata": {},
263 |      "output_type": "execute_result"
264 |     }
265 |    ],
266 |    "source": [
267 |     "(A @ x)[:5]"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "code",
272 |    "execution_count": 13,
273 |    "metadata": {},
274 |    "outputs": [
275 |     {
276 |      "data": {
277 |       "text/plain": [
278 |        "9.834201864805834"
279 |       ]
280 |      },
281 |      "execution_count": 13,
282 |      "metadata": {},
283 |      "output_type": "execute_result"
284 |     }
285 |    ],
286 |    "source": [
287 |     "la.norm(Q @ b)**2"
288 |    ]
289 |   },
290 |   {
291 |    "cell_type": "code",
292 |    "execution_count": 14,
293 |    "metadata": {},
294 |    "outputs": [
295 |     {
296 |      "data": {
297 |       "text/plain": [
298 |        "array([9.83420186])"
299 |       ]
300 |      },
301 |      "execution_count": 14,
302 |      "metadata": {},
303 |      "output_type": "execute_result"
304 |     }
305 |    ],
306 |    "source": [
307 |     "res"
308 |    ]
309 |   },
310 |   {
311 |    "cell_type": "markdown",
312 |    "metadata": {},
313 |    "source": [
314 |     "## Optimization\n",
315 |     "\n",
316 |     "Note that $\\Vert Ax - b \\Vert^2$ can also be considered as a *cost* function of $x$ that we want to minimize. Hence, we can also use optimization methods such as gradient descent to solve this problem iteratively. Importantly, optimization techniques are generalizable to nonlinear cost functions as well, and some can be made to scale to massive problems.\n",
317 |     "\n",
318 |     "This will be the topic of the next set of lectures."
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "code",
323 |    "execution_count": null,
324 |    "metadata": {},
325 |    "outputs": [],
326 |    "source": []
327 |   }
328 |  ],
329 |  "metadata": {
330 |   "kernelspec": {
331 |    "display_name": "Python 3",
332 |    "language": "python",
333 |    "name": "python3"
334 |   },
335 |   "language_info": {
336 |    "codemirror_mode": {
337 |     "name": "ipython",
338 |     "version": 3
339 |    },
340 |    "file_extension": ".py",
341 |    "mimetype": "text/x-python",
342 |    "name": "python",
343 |    "nbconvert_exporter": "python",
344 |    "pygments_lexer": "ipython3",
345 |    "version": "3.7.4"
346 |   }
347 |  },
348 |  "nbformat": 4,
349 |  "nbformat_minor": 2
350 | }
351 | 


--------------------------------------------------------------------------------
/notebooks/S09B_Optimization.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Numerical Optimization"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Reference: [Numerical Optimization by Nocedal and Wright](http://www.springer.com/us/book/9780387303031)"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "## Categorize your optimization problem\n",
 22 |     "\n",
 23 |     "Different optimization problems require different classes of optimization algorithms for efficient solution. Some fundamental decision points: The tree below can serve as a guide for which class of optimization algorithm is appropriate."
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "markdown",
 28 |    "metadata": {},
 29 |    "source": [
 30 |     "![Optimization Tree](https://neos-guide.org/sites/default/files/graphviz/dcd8800d05552f6159a3dc0a0b2bdb38.out.svg)"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {
 36 |     "collapsed": true
 37 |    },
 38 |    "source": [
 39 |     "We will mainly discuss the optimization of smooth functions, with and without constraints."
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "## Sketch of lecture\n",
 47 |     "\n",
 48 |     "- Problem classification\n",
 49 |     "    - Tree\n",
 50 |     "- Concepts\n",
 51 |     "    - Global and local solutions\n",
 52 |     "    - Convex sets and functions\n",
 53 |     "    - Conditions for optima from calculus\n",
 54 |     "    - Iteration algorithms \n",
 55 |     "$$\n",
 56 |     "  x_o \\rightarrow x_1 \\rightarrow \\ldots \\rightarrow x_k \\rightarrow x_{k+1} \\rightarrow x_\\text{opt}\n",
 57 |     "$$\n",
 58 |     "- Gradient free methods\n",
 59 |     "    - Nelder-Mead simplex\n",
 60 |     "- Line search\n",
 61 |     "$$\n",
 62 |     "\\min_{\\alpha > 0} f(x_k + \\alpha p_k)\n",
 63 |     "$$\n",
 64 |     "    - Descent condition $p_k^T \\nabla f_k < 0$\n",
 65 |     "    - Steepest descent $\\nabla f$ (Sensitivity to scaling)\n",
 66 |     "    - Newton direction $(\\nabla^2 f)^{-1} \\nabla f$ (by minimizing $m_k$)\n",
 67 |     "    - Conjugate gradient method\n",
 68 |     "    - Quasi-Newton methods (SR1, BFGS)\n",
 69 |     "- Trust regions\n",
 70 |     "  $$\\min_{p} m_k(x_k + p)$$\n",
 71 |     "  where $x_k + p$ lie inside the trust region and\n",
 72 |     "  $$\n",
 73 |     "  m_k(x_k + p) = f_k + p^T\\nabla f_k + p^T B_k p\n",
 74 |     "  $$\n",
 75 |     "  where $B_k \\sim \\nabla^2 f_k$\n",
 76 |     "  Secant condition\n",
 77 |     "    $$B_{k+1}s_k = y_k$$\n",
 78 |     "    $$s_k = x_{k+1}-x_k = \\alpha p_k$$\n",
 79 |     "    $$y_k = \\nabla f_{k+1} - \\nabla f_k$$   \n",
 80 |     "    - Levenberg-Marquadt\n",
 81 |     "- Model formulation\n",
 82 |     "- Objective function $f$\n",
 83 |     "- Variables $x$\n",
 84 |     "- Constraint functions\n",
 85 |     "    - Equality\n",
 86 |     "    - Inequality"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": null,
 92 |    "metadata": {
 93 |     "collapsed": true
 94 |    },
 95 |    "outputs": [],
 96 |    "source": []
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {
102 |     "collapsed": true
103 |    },
104 |    "outputs": [],
105 |    "source": []
106 |   }
107 |  ],
108 |  "metadata": {
109 |   "kernelspec": {
110 |    "display_name": "Python 3",
111 |    "language": "python",
112 |    "name": "python3"
113 |   },
114 |   "language_info": {
115 |    "codemirror_mode": {
116 |     "name": "ipython",
117 |     "version": 3
118 |    },
119 |    "file_extension": ".py",
120 |    "mimetype": "text/x-python",
121 |    "name": "python",
122 |    "nbconvert_exporter": "python",
123 |    "pygments_lexer": "ipython3",
124 |    "version": "3.7.4"
125 |   }
126 |  },
127 |  "nbformat": 4,
128 |  "nbformat_minor": 2
129 | }
130 | 


--------------------------------------------------------------------------------
/notebooks/data/HtWt.csv:
--------------------------------------------------------------------------------
 1 | male,height,weight
 2 | 0,63.2,168.7
 3 | 0,68.7,169.8
 4 | 0,64.8,176.6
 5 | 0,67.9,246.8
 6 | 1,68.9,151.6
 7 | 1,67.8,158.0
 8 | 1,68.2,168.6
 9 | 0,64.8,137.2
10 | 1,64.3,177.0
11 | 0,64.7,128.0
12 | 1,66.9,168.4
13 | 1,66.9,136.2
14 | 1,67.1,160.3
15 | 1,70.2,233.9
16 | 1,67.4,171.7
17 | 1,71.1,185.5
18 | 0,63.4,177.6
19 | 1,66.9,132.9
20 | 0,71.0,140.1
21 | 1,70.4,151.9
22 | 0,59.5,147.2
23 | 1,70.4,159.0
24 | 0,61.5,113.0
25 | 1,74.5,194.5
26 | 0,65.3,145.1
27 | 1,68.8,196.5
28 | 0,67.2,148.9
29 | 1,68.7,132.9
30 | 0,60.0,168.4
31 | 0,62.5,146.2
32 | 1,72.0,236.4
33 | 1,67.9,140.0
34 | 1,65.1,156.2
35 | 1,63.5,178.7
36 | 1,68.2,147.5
37 | 0,64.6,97.7
38 | 1,68.1,189.6
39 | 0,66.2,221.9
40 | 0,62.8,168.1
41 | 0,65.3,143.1
42 | 0,65.8,217.7
43 | 0,68.7,133.2
44 | 0,63.8,96.5
45 | 1,70.6,270.6
46 | 0,61.5,137.2
47 | 0,61.9,124.2
48 | 0,65.1,128.3
49 | 1,68.7,203.6
50 | 0,57.6,132.4
51 | 1,66.3,189.4
52 | 1,69.0,174.0
53 | 0,63.4,163.3
54 | 1,69.5,183.5
55 | 1,67.8,193.8
56 | 0,61.6,119.7
57 | 1,71.2,157.4
58 | 1,67.4,146.1
59 | 0,66.1,128.3
60 | 1,70.7,179.1
61 | 0,67.0,140.0
62 | 1,66.8,202.2
63 | 1,69.9,169.4
64 | 0,57.7,122.8
65 | 0,62.5,248.5
66 | 1,66.6,154.4
67 | 0,60.6,140.2
68 | 1,70.4,141.6
69 | 0,66.4,144.4
70 | 0,62.3,116.2
71 | 1,73.3,175.0
72 | 


--------------------------------------------------------------------------------
/notebooks/data/cal_house.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/notebooks/data/cal_house.npy


--------------------------------------------------------------------------------
/notebooks/data/targt.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/notebooks/data/targt.npy


--------------------------------------------------------------------------------
/notebooks/figs/Lagrange_multiplier.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/notebooks/figs/Lagrange_multiplier.png


--------------------------------------------------------------------------------
/notebooks/figs/agents.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/notebooks/figs/agents.png


--------------------------------------------------------------------------------
/notebooks/figs/alap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/notebooks/figs/alap.png


--------------------------------------------------------------------------------
/notebooks/figs/asap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/notebooks/figs/asap.png


--------------------------------------------------------------------------------
/notebooks/figs/auto_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/notebooks/figs/auto_diff.png


--------------------------------------------------------------------------------
/notebooks/figs/bb1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/notebooks/figs/bb1.png


--------------------------------------------------------------------------------
/notebooks/figs/bb2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/notebooks/figs/bb2.png


--------------------------------------------------------------------------------
/notebooks/figs/bb3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/notebooks/figs/bb3.png


--------------------------------------------------------------------------------
/notebooks/figs/box_loop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/notebooks/figs/box_loop.png


--------------------------------------------------------------------------------
/notebooks/figs/data_flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/notebooks/figs/data_flow.png


--------------------------------------------------------------------------------
/notebooks/figs/elim1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/notebooks/figs/elim1.png


--------------------------------------------------------------------------------
/notebooks/figs/elim2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/notebooks/figs/elim2.png


--------------------------------------------------------------------------------
/notebooks/figs/inference.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/notebooks/figs/inference.png


--------------------------------------------------------------------------------
/notebooks/figs/mcmc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/notebooks/figs/mcmc.png


--------------------------------------------------------------------------------
/notebooks/figs/operations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/notebooks/figs/operations.png


--------------------------------------------------------------------------------
/notebooks/figs/pagerank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/notebooks/figs/pagerank.png


--------------------------------------------------------------------------------
/notebooks/figs/score_distribution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/notebooks/figs/score_distribution.png


--------------------------------------------------------------------------------
/notebooks/figs/slice.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/notebooks/figs/slice.jpg


--------------------------------------------------------------------------------
/notebooks/figs/spectral.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cliburn/sta-663-2020/05a965e2f33c8925d25e000c6c76b084b5bf0ed6/notebooks/figs/spectral.png


--------------------------------------------------------------------------------
/project/template.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Final Project \n",
  8 |     "\n",
  9 |     "For the final project, you will need to implement a \"new\" statistical algorithm in Python from the research literature and write a \"paper\" describing the algorithm. \n",
 10 |     "\n",
 11 |     "Suggested papers can be found in Sakai:Resources:Final_Project_Papers\n",
 12 |     "\n",
 13 |     "## Paper\n",
 14 |     "\n",
 15 |     "The paper should have the following:\n",
 16 |     "\n",
 17 |     "### Title\n",
 18 |     "\n",
 19 |     "Should be concise and informative.\n",
 20 |     "\n",
 21 |     "### Abstract\n",
 22 |     "\n",
 23 |     "250 words or less. Identify 4-6 key phrases.\n",
 24 |     "\n",
 25 |     "### Background\n",
 26 |     "\n",
 27 |     "State the research paper you are using. Describe the concept of the algorithm and why it is interesting and/or useful. If appropriate, describe the mathematical basis of the algorithm. Some potential topics for the background include:\n",
 28 |     "\n",
 29 |     "- What problem does it address? \n",
 30 |     "- What are known and possible applications of the algorithm? \n",
 31 |     "- What are its advantages and disadvantages relative to other algorithms?\n",
 32 |     "- How will you use it in your research?\n",
 33 |     "\n",
 34 |     "### Description of algorithm\n",
 35 |     "\n",
 36 |     "First, explain in plain English what the algorithm does. Then describes the details of the algorithm, using mathematical equations or pseudocode as appropriate. \n",
 37 |     "\n",
 38 |     "### Describe optimization for performance\n",
 39 |     "\n",
 40 |     "First implement the algorithm using plain Python in a straightforward way from the description of the algorithm. Then profile and optimize it using one or more appropriate methods, such as:\n",
 41 |     "\n",
 42 |     "1. Use of better algorithms or data structures\n",
 43 |     "2. Use of vectorization\n",
 44 |     "3. JIT or AOT compilation of critical functions\n",
 45 |     "4. Re-writing critical functions in C++ and using pybind11 to wrap them\n",
 46 |     "5. Making use of parallelism or concurrency\n",
 47 |     "6. Making use of distributed compuitng\n",
 48 |     "\n",
 49 |     "Document the improvement in performance with the optimizations performed.\n",
 50 |     "\n",
 51 |     "### Applications to simulated data sets\n",
 52 |     "\n",
 53 |     "Are there specific inputs that give known outputs (e.g. there might be closed form solutions for special input cases)? How does the algorithm perform on these? \n",
 54 |     "\n",
 55 |     "If no such input cases are available (or in addition to such input cases), how does the algorithm perform on simulated data sets for which you know the \"truth\"? \n",
 56 |     "\n",
 57 |     "### Applications to real data sets\n",
 58 |     "\n",
 59 |     "Test the algorithm on the real-world examples in the original paper if possible. Try to find at least one other real-world data set not in the original paper and test it on that. Describe and interpret the results.\n",
 60 |     "\n",
 61 |     "### Comparative analysis with competing algorithms\n",
 62 |     "\n",
 63 |     "Find two other algorithm that address a similar problem. Perform a comparison - for example, of accuracy or speed. You can use native libraries of the other algorithms - you do not need to code them yourself. Comment on your observations. \n",
 64 |     "\n",
 65 |     "### Discussion/conclusion\n",
 66 |     "\n",
 67 |     "Your thoughts on the algorithm. Does it fulfill a particular need? How could it be generalized to other problem domains? What are its limitations and how could it be improved further?\n",
 68 |     "\n",
 69 |     "### References/bibliography\n",
 70 |     "\n",
 71 |     "Make sure you cite your sources.\n",
 72 |     "\n",
 73 |     "## Code\n",
 74 |     "\n",
 75 |     "The code should be in a public GitHub repository with:\n",
 76 |     "\n",
 77 |     "1. A README file\n",
 78 |     "2. An open source license\n",
 79 |     "3. Source code\n",
 80 |     "4. Test code\n",
 81 |     "5. Examples\n",
 82 |     "6. A reproducible report\n",
 83 |     "\n",
 84 |     "    The package should be downloadable and installable with `python setup.py install`, or even posted to PyPI adn installable with `pip install package`. See https://packaging.python.org/tutorials/packaging-projects/ for how to upload to a Python repository. Use the repository at https://test.pypi.org - this is for testing and will be wiped clean after a period.\n",
 85 |     "\n",
 86 |     "## Rubric\n",
 87 |     "\n",
 88 |     "Here are some considerations I use when grading. Note that the \"difficulty factor\" of the chosen algorithm will be factored into the grading. \n",
 89 |     "\n",
 90 |     "1. Is the abstract, background and discussion readable and clear? \n",
 91 |     "2. Is the algorithm description clear and accurate? \n",
 92 |     "3. Has the algorithm been optimized? \n",
 93 |     "4. Are the applications to simulated/real data clear and useful? \n",
 94 |     "5. Was the comparative analysis done well? \n",
 95 |     "6. Is there a well-maintained GitHub repository for the code? \n",
 96 |     "7. Is the document show evidence of literate programming? \n",
 97 |     "8. Is the analysis reproducible? \n",
 98 |     "9. Is the code tested? Are examples provided? \n",
 99 |     "10. Is the package easily installable? \n",
100 |     "\n"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "metadata": {},
107 |    "outputs": [],
108 |    "source": []
109 |   }
110 |  ],
111 |  "metadata": {
112 |   "kernelspec": {
113 |    "display_name": "Python 3",
114 |    "language": "python",
115 |    "name": "python3"
116 |   },
117 |   "language_info": {
118 |    "codemirror_mode": {
119 |     "name": "ipython",
120 |     "version": 3
121 |    },
122 |    "file_extension": ".py",
123 |    "mimetype": "text/x-python",
124 |    "name": "python",
125 |    "nbconvert_exporter": "python",
126 |    "pygments_lexer": "ipython3",
127 |    "version": "3.7.7"
128 |   }
129 |  },
130 |  "nbformat": 4,
131 |  "nbformat_minor": 1
132 | }
133 | 


--------------------------------------------------------------------------------