├── .DS_Store ├── .github └── workflows │ └── ExportPluto.yaml ├── README.md ├── hw1 ├── hints.md ├── hw1.html ├── hw1.jl └── hw1.jmd ├── hw2 ├── hints2.md ├── hw2_2023.jl ├── hw2_hint.pdf └── hw2_hint.tex ├── hw3 ├── 18_337_2023_pset3.pdf ├── parallelhistogram.jl └── patiencesort1.jl ├── hw4.pdf ├── hw4 └── hints.md ├── lecture 11 └── adjoint handwritten notes.pdf ├── lecture 12 ├── ..textClipping ├── .DS_Store ├── notebook.jl ├── reverse mode 2 (simeon).jl ├── reverse mode 3.jl ├── reverse mode 4.jl ├── reverse mode 4.jl.zip └── reverse mode 6.jl ├── lecture 14 └── adjoint equations.pdf ├── lecture 17 └── handwritten notes adjoint.pdf ├── lecture 22 ├── .DS_Store ├── HPEC-Handbook-Kepner.pdf ├── MathOfBigData-Chapter1.pdf └── Optimizing_Xeon_Phi_for_Interactive_Data_Analysis.pdf ├── lecture 24 ├── .DS_Store ├── .ipynb_checkpoints │ ├── Alan trying to understand MCMC-checkpoint.ipynb │ ├── Designing+Markov+chains-checkpoint.ipynb │ ├── Metropolis with linear algebra-checkpoint.ipynb │ └── MetropolisHastings-checkpoint.ipynb ├── Alan trying to understand MCMC.ipynb ├── Designing+Markov+chains.ipynb ├── Metropolis with linear algebra.ipynb └── MetropolisHastings.ipynb ├── lecture1 ├── AutoDiff.ipynb ├── Julia is fast.ipynb ├── fernbach 2019 power_of_language.pptx └── the_dream.ipynb ├── lecture10 ├── .DS_Store ├── prefix.pptx ├── star_and_more.pdf ├── trid.pdf └── ~$prefix.pptx ├── lecture11 └── .DS_Store ├── lecture13 └── handwritten_notes_vectors_adjoints.pdf ├── lecture2 ├── .DS_Store ├── The Julia HPC dream - Jupyter Notebook.pdf ├── allocations.jl ├── lecture2.jl ├── matrix_calculus_handwritten_notes_02_08_2023.pdf ├── optimizing.html └── optimizing.jmd ├── lecture20 └── adjointpde.pdf ├── lecture3 ├── allocation.jl └── lecture_3_handwritten_2023.pdf ├── lecture4 ├── lecture_4_handwritten_2023.pdf └── serial performance.jl ├── lecture5 ├── .DS_Store ├── 1071_230222012837_001.pdf ├── de_solver_software_comparsion.pdf ├── ode.jl ├── ode_simple.jl └── pinn.jl ├── lecture6 ├── .DS_Store ├── BACKpropagation.pdf ├── Backprop with Backslash.ipynb ├── backprop_poster.pdf ├── handwritten reverse mode.pdf ├── parallel_models.jl ├── pinn2.jl └── threads_demo.jl ├── lecture7 ├── .DS_Store ├── LorenzManyWays.jl ├── dynamics.jl ├── lecture7 handwritten notes.pdf ├── pinn.jl └── pinn2.jl ├── lecture8 ├── pi.jl └── threads.jl ├── lecture9 └── reduce_prefix.jl ├── old lecture 13 ├── .DS_Store ├── Reduce and Parallel Prefix.jl ├── firstcuda.jl └── prefix.ppt ├── old lecture10 ├── .DS_Store ├── helloworld ├── mpihelloworld.jl └── mpijl_demo │ ├── data │ ├── 1013.txt │ ├── 1059-0.txt │ ├── 159.txt │ ├── 23218-0.txt │ ├── 35.txt │ ├── 36.txt │ ├── 5230.txt │ ├── 524-0.txt │ ├── 775-0.txt │ ├── 780-0.txt │ ├── pg11696.txt │ ├── pg31547.txt │ └── pg7308.txt │ ├── helloworld.jl │ ├── mpihelloworld.jl │ ├── submit.sh │ ├── top5norm.jl │ ├── top5norm_collective.jl │ ├── top5norm_sendrecv.jl │ └── word_count_helpers.jl ├── old lecture11 ├── .DS_Store ├── .ipynb_checkpoints │ ├── intro-checkpoint.ipynb │ └── tracing-checkpoint.ipynb ├── Manifest.toml ├── Project.toml ├── intro.ipynb ├── lecture11.jl ├── tracing.ipynb └── utils.jl ├── old lecture9 ├── eigenvalue derivative.ipynb ├── gsvd derivative.ipynb ├── jacobian_example.jl ├── lecture9-1.jl ├── lecture9.jl └── svd derivative.ipynb ├── oldhw3 └── hints3.md └── threads.jl /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mitmath/18337/bd60c58e052232f6ae7ada430904de534d1c5843/.DS_Store -------------------------------------------------------------------------------- /.github/workflows/ExportPluto.yaml: -------------------------------------------------------------------------------- 1 | name: Export Pluto notebooks 2 | on: 3 | push: 4 | branches: 5 | - main 6 | - master 7 | workflow_dispatch: 8 | permissions: 9 | contents: write 10 | 11 | # When two jobs run in parallel, cancel the older ones, to make sure that the website is generated from the most recent commit. 12 | concurrency: 13 | group: pluto-export 14 | cancel-in-progress: true 15 | 16 | jobs: 17 | build-and-deploy: 18 | runs-on: ubuntu-latest 19 | steps: 20 | - name: Checkout this repository 21 | uses: actions/checkout@v3 22 | 23 | - name: Install Julia 24 | uses: julia-actions/setup-julia@v1 25 | with: 26 | version: "1" # This will automatically pick the latest Julia version 27 | 28 | - name: Cache Julia artifacts & such 29 | uses: julia-actions/cache@v1 30 | with: 31 | cache-registries: "true" 32 | 33 | # We set up a folder that Pluto can use to cache exported notebooks. If the notebook file did not change, then Pluto can take the exported file from cache instead of running the notebook. 34 | - name: Set up notebook state cache 35 | uses: actions/cache@v3 36 | with: 37 | path: pluto_state_cache 38 | key: ${{ runner.os }}-pluto_state_cache-v2-${{ hashFiles('**/Project.toml', '**/Manifest.toml', '.github/workflows/*' ) }}-${{ hashFiles('**/*jl') }} 39 | restore-keys: | 40 | ${{ runner.os }}-pluto_state_cache-v2-${{ hashFiles('**/Project.toml', '**/Manifest.toml', '.github/workflows/*' ) }} 41 | 42 | 43 | - name: Run & export Pluto notebooks 44 | run: | 45 | julia -e 'using Pkg 46 | Pkg.activate(mktempdir()) 47 | Pkg.add([ 48 | Pkg.PackageSpec(name="PlutoSliderServer", version="0.3.2-0.3"), 49 | ]) 50 | 51 | import PlutoSliderServer 52 | 53 | PlutoSliderServer.github_action("."; 54 | Export_cache_dir="pluto_state_cache", 55 | Export_baked_notebookfile=false, 56 | Export_baked_state=false, 57 | # more parameters can go here 58 | )' 59 | 60 | 61 | - name: Deploy to gh-pages 62 | uses: JamesIves/github-pages-deploy-action@releases/v4 63 | with: 64 | token: ${{ secrets.GITHUB_TOKEN }} 65 | branch: gh-pages 66 | folder: . 67 | single-commit: true 68 | 69 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 18.337J/6.338J: Parallel Computing and Scientific Machine Learning (Spring 2023) 2 | ## Professor Alan Edelman (and Philip the Corgi) 3 | ## MW 3:00 to 4:30 @ Room 2-190 4 | ## TA and Office hours: (To be confirmed) 5 | ## [Piazza Link](https://piazza.com/mit/spring2023/18337) 6 | ## [Canvas](https://canvas.mit.edu/courses/18760) will only be used for homework and project (+proposal) submission + lecture videos 7 | 8 | ## Classes are recorded and will be uploaded on canvas. Another great resource is Chris Rackauckas' videos of 2021 spring class. See [SciMLBook](https://book.sciml.ai/). 9 | 10 | 11 | ## Julia: 12 | 13 | * Really nice Julia tutorial for the fall 2022 class [Tutorial](https://mit-c25.netlify.app/notebooks/0_julia_tutorial) 14 | 15 | * [Julia cheatsheets](https://computationalthinking.mit.edu/Spring21/cheatsheets/) 16 | 17 | * Julia tutorial by Steven Johnson Wed Feb 8 18 | *Optional* Julia Tutorial: Wed Feb 8 @ 5pm [via Zoom](https://mit.zoom.us/j/96829722642?pwd=TDhhME0wbmx0SG5RcnFOS3VScTA5Zz09) 19 | 20 | * Virtually [via Zoom](https://mit.zoom.us/j/96829722642?pwd=TDhhME0wbmx0SG5RcnFOS3VScTA5Zz09). Recording will be posted. 21 | 22 | A basic overview of the Julia programming environment for numerical computations that we will use in 18.06 for simple computational exploration. This (Zoom-based) tutorial will cover what Julia is and the basics of interaction, scalar/vector/matrix arithmetic, and plotting — we'll be using it as just a "fancy calculator" and no "real programming" will be required. 23 | 24 | * [Tutorial materials](https://github.com/mitmath/julia-mit) (and links to other resources) 25 | 26 | If possible, try to install Julia on your laptop beforehand using the instructions at the above link. Failing that, you can run Julia in the cloud (see instructions above). 27 | 28 | 29 | ## Announcement: 30 | 31 | There will be homeworks, followed by the final project. 32 | Everyone needs to present their work and submit a project report. 33 | 34 | 1-page Final Project proposal due : March 24 35 | 36 | Final Project presentations : April 26 to May 15 37 | 38 | Final Project reports due: May 15 39 | 40 | # Grading: 41 | 50% problem sets, 10% for the final project proposal, and 40% for the final project. Problem sets and final projects will be submitted electronically. 42 | 43 | # HW 44 | |#| Notebook| 45 | |-|-| 46 | |1| [HW1](https://mitmath.github.io/18337/hw1/hw1.html) | 47 | (For matrix calculus problems, do not use indices) 48 | |2| [HW2](https://mitmath.github.io/18337/hw2/hw2_2023.html) Due Wednesday March 1, 2023 | 49 | |3| [HW3](https://github.com/mitmath/18337/blob/master/hw3/18_337_2023_pset3.pdf ) Due Wednesday March 15, 2023| 50 | |4| [HW4](https://github.com/mitmath/18337/blob/master/hw4.pdf) Due Wednesday April 19, 2023 | 51 | 52 | # Lecture Schedule (tentative) 53 | |#|Day| Date | Topic | [SciML](https://book.sciml.ai/) lecture | Materials | 54 | |-|-|------|------|-----|--| 55 | |1|M| 2/6 | Intro to Julia. My Two Favorite Notebooks. | | [[Julia is fast]](https://github.com/mitmath/18337/blob/master/lecture1/Julia%20is%20fast.ipynb), [[AutoDiff]](https://github.com/mitmath/18337/blob/master/lecture1/AutoDiff.ipynb), [[autodiff video]](https://www.youtube.com/watch?v=vAp6nUMrKYg), 56 | |2|W|2/8| Matrix Calculus I and The Parallel Dream| | See [[IAP 2023 Class on Matrix Calculus]](https://github.com/mitmath/matrixcalc),[[handwritten notes]](https://github.com/mitmath/18337/blob/master/lecture2/matrix_calculus_handwritten_notes_02_08_2023.pdf),[[The Parallel Dream]](https://github.com/mitmath/18337/blob/master/lecture1/the_dream.ipynb) 57 | |3|M|2/13| Matrix Calculus II || [[handwritten notes]](https://github.com/mitmath/18337/blob/master/lecture3/lecture_3_handwritten_2023.pdf),[[Corgi in the Washing Machine]](https://mit-c25.netlify.app/notebooks/1_hyperbolic_corgi),[[2x2 Matrix Jacobians]](https://rawcdn.githack.com/mitmath/matrixcalc/3f6758996e40c5c1070279f89f7f65e76e08003d/notes/2x2Jacobians.jl.html) 58 | |4|W|2/15| Serial Performance | [2][2] |[[handwritten notes]](https://github.com/mitmath/18337/blob/master/lecture4/lecture_4_handwritten_2023.pdf), [[Serial Performance .jl file]](https://github.com/mitmath/18337/blob/master/lecture4/serial%20performance.jl), [[Loop Fusion Blog ]](https://julialang.org/blog/2017/01/moredots/) 59 | |5|T|2/21| Intro to PINNs and Automatic differentiation I : Forward mode AD | [3][3] and [8][8] | [ode and Pinns](https://mit-18337-spring2023.netlify.app/lecture5/ode_simple.html),[intro to pinn handwritten notes](https://github.com/mitmath/18337/blob/master/lecture5/1071_230222012837_001.pdf),[autodiff handwritten notes](https://github.com/mitmath/JuliaComputation/blob/ec6861bc9396d2b577f1bbc8136683d4298d7dc8/slides/ad_handwritten.pdf) 60 | |6|W|2/22| Automatic differentiation II : Reverse mode AD |[10][10]| [pinn.jl](https://github.com/mitmath/18337/blob/master/lecture5/pinn.jl), [reverse mode ad demo](https://simeonschaub.github.io/ReverseModePluto/notebook.html),[handwritten notes](https://github.com/mitmath/18337/blob/master/lecture6/handwritten%20reverse%20mode.pdf)| 61 | |7|M|2/27 | Dynamical Systems & Serial Performance on Iterations | [4][4] | [Lorenz many ways](https://github.com/mitmath/18337/blob/master/lecture7/LorenzManyWays.jl), [Dynamical Systems](https://mitmath.github.io/18337/lecture7/dynamics.html), [handwriten notes](https://github.com/mitmath/18337/blob/master/lecture7/lecture7%20handwritten%20notes.pdf) | 62 | |8|W|3/1| HPC & Threading | [5][5] and [6][6] | [pi.jl](https://github.com/mitmath/18337/blob/master/lecture8/pi.jl), [threads.jl](https://github.com/mitmath/18337/blob/master/lecture8/threads.jl),[HPC Slides](https://docs.google.com/presentation/d/1i6w4p26r_9lu_reHYZDIVnzh-4SdERVAoSI5i42lBU8/edit#slide=id.p) | 63 | |9|M|3/6| Parallelism| | [Parallelism in Julia Slides](https://docs.google.com/presentation/d/1kBYvDedm_VGZEdjhSLXSCPLec6N7fLZswcYENqwiw3k/edit#slide=id.p),[reduce/prefix notebook](https://mitmath.github.io/18337/lecture9/reduce_prefix.html)| 64 | |10|W| 3/8| Prefix (and more) ||[ppt slides](https://github.com/mitmath/18337/blob/master/lecture10/prefix.pptx), [reduce/prefix notebook](https://mitmath.github.io/18337/lecture9/reduce_prefix.html),[ThreadedScans.jl](https://github.com/JuliaFolds/ThreadedScans.jl),[cuda blog](https://developer.nvidia.com/gpugems/gpugems3/part-vi-gpu-computing/chapter-39-parallel-prefix-sum-scan-cuda)| 65 | |11|M|3/13| Adjoint Method Example | [10][10] | [Handwritten Notes](https://github.com/mitmath/18337/blob/master/lecture%2011/adjoint%20handwritten%20notes.pdf)| 66 | |12|W|3/15| Guest Lecture - Chris Rackauckas | 67 | |13|M|3/21 | Vectors, Operators and Adjoints | | [Handwritten Notes](https://github.com/mitmath/18337/blob/master/lecture14/handwritten_notes_vectors_adjoints.pdf) | 68 | |14|W|3/23 | Adjoints of Linear, Nonlinear, Ode | [11][11] | [Handwritten Notes](https://github.com/mitmath/18337/blob/master/lecture%2014/adjoint%20equations.pdf), [18.335 adjoint notes (Johnson)](https://math.mit.edu/~stevenj/18.336/adjoint.pdf)| 69 | |Spring Break| 70 | |15|M|4/3| Guest Lecture, Billy Moses | | [Enzyme AD](https://enzyme.mit.edu/) | 71 | |16|W|4/5| Guest Lecture, Keaton Burns | | [Dedalus PDE Solver](https://dedalus-project.org/) | 72 | |17|M|4/10| Adjoints of ODE | | [Handwritten Notes](https://github.com/mitmath/18337/blob/master/lecture%2017/handwritten%20notes%20adjoint.pdf) | 73 | |18|W|4/12| Partitioning | | | 74 | | |M|4/17| Patriots' Day 75 | |19|W|4/19| Fast Multipole and Parallel Prefix | |[Unfinished Draft](https://math.mit.edu/~edelman/publications/fast_multipole.pdf) | 76 | |20|M|4/24| 77 | |21|W|4/26| Project Presentation I | 78 | |22|M|5/1| Project Presentation II | 79 | |23|W|5/3| Project Presentation III | 80 | |24|M|5/8| Project Presentation IV | 81 | |25|W|5/10| Project Presentation V | 82 | | |M|5/15| Class Cancelled | 83 | 84 | 85 | 86 | |8|W|3/1| GPU Parallelism I |[7][7]| [[video 1]](https://www.youtube.com/watch?v=riAbPZy9gFc),[[video2]](https://www.youtube.com/watch?v=HMmOk9GIhsw) 87 | |9|M|3/6| GPU Paralellism II | | [[video]](https://www.youtube.com/watch?v=zHPXGBiTM5A), [[Eig&SVD derivatives notebooks]](https://github.com/mitmath/18337/tree/master/lecture9), [[2022 IAP Class Matrix Calculus]](https://github.com/mitmath/matrixcalc) 88 | |10|W|3/8| MPI | | [Slides](https://github.com/SciML/SciMLBook/blob/spring21/lecture12/MPI.jl.pdf), [[video, Lauren Milichen]](https://www.youtube.com/watch?v=LCIJj0czofo),[[Performance Metrics]](https://github.com/mitmath/18337/blob/spring21/lecture12/PerformanceMetricsSoftwareArchitecture.pdf) see p317,15.6 89 | |11|M|3/13| Differential Equations I | [9][9]| 90 | |12|W|3/15| Differential Equations II |[10][10] | 91 | |13|M|3/20| Neural ODE |[11][11] | 92 | |14|W|3/22| |[13][13] | 93 | | | | | Spring Break | 94 | |15|M|4/3| | | [GPU Slides](https://docs.google.com/presentation/d/1npryMMe7JyLLCLdeAM3xSjLe5Q54eq0QQrZg5cxw-ds/edit?usp=sharing) [Prefix Materials](https://github.com/mitmath/18337/tree/master/lecture%2013) 95 | |16|W|4/5| Convolutions and PDEs | [14][14] | 96 | |17|M|4/10| Chris R on ode adjoints, PRAM Model |[11][11] | [[video]](https://www.youtube.com/watch?v=KCTfPyVIxpc)| 97 | |18|W|4/12| Linear and Nonlinear System Adjoints | [11][11] | [[video]](https://www.youtube.com/watch?v=KCTfPyVIxpc)| 98 | | |M|4/17| Patriots' Day 99 | |19|W|4/19| Lagrange Multipliers, Spectral Partitioning || [Partitioning Slides](https://github.com/alanedelman/18.337_2018/blob/master/Lectures/Lecture13_1022_SpectralPartitioning/Partitioning.ppt)| | 100 | |20|M|4/24| |[15][15]| [[video]](https://www.youtube.com/watch?v=YuaVXt--gAA),[notes on adjoint](https://github.com/mitmath/18337/blob/master/lecture20/adjointpde.pdf)| 101 | |21|W|4/26| Project Presentation I | 102 | |22|M|5/1| Project Presentation II | [Materials](https://github.com/mitmath/18337/tree/master/lecture%2022) 103 | |23|W|5/3| Project Presentation III | [16][16] | [[video](https://www.youtube.com/watch?v=32rAwtTAGdU)] 104 | |24|M|5/8| Project Presentation IV | 105 | |25|W|5/10| Project Presentation V | 106 | |26|M|5/15| Project Presentation VI| 107 | 108 | 109 | [1]:https://book.sciml.ai/notes/01/ 110 | [2]:https://book.sciml.ai/notes/02-Optimizing_Serial_Code/ 111 | [3]:https://book.sciml.ai/notes/03-Introduction_to_Scientific_Machine_Learning_through_Physics-Informed_Neural_Networks/ 112 | [4]:https://book.sciml.ai/notes/04-How_Loops_Work-An_Introduction_to_Discrete_Dynamics/ 113 | [5]:https://book.sciml.ai/notes/05-The_Basics_of_Single_Node_Parallel_Computing/ 114 | [6]:https://book.sciml.ai/notes/06-The_Different_Flavors_of_Parallelism/ 115 | [7]:https://book.sciml.ai/notes/07/ 116 | [8]:https://book.sciml.ai/notes/08-Forward-Mode_Automatic_Differentiation_(AD)_via_High_Dimensional_Algebras/ 117 | [9]:https://book.sciml.ai/notes/09/ 118 | [10]:https://book.sciml.ai/notes/10-Basic_Parameter_Estimation-Reverse-Mode_AD-and_Inverse_Problems/ 119 | [11]:https://book.sciml.ai/notes/11-Differentiable_Programming_and_Neural_Differential_Equations/ 120 | [13]:https://book.sciml.ai/notes/13/ 121 | [14]:https://book.sciml.ai/notes/14/ 122 | [15]:https://book.sciml.ai/notes/15/ 123 | [16]:https://book.sciml.ai/notes/16/ 124 | 125 | # Lecture Summaries and Handouts 126 | 127 | [Class Videos](https://mit.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx?folderID=9e659f61-1fd4-4b98-96a0-af940143c9c7) 128 | 129 | ## Lecture 1: Syllabus, Introduction to Performance, Introduction to Automatic Differentiation 130 | 131 | Setting the stage for this course which will involve high performance computing, mathematics, and scientific machine learning, we looked 132 | at two introductory notebooks. The first [Julia is fast]](https://github.com/mitmath/18337/blob/master/lecture1/Julia%20is%20fast.ipynb) 133 | primarily reveals just how much performance languages like Python can leave on the table. Many people don't compare languages, so they 134 | are unlikely to be aware. The second [AutoDiff]](https://github.com/mitmath/18337/blob/master/lecture1/AutoDiff.ipynb) reveals the "magic" 135 | of forward mode autodifferentiation showing how a compiler can "rewrite" a program through the use of software overloading and still 136 | maintain performance. This is a whole new way to see calculus, not the way you learned it in a first year class, and not finite differences either. 137 | 138 | ## Lecture 2: The Parallel Dream and Intro to Matrix Calculus 139 | We gave an example 140 | [The Parallel Dream]](https://github.com/mitmath/18337/blob/master/lecture1/the_dream.ipynb) 141 | 142 | 143 | ### Lecture and Notes 144 | 145 | 146 | # Homeworks 147 | 148 | HW1 will be due Thursday Feb 16. This is really just a getting started homework. 149 | 150 | [Hw1](https://mitmath.github.io/18337/hw1/hw1.html) 151 | 152 | # Final Project 153 | 154 | For the second half of the class students will work on the final project. A one-page final project 155 | proposal must be sumbitted by March 24 Friday, through canvas. 156 | 157 | Last three weeks (tentative) will be student presentations. 158 | 159 | ## Possible Project Topics 160 | 161 | Here's a list of [current projects](https://github.com/JuliaLabs/julialabs.github.io/blob/master/projects.md) of interest to the julialab 162 | 163 | One possibility is to review an interesting algorithm not covered in the course 164 | and develop a high performance implementation. Some examples include: 165 | 166 | - High performance PDE solvers for specific PDEs like Navier-Stokes 167 | - Common high performance algorithms (Ex: Jacobian-Free Newton Krylov for PDEs) 168 | - Recreation of a parameter sensitivity study in a field like biology, 169 | pharmacology, or climate science 170 | - [Augmented Neural Ordinary Differential Equations](https://arxiv.org/abs/1904.01681) 171 | - [Neural Jump Stochastic Differential Equations](https://arxiv.org/pdf/1905.10403.pdf) 172 | - Parallelized stencil calculations 173 | - Distributed linear algebra kernels 174 | - Parallel implementations of statistical libraries, such as survival statistics 175 | or linear models for big data. Here's [one example parallel library)](https://github.com/harrelfe/rms) 176 | and a [second example](https://bioconductor.org/packages/release/data/experiment/html/RegParallel.html). 177 | - Parallelization of data analysis methods 178 | - Type-generic implementations of sparse linear algebra methods 179 | - A fast regex library 180 | - Math library primitives (exp, log, etc.) 181 | 182 | Another possibility is to work on state-of-the-art performance engineering. 183 | This would be implementing a new auto-parallelization or performance enhancement. 184 | For these types of projects, implementing an application for benchmarking is not 185 | required, and one can instead benchmark the effects on already existing code to 186 | find cases where it is beneficial (or leads to performance regressions). 187 | Possible examples are: 188 | 189 | - [Create a system for automatic multithreaded parallelism of array operations](https://github.com/JuliaLang/julia/issues/19777) and see what kinds of packages end up more efficient 190 | - [Setup BLAS with a PARTR backend](https://github.com/JuliaLang/julia/issues/32786) 191 | and investigate the downstream effects on multithreaded code like an existing 192 | PDE solver 193 | - [Investigate the effects of work-stealing in multithreaded loops](https://github.com/JuliaLang/julia/issues/21017) 194 | - Fast parallelized type-generic FFT. Starter code by Steven Johnson (creator of FFTW) 195 | and Yingbo Ma [can be found here](https://github.com/YingboMa/DFT.jl) 196 | - Type-generic BLAS. [Starter code can be found here](https://github.com/JuliaBLAS/JuliaBLAS.jl) 197 | - Implementation of parallelized map-reduce methods. For example, `pmapreduce` 198 | [extension to `pmap`](https://docs.julialang.org/en/v1/manual/parallel-computing/index.html) 199 | that adds a paralellized reduction, or a fast GPU-based map-reduce. 200 | - Investigating auto-compilation of full package codes to GPUs using tools like 201 | [CUDAnative](https://github.com/JuliaGPU/CUDAnative.jl) and/or 202 | [GPUifyLoops](https://github.com/vchuravy/GPUifyLoops.jl). 203 | - Investigating alternative implementations of databases and dataframes. 204 | [NamedTuple backends of DataFrames](https://github.com/JuliaData/DataFrames.jl/issues/1335), alternative [type-stable DataFrames](https://github.com/FugroRoames/TypedTables.jl), defaults for CSV reading and other large-table formats 205 | like [JuliaDB](https://github.com/JuliaComputing/JuliaDB.jl). 206 | 207 | Additionally, Scientific Machine Learning is a wide open field with lots of 208 | low hanging fruit. Instead of a review, a suitable research project can be 209 | used for chosen for the final project. Possibilities include: 210 | 211 | - Acceleration methods for adjoints of differential equations 212 | - Improved methods for Physics-Informed Neural Networks 213 | - New applications of neural differential equations 214 | - Parallelized implicit ODE solvers for large ODE systems 215 | - GPU-parallelized ODE/SDE solvers for small systems 216 | 217 | 218 | 219 | 220 | 221 | -------------------------------------------------------------------------------- /hw1/hints.md: -------------------------------------------------------------------------------- 1 | # Hints and Tricks for HW1. More will be added. 2 | # Note: any format submission (e.g. pdf, notebooks, zip) is fine 3 | 4 | ## Problem 1: 5 | 6 | **Note: g is a function from Rⁿ to Rⁿ** 7 | **Note: Understand that the goal of the problem is to understand the stability of these basic iterations as they will become critical 8 | for understanding the use of neural networks and other methods** 9 | 10 | **Reminder: Stability of x(n) = g(x(n-1)) is proved by taking the jacobian of g and showing its eigenvalues have absolute value < 1.** 11 | 12 | * Part 1: Should be straightforward. Think about x converging to the fixed point. 13 | * Part 2: Write your answer in terms of J_n the Jacobian of g at x_n and J_0 the Jacobian of g at x_0 and the Identity. 14 | 15 | **Hint:** What is the Jacobian of the function `x->x-J_0⁻¹g(x)` at x=x_n? That is the matrix you need to write down. 16 | 17 | **Hint:** You may use the fact that if `x_0 - x*` is small, then `J_0 ≈ J_n ≈ J_*`. More precisely, assume that `J_x⁻¹ J_y = I + O(|x - y|)` 18 | 19 | **Hint:** If a matrix is small then the eigenvalues of the matrix are small. 20 | 21 | * Part 4: Remember that the eigenvalues of `α * M` are α times the eigenvalues of M, and the problem says the eigenvalues are positive. 22 | 23 | * Part 4 24 | Better wording: create a new dynamical system that converges to a value x_e such that g(x_e) = 0. 25 | 26 | ## Problem 2: 27 | * Part 1: If you do part 1 as a Julia program (rather than as text), then Part 1 and Part 2 are the same. There really is no part 1. 28 | 29 | * Part 2: The prompt to make use of multiple dispatch might be a little bit misleading. You don't need to define multiple methods for `my_quantile` itself, but ideally you should take advantage of how Distributions.jl uses multiple dispatch. Distributions.jl defines methods for the functions `mean`, `pdf` and `cdf` for all `Distribution` objects, so if you implement `my_quantile` right, it should just work for any distribution. 30 | 31 | For those of you that are new to Julia, you can find a quick explanation of what multiple dispatch is [here](https://stackoverflow.com/questions/58700879/what-is-multiple-dispatch-and-how-does-one-use-it-in-julia). If you are more curious, you can also check out [this blog post explaining it in more detail](https://opensourc.es/blog/basics-multiple-dispatch/#what_is_dispatch) or [this video](https://www.youtube.com/watch?v=kc9HwsxE1OY) by Stefan explaining why this is actually so useful. 32 | 33 | **Hint:** You can get the CDF and PDF of a `Distribution` object `d` at point `x` with `cdf(d, x)` or `pdf(d, x)` respectively. You don't have to derive the PDF yourself. 34 | 35 | **Hint:** Julia allows you to compute default values for (keyword) arguments in the function signature itself, so your function definition could look like: 36 | ```julia 37 | function my_quantile(d, y; x₀=mean(d)) 38 | # the actual implementation 39 | end 40 | ``` 41 | 42 | ## Problem 3: 43 | 44 | * Part 1: Some were a bit confused by the signature given for `calc_attractor!`. It's probably easiest if you write your function something like this: 45 | ```julia 46 | function calc_attractor!(out, r, x₀; warmup=400) 47 | num_attract = length(out) 48 | # first do warmup then write each step to `out` 49 | end 50 | ``` 51 | If you want you can generalize this to arbitrary systems given by some recurrence relation `f`, but this is not required. 52 | 53 | **Optional Optimization** In Julia, you can get uninitialized arrays with the constructor `Array{Float64}(undef, dim1, dim2, ...)`, which will be slightly more efficient than `zeros` if you are overwriting each entry anyways. 54 | 55 | **Optional Julia Syntax** For Parts 2-5, the function `eachindex` or `eachcol` might be useful, which iterates over each index of an array or each column of a matrix as views. 56 | 57 | If a vector is 1-based there is no difference between `for i = 1:length(vector)` 58 | and `for i = eachindex(vector)`. 59 | 60 | * Part 3: (Use `@threads`) to parallelize an embarassingly parallel for loop. 61 | 62 | Note you can not change the number of threads inside of a Julia session so you must start Julia with something like `julia -t 4` or use the vscode setting like you saw in class. (Code-->Preferences-->Settings-->threads) on a mac it's (Command-Comma) 63 | 64 | * Part 4: We didn't get a chance to talk about `@distributed` in class, but here is an example. (This works on distributed memory computers but you can also run it on your shared memory laptop. By contrast `@threads` asummes shared memory.) 65 | 66 | One can use `@distributed` in the same way as `@threads` (to parallelize a loop) but it also has the nice property of allowing reductions. In the following example, we will use `(+)` and `hcat` which are summation and a horizontal concatenation, meaning package everything up in an array. (Note `sum` is wrong, the reduction should 67 | be a binary operation.) 68 | 69 | 70 | ```julia 71 | using Distributed 72 | println(workers()) 73 | 74 | if nworkers()==1 75 | addprocs(5) # Unlike threads you can addprocs in the middle of a julia session 76 | println(workers()) 77 | end 78 | 79 | @everywhere function f(i) 80 | return rand(10)*i 81 | end 82 | 83 | r = 1:10000 84 | 85 | @distributed (+) for i in r 86 | f(i) 87 | end 88 | 89 | @distributed hcat for i in r 90 | f(i) 91 | end 92 | 93 | 94 | 95 | 96 | ``` 97 | 98 | 99 | @Simeon, we need an example of `pmap` in a similar way 100 | 101 | @simeon: pmap has a head node which sends the data to the other processors..... 102 | with load balancing??? 103 | distributed has each node setting the computation 104 | 105 | @simeon: for computations where there is a ton of data to send around 106 | pmap can be very inefficient, but for this computation i hardly expect 107 | much difference, perhaps slightly different overheads. 108 | 109 | -------------------------------------------------------------------------------- /hw1/hw1.jl: -------------------------------------------------------------------------------- 1 | ### A Pluto.jl notebook ### 2 | # v0.19.14 3 | 4 | using Markdown 5 | using InteractiveUtils 6 | 7 | # ╔═╡ 9c384715-5bf5-4308-94ef-db4f26be45a4 8 | md"_Homework 1, version 1 -- 18.337 -- Spring 2023_" 9 | 10 | # ╔═╡ 7679b2c5-a644-4341-a7cc-d1335727aacd 11 | # edit the code below to set your name and kerberos ID (i.e. email without @mit.edu) 12 | 13 | student = (name = "Philip the Corgi", kerberos_id = "ptcorgi") 14 | 15 | # press the ▶ button in the bottom right of this cell to run your edits 16 | # or use Shift+Enter 17 | 18 | # you might need to wait until all other cells in this notebook have completed running. 19 | # scroll down the page to see what's up 20 | 21 | # ╔═╡ f8750fa4-8d49-4880-a53e-f40a653c84ea 22 | md"HW is to be submitted on Canvas in the form of a .jl file and .pdf file (use the browser print)" 23 | 24 | # ╔═╡ bec48cfd-ac3b-4dae-973f-cf529b3cdc05 25 | md""" 26 | # Homework 1: Getting up and running and Matrix Calculus 27 | 28 | HW1 release date: Thursday, Feb 9, 2023. 29 | 30 | **HW1 due date: Thursday, Feb 16, 2023, 11:59pm EST**, _but best completed before Wednesday's lecture if possible_. 31 | 32 | First of all, **_welcome to the course!_** We are excited to teach you about parallel computing and scientific machine lerning, using the same tools that we work with ourselves. 33 | 34 | 35 | Without submitting anything we'd also like you to login and try out Juliahub, which we will use later especially when we use GPUs. You might also try vscode on your own computer. 36 | """ 37 | 38 | # ╔═╡ 0da73ecd-5bda-4098-8f13-354af436d231 39 | md"## (Required) Exercise 0 - _Making a basic function_ 40 | 41 | Computing $x^2+1$ is easy -- you just multiply $x$ with itself and add 1. 42 | 43 | ##### Algorithm: 44 | 45 | Given: $x$ 46 | 47 | Output: $x^2+1$ 48 | 49 | 1. Multiply $x$ by $x$ and add 1" 50 | 51 | # ╔═╡ 963f24f5-a442-4590-b355-300703b0cf86 52 | function basic_function(x) 53 | return x*x # this is wrong, write your code here! 54 | end 55 | 56 | # ╔═╡ b6f5abbb-1c32-46d0-b92a-2d0c6c806348 57 | let 58 | result = basic_function(5) 59 | if !(result isa Number) 60 | md""" 61 | !!! warning "Not a number" 62 | `basic_square` did not return a number. Did you forget to write `return`? 63 | """ 64 | elseif abs(result - (5*5 + 1)) < 0.01 65 | md""" 66 | !!! correct 67 | Well done! 68 | """ 69 | else 70 | md""" 71 | !!! warning "Incorrect" 72 | Keep working on it! 73 | """ 74 | end 75 | end 76 | 77 | # ╔═╡ 172bd4bd-5ea9-475f-843d-abb86ffaed34 78 | 79 | 80 | # ╔═╡ 20ed1521-fb1d-43cd-8c6f-15041fc512ec 81 | if student.kerberos_id === "ptcorgi" 82 | md""" 83 | !!! danger "Oops!" 84 | **Before you submit**, remember to fill in your name and kerberos ID at the top of this notebook! 85 | """ 86 | end 87 | 88 | # ╔═╡ ceaf29f7-df04-481e-9836-68298a9f64c7 89 | md"""# Installation 90 | Before being able to run this notebook succesfully locally, you will need to [set up Julia and Pluto.](https://computationalthinking.mit.edu/Spring21/installation/) 91 | 92 | One you have Julia and Pluto installed, you can click the button at the top right of this page and follow the instructions to edit this notebook locally and submit. 93 | """ 94 | 95 | # ╔═╡ 4ba96121-453d-400e-877a-61db02928ffb 96 | md""" 97 | # Matrix calculus 98 | """ 99 | 100 | # ╔═╡ 6996372a-0150-4522-8aa4-3fec36a0dcbb 101 | md""" 102 | For each function $f(x)$, work out the linear transformation $f'(x)$ such that $df = f'(x) dx$. 103 | Check your answers numerically using Julia by computing $f(x+e)-f(x)$ for some random $x$ and (small) $e$, and comparing with $f'(x)e$. 104 | We use lowercase $x$ for vectors and uppercase $X$ for matrices. 105 | 106 | For the written part write the answer in the form f'(x)[dx]. 107 | 108 | For the numerical part write a function that works for all $x$ and $e$ and run 109 | on a few random inputs. 110 | """ 111 | 112 | # ╔═╡ 6067b7d5-a8d4-4922-a761-210418032da5 113 | md""" 114 | ## Question 1 115 | 116 | $f \colon x \in \mathbb{R}^n \longmapsto (x^\top x)^2$. 117 | 118 | $f'(x)[dx]=?$ 119 | Note: dx is a column vector. Be sure your answer makes sense in terms 120 | of row and column vectors. 121 | """ 122 | 123 | # ╔═╡ 7b2550d6-422d-4b8b-a86c-7e49314ac6c9 124 | 125 | 126 | # ╔═╡ f95d162c-0522-4cb1-9251-7659fee4711e 127 | md""" 128 | ## Question 2 129 | 130 | $f \colon x \in \mathbb{R}^n \longmapsto \sin.(x)$, meaning the elementwise application of the $\sin$ function to each entry of the vector $x$, whose result is another vector in $\mathbb{R}^n$. 131 | """ 132 | 133 | # ╔═╡ a02e8536-0360-4043-90e7-4fb28966393d 134 | 135 | 136 | # ╔═╡ e5738862-51f5-4dde-81a8-6db7d3638270 137 | 138 | 139 | # ╔═╡ bc655179-19a3-42c7-ab8b-776d3158a8c6 140 | md""" 141 | ## Question 3 142 | 143 | $f \colon X \in \mathbb{R}^{n \times m} \longmapsto \theta^\top X$, where $\theta \in R^n$ is a vector 144 | """ 145 | 146 | # ╔═╡ 2721e816-327b-468e-8121-2dec969d2021 147 | md""" 148 | ## Question 4 149 | 150 | $f \colon X \in \mathbb{R}^{n \times n} \longmapsto X^{-2}$, where $X$ is non-singular. 151 | """ 152 | 153 | # ╔═╡ 675fd3c3-063e-4b34-a43d-e2486ca514ae 154 | 155 | 156 | # ╔═╡ 29d955a0-0410-4d8e-89a8-81a63229126c 157 | # Your code goes here 158 | 159 | # ╔═╡ 00000000-0000-0000-0000-000000000001 160 | PLUTO_PROJECT_TOML_CONTENTS = """ 161 | [deps] 162 | """ 163 | 164 | # ╔═╡ 00000000-0000-0000-0000-000000000002 165 | PLUTO_MANIFEST_TOML_CONTENTS = """ 166 | # This file is machine-generated - editing it directly is not advised 167 | 168 | julia_version = "1.8.0-rc4" 169 | manifest_format = "2.0" 170 | project_hash = "da39a3ee5e6b4b0d3255bfef95601890afd80709" 171 | 172 | [deps] 173 | """ 174 | 175 | # ╔═╡ Cell order: 176 | # ╠═9c384715-5bf5-4308-94ef-db4f26be45a4 177 | # ╠═7679b2c5-a644-4341-a7cc-d1335727aacd 178 | # ╟─f8750fa4-8d49-4880-a53e-f40a653c84ea 179 | # ╟─bec48cfd-ac3b-4dae-973f-cf529b3cdc05 180 | # ╠═0da73ecd-5bda-4098-8f13-354af436d231 181 | # ╠═963f24f5-a442-4590-b355-300703b0cf86 182 | # ╟─b6f5abbb-1c32-46d0-b92a-2d0c6c806348 183 | # ╠═172bd4bd-5ea9-475f-843d-abb86ffaed34 184 | # ╟─20ed1521-fb1d-43cd-8c6f-15041fc512ec 185 | # ╟─ceaf29f7-df04-481e-9836-68298a9f64c7 186 | # ╟─4ba96121-453d-400e-877a-61db02928ffb 187 | # ╟─6996372a-0150-4522-8aa4-3fec36a0dcbb 188 | # ╟─6067b7d5-a8d4-4922-a761-210418032da5 189 | # ╠═7b2550d6-422d-4b8b-a86c-7e49314ac6c9 190 | # ╟─f95d162c-0522-4cb1-9251-7659fee4711e 191 | # ╠═a02e8536-0360-4043-90e7-4fb28966393d 192 | # ╠═e5738862-51f5-4dde-81a8-6db7d3638270 193 | # ╟─bc655179-19a3-42c7-ab8b-776d3158a8c6 194 | # ╟─2721e816-327b-468e-8121-2dec969d2021 195 | # ╠═675fd3c3-063e-4b34-a43d-e2486ca514ae 196 | # ╠═29d955a0-0410-4d8e-89a8-81a63229126c 197 | # ╟─00000000-0000-0000-0000-000000000001 198 | # ╟─00000000-0000-0000-0000-000000000002 199 | -------------------------------------------------------------------------------- /hw1/hw1.jmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: Homework 1, Parallelized Dynamics 3 | date: February 2nd, 2022 4 | --- 5 | The problems up to Problem 3 Part 2 are 6 | due Wednesday February 16, 2022 at 11:59pm EST. 7 | We'll have the parallel parts (Problem 3, Part 3 and 4) due Tuesday February 22, 2022. 8 | 9 | At the time of assignment, we have not covered all the material yet, 10 | but I wanted to give you a headstart. 11 | 12 | Homework 1 is a chance to get some experience implementing discrete dynamical 13 | systems techniques in a way that is parallelized, and a time to understand the 14 | fundamental behavior of the bottleneck algorithms in scientific computing. 15 | 16 | Please submit the hw to canvas. Canvas will only be used for hw submission. 17 | (Original pset authored by Chris Rackauckas.) 18 | 19 | ## Problem 1: A Ton of New Facts on Newton 20 | 21 | In this problem we will look into Newton's method. Newton's method is the 22 | dynamical system defined by the update process: 23 | 24 | $$x_{n+1} = x_n - \left(\frac{dg}{dx}(x_n)\right)^{-1} g(x_n)$$ 25 | 26 | For these problems, assume that $\frac{dg}{dx}$ is non-singular. 27 | 28 | ### Part 1 29 | 30 | Show that if $x^\ast$ is a steady state of the equation, then $g(x^\ast) = 0$. 31 | 32 | ### Part 2 33 | 34 | Take a look at the Quasi-Newton approximation: 35 | 36 | $$x_{n+1} = x_n - \left(\frac{dg}{dx}(x_0)\right)^{-1} g(x_n)$$ 37 | 38 | for some fixed $x_0$. Derive the stability of the Quasi-Newton approximation 39 | in the form of a matrix whose eigenvalues need to be constrained. Use this 40 | to argue that if $x_0$ is sufficiently close to $x^\ast$ then the steady 41 | state is a stable (attracting) steady state. 42 | 43 | ### Part 3 44 | 45 | Relaxed Quasi-Newton is the method: 46 | 47 | $$x_{n+1} = x_n - \alpha \left(\frac{dg}{dx}(x_0)\right)^{-1} g(x_n)$$ 48 | 49 | Argue that for some sufficiently small $\alpha$ that the Quasi-Newton iterations 50 | will be stable if the eigenvalues of 51 | $(\left(\frac{dg}{dx}(x_0)\right)^{-1} g(x_n))^\prime$ are all positive for 52 | every $x$. 53 | 54 | (Technically, these assumptions can be greatly relaxed, but weird cases arise. 55 | When $x \in \mathbb{C}$, this holds except on some set of Lebesgue measure zero. 56 | Feel free to explore this.) 57 | 58 | ### Part 4 59 | 60 | Fixed point iteration is the dynamical system 61 | 62 | $$x_{n+1} = g(x_n)$$ 63 | 64 | which converges to $g(x)=x$. 65 | 66 | 1. What is a small change to the dynamical system that could be done such that 67 | $g(x)=0$ is the steady state? 68 | 2. How can you change the $\left(\frac{dg}{dx}(x_0)\right)^{-1}$ term from the 69 | Quasi-Newton iteration to get a method equivalent to fixed point iteration? 70 | What does this imply about the difference in stability between Quasi-Newton 71 | and fixed point iteration if $\frac{dg}{dx}$ has large eigenvalues? 72 | 73 | ## Problem 2: The Root of all Problems 74 | 75 | In this problem we will practice writing fast and type-generic Julia code by 76 | producing an algorithm that will compute the quantile of any probability 77 | distribution. 78 | 79 | ### Part 1 80 | 81 | Many problems can be interpreted as a rootfinding problem. For example, let's 82 | take a look at a problem in statistics. Let $X$ be a random variable with a 83 | cumulative distribution function (CDF) of $cdf(x)$. Recall that the CDF is a 84 | monotonically increasing function in $[0,1]$ which is the total probability of 85 | $X < x$. The $y$th quantile of $X$ is the value $x$ at with $X$ has a y% chance 86 | of being less than $x$. Interpret the problem of computing an arbitrary quantile 87 | $y$ as a rootfinding problem, and use Newton's method to write an algorithm 88 | for computing $x$. 89 | 90 | (Hint: Recall that $cdf^{\prime}(x) = pdf(x)$, the probability distribution 91 | function.) 92 | 93 | ### Part 2 94 | 95 | Use the types from Distributions.jl to write a function 96 | `my_quantile(y,d)` which uses multiple dispatch to compute the 97 | $y$th quantile for any `UnivariateDistribution` `d` from Distributions.jl. 98 | Test your function on `Gamma(5, 1)`, `Normal(0, 1)`, and `Beta(2, 4)` against 99 | the `Distributions.quantile` function built into the library. 100 | 101 | (Hint: Have a keyword argument for $x_0$, and let its default be the mean or 102 | median of the distribution.) 103 | 104 | ## Problem 3: Bifurcating Data for Parallelism 105 | 106 | In this problem we will write code for efficient generation of the bifurcation 107 | diagram of the logistic equation. 108 | 109 | ### Part 1 110 | 111 | The logistic equation is the dynamical system given by the update relation: 112 | 113 | $$x_{n+1} = rx_n (1-x_n)$$ 114 | 115 | where $r$ is some parameter. Write a function which iterates the equation from 116 | $x_0 = 0.25$ enough times to be sufficiently close to its long-term behavior 117 | (400 iterations) and samples 150 points from the steady state attractor 118 | (i.e. output iterations 401:550) as a function of $r$, and mutates some vector 119 | as a solution, i.e. `calc_attractor!(out,f,p,num_attract=150;warmup=400)`. 120 | 121 | Test your function with $r = 2.9$. Double check that your function computes 122 | the correct result by calculating the analytical steady state value. 123 | 124 | ### Part 2 125 | 126 | The bifurcation plot shows how a steady state changes as a parameter changes. 127 | Compute the long-term result of the logistic equation at the values of 128 | `r = 2.9:0.001:4`, and plot the steady state values for each $r$ as an 129 | r x steady_attractor scatter plot. You should get a very bizarrely awesome 130 | picture, the bifurcation graph of the logistic equation. 131 | 132 |  133 | 134 | (Hint: Generate a single matrix for the attractor values, and use `calc_attractor!` 135 | on views of columns for calculating the output, or inline the `calc_attractor!` 136 | computation directly onto the matrix, or even give `calc_attractor!` an input 137 | for what column to modify.) 138 | 139 | ### Part 3 140 | 141 | Multithread your bifurcation graph generator by performing different steady 142 | state calcuations on different threads. Does your timing improve? Why? Be 143 | careful and check to make sure you have more than 1 thread! 144 | 145 | ### Part 4 146 | 147 | Multiprocess your bifurcation graph generator first by using `pmap`, and then 148 | by using `@distributed`. Does your timing improve? Why? Be careful to add 149 | processes before doing the distributed call. 150 | 151 | (Note: You may need to change your implementation around to be allocating 152 | differently in order for it to be compatible with multiprocessing!) 153 | 154 | ### Part 5 155 | 156 | Which method is the fastest? Why? 157 | -------------------------------------------------------------------------------- /hw2/hints2.md: -------------------------------------------------------------------------------- 1 | # Hints and Tricks for HW2. 2 | # Note: any format submission (e.g. pdf, notebooks, zip) is fine 3 | 4 | 5 | 6 | ** Motivation: In this problem you will learn to do scientific machine learning. Yay! 7 | We will generate some artificial data, and find parameters to fit a differential equation. 8 | 9 | ## Problem 1: 10 | 11 | Many people are impressed by differential equation solvers such as the ones that appear 12 | in commonly used packages. The feeling is they must be so much more 13 | complicated than the Euler methods one sees in the basic classes. 14 | Here we will demonstrate that it is remarkably simple 15 | to build these solvers yourself. Everybody should do this once in their lifetimes. 16 | 17 | All you really need is the data in [the Dormand Prince Wikipedia article ](https://en.wikipedia.org/wiki/Dormand%E2%80%93Prince_method) and the algorithm in the [the Butcher Wikipedia article](https://en.wikipedia.org/wiki/Runge%E2%80%93Kutta_methods#Explicit_Runge.E2.80.93Kutta_methods) . There is also discussion 18 | about this method in the class notes [Lecture 7](https://book.sciml.ai/notes/07/) search for Higher Order Methods 19 | towards the bottom of the page. 20 | 21 | For the data, you can just copy and paste (and maybe use Static Vectors): 22 | ```julia 23 | const s = 7 24 | const a = @SVector[ 25 | 1/5, 26 | 3/40, 9/40, 27 | 44/45, −56/15, 32/9, 28 | 19372/6561, −25360/2187, 64448/6561, −212/729, 29 | 9017/3168, −355/33, 46732/5247, 49/176, −5103/18656, 30 | 35/384, 0, 500/1113, 125/192, −2187/6784, 11/84, 31 | ] 32 | const b = @SVector[35/384, 0, 500/1113, 125/192, −2187/6784, 11/84, 0] 33 | const c = @SVector[0, 1/5, 3/10, 4/5, 8/9, 1, 1] 34 | ``` 35 | For Part 2 of Problem 1 see [[pdf file]](https://github.com/mitmath/18337/blob/master/hw2/hw2_hint.pdf) 36 | I believe setting the initial conditions to 0 for the 8 new parameters should work just fine. 37 | 38 | Part 3: use the data from all the timesteps 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /hw2/hw2_hint.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mitmath/18337/bd60c58e052232f6ae7ada430904de534d1c5843/hw2/hw2_hint.pdf -------------------------------------------------------------------------------- /hw2/hw2_hint.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \usepackage[utf8]{inputenc} 3 | \usepackage{amsmath} 4 | \usepackage{physics} 5 | 6 | \begin{document} 7 | \section{HW2 Hint} 8 | \subsection{Problem 1} 9 | \subsubsection{Part 2} 10 | 11 | It is helpful to realize that $x$ and $y$ depend not only on $t$ but on the four parameters $p=(\alpha,\beta,\gamma,\delta).$ 12 | 13 | Thus it is reasonable to evolve not only $u$=[$x$, $y$] with time but also the 14 | eight variables in the 2x4 matrix: 15 | $$ 16 | \frac{\partial{u}}{\partial p}= 17 | \begin{pmatrix} 18 | \frac{\partial x}{\partial \alpha} & 19 | \frac{\partial x}{\partial \beta} & 20 | \frac{\partial x}{\partial \gamma} & 21 | \frac{\partial x}{\partial \delta} \\ 22 | \frac{\partial y}{\partial \alpha} & 23 | \frac{\partial y}{\partial \beta} & 24 | \frac{\partial y}{\partial \gamma} & 25 | \frac{\partial y}{\partial \delta} 26 | \end{pmatrix}.$$ 27 | Thus we are evolving 10 variables in 28 | total. 29 | I'm wondering if it matters 30 | if we start these eight variables 31 | at 0 at t=0 or not? 32 | 33 | Here 34 | $$f(u,p,t) = 35 | \begin{pmatrix} 36 | \alpha x - \beta x y \\ 37 | -\gamma y + \delta x y 38 | \end{pmatrix}. 39 | $$ 40 | 41 | You will need the Jacobian 42 | of $f$ with respect to $x$ and $y$: 43 | $$\frac{\partial f}{\partial u} 44 | = 45 | \begin{pmatrix} 46 | \alpha-\beta y & - \beta x \\ 47 | \delta y & -\gamma + \delta x 48 | \end{pmatrix}, 49 | $$ 50 | and also the Jacobian of $f$ with 51 | respect to $\alpha,\beta,\gamma,\delta$: 52 | 53 | $$ 54 | \frac{\partial f}{\partial p}= 55 | \begin{pmatrix} 56 | x & -xy & 0 & 0 \\ 57 | 0 & 0 & -y & xy 58 | \end{pmatrix}. 59 | $$ 60 | 61 | Note that the resulting system does not have a nice analytical solution since $x$ and $y$ are functions of 62 | $t$. Instead, use your integrator from part 1 for solving the new combined system. 63 | 64 | \subsubsection{Part 3} 65 | 66 | First you will need to write down the loss function you want to minimize. You are asked to use the L2-norm 67 | of the difference between your computed solution $u(t_i)$ and the original solution from part 1 $\hat u(t_i)$ you are trying to 68 | recreate (the training data if you will). The loss function $L(u)$ then looks as follows: 69 | 70 | $$ 71 | L(u) = \sum_i (u(t_i) - \hat u(t_i))^2 72 | $$ 73 | 74 | You want to minimize this function via gradient descent, so you need to find the gradient w.r.t. the parameters 75 | $p$ ($\alpha$, $\beta$, $\gamma$, and $\delta$). Use the chain rule: 76 | 77 | $$ 78 | \pdv{L}{p} = \sum_i \pdv{L}{u(t_i)} \cdot \pdv{u(t_i)}{p} 79 | $$ 80 | 81 | $\pdv{L}{u(t_i)}$ is straightforward to derive from the previous equation and $\pdv{u(t_i)}{p}$ is exactly what you were 82 | supposed to find a way to calculate numerically in part 2. 83 | 84 | 85 | \end{document} 86 | -------------------------------------------------------------------------------- /hw3/18_337_2023_pset3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mitmath/18337/bd60c58e052232f6ae7ada430904de534d1c5843/hw3/18_337_2023_pset3.pdf -------------------------------------------------------------------------------- /hw3/parallelhistogram.jl: -------------------------------------------------------------------------------- 1 | using Plots, Random, SpecialFunctions, FastGaussQuadrature, LinearAlgebra, ForwardDiff 2 | 3 | airy_kernel(x, y) = x==y ? (airyaiprime(x))^2 - x * (airyai(x))^2 : 4 | (airyai(x) * airyaiprime(y) - airyai(y) * airyaiprime(x)) / (x - y) 5 | ϕ(ξ, s) = s + 10*tan(π*(ξ+1)/4) # Transformation from [-1,1] to (s,∞) 6 | ϕ′(ξ) = (5π/2)*(sec(π*(ξ+1)/4))^2 7 | K(ξ,η,s) = sqrt(ϕ′(ξ) * ϕ′(η)) * airy_kernel(ϕ(ξ,s), ϕ(η,s)) 8 | 9 | function K(s , n=100) 10 | nodes,weights = gausslegendre(n) 11 | Symmetric( K.(nodes',nodes,s) .* (√).(weights) .* (√).(weights')) 12 | end 13 | 14 | TracyWidomPDF_via_Fredholm_Det(s) = ForwardDiff.derivative( t->det(I-K(t)),s) 15 | 16 | t = 300 # change to 10_000 slowly when ready 17 | 18 | n = 6^6 19 | dx = 1/6 20 | v = zeros(t) 21 | 22 | 23 | ## Experiment 24 | v = zeros(t) 25 | Threads.@threads for i ∈ 1:t 26 | v[i] = patiencesort1(randperm(n)) # use your fastest function here 27 | end 28 | w = (v .- 2sqrt(n+.5)) ./ (n^(1/6)) 29 | histogram(w, normalized=true, bins=-4.5:dx:2) 30 | 31 | plot!(TracyWidomPDF_via_Fredholm_Det, -5.0, 2, label="Theory", lw=3) -------------------------------------------------------------------------------- /hw3/patiencesort1.jl: -------------------------------------------------------------------------------- 1 | function patiencesort1(p) 2 | # p : Permutation 3 | # Returns length of longest increasing subsequence 4 | pile_tops = Int[] 5 | for α ∈ p 6 | whichpile = 1+sum(α.>pile_tops) # first pile where α is smaller 7 | if whichpile ≤ length(pile_tops) 8 | pile_tops[whichpile] = α # put α on top of a pile or .. 9 | else 10 | push!(pile_tops, α) # create a new pile 11 | end 12 | end 13 | return length(pile_tops) 14 | end -------------------------------------------------------------------------------- /hw4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mitmath/18337/bd60c58e052232f6ae7ada430904de534d1c5843/hw4.pdf -------------------------------------------------------------------------------- /hw4/hints.md: -------------------------------------------------------------------------------- 1 | # Hints for https://book.sciml.ai/homework/03/ 2 | 3 | This problem has many moving parts, but should be very satisfying once you get this to work properly. When you reach ``the finish line" (Boston marathon is Monday after all) please 4 | stop and think how this idea can be used and adapted for many other problems. 5 | 6 | 7 | ## Problem 1 8 | 9 | * Part 1: The definition of pullback first appears in [CR Lecture 10][10]. In particular the input of the pullback B has the size of the output of f. The output value of B has the shape of the inputs to f. A scalar function f of many variables has a B with input a scalar and an output the shape of the variables. 10 | 11 | A gradient (∇) of a scalar function of a column vector is traditionally a column vector. 12 | The Jacobian of the same scalar function is the corresponding row vector. More generally 13 | the gradient of a scalar function of any combination of shapes has the same shapes as the input. 14 | 15 | 16 | For problem 1, I would have said that B(1) is the gradient not the transpose, i.e. it is a column vector. 17 | I will be completely consistent. 18 | 19 | vjp refers to vector jacobian product. (Not a great name. 20 | In part because it's not clear, and in part because we are going 21 | to more consistently compute "Jacobian transpose"*vector. 22 | ) Computationally one does not often form Jacobians these days as they are too expensive, but rather vjp's. 23 | A function from R^n to R^m has a Jacobian that is mxn. 24 | The resulting vjp then is a vector of size m. (Note Julia's vectors are not rows or columns, they are just one dimensional.) In one place in Chris' notes he treats it as a row vector, but more consistent and simpler is to think column vector. 25 | 26 | * For part 2 see https://book.sciml.ai/notes/10/, specifically equations 36-41 will be relevant to part 2 27 | 28 | Perhaps define a function with firstline `function pullback(y,u, W₁, W, b₁, b₂)` which can be called 29 | `ū, W̄₁, W̄₂, b̄₁, b̄₂ = pullback(y, u, W₁, W, b₁, b₂)`. 30 | 31 | Note the input `y` of the pullback here is a 2-vector and the output has the same shape of the five objects, u,W1,W2,b1,b2. 32 | 33 | For the ODE, in Part 3 you'll then need to flatten those into a vector. Perhaps write a function 34 | `p = flatten(u, W₁, W, b₁, b₂)` and `u, W₁, W, b₁, b₂ = unflatten(p)`. 35 | 36 | You can do `[vec.(B_NN(y))...]` to flatten and for unflattening, use slicing and perhaps reshaping (e.g. `reshape(µ[1:10], 2, 5)`) for the final µ to use in the gradient descent step to optimize the weights. 37 | 38 | Notice that equations (36) to (41) give expressions for `W̄₁, W̄₂, b̄₁, b̄₂` but while `x` serves as the `u` , in those equations it is not considered a parameter and so you will have to figure out the right expression for `ū`. 39 | Hint: `u` just appears as matrix times vector, so perhaps looking at equation (38) might help you see the right answer. 40 | 41 | ## Part 3: you can do Part 2 yourself if you like, or use ForwardDiff.jl or Zygote. jl if you like. 42 | 43 | * Part 3: Use https://diffeq.sciml.ai/stable/features/callback_library/#PresetTimeCallback for adding the jumps for $\lambda$. A nice example of difeqs with jumps and how to run the software 44 | may be found here: https://diffeq.sciml.ai/stable/features/callback_functions/#PresetTimeCallback . 45 | 46 | Part 3 consists first of a forward pass to obtain u(t). We might recommend just saving the solution, but you can also just save u(T) and then (re)compute u in reverse with the λ and μ. The second requirement of Part 3 is 47 | a backward pass `tspan=(1.0,0.0)` with the primary goal 48 | to obtain the final value of μ(0) which is the flattened version of the gradient that we seek. 49 | Notice that μ would be expressed on a blackboard as a simple integral, but as a memory saving trick 50 | (we don't need to store the λ's, we can use them on the fly) we express this as a differential equation. 51 | 52 | Following our convention, we might recommend (you can do it either way) thinking of λ as a column vector. 53 | So you are solving λ' = -fᵤᵀλ + (jumps when appropriate) and μ' = -f_pᵀλ . 54 | (We won't take jumps for μ because our loss function will not depend explicitly on the parameters, i.e., g_p=0 55 | .) 56 | 57 | Notice that you will not compute fᵤᵀλ but rather you will use the ū result of the pullback function that you wrote in Part 2 calling for example, `pullback(λ, u, W₁, W, b₁, b₂)`. Don't worry the 58 | `W̄₁, W̄₂, b̄₁, b̄₂` parts will not go to waste as you need them for the f_pᵀλ. 59 | 60 | 61 | To solve for λ you will need Cᵤ for the initial condition at T=1 and Cᵤ at 0:.1:.9 for the jumps. 62 | The only time you will use Cᵤ for λ is T=1, for all other λ(t) you will be solving the differential 63 | equation λ' = fᵤᵀλ + (jumps when appropriate), where the jumps which will also be Cᵤ. 64 | Anticipating part 4, we can use the explicit values Cᵤ = 2(u(t)-û(t)), where u(t) is at the forward pass and û(t) is the known theoretical solution. (Those of you who are following will note that Cᵤ plays 65 | the role of gᵤᵀ hence it is a column vector so λ' = fᵤᵀλ +gᵤᵀ at the jumps.) 66 | 67 | When going forward just use t going from 0 to 1. No need to think about the .1's just yet. 68 | u(0) is an arbitrary 2-vector for now, but in part 4 it will be [2,0]. When going backward t 69 | run from 1 to 0 (not just at the discrete time steps of multiples of .1) 70 | 71 | * Part 4: you now have the ability to maneuver around p space and train. You will need to do gradient 72 | descent with the old problem of figuring out what multiple of the gradient to take, i.e. the stepsize. 73 | If you know some fancy methods you may give it a try, but you can also take stepsizes around .1 or .01 and 74 | then if necessary reduce this until convergence seems reasonable. Plotting the loss function is recommended 75 | for this purpose. 76 | 77 | As a finale, compare the theoretical value of the known solution with the trained solution. 78 | 79 | [10]:https://book.sciml.ai/notes/10/ 80 | -------------------------------------------------------------------------------- /lecture 11/adjoint handwritten notes.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mitmath/18337/bd60c58e052232f6ae7ada430904de534d1c5843/lecture 11/adjoint handwritten notes.pdf -------------------------------------------------------------------------------- /lecture 12/..textClipping: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mitmath/18337/bd60c58e052232f6ae7ada430904de534d1c5843/lecture 12/..textClipping -------------------------------------------------------------------------------- /lecture 12/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mitmath/18337/bd60c58e052232f6ae7ada430904de534d1c5843/lecture 12/.DS_Store -------------------------------------------------------------------------------- /lecture 12/notebook.jl: -------------------------------------------------------------------------------- 1 | ### A Pluto.jl notebook ### 2 | # v0.18.0 3 | 4 | using Markdown 5 | using InteractiveUtils 6 | 7 | # ╔═╡ d5559dba-9fe4-11ec-3744-ebd1408e7dc4 8 | using LegibleLambdas, AbstractTrees, PlutoUI, HypertextLiteral, PlutoTest 9 | 10 | # ╔═╡ 99b6ab91-a022-449c-988c-0e5c5719c910 11 | begin 12 | struct Tracked{T} <: Number 13 | # The numerical result when doing the forward pass 14 | val::T 15 | name::Symbol 16 | # The pullback map for the reverse pass 17 | df 18 | # All the other variables this variable directly depends on 19 | deps::Vector{Tracked} 20 | end 21 | Tracked{T}(x, name=gensym()) where {T} = Tracked{T}(x, name, nothing, Tracked[]) 22 | # This tells Julia to convert any number added to a `Tracked` to a `Tracked` first 23 | Base.promote_rule(::Type{Tracked{S}}, ::Type{T}) where {S<:Number, T<:Number} = Tracked{promote_type(S, T)} 24 | end 25 | 26 | # ╔═╡ 13487e65-5e48-4a37-9bea-f262dd7b6d56 27 | # calculate the sum, but also remember the pullback map and input variables for the reverse pass which we'll need to calculate the gradient 28 | # `@λ` is just for the nicer printing, we could have replaced `@λ(Δ -> (Δ, Δ))` with `Δ -> (Δ, Δ)` if we didn't care about that 29 | function Base.:+(x::Tracked, y::Tracked) 30 | Tracked(x.val + y.val, :+, @λ(Δ -> (Δ, Δ)), Tracked[x, y]) 31 | end 32 | 33 | # ╔═╡ b0cc4665-eb45-48ea-9a33-5acf56d2a283 34 | function Base.:-(x::Tracked, y::Tracked) 35 | Tracked(x.val - y.val, :-, @λ(Δ -> (Δ, -Δ)), Tracked[x, y]) 36 | end 37 | 38 | # ╔═╡ 73d638bf-30c1-4694-b3a8-4b29c5e3fa65 39 | function Base.:*(x::Tracked, y::Tracked) 40 | Tracked(x.val * y.val, :*, @λ(Δ -> (Δ * y.val, Δ * x.val)), Tracked[x, y]) 41 | end 42 | 43 | # ╔═╡ ac097299-0a31-474c-ab26-a4fb24bb9046 44 | function Base.:^(x::Tracked, n::Int) 45 | Tracked(x.val^n, Symbol("^$n"), @λ(Δ -> (Δ * n * x.val^(n-1),)), Tracked[x,]) 46 | end 47 | 48 | # ╔═╡ 2141849b-675e-406c-8df4-34b2706507af 49 | function Base.:/(x::Tracked, y::Tracked) 50 | Tracked(x.val / y.val, :/, @λ(Δ -> (Δ / y.val, -Δ * x.val / y.val^2)), Tracked[x, y]) 51 | end 52 | 53 | # ╔═╡ 7429ffcb-dcee-4090-972e-ffde8393a37a 54 | begin 55 | # `Tracked` is a tree, we just need to tell AbstractTrees.jl how to get the children for each node 56 | AbstractTrees.children(x::Tracked) = x.deps 57 | # All this is just for nicer printing 58 | function Base.show(io::IO, x::Tracked) 59 | if x.df === nothing 60 | print(io, Base.isgensym(x.name) ? x.val : "$(x.name)=$(x.val)") 61 | else 62 | print(io, "Tracked(") 63 | show(io, x.val) 64 | print(io, ", ") 65 | print(io, x.name) 66 | #print(io, ", ") 67 | #show(io, x.df) 68 | print(io, ")") 69 | end 70 | end 71 | Base.show(io::IO, ::MIME"text/plain", x::Tracked) = print_tree(io, x) 72 | end 73 | 74 | # ╔═╡ 0b5e6560-81fd-4182-bba5-aca702fb3048 75 | begin 76 | x = Tracked{Int}(3, :x) 77 | y = Tracked{Int}(5, :y) 78 | end 79 | 80 | # ╔═╡ 81eb8a2d-a3a9-45af-a5a5-b96aefd48712 81 | (2x + (x-1)^2).val # The regular result of `2x + (x-1)^2` 82 | 83 | # ╔═╡ e52aa672-69a9-419b-a992-e7a3d1364fb6 84 | # PreOrderDFS traverses this tree from the top down 85 | Text.(collect(PreOrderDFS(y*x+x^2))) 86 | 87 | # ╔═╡ f0814e23-6f75-4db8-b277-d21d4926f876 88 | y*x+x^2 89 | 90 | # ╔═╡ 99a3507b-ca03-429f-acde-e2d1ebb32054 91 | # produces a dict with all the intermediate gradient 92 | function grad(f::Tracked) 93 | d = Dict{Any, Any}(f => 1) 94 | for x in PreOrderDFS(f) # recursively traverse all dependents 95 | x.df === nothing && continue # ignore untracked variables like constants 96 | dy = x.df(d[x]) # evaluate pullback 97 | for (yᵢ, dyᵢ) in zip(x.deps, dy) 98 | # store the gradient in d 99 | # if we have already stored a gradient for this variable, we need to add them 100 | d[yᵢ] = get(d, yᵢ, 0) + dyᵢ 101 | end 102 | end 103 | return d 104 | end 105 | 106 | # ╔═╡ d4e9b202-242e-4420-986b-12d2ab57af93 107 | grad(f::Tracked, x::Tracked) = grad(f)[x] 108 | 109 | # ╔═╡ dc62ff81-dbb8-4416-8fc7-8878e16bdf85 110 | grad(y) 111 | 112 | # ╔═╡ fc8aeed7-2806-438a-85f7-c155b0b222e6 113 | #grad(y, x) 114 | 115 | # ╔═╡ a34a0941-6e7e-4a40-affa-7941c54a10b9 116 | y 117 | 118 | # ╔═╡ 18b1c55d-a6b5-44f6-b0b3-50bdb0aa9d96 119 | w = x*y + x 120 | 121 | # ╔═╡ 506d408e-dc2b-4e12-b917-286e3f4079a2 122 | grad(w) 123 | 124 | # ╔═╡ 1a154bb7-93a3-4973-8908-788db77ac294 125 | @htl """ 126 | 127 | 139 | 140 | 141 | 142 | 143 | 144 | """ 145 | 146 | # ╔═╡ 6b1fb808-e993-4c2b-b81b-6710f8206de7 147 | function to_json(x) 148 | d = Dict{Symbol, Any}( 149 | :text => Dict{Symbol, Any}(:name => sprint(AbstractTrees.printnode, x)), 150 | :children => Any[to_json(c) for c in children(x)], 151 | :collapsed => !isempty(children(x)), 152 | ) 153 | end 154 | 155 | # ╔═╡ 437285d4-ec53-4bb7-9966-fcfb5352e205 156 | function show_tree(x; height=400) 157 | id = gensym() 158 | @htl """ 159 |