├── Gemfile ├── LICENSE ├── README.md ├── _config.yml ├── contribution_guide.md ├── how_to_preview_locally.md ├── lessons ├── AMG │ ├── lesson.md │ ├── rhs.jpg │ └── solution.jpg ├── AMReX │ ├── VisIt_2D.pdf │ ├── lesson.md │ ├── phi.gif │ ├── phi_adv_noref.0.jpg │ ├── phi_adv_noref.60.jpg │ ├── phi_adv_ref.0.jpg │ ├── phi_adv_ref.60.jpg │ ├── phi_diff_0.jpg │ └── phi_diff_10000.jpg ├── adjoint │ ├── chkpt.png │ ├── ex1.png │ ├── ex1adj.c │ ├── ex1adj.png │ ├── ex3opt.c │ ├── ex5adj.c │ └── lesson.md ├── atpesc-instructions.md ├── hand_coded_heat │ ├── 1d_heat_equation.xlsx │ ├── animated_basic_heat.gif │ ├── basic0000.png │ ├── basic0001.png │ ├── basic0002.png │ ├── basic0003.png │ ├── heat.c │ ├── heat.c.numbered.txt │ ├── highres0000.png │ ├── highres0001.png │ ├── hr_crankn0000.png │ ├── hr_crankn0001.png │ ├── hr_crankn0002.png │ ├── hr_smalldt_long0000.png │ ├── hr_smalldt_long0001.png │ ├── hr_smalldt_long0002.png │ ├── hr_smalldt_long0003.png │ ├── hr_smalldt_long0004.png │ ├── hr_spikes0000.png │ ├── hr_spikes0001.png │ ├── hr_spikes0002.png │ ├── hr_spikes_crankn0000.png │ ├── hr_spikes_crankn0001.png │ ├── hr_spikes_smalldt0000.png │ ├── hr_spikes_smalldt0001.png │ ├── hr_spikes_smalldt0002.png │ ├── hr_spikes_smalldt0003.png │ ├── lesson.md │ ├── makefile │ ├── makefile.txt │ ├── plot_heat.py │ ├── problem_setup.png │ ├── simple_1d_heat.png │ ├── spikes0000.png │ ├── spikes0001.png │ ├── spikes0002.png │ ├── spikes0003.png │ ├── spikes0004.png │ ├── spikes0005.png │ └── spikes_animated.gif ├── iterativesolvers │ └── lesson.md ├── lesson_template │ ├── animated_basic_heat.gif │ ├── basic0000.png │ └── lesson.md ├── lessons.md ├── mfem_convergence │ ├── diffusion.png │ ├── ex8.png │ ├── lesson.md │ └── mesh.png ├── superlu-mfem │ ├── gmres.mpg │ ├── gmres_residual.png │ ├── gmres_time.png │ ├── lesson.md │ ├── mfem-superlu0000.png │ ├── mfem-superlu0001.png │ ├── mfem-superlu0002.png │ ├── mfem-superlu0003.png │ ├── mfem-superlu0004.png │ ├── mfem-superlu0005.png │ ├── slu_metis.mpg │ ├── slu_metis_residual.png │ └── slu_metis_time.png └── time_integrators │ ├── lesson.md │ ├── mfem_sundials_dtt0000.png │ ├── mfem_sundials_dtt0001.png │ ├── mfem_sundials_dtt0002.png │ ├── mfem_sundials_dtt0003.png │ ├── mfem_sundials_explicit0000.png │ ├── mfem_sundials_explicit0001.png │ ├── mfem_sundials_explicit0002.png │ ├── mfem_sundials_explicit20000.png │ ├── mfem_sundials_explicit20001.png │ ├── mfem_sundials_explicit20002.png │ ├── nonlinear_heat.png │ ├── pyramid_animated.gif │ └── transient-heat.cpp.numbered.txt └── tools └── atpesc2017_cooley_vnc_setup.sh /Gemfile: -------------------------------------------------------------------------------- 1 | gem 'github-pages', group: :jekyll_plugins 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Extreme-scale Scientific Software Development Kit (xSDK) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Welcome to HandsOnLessons 2 | 3 | 4 | Hosted here are a series of increasingly sophisticated _hands-on_ lessons aimed at helping 5 | users of all experience levels learn to use a variety of scientific software packages for 6 | solving complex numerical problems. We begin with custom, hand-coded solutions to the 7 | homogeneous, one-dimensional heat equation to demonstrate basic numerical and performance 8 | issues such as accuracy, stability, time to solution, memory, and flops required, along 9 | with motivation for the use of numerical software packages to help achieve more robust, 10 | efficient, scalable, extensible, and portable software. 11 | 12 | [Go to the Lessons](lessons/lessons.md) 13 | 14 | We slowly build upon 15 | these simple, early examples introducing additional complexities such as inhomogenieties, 16 | higher-order solutions, time-variabilities, nonlinearities, and complex geometries in higher 17 | dimensions. We demonstrate the use of a variety of numerical software packages to address these 18 | issues and the advantages they offer over hand-coded software. 19 | 20 | Throughout the currently designed lesson plans, we use the [MFEM](http://mfem.org) (unstructured) 21 | and [AMReX](https://github.com/AMReX-Codes/AMReX-Codes.github.io) (structured, adaptive) 22 | packages as _demonstration vehicles_. Both of these packages include the basic functional pieces 23 | necessary to start from a continuous description of a physical problem to solve, through 24 | PDE specification, numerical analysis, discretization, algorithm development and then implementation. 25 | In addition, they include essential abstractions to support scalable, parallel expression 26 | of the algorithms and to orchestrate the application of various numerical packages in the 27 | solution. 28 | 29 | In addition, we use [PAPI](http://icl.utk.edu/papi) (performance counters) to enable users to 30 | observe variations in performance (time and space) as algorithmic choices are varied and 31 | [VisIt](http://visit.llnl.gov) to visualize results. 32 | 33 | These initial lessons are a starting point for a growing set of hands-on examples to demonstrate 34 | a broad range of numerical software packages. 35 | 36 | The packages demonstrated here benefit from many person-years of software development 37 | aimed at addressing such issues as extreme scalability and multi-modal parallelism such as message-passing, 38 | many-threads and/or GPUs. Numerical results computed with the packages demonstrated here have been vetted over 39 | many years of use in a variety of application settings. Nonetheless, because the main focus of these 40 | lessons is in **introducing** how to use these packages, there are likely few specific hands-on lessons 41 | here in which there are opportunities to observe these important capabilities of numerical software packages. 42 | 43 | See the [Contributing Guide](contribution_guide.md) for instructions on contributing lessons. 44 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman 2 | exclude: tools 3 | -------------------------------------------------------------------------------- /contribution_guide.md: -------------------------------------------------------------------------------- 1 | # Contributing to HandsOnLessons 2 | 3 | ## About GitHub Pages and Jekyll 4 | 5 | [Jekyll](https://jekyllrb.com) allows site content developers to build beautiful 6 | mostly static sites by composing ASCII files involving a combination of three 7 | technologies... 8 | 9 | * [Markdown](https://guides.github.com/features/mastering-markdown/) 10 | (or [Textile](https://www.promptworks.com/textile)), 11 | * YAML [Front Matter](http://jekyllrb.com/docs/frontmatter/) page configuration code 12 | * [Liquid](https://shopify.github.io/liquid/) content filtering and page construction code 13 | 14 | The Jekyll engine reads source `.md` files and, optionally, a number of 15 | other CSS, HTML (and other Web technology code snippets and accouterments) stored in 16 | supporting files and directories in the repository (when necessary) and builds the 17 | site HTML files. 18 | 19 | On a GitHub Pages site, this process happens automatically upon each new commit of 20 | files to the site's repository. GitHub uses Jekyll to re-generate the site and the 21 | changes go live shortly thereafter. See below about how to preview changes to the 22 | site before committing them. 23 | 24 | Within the repository, a single source `.md` file will contain not only Markdown 25 | content, but may also contain, optionally, a YAML code block at the beginning of 26 | the file (called its _front matter_) which holds information on how the page is 27 | to be configured when the site is built and, optionally, Liquid instructions 28 | which can appear anywhere in the file and which program the Jekyll engine on how 29 | to filter, merge and combine content snippets into a generated page. 30 | 31 | So far, we are not using either YAML Front Matter or Liquid here to build our 32 | site. We may eventually decide we might need to do that and it would be fine 33 | but there isn't anything yet that has indicated we have a need for that. I 34 | mention this because we can easily get much more sophisticated than we are 35 | currently in managing and hosting this content. 36 | 37 | ## Previewing your changes locally 38 | 39 | If you are new to GitHub and Jekyll themed GitHub pages, read this section to 40 | learn how to preview your work locally before committing it to GitHub. On the 41 | other hand, if its easier for you, you can just commit changes to GitHub, see 42 | how they turn out there and modify if you are not satisfied. That workflow 43 | just takes a bit more time because GitHub/Jekyll backend may be delayed a 44 | minute or two in re-generating the site. And, it also means your changes 45 | are always going live. 46 | 47 | This site is a GitHub pages site using a GitHub [supported](https://pages.github.com/themes/) 48 | Jekyll theme backend. This means you compose content in GitHub Markdown and when you commit 49 | changes to GitHub, Jekyll generates the HTML pages for the site automatically. But, it also 50 | means its a bit harder for you to preview, locally, your changes before committing. 51 | 52 | But, if we don't like the current theme we're using, 53 | [Cayman](https://pages-themes.github.io/cayman/), we can easily change 54 | it by going to the repository's settings page on github, scrolling down to the 55 | GitHub Pages section and hitting the `Change Theme` button and then selecting 56 | another theme. Note, there are only a handful of themes supported by GitHub this 57 | way (e.g. easily switchable via the `Change Theme` button). However, that does not 58 | mean we cannot choose from another UNsupported theme. There are hundreds of 59 | [Jekyll themes](http://jekyllthemes.org) available and we should probably spend 60 | some more time to find best. Using an UNsupported theme simply means that we'd have 61 | to buy into committing all the Jekyll accouterments to our repo and, as a result, 62 | it is not as easy to switch the theme later if we don't like it. 63 | 64 | To permit the site to be easily switched to another GitHub supported Jekyll theme, 65 | I have chosen **not** to commit to the repository all the Jekyll accouterments. 66 | In any event, even if they were present, you would still wind up having to run 67 | Jekyll to re-build and serve the site, each time you want to preview it. 68 | To preview changes locally, you will need to run these commands 69 | 70 | ``` 71 | $ gem install github-pages 72 | $ bundle exec jekyll serve 73 | ``` 74 | 75 | To get all the tools on my Mac (gem, bundle, jekyll), I used Homebrew. 76 | 77 | Detailed instructions on setting up GitHub pages locally can be found [here](how_to_preview_locally.md). 78 | 79 | ## Including Math Equations 80 | 81 | I am not sure I have found the easiest solution here. I have read that MathJax may be 82 | better but it seemed to require a bit more configuration than had time to play and 83 | what I tried here seemed to work. The code snippet below demonstrates an example... 84 | 85 | ``` 86 | ![equation](http://latex.codecogs.com/gif.latex?%5Cfrac%7B%5Cpartial%20Q%7D%7B%5Cpartial%20t%7D%20%3D%20%5Cfrac%7B%5Cpartial%20s%7D%7B%5Cpartial%20t%7D) 87 | ``` 88 | 89 | ![equation](http://latex.codecogs.com/gif.latex?%5Cfrac%7B%5Cpartial%20Q%7D%7B%5Cpartial%20t%7D%20%3D%20%5Cfrac%7B%5Cpartial%20s%7D%7B%5Cpartial%20t%7D) 90 | 91 | Everything after the question-mark character at the end of `latex?` and before the closing right-parenthesis 92 | is the a URL-encoded form of the latex commands to generate the equation. Here are the steps... 93 | 94 | 1. Create the latex for the equation 95 | 2. URL-encode it using something like, [url-encode-decode](http://www.url-encode-decode.com) 96 | 3. Replace all instances of `+` that step 2 inserted to represent spaces with `%20` 97 | 4. Paste the resulting, possibly very long, string between `?` and enclosing `)` in the above. 98 | 99 | ## Adding a Hands On Example 100 | 101 | 1. Be sure to start from the [lesson template](lessons/lesson_template/lesson.md) 102 | 1. Add a new directory to `./lessons/` 103 | 1. Add a new line to `./lessons/lessons.md` for the new example 104 | 1. Copy the [lesson template](lessons/lesson_template/lesson.md), `./lessons/lesson_template/lesson.md` to `./lessons/`. 105 | 1. Edit/revise the copied `lesson.md` file to create the new lesson 106 | 1. You can link to images, source code files that you place in `./lessons/` 107 | using standard Markdown links. 108 | 109 | I think it would be best put all content related to each hands-on lesson we develop here 110 | into its own separate directory. That means all images, example codes, markdown pages, etc. 111 | Then, we can have a separate page (or page hierarchy) that indexes the examples. 112 | 113 | I know Jekyll has a built-in concept of a `posts` object. That is because Jekyll is designed 114 | around the notion of supporting blogging. It may make sense to handle each hands-on kinda 115 | sorta like a `post` in Jekyll. But, I think that also means that content related to each 116 | lesson gets scattered across multiple directories (at least given the **default**) way that 117 | Jekyll seems to handle `posts`. We need to investigate proper use of Jekyll further **after** 118 | we've completed ATPESC. 119 | 120 | ## GitHub Style Primer 121 | 122 | This section is just a copy of boilerplate content from GitHub Pages template 123 | about how to use Markdown, etc. I have kept it here for convenience. 124 | 125 | You can use the [editor on GitHub](https://github.com/xsdk-project/HandsOnLessons/edit/master/README.md) to maintain and preview the content for your website in Markdown files. 126 | 127 | Whenever you commit to this repository, GitHub Pages will run [Jekyll](https://jekyllrb.com/) to rebuild the pages in your site, from the content in your Markdown files. 128 | 129 | ### Markdown 130 | 131 | Markdown is a lightweight and easy-to-use syntax for styling your writing. It includes conventions for 132 | 133 | ```markdown 134 | Syntax highlighted code block 135 | 136 | # Header 1 137 | ## Header 2 138 | ### Header 3 139 | 140 | - Bulleted 141 | - List 142 | 143 | 1. Numbered 144 | 2. List 145 | 146 | **Bold** and _Italic_ and `Code` text 147 | 148 | [Link](url) and ![Image](src) 149 | ``` 150 | 151 | For more details see [GitHub Flavored Markdown](https://guides.github.com/features/mastering-markdown/). 152 | 153 | ### Jekyll Themes 154 | 155 | Your Pages site will use the layout and styles from the Jekyll theme you have selected in your [repository settings](https://github.com/xsdk-project/HandsOnLessons/settings). The name of this theme is saved in the Jekyll `_config.yml` configuration file. 156 | 157 | ### Support or Contact 158 | 159 | Having trouble with Pages? Check out our [documentation](https://help.github.com/categories/github-pages-basics/) or [contact support](https://github.com/contact) and we’ll help you sort it out. 160 | -------------------------------------------------------------------------------- /how_to_preview_locally.md: -------------------------------------------------------------------------------- 1 | # Preview the site locally 2 | 3 | ## Requirements 4 | 5 | * github-pages 6 | * bundler 7 | * jekyll 8 | 9 | The recommended way of installing these is to use **gem**, the [Ruby](https://www.ruby-lang.org/en/) package manager. 10 | 11 | ``` 12 | $ gem install github-pages 13 | $ gem install bundler 14 | $ gem install jekyll 15 | ``` 16 | 17 | But Ruby version 2.1.0 or higher is required. The version can be checked with 18 | ``` 19 | $ ruby -v 20 | ``` 21 | 22 | The native Ruby on my Mac (Sierra 10.12.6) is 2.0. So I took the following steps to upgrade it. 23 | 24 | 1. Open your terminal and run 25 | ``` 26 | $ \curl -sSL https://get.rvm.io | bash -s stable 27 | ``` 28 | When this is complete, you need to restart your terminal for the rvm to work. 29 | 1. Run 30 | ``` 31 | $ rvm install ruby-2.4 32 | ``` 33 | Type ruby -v in the terminal, if it is 2.4, you are done. 34 | 1. If it still shows ruby 2.0, run 35 | ``` 36 | $ rvm use ruby-2.4 37 | ``` 38 | To set this as the default version, run 39 | ``` 40 | $ rvm use ruby-2.4 --default 41 | ``` 42 | 43 | ## Preview your work using a brower 44 | 45 | 1. Navigate into the **root** directory of the project 46 | 1. Run Jekyll locally 47 | ``` 48 | $ bundle exec jekyll serve 49 | Configuration file: /Users/hongzhang/Projects/HandsOnLessons/_config.yml 50 | Configuration file: /Users/hongzhang/Projects/HandsOnLessons/_config.yml 51 | Source: /Users/hongzhang/Projects/HandsOnLessons 52 | Destination: /Users/hongzhang/Projects/HandsOnLessons/_site 53 | Incremental build: disabled. Enable with --incremental 54 | Generating... 55 | GitHub Metadata: No GitHub API authentication could be found. Some fields may be missing or have incorrect data. 56 | done in 0.582 seconds. 57 | Auto-regeneration: enabled for '/Users/hongzhang/Projects/HandsOnLessons' 58 | Configuration file: /Users/hongzhang/Projects/HandsOnLessons/_config.yml 59 | Server address: http://127.0.0.1:4000 60 | Server running... press ctrl-c to stop. 61 | ``` 62 | 1. Open your browser, preview the local site at **http://127.0.0.1:4000** 63 | -------------------------------------------------------------------------------- /lessons/AMG/lesson.md: -------------------------------------------------------------------------------- 1 | # Algebraic Multigrid 2 | 3 | ## At a Glance 4 | 5 | 6 | 7 | ``` 8 | Questions |Objectives |Key Points 9 | --------------------------|----------- -------------------------|-------------------------- 10 | Why multigrid over CG for |Understand multigrid concept |Faster convergence, 11 | large problems? | |better scalability 12 | Why use more aggressive |Understand need for low complexities |Lower memory use, faster 13 | coarsening for AMG? | |times, but more iterations 14 | Why a structured solver |Understand importance of suitable |Higher efficiency, 15 | for a structured problem? |data structures |faster solve times 16 | ``` 17 | 18 | **Note:** To begin this lesson... 19 | ``` 20 | cd handson/amrex/amg 21 | ``` 22 | 23 | ## The Problem 24 | 25 | The linear system to be solved is generated by AMReX from the following differential equation: 26 | 27 | ![equation](http://latex.codecogs.com/gif.latex?%5Cvarphi-%5CDelta%5Ccdot%5Cbeta%5Cnabla%5Cvarphi%3DRHS) 28 | 29 | with Dirichlet boundary conditions. 30 | 31 | The grid is a cube consisting of 128 x 128 x 128 cells, consisteing of (at least) 8 subgrids. 32 | We also consider a larger grid with 256 x 256 x 256 cells. 33 | 34 | The right hand side (left image) and solution (right image) are plotted below: 35 | 36 | 37 | 38 | 39 | ## The Example Input File 40 | 41 | To run AMReX with hypre, an input file is required to specify the desired hypre solvers to be used and also allows to define problem options, e.g. grid size, as well as solver options for some of the solvers. The content of the file 'inputs' is given below, although some specific input files are also provided for the handson exercises. 42 | 43 | ``` 44 | n_cell = 128 45 | max_grid_size = 64 46 | tol_rel = 1.e-6 47 | 48 | bc_type = Dirichlet # Dirichlet, Neumann, or periodic 49 | 50 | hypre.solver_flag = PFMG-PCG # SMG, or PFMG, SMG-PCG, PFMG-PCG, PCG, BoomerAMG, AMG-PCG, or DS-PCG 51 | hypre.print_level = 1 52 | #hypre.agg_num_levels = 1 # uses aggressive coarsening in BoomerAMG and AMG-PCG 53 | 54 | ## Below are some more BoomerAMG options which change 55 | #hypre.relax_type = 6 #uses symmetric Gauss-Seidel smoothing 56 | #hypre.coarsen_type = 8 #uses PMIS coarsening instead of HMIS 57 | #hypre.Pmx_elmts = 6 # changes max nnz per row from 4 to 6 in interpolation 58 | #hypre.interp_type = 0 #uses classified interpolation instead of distance-two interpolation 59 | #hypre.strong_threshold = 0.5 # changes strength threshold 60 | #hypre.max_row_sum = 1.0 # changes treatment of diagonal dominant portions 61 | 62 | ##Below are some more PFMG options which affect convergence and times 63 | #hypre.pfmg_rap_type = 1 # uses nonGalerkin version for PFMG and PFMG-CG 64 | #hypre.skip_relax = 0 # skips some relaxations in PFMG and PFMG-CG 65 | 66 | ``` 67 | 68 | ## Running the Example 69 | 70 | ### Exercise 1: Compare a generic iterative solver (CG) with multigrid 71 | 72 | Use the following command to solve our problem using conjugate gradient (CG): 73 | ``` 74 | /usr/bin/time -p mpiexec -n 8 ./amrex pcg 75 | ``` 76 | 77 | You should get some output that looks like this 78 | ``` 79 | MPI initialized with 8 MPI processes 80 | 81 | 213 Hypre Solver Iterations, Relative Residual 9.6515871445080283e-07 82 | 83 | Max-norm of the error is 0.0002812723371 84 | Maximum absolute value of the solution is 0.9991262625 85 | Maximum absolute value of the rhs is 1661.007274 86 | real 3.46 87 | user 21.75 88 | sys 1.07 89 | ``` 90 | 91 | Now we solve the same problem using PFMG, the structured multigrid solver from hypre: 92 | ``` 93 | /usr/bin/time -p mpiexec -n 8 ./amrex pfmg 94 | ``` 95 | 96 | You should get some output that looks like this 97 | ``` 98 | MPI initialized with 8 MPI processes 99 | 100 | 22 Hypre Solver Iterations, Relative Residual 8.2557612429588765e-07 101 | 102 | Max-norm of the error is 0.0002812747961 103 | Maximum absolute value of the solution is 0.9991262625 104 | Maximum absolute value of the rhs is 1661.007274 105 | real 1.47 106 | user 8.72 107 | sys 0.99 108 | ``` 109 | #### Examining Results 110 | 111 | Examine the number of iterations and the time listed in the line starting with 'real' for both runs. 112 | 113 | #### Questions 114 | 115 | > **How do the numbers of iterations compare?** 116 | 117 | |PFMG converges much faster, almost 10 times as fast| 118 | 119 | > **How do the times compare?** 120 | 121 | |PFMG is more than twice as fast| 122 | 123 | > **What does this say about the cost of an iteration for CG compared to PFMG?** 124 | 125 | |One iteration of PFMG is more costly than one CG iteration.| 126 | 127 | 128 | ### Example 2 (Use PFMG as a preconditioner for CG) 129 | 130 | Now use the following command: 131 | ``` 132 | /usr/bin/time -p mpiexec -n 8 ./amrex pfmgpcg 133 | ``` 134 | 135 | You should get some output that looks like this 136 | ``` 137 | MPI initialized with 8 MPI processes 138 | 139 | 10 Hypre Solver Iterations, Relative Residual 4.7155525002784425e-07 140 | 141 | Max-norm of the error is 0.0002813010027 142 | Maximum absolute value of the solution is 0.9991262625 143 | Maximum absolute value of the rhs is 1661.007274 144 | real 1.23 145 | user 6.73 146 | sys 0.98 147 | ``` 148 | 149 | #### Questions 150 | 151 | > **How does the number of iterations compare to that of PFMG without CG?** 152 | 153 | |PFMG with PCG converges about twice as fast as PFMG, 22 times as fast as CG.| 154 | 155 | > **How do the times compare?** 156 | 157 | |PFMG-PCG is faster than PFMG alone. It is almost 3 times as fast as CG.| 158 | 159 | > **What does this say about the cost of an iteration for PFMG-PCG compared to PFMG?** 160 | 161 | |One iteration of PFMG-PCG is more costly than one PFMG iteration.| 162 | 163 | Since using multigrid in combination with CG is faster than multigrid alone for the considered problem, we now only consider multigrid solvers in combination with CG for the sake of time in the hands-on exercises. 164 | 165 | ### Example 3 (Examine scalability of CG compared with PFMG-CG) 166 | 167 | We now solve the larger problem using first CG and then PFMG-PCG. 168 | 169 | Now use the following command: 170 | ``` 171 | /usr/bin/time -p mpiexec -n 8 ./amrex pcg.large 172 | ``` 173 | 174 | You should get some output that looks like this 175 | ``` 176 | MPI initialized with 8 MPI processes 177 | 178 | 440 Hypre Solver Iterations, Relative Residual 9.9740013439759751e-07 179 | 180 | Max-norm of the error is 7.039397221e-05 181 | Maximum absolute value of the solution is 0.9997778176 182 | Maximum absolute value of the rhs is 1663.462965 183 | real 42.25 184 | user 333.29 185 | sys 2.47 186 | ``` 187 | 188 | Now use the following command: 189 | ``` 190 | /usr/bin/time -p mpiexec -n 8 ./amrex pfmgpcg.large 191 | ``` 192 | 193 | You should get some output that looks like this 194 | ``` 195 | MPI initialized with 8 MPI processes 196 | 197 | 11 Hypre Solver Iterations, Relative Residual 2.5598385447572329e-07 198 | 199 | Max-norm of the error is 7.041558516e-05 200 | Maximum absolute value of the solution is 0.9997778176 201 | Maximum absolute value of the rhs is 1663.462965 202 | real 7.15 203 | user 52.28 204 | sys 2.93 205 | ``` 206 | 207 | #### Examining Results 208 | 209 | > **How do the numbers of iterations now compare?** 210 | 211 | |Iterations for PCG doubled, whereas PFMG-PCG only increased by 1. PFMG-PCG converges 40 times as fast as PCG.| 212 | 213 | > **How do the times compare?** 214 | 215 | |PFMG-PCG is almost 6 times as fast as PCG.| 216 | 217 | > **If you compare these numbers to the numbers for the smaller system, what do you observe?** 218 | 219 | |Times and iterations for PCG grow much faster than for PFMG-PCG with increasing problem size. PFMG-PCG is more scalable than PCG.| 220 | 221 | 222 | ### Example 4 (Examine complexities in AMG-PCG) 223 | 224 | We now go back to the smaller problem using AMG-PCG. 225 | 226 | Now use the following command: 227 | ``` 228 | /usr/bin/time -p mpiexec -n 8 ./amrex amgpcg 229 | ``` 230 | 231 | You should get some output that looks like this 232 | ``` 233 | MPI initialized with 8 MPI processes 234 | 235 | Num MPI tasks = 8 236 | 237 | Num OpenMP threads = 1 238 | 239 | 240 | BoomerAMG SETUP PARAMETERS: 241 | 242 | Max levels = 25 243 | Num levels = 8 244 | 245 | Strength Threshold = 0.250000 246 | Interpolation Truncation Factor = 0.000000 247 | Maximum Row Sum Threshold for Dependency Weakening = 0.900000 248 | 249 | Coarsening Type = HMIS 250 | measures are determined locally 251 | 252 | 253 | No global partition option chosen. 254 | 255 | Interpolation = extended+i interpolation 256 | 257 | Operator Matrix Information: 258 | 259 | nonzero entries per row row sums 260 | lev rows entries sparse min max avg min max 261 | =================================================================== 262 | 0 2097152 14581760 0.000 4 7 7.0 1.000e-03 9.830e+05 263 | 1 1048122 19632610 0.000 7 42 18.7 1.998e-03 1.229e+06 264 | 2 199271 9681535 0.000 15 89 48.6 4.627e-03 1.397e+06 265 | 3 27167 2149919 0.003 17 140 79.1 2.503e-02 1.491e+06 266 | 4 3504 306430 0.025 13 185 87.5 3.300e-01 2.597e+06 267 | 5 458 32358 0.154 11 175 70.7 1.164e+00 7.021e+06 268 | 6 61 2375 0.638 10 60 38.9 -1.998e+09 7.281e+09 269 | 7 6 36 1.000 6 6 6.0 9.485e+06 7.651e+07 270 | 271 | Interpolation Matrix Information: 272 | entries/row min max row sums 273 | lev rows cols min max weight weight min max 274 | ================================================================= 275 | 0 2097152 x 1048122 1 4 1.111e-01 4.631e-01 3.333e-01 1.000e+00 276 | 1 1048122 x 199271 1 4 3.236e-03 5.927e-01 1.070e-01 1.000e+00 277 | 2 199271 x 27167 0 4 -1.101e-01 7.178e-01 0.000e+00 1.000e+00 278 | 3 27167 x 3504 0 4 -5.812e-01 6.900e-01 0.000e+00 1.000e+00 279 | 4 3504 x 458 0 4 -3.235e+01 6.382e+01 0.000e+00 1.000e+00 280 | 5 458 x 61 0 4 -3.563e+01 1.590e+01 -3.338e+01 1.000e+00 281 | 6 61 x 6 0 3 2.779e-03 5.764e-01 0.000e+00 1.012e+00 282 | 283 | 284 | Complexity: grid = 1.609679 285 | operator = 3.181168 286 | memory = 3.843933 287 | 288 | 289 | 290 | 291 | BoomerAMG SOLVER PARAMETERS: 292 | 293 | Maximum number of cycles: 1 294 | Stopping Tolerance: 0.000000e+00 295 | Cycle type (1 = V, 2 = W, etc.): 1 296 | 297 | Relaxation Parameters: 298 | Visiting Grid: down up coarse 299 | Number of sweeps: 1 1 1 300 | Type 0=Jac, 3=hGS, 6=hSGS, 9=GE: 13 14 9 301 | Point types, partial sweeps (1=C, -1=F): 302 | Pre-CG relaxation (down): 0 303 | Post-CG relaxation (up): 0 304 | Coarsest grid: 0 305 | 306 | 307 | 10 Hypre Solver Iterations, Relative Residual 6.9077383873163803e-07 308 | 309 | Max-norm of the error is 0.0002813125533 310 | Maximum absolute value of the solution is 0.9991262625 311 | Maximum absolute value of the rhs is 1661.007274 312 | real 4.63 313 | user 33.54 314 | sys 1.47 315 | ``` 316 | 317 | This output gives the stats for the developed AMG preconditioner. It shows the number of levels, the average number of nonzeros in total and per row for each matrix 318 | ![](http://latex.codecogs.com/gif.latex?A_i) 319 | as well as each interpolation operator. 320 | It also shows the operator complexity, which is defined as the sum of the number of nonzeroes of all operators 321 | ![](http://latex.codecogs.com/gif.latex?A_i) 322 | divided by the number of nonzeroes of the original matrix _A_ = 323 | ![](http://latex.codecogs.com/gif.latex?A_0) : 324 | 325 | ![equation](http://latex.codecogs.com/gif.latex?%5Cfrac%7B%5Csum_%7Bi%3D0%7D%5EL%20nnz%28A_i%29%7D%7Bnnz%28A%29%7D). 326 | 327 | The memory complexity also includes the number of nonzeroes of all interpolation operators in the sum: 328 | 329 | ![equation](http://latex.codecogs.com/gif.latex?%5Cfrac%7B%5Csum_%7Bi%3D0%7D%5E%7BL-1%7D%20%28nnz%28A_i%29%2Bnnz%28P_i%29%29%2Bnnz%28A_L%29%7D%7Bnnz%28A%29%7D) 330 | 331 | #### Questions 332 | 333 | > **Is the operator complexity acceptable?** 334 | 335 | |No, it is too large, above 2!| 336 | 337 | > **How does the complexity affect performance?** 338 | 339 | |The method is slower than PFMG-PCG and even PCG, inspite of a low number of iterations.| 340 | 341 | Now, let us use AMG-PCG with aggressive coarsening turned on for the first level. 342 | 343 | ``` 344 | /usr/bin/time -p mpiexec -n 8 ./amrex amgpcg2 345 | ``` 346 | 347 | You should get some output that looks like this 348 | ``` 349 | MPI initialized with 8 MPI processes 350 | Num OpenMP threads = 1 351 | 352 | 353 | BoomerAMG SETUP PARAMETERS: 354 | 355 | Max levels = 25 356 | Num levels = 7 357 | 358 | Strength Threshold = 0.250000 359 | Interpolation Truncation Factor = 0.000000 360 | Maximum Row Sum Threshold for Dependency Weakening = 0.900000 361 | 362 | Coarsening Type = HMIS 363 | 364 | No. of levels of aggressive coarsening: 1 365 | 366 | Interpolation on agg. levels= multipass interpolation 367 | measures are determined locally 368 | 369 | 370 | No global partition option chosen. 371 | 372 | Interpolation = extended+i interpolation 373 | 374 | Operator Matrix Information: 375 | 376 | nonzero entries per row row sums 377 | lev rows entries sparse min max avg min max 378 | =================================================================== 379 | 0 2097152 14581760 0.000 4 7 7.0 1.000e-03 9.830e+05 380 | 1 168473 3001117 0.000 9 36 17.8 1.196e-02 1.835e+06 381 | 2 36380 1786702 0.001 15 93 49.1 4.442e-02 3.245e+06 382 | 3 4862 345260 0.015 15 146 71.0 2.634e-01 5.022e+06 383 | 4 674 46930 0.103 14 184 69.6 1.035e+00 1.199e+07 384 | 5 84 3542 0.502 13 74 42.2 2.600e+06 5.014e+07 385 | 6 7 49 1.000 7 7 7.0 6.754e+06 2.370e+07 386 | 387 | 388 | Interpolation Matrix Information: 389 | entries/row min max row sums 390 | lev rows cols min max weight weight min max 391 | ================================================================= 392 | 0 2097152 x 168473 1 9 1.055e-02 1.000e+00 1.220e-01 1.000e+00 393 | 1 168473 x 36380 1 4 3.841e-03 1.000e+00 1.630e-01 1.000e+00 394 | 2 36380 x 4862 0 4 -4.129e-03 1.000e+00 0.000e+00 1.000e+00 395 | 3 4862 x 674 0 4 -1.383e-01 6.712e-01 0.000e+00 1.000e+00 396 | 4 674 x 84 0 4 -6.354e-01 6.935e-01 0.000e+00 1.000e+00 397 | 5 84 x 7 0 4 -2.982e-02 1.394e-01 0.000e+00 1.000e+00 398 | 399 | 400 | Complexity: grid = 1.100365 401 | operator = 1.355485 402 | memory = 1.707254 403 | 404 | 405 | 406 | 407 | BoomerAMG SOLVER PARAMETERS: 408 | Maximum number of cycles: 1 409 | Stopping Tolerance: 0.000000e+00 410 | Cycle type (1 = V, 2 = W, etc.): 1 411 | 412 | Relaxation Parameters: 413 | Visiting Grid: down up coarse 414 | Number of sweeps: 1 1 1 415 | Type 0=Jac, 3=hGS, 6=hSGS, 9=GE: 13 14 9 416 | Point types, partial sweeps (1=C, -1=F): 417 | Pre-CG relaxation (down): 0 418 | Post-CG relaxation (up): 0 419 | Coarsest grid: 0 420 | 421 | 422 | 13 Hypre Solver Iterations, Relative Residual 2.6981728821542126e-07 423 | 424 | Max-norm of the error is 0.000281305921 425 | Maximum absolute value of the solution is 0.9991262625 426 | Maximum absolute value of the rhs is 1661.007274 427 | real 2.17 428 | user 14.03 429 | sys 1.34 430 | ``` 431 | 432 | #### Questions 433 | 434 | > **How does the number of levels change? The complexity?** 435 | 436 | |There is one level less. The complexity is much improved, almost 3 times as small, clearly below 2, closer to 1.| 437 | 438 | > **How does this affect the performance?** 439 | 440 | |The time is more than twice as fast, however convergence is worse.| 441 | 442 | > **How does this compare to PFMG-PCG when applied to the same problem? Why?** 443 | 444 | |PFMG-PCG is almost twice as fast, even converges slightly faster. PFMG-PCG takes advantage of the structure in the problem, which AMG-PCG cannot do.| 445 | 446 | 447 | ## Out-Brief 448 | 449 | We investigated why multigrid methods are preferrable over generic solvers like conjugate gradient for large suitable PDE problems. 450 | Additional improvements can be achieved when using them as preconditioners for Krylov solvers like conjugate gradient. 451 | For unstructured multigrid solvers, it is important to keep complexities low, since large complexitites lead to slow solve times and require much memory. 452 | For structured problems, solvers that take advantage of the structure of the problem are more efficient than unstructured solvers. 453 | 454 | 455 | ### Further Reading 456 | 457 | To learn more about algebraic multigrid, see 458 | [An Introduction to Algebraic Multigrid](https://computation.llnl.gov/projects/hypre-scalable-linear-solvers-multigrid-methods/CiSE_2006_amg_220851.pdf) 459 | 460 | More information on hypre , including documentation and further publications, can be found [here](http://www.llnl.gov/CASC/hypre) 461 | 462 | 463 | 464 |   465 | 466 | --- 467 | 468 | [Back to all HandsOnLessons](../lessons.md) 469 | -------------------------------------------------------------------------------- /lessons/AMG/rhs.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/AMG/rhs.jpg -------------------------------------------------------------------------------- /lessons/AMG/solution.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/AMG/solution.jpg -------------------------------------------------------------------------------- /lessons/AMReX/VisIt_2D.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/AMReX/VisIt_2D.pdf -------------------------------------------------------------------------------- /lessons/AMReX/lesson.md: -------------------------------------------------------------------------------- 1 | # AMReX -- a block-structured Adaptive Mesh Refinement (AMR) framework 2 | 3 | ## At a Glance 4 | 5 | 6 | 7 | ``` 8 | Questions |Objectives |Key Points 9 | --------------------------|----------- -------------------------|-------------------------- 10 | How do I start to use | Understand easy set-up | It's not hard to get started 11 | AMReX? | | 12 | | | 13 | How do I 'turn on' AMR? | Understand minimum specs for AMR | When the algorithm is correctly designed 14 | | | and implemented, AMR 'just works' 15 | | | 16 | How do I visualize AMR | Use Visit for AMR results | Visualization tools exist for AMR data. 17 | results? 18 | ``` 19 | 20 | ## Example: Single-Level Heat Equation 21 | 22 | ### The Equation and the Discretization 23 | 24 | First lets revisit the heat equation problem. 25 | 26 | This algorithm should look familiar to you -- in each time step we call the following two Fortran routines: 27 | ```fortran 28 | ! x-fluxes 29 | do j = lo(2), hi(2) 30 | do i = lo(1), hi(1)+1 31 | fluxx(i,j) = ( phi(i,j) - phi(i-1,j) ) / dx(1) 32 | end do 33 | end do 34 | 35 | ! y-fluxes 36 | do j = lo(2), hi(2)+1 37 | do i = lo(1), hi(1) 38 | fluxy(i,j) = ( phi(i,j) - phi(i,j-1) ) / dx(2) 39 | end do 40 | end do 41 | ``` 42 | 43 | and 44 | ``` 45 | do j = lo(2), hi(2) 46 | do i = lo(1), hi(1) 47 | 48 | phinew(i,j) = phiold(i,j) & 49 | + dtdx(1) * (fluxx(i+1,j ) - fluxx(i,j)) & 50 | + dtdx(2) * (fluxy(i ,j+1) - fluxy(i,j)) 51 | 52 | end do 53 | end do 54 | 55 | ``` 56 | 57 | The other parts of the algorithm -- that, in particular, involve MPI communication, are handled in the C++: 58 | 59 | ```C++ 60 | MultiFab::Copy(phi_old, phi_new, 0, 0, 1, 0); 61 | ``` 62 | 63 | and 64 | 65 | ```C++ 66 | old_phi.FillBoundary(geom.periodicity()); 67 | ``` 68 | 69 | See if it makes sense what order all of these are called in. 70 | 71 | ### Running the Problem 72 | 73 | **Note:** To run this part of the lesson 74 | ``` 75 | cd handson/amrex/AMReX_diffusion 76 | ``` 77 | 78 | In this directory you'll see 79 | 80 | ``` 81 | main2d.gnu.MPI.ex -- the executable 82 | inputs_2d -- the inputs file 83 | fextract -- an executable that extracts a 1-d slice from 2-d or 3-d data 84 | extract_slice -- a simple script that calls fextract on each of our plotfiles 85 | plot_phi -- a simple gnuplot script to read and animate the 1-d slices 86 | ``` 87 | 88 | The inputs file currently has 89 | 90 | ``` 91 | nsteps = 20000 92 | n_cell = 256 256 93 | max_grid_size = 128 94 | plot_int = 1000 95 | is_periodic = 1 0 96 | 97 | ``` 98 | 99 | The grid is a cube consisting of 256 x 256 cells, consisting of 4 subgrids each 100 | of size 128x128 cells. The problem is periodic in the x-direction and not in the y-direction. 101 | This problem happens to be set-up to have homogeneous Neumann boundary conditions when not periodic. 102 | 103 | Let's try running this 2-d problem 104 | 105 | ``` 106 | ./main2d.gnu.MPI.ex inputs_2d 107 | ``` 108 | 109 | Then let's extract 1-d slices from the plotfiles and animate them 110 | 111 | ``` 112 | source extract_slice 113 | gnuplot plot_phi 114 | ``` 115 | 116 | This should make an animated gif like the one you see here. 117 | 118 | |[](phi.gif) 119 | 120 | If you'd like to see the 2-d solution, use Visit to open up a plotfile. 121 | 122 | ``` 123 | Select ``File'' then ``Open file ...'', 124 | then select the Header file associated the the plotfile of interest (e.g., _plt00000/Header_ 125 | Here are instructions (from the Users Guide) for making a simple plot: 126 | 127 | To view the data, select ``Add'' then ``Pseudocolor'' then ``phi'' then ``Draw''. 128 | 129 | To view the grid structure (not particularly interesting yet, but when we add AMR it will be), select 130 | ``subset'' then ``levels''. Then double-click the text ``Subset - levels'', 131 | enable the ``Wireframe'' option, select ``Apply'', select ``Dismiss'', and then select ``Draw''. 132 | 133 | To save the image, select ``File'' then ``Set save options'', then customize the image format 134 | to your liking, then click ``Save''. 135 | ``` 136 | 137 | Your images should look similar to those below. 138 | 139 | |Time Step 0|Time Step 10000| 140 | |:---:|:---:| 141 | |[](phi_diff_0.jpg)|[](phi_diff_10000.jpg) 142 | 143 | 144 | ## What does this do in parallel 145 | 146 | Let's now try 147 | ``` 148 | mpiexec -n 1 ./main2d.gnu.MPI.ex inputs_2d plot_int=-1 max_step= 1000 | grep "Run time" 149 | mpiexec -n 2 ./main2d.gnu.MPI.ex inputs_2d plot_int=-1 max_step= 1000 | grep "Run time" 150 | mpiexec -n 4 ./main2d.gnu.MPI.ex inputs_2d plot_int=-1 max_step= 1000 | grep "Run time" 151 | ``` 152 | and see how the timings compare. 153 | 154 | Questions to think about: 155 | 156 | Why did we set plot_int = -1 in the command line? 157 | 158 | If this didn't scale perfectly, why not? 159 | 160 | ## Example: Multi-Level Advection 161 | 162 | ### The Equation and the Discretization 163 | 164 | Now let's consider scalar advection with a specified time-dependent velocity field. In this 165 | example we'll be using AMR. 166 | 167 | This algorithm should also look familiar to you -- in each time step we construct fluxes and use them to update the solution. 168 | ```fortran 169 | ! Do a conservative update 170 | do j = lo(2),hi(2) 171 | do i = lo(1),hi(1) 172 | uout(i,j) = uin(i,j) + & 173 | ( (flxx(i,j) - flxx(i+1,j)) * dtdx(1) & 174 | + (flxy(i,j) - flxy(i,j+1)) * dtdx(2) ) 175 | enddo 176 | enddo 177 | ``` 178 | 179 | Here the construction of the fluxes is a little more complicated, and because we are going to use AMR, we 180 | must save the fluxes at each level so that we can use them in a refluxing operation. The subcycling in time 181 | algorithm, which we haven't really had time to talk about, looks like 182 | ```C++ 183 | if (lev < finest_level) 184 | { 185 | // recursive call for next-finer level 186 | for (int i = 1; i <= nsubsteps[lev+1]; ++i) 187 | { 188 | timeStep(lev+1, time+(i-1)*dt[lev+1], i); 189 | } 190 | 191 | if (do_reflux) 192 | { 193 | // update lev based on coarse-fine flux mismatch 194 | flux_reg[lev+1]->Reflux(*phi_new[lev], 1.0, 0, 0, phi_new[lev]->nComp(), geom[lev]); 195 | } 196 | 197 | AverageDownTo(lev); // average lev+1 down to lev 198 | } 199 | ``` 200 | 201 | ### Running the Problem 202 | 203 | **Note:** To run this part of the lesson 204 | ``` 205 | cd handson/amrex/AMReX_advection 206 | ``` 207 | 208 | In this directory you'll see 209 | 210 | ``` 211 | main2d.gnu.MPI.ex -- the executable 212 | inputs -- the inputs file 213 | ``` 214 | 215 | The inputs file currently has 216 | 217 | ``` 218 | max_step = 120 219 | amr.n_cell = 64 64 220 | amr.max_grid_size = 32 221 | amr.plot_int = 10 222 | 223 | ``` 224 | 225 | The grid here is a cube consisting of 64 x 64 cells, consisting of 4 subgrids each 226 | of size 32x32 cells. The problem is periodic in the x-direction and not in the y-direction. 227 | This problem happens to be set-up to have homogeneous Neumann boundary conditions when not periodic. 228 | 229 | Let's try running this 2-d problem with no refinement 230 | 231 | ``` 232 | ./main2d.gnu.MPI.ex inputs amr.max_level=0 233 | ``` 234 | 235 | To see the 2-d solution, use Visit to look at plt00000 and plt00060, for example. 236 | You should see something like this (though these pictures are 237 | made using a different visualization program.) 238 | 239 | |Time Step 0|Time Step 60| 240 | |:---:|:---:| 241 | |[](phi_adv_noref.0.jpg)|[](phi_adv_noref.60.jpg) 242 | 243 | ## Now let's turn on AMR. 244 | 245 | Let's now run with 246 | ``` 247 | ./main2d.gnu.MPI.ex inputs amr.max_level=2 248 | ``` 249 | 250 | and again visualize the results. 251 | 252 | |Time Step 0|Time Step 60| 253 | |:---:|:---:| 254 | |[](phi_adv_ref.0.jpg)|[](phi_adv_ref.60.jpg) 255 | 256 | ### Further Reading 257 | 258 | Learn more about AMReX [here](https://www.github.com/AMReX-codes/amrex) and take a look at the Users Guide in Docs. 259 | 260 | 261 | 262 |   263 | 264 | --- 265 | 266 | [Back to all HandsOnLessons](../lessons.md) 267 | -------------------------------------------------------------------------------- /lessons/AMReX/phi.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/AMReX/phi.gif -------------------------------------------------------------------------------- /lessons/AMReX/phi_adv_noref.0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/AMReX/phi_adv_noref.0.jpg -------------------------------------------------------------------------------- /lessons/AMReX/phi_adv_noref.60.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/AMReX/phi_adv_noref.60.jpg -------------------------------------------------------------------------------- /lessons/AMReX/phi_adv_ref.0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/AMReX/phi_adv_ref.0.jpg -------------------------------------------------------------------------------- /lessons/AMReX/phi_adv_ref.60.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/AMReX/phi_adv_ref.60.jpg -------------------------------------------------------------------------------- /lessons/AMReX/phi_diff_0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/AMReX/phi_diff_0.jpg -------------------------------------------------------------------------------- /lessons/AMReX/phi_diff_10000.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/AMReX/phi_diff_10000.jpg -------------------------------------------------------------------------------- /lessons/adjoint/chkpt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/adjoint/chkpt.png -------------------------------------------------------------------------------- /lessons/adjoint/ex1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/adjoint/ex1.png -------------------------------------------------------------------------------- /lessons/adjoint/ex1adj.c: -------------------------------------------------------------------------------- 1 | static char help[] = "Adjoint sensitivity of a hybrid system with state-dependent switchings.\n"; 2 | 3 | /* 4 | The dynamics is described by the ODE 5 | u_t = A_i u 6 | 7 | where A_1 = [ 1 -100 8 | 10 1 ], 9 | A_2 = [ 1 10 10 | -100 1 ]. 11 | The index i changes from 1 to 2 when u[1]=2.75u[0] and from 2 to 1 when u[1]=0.36u[0]. 12 | Initially u=[0 1]^T and i=1. 13 | 14 | References: 15 | H. Zhang, S. Abhyankar, E. Constantinescu, M. Mihai, Discrete Adjoint Sensitivity Analysis of Hybrid Dynamical Systems With Switching, IEEE Transactions on Circuits and Systems I: Regular Papers, 64(5), May 2017 16 | I. A. Hiskens, M.A. Pai, Trajectory Sensitivity Analysis of Hybrid Systems, IEEE Transactions on Circuits and Systems, Vol 47, No 2, February 2000 17 | */ 18 | 19 | #include 20 | 21 | typedef struct { 22 | PetscScalar lambda1; 23 | PetscScalar lambda2; 24 | PetscInt mode; /* mode flag*/ 25 | } AppCtx; 26 | 27 | PetscErrorCode EventFunction(TS ts,PetscReal t,Vec U,PetscScalar *fvalue,void *ctx) 28 | { 29 | AppCtx *actx=(AppCtx*)ctx; 30 | PetscErrorCode ierr; 31 | const PetscScalar *u; 32 | 33 | PetscFunctionBegin; 34 | ierr = VecGetArrayRead(U,&u);CHKERRQ(ierr); 35 | if (actx->mode == 1) { 36 | fvalue[0] = u[1]-actx->lambda1*u[0]; 37 | }else if (actx->mode == 2) { 38 | fvalue[0] = u[1]-actx->lambda2*u[0]; 39 | } 40 | ierr = VecRestoreArrayRead(U,&u);CHKERRQ(ierr); 41 | PetscFunctionReturn(0); 42 | } 43 | 44 | PetscErrorCode ShiftGradients(TS ts,Vec U,AppCtx *actx) 45 | { 46 | Vec *lambda,*mu; 47 | PetscScalar *x,*y; 48 | const PetscScalar *u; 49 | PetscErrorCode ierr; 50 | PetscScalar tmp[2],A1[2][2],A2[2],denorm; 51 | PetscInt numcost; 52 | 53 | PetscFunctionBegin; 54 | ierr = TSGetCostGradients(ts,&numcost,&lambda,&mu);CHKERRQ(ierr); 55 | ierr = VecGetArrayRead(U,&u);CHKERRQ(ierr); 56 | 57 | if (actx->mode==2) { 58 | denorm = -actx->lambda1*(u[0]-100.*u[1])+1.*(10.*u[0]+u[1]); 59 | A1[0][0] = 110.*u[1]*(-actx->lambda1)/denorm+1.; 60 | A1[0][1] = -110.*u[0]*(-actx->lambda1)/denorm; 61 | A1[1][0] = 110.*u[1]*1./denorm; 62 | A1[1][1] = -110.*u[0]*1./denorm+1.; 63 | 64 | A2[0] = 110.*u[1]*(-u[0])/denorm; 65 | A2[1] = -110.*u[0]*(-u[0])/denorm; 66 | } else { 67 | denorm = -actx->lambda2*(u[0]+10.*u[1])+1.*(-100.*u[0]+u[1]); 68 | A1[0][0] = 110.*u[1]*(actx->lambda2)/denorm+1; 69 | A1[0][1] = -110.*u[0]*(actx->lambda2)/denorm; 70 | A1[1][0] = -110.*u[1]*1./denorm; 71 | A1[1][1] = 110.*u[0]*1./denorm+1.; 72 | 73 | A2[0] = 0; 74 | A2[1] = 0; 75 | } 76 | 77 | ierr = VecRestoreArrayRead(U,&u);CHKERRQ(ierr); 78 | 79 | ierr = VecGetArray(lambda[0],&x);CHKERRQ(ierr); 80 | ierr = VecGetArray(mu[0],&y);CHKERRQ(ierr); 81 | tmp[0] = A1[0][0]*x[0]+A1[0][1]*x[1]; 82 | tmp[1] = A1[1][0]*x[0]+A1[1][1]*x[1]; 83 | y[0] = y[0] + A2[0]*x[0]+A2[1]*x[1]; 84 | x[0] = tmp[0]; 85 | x[1] = tmp[1]; 86 | ierr = VecRestoreArray(mu[0],&y);CHKERRQ(ierr); 87 | ierr = VecRestoreArray(lambda[0],&x);CHKERRQ(ierr); 88 | 89 | ierr = VecGetArray(lambda[1],&x);CHKERRQ(ierr); 90 | ierr = VecGetArray(mu[1],&y);CHKERRQ(ierr); 91 | tmp[0] = A1[0][0]*x[0]+A1[0][1]*x[1]; 92 | tmp[1] = A1[1][0]*x[0]+A1[1][1]*x[1]; 93 | y[0] = y[0] + A2[0]*x[0]+A2[1]*x[1]; 94 | x[0] = tmp[0]; 95 | x[1] = tmp[1]; 96 | ierr = VecRestoreArray(mu[1],&y);CHKERRQ(ierr); 97 | ierr = VecRestoreArray(lambda[1],&x);CHKERRQ(ierr); 98 | PetscFunctionReturn(0); 99 | } 100 | 101 | PetscErrorCode PostEventFunction(TS ts,PetscInt nevents,PetscInt event_list[],PetscReal t,Vec U,PetscBool forwardsolve,void* ctx) 102 | { 103 | AppCtx *actx=(AppCtx*)ctx; 104 | PetscErrorCode ierr; 105 | 106 | PetscFunctionBegin; 107 | /* ierr = VecView(U,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); */ 108 | if (!forwardsolve) { 109 | ierr = ShiftGradients(ts,U,actx);CHKERRQ(ierr); 110 | } 111 | if (actx->mode == 1) { 112 | actx->mode = 2; 113 | /* ierr = PetscPrintf(PETSC_COMM_SELF,"Change from mode 1 to 2 at t = %f \n",t);CHKERRQ(ierr); */ 114 | } else if (actx->mode == 2) { 115 | actx->mode = 1; 116 | /* ierr = PetscPrintf(PETSC_COMM_SELF,"Change from mode 2 to 1 at t = %f \n",t);CHKERRQ(ierr); */ 117 | } 118 | PetscFunctionReturn(0); 119 | } 120 | 121 | /* 122 | Defines the ODE passed to the ODE solver 123 | */ 124 | static PetscErrorCode IFunction(TS ts,PetscReal t,Vec U,Vec Udot,Vec F,void *ctx) 125 | { 126 | AppCtx *actx=(AppCtx*)ctx; 127 | PetscErrorCode ierr; 128 | PetscScalar *f; 129 | const PetscScalar *u,*udot; 130 | 131 | PetscFunctionBegin; 132 | /* The next three lines allow us to access the entries of the vectors directly */ 133 | ierr = VecGetArrayRead(U,&u);CHKERRQ(ierr); 134 | ierr = VecGetArrayRead(Udot,&udot);CHKERRQ(ierr); 135 | ierr = VecGetArray(F,&f);CHKERRQ(ierr); 136 | 137 | if (actx->mode == 1) { 138 | f[0] = udot[0]-u[0]+100*u[1]; 139 | f[1] = udot[1]-10*u[0]-u[1]; 140 | } else if (actx->mode == 2) { 141 | f[0] = udot[0]-u[0]-10*u[1]; 142 | f[1] = udot[1]+100*u[0]-u[1]; 143 | } 144 | 145 | ierr = VecRestoreArrayRead(U,&u);CHKERRQ(ierr); 146 | ierr = VecRestoreArrayRead(Udot,&udot);CHKERRQ(ierr); 147 | ierr = VecRestoreArray(F,&f);CHKERRQ(ierr); 148 | PetscFunctionReturn(0); 149 | } 150 | 151 | /* 152 | Defines the Jacobian of the ODE passed to the ODE solver. See TSSetIJacobian() for the meaning of a and the Jacobian. 153 | */ 154 | static PetscErrorCode IJacobian(TS ts,PetscReal t,Vec U,Vec Udot,PetscReal a,Mat A,Mat B,void *ctx) 155 | { 156 | AppCtx *actx=(AppCtx*)ctx; 157 | PetscErrorCode ierr; 158 | PetscInt rowcol[] = {0,1}; 159 | PetscScalar J[2][2]; 160 | const PetscScalar *u,*udot; 161 | 162 | PetscFunctionBegin; 163 | ierr = VecGetArrayRead(U,&u);CHKERRQ(ierr); 164 | ierr = VecGetArrayRead(Udot,&udot);CHKERRQ(ierr); 165 | 166 | if (actx->mode == 1) { 167 | J[0][0] = a-1; J[0][1] = 100; 168 | J[1][0] = -10; J[1][1] = a-1; 169 | } else if (actx->mode == 2) { 170 | J[0][0] = a-1; J[0][1] = -10; 171 | J[1][0] = 100; J[1][1] = a-1; 172 | } 173 | ierr = MatSetValues(B,2,rowcol,2,rowcol,&J[0][0],INSERT_VALUES);CHKERRQ(ierr); 174 | 175 | ierr = VecRestoreArrayRead(U,&u);CHKERRQ(ierr); 176 | ierr = VecRestoreArrayRead(Udot,&udot);CHKERRQ(ierr); 177 | 178 | ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 179 | ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 180 | if (A != B) { 181 | ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 182 | ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 183 | } 184 | PetscFunctionReturn(0); 185 | } 186 | 187 | /* Matrix JacobianP is constant so that it only needs to be evaluated once */ 188 | static PetscErrorCode RHSJacobianP(TS ts,PetscReal t,Vec X,Mat A, void *ctx) 189 | { 190 | PetscFunctionBeginUser; 191 | PetscFunctionReturn(0); 192 | } 193 | 194 | int main(int argc,char **argv) 195 | { 196 | TS ts; /* ODE integrator */ 197 | Vec U; /* solution will be stored here */ 198 | Mat A; /* Jacobian matrix */ 199 | Mat Ap; /* dfdp */ 200 | PetscErrorCode ierr; 201 | PetscMPIInt size; 202 | PetscInt n = 2; 203 | PetscScalar *u,*v; 204 | AppCtx app; 205 | PetscInt direction[1]; 206 | PetscBool terminate[1]; 207 | Vec lambda[2],mu[2]; 208 | PetscReal tend; 209 | 210 | FILE *f; 211 | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 212 | Initialize program 213 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 214 | ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; 215 | ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 216 | if (size > 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Only for sequential runs"); 217 | app.mode = 1; 218 | app.lambda1 = 2.75; 219 | app.lambda2 = 0.36; 220 | tend = 0.125; 221 | ierr = PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"ex1adj options","");CHKERRQ(ierr); 222 | { 223 | ierr = PetscOptionsReal("-lambda1","","",app.lambda1,&app.lambda1,NULL);CHKERRQ(ierr); 224 | ierr = PetscOptionsReal("-lambda2","","",app.lambda2,&app.lambda2,NULL);CHKERRQ(ierr); 225 | ierr = PetscOptionsReal("-tend","","",tend,&tend,NULL);CHKERRQ(ierr); 226 | } 227 | ierr = PetscOptionsEnd();CHKERRQ(ierr); 228 | 229 | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 230 | Create necessary matrix and vectors 231 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 232 | ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); 233 | ierr = MatSetSizes(A,n,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 234 | ierr = MatSetType(A,MATDENSE);CHKERRQ(ierr); 235 | ierr = MatSetFromOptions(A);CHKERRQ(ierr); 236 | ierr = MatSetUp(A);CHKERRQ(ierr); 237 | 238 | ierr = MatCreateVecs(A,&U,NULL);CHKERRQ(ierr); 239 | 240 | ierr = MatCreate(PETSC_COMM_WORLD,&Ap);CHKERRQ(ierr); 241 | ierr = MatSetSizes(Ap,n,1,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 242 | ierr = MatSetType(Ap,MATDENSE);CHKERRQ(ierr); 243 | ierr = MatSetFromOptions(Ap);CHKERRQ(ierr); 244 | ierr = MatSetUp(Ap);CHKERRQ(ierr); 245 | ierr = MatZeroEntries(Ap);CHKERRQ(ierr); /* initialize to zeros */ 246 | 247 | ierr = VecGetArray(U,&u);CHKERRQ(ierr); 248 | u[0] = 0; 249 | u[1] = 1; 250 | ierr = VecRestoreArray(U,&u);CHKERRQ(ierr); 251 | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 252 | Create timestepping solver context 253 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 254 | ierr = TSCreate(PETSC_COMM_WORLD,&ts);CHKERRQ(ierr); 255 | ierr = TSSetProblemType(ts,TS_NONLINEAR);CHKERRQ(ierr); 256 | ierr = TSSetType(ts,TSCN);CHKERRQ(ierr); 257 | ierr = TSSetIFunction(ts,NULL,(TSIFunction)IFunction,&app);CHKERRQ(ierr); 258 | ierr = TSSetIJacobian(ts,A,A,(TSIJacobian)IJacobian,&app);CHKERRQ(ierr); 259 | 260 | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 261 | Set initial conditions 262 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 263 | ierr = TSSetSolution(ts,U);CHKERRQ(ierr); 264 | 265 | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 266 | Save trajectory of solution so that TSAdjointSolve() may be used 267 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 268 | ierr = TSSetSaveTrajectory(ts);CHKERRQ(ierr); 269 | 270 | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 271 | Set solver options 272 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 273 | ierr = TSSetMaxTime(ts,tend);CHKERRQ(ierr); 274 | ierr = TSSetExactFinalTime(ts,TS_EXACTFINALTIME_MATCHSTEP);CHKERRQ(ierr); 275 | ierr = TSSetTimeStep(ts,1./256.);CHKERRQ(ierr); 276 | ierr = TSSetFromOptions(ts);CHKERRQ(ierr); 277 | 278 | /* Set directions and terminate flags for the two events */ 279 | direction[0] = 0; 280 | terminate[0] = PETSC_FALSE; 281 | ierr = TSSetEventHandler(ts,1,direction,terminate,EventFunction,PostEventFunction,(void*)&app);CHKERRQ(ierr); 282 | 283 | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 284 | Run timestepping solver 285 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 286 | ierr = TSSolve(ts,U);CHKERRQ(ierr); 287 | 288 | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 289 | Adjoint model starts here 290 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 291 | ierr = MatCreateVecs(A,&lambda[0],NULL);CHKERRQ(ierr); 292 | ierr = MatCreateVecs(A,&lambda[1],NULL);CHKERRQ(ierr); 293 | /* Set initial conditions for the adjoint integration */ 294 | ierr = VecZeroEntries(lambda[0]);CHKERRQ(ierr); 295 | ierr = VecZeroEntries(lambda[1]);CHKERRQ(ierr); 296 | ierr = VecGetArray(lambda[0],&u);CHKERRQ(ierr); 297 | u[0] = 1.; 298 | ierr = VecRestoreArray(lambda[0],&u);CHKERRQ(ierr); 299 | ierr = VecGetArray(lambda[1],&u);CHKERRQ(ierr); 300 | u[1] = 1.; 301 | ierr = VecRestoreArray(lambda[1],&u);CHKERRQ(ierr); 302 | 303 | ierr = MatCreateVecs(Ap,&mu[0],NULL);CHKERRQ(ierr); 304 | ierr = MatCreateVecs(Ap,&mu[1],NULL);CHKERRQ(ierr); 305 | ierr = VecZeroEntries(mu[0]);CHKERRQ(ierr); 306 | ierr = VecZeroEntries(mu[1]);CHKERRQ(ierr); 307 | ierr = TSSetCostGradients(ts,2,lambda,mu);CHKERRQ(ierr); 308 | 309 | /* Set RHS JacobianP */ 310 | ierr = TSAdjointSetRHSJacobian(ts,Ap,RHSJacobianP,&app);CHKERRQ(ierr); 311 | 312 | ierr = TSAdjointSolve(ts);CHKERRQ(ierr); 313 | 314 | /* 315 | ierr = VecView(lambda[0],PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 316 | ierr = VecView(lambda[1],PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 317 | ierr = VecView(mu[0],PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 318 | ierr = VecView(mu[1],PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 319 | */ 320 | ierr = VecGetArray(mu[0],&u);CHKERRQ(ierr); 321 | ierr = VecGetArray(mu[1],&v);CHKERRQ(ierr); 322 | f = fopen("adj_mu.out", "a"); 323 | ierr = PetscFPrintf(PETSC_COMM_WORLD,f,"%20.15lf %20.15lf %20.15lf\n",tend,u[0],v[0]);CHKERRQ(ierr); 324 | ierr = VecRestoreArray(mu[0],&u);CHKERRQ(ierr); 325 | ierr = VecRestoreArray(mu[1],&v);CHKERRQ(ierr); 326 | fclose(f); 327 | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 328 | Free work space. All PETSc objects should be destroyed when they are no longer needed. 329 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 330 | ierr = MatDestroy(&A);CHKERRQ(ierr); 331 | ierr = VecDestroy(&U);CHKERRQ(ierr); 332 | ierr = TSDestroy(&ts);CHKERRQ(ierr); 333 | 334 | ierr = MatDestroy(&Ap);CHKERRQ(ierr); 335 | ierr = VecDestroy(&lambda[0]);CHKERRQ(ierr); 336 | ierr = VecDestroy(&lambda[1]);CHKERRQ(ierr); 337 | ierr = VecDestroy(&mu[0]);CHKERRQ(ierr); 338 | ierr = VecDestroy(&mu[1]);CHKERRQ(ierr); 339 | ierr = PetscFinalize(); 340 | return ierr; 341 | } 342 | -------------------------------------------------------------------------------- /lessons/adjoint/ex1adj.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/adjoint/ex1adj.png -------------------------------------------------------------------------------- /lessons/adjoint/ex3opt.c: -------------------------------------------------------------------------------- 1 | 2 | static char help[] = "Finds optimal parameter P_m for the generator system while maintaining generator stability.\n"; 3 | 4 | /*F 5 | 6 | \begin{eqnarray} 7 | \frac{d \theta}{dt} = \omega_b (\omega - \omega_s) 8 | \frac{2 H}{\omega_s}\frac{d \omega}{dt} & = & P_m - P_max \sin(\theta) -D(\omega - \omega_s)\\ 9 | \end{eqnarray} 10 | 11 | F*/ 12 | 13 | /* 14 | This code demonstrates how to solve a ODE-constrained optimization problem with TAO, TSEvent, TSAdjoint and TS. 15 | The problem features discontinuities and a cost function in integral form. 16 | The gradient is computed with the discrete adjoint of an implicit theta method, see ex3adj.c for details. 17 | */ 18 | #include 19 | #include 20 | 21 | typedef struct { 22 | PetscScalar H,D,omega_b,omega_s,Pmax,Pmax_ini,Pm,E,V,X,u_s,c; 23 | PetscInt beta; 24 | PetscReal tf,tcl; 25 | } AppCtx; 26 | 27 | PetscErrorCode FormFunctionGradient(Tao,Vec,PetscReal*,Vec,void*); 28 | 29 | /* Event check */ 30 | PetscErrorCode EventFunction(TS ts,PetscReal t,Vec X,PetscScalar *fvalue,void *ctx) 31 | { 32 | AppCtx *user=(AppCtx*)ctx; 33 | 34 | PetscFunctionBegin; 35 | /* Event for fault-on time */ 36 | fvalue[0] = t - user->tf; 37 | /* Event for fault-off time */ 38 | fvalue[1] = t - user->tcl; 39 | 40 | PetscFunctionReturn(0); 41 | } 42 | 43 | PetscErrorCode PostEventFunction(TS ts,PetscInt nevents,PetscInt event_list[],PetscReal t,Vec X,PetscBool forwardsolve,void* ctx) 44 | { 45 | AppCtx *user=(AppCtx*)ctx; 46 | 47 | PetscFunctionBegin; 48 | 49 | if (event_list[0] == 0) { 50 | if (forwardsolve) user->Pmax = 0.0; /* Apply disturbance - this is done by setting Pmax = 0 */ 51 | else user->Pmax = user->Pmax_ini; /* Going backward, reversal of event */ 52 | } else if(event_list[0] == 1) { 53 | if (forwardsolve) user->Pmax = user->Pmax_ini; /* Remove the fault - this is done by setting Pmax = Pmax_ini */ 54 | else user->Pmax = 0.0; /* Going backward, reversal of event */ 55 | } 56 | PetscFunctionReturn(0); 57 | } 58 | 59 | /* 60 | Defines the ODE passed to the ODE solver 61 | */ 62 | static PetscErrorCode IFunction(TS ts,PetscReal t,Vec U,Vec Udot,Vec F,AppCtx *ctx) 63 | { 64 | PetscErrorCode ierr; 65 | PetscScalar *f,Pmax; 66 | const PetscScalar *u,*udot; 67 | 68 | PetscFunctionBegin; 69 | /* The next three lines allow us to access the entries of the vectors directly */ 70 | ierr = VecGetArrayRead(U,&u);CHKERRQ(ierr); 71 | ierr = VecGetArrayRead(Udot,&udot);CHKERRQ(ierr); 72 | ierr = VecGetArray(F,&f);CHKERRQ(ierr); 73 | Pmax = ctx->Pmax; 74 | f[0] = udot[0] - ctx->omega_b*(u[1] - ctx->omega_s); 75 | f[1] = 2.0*ctx->H/ctx->omega_s*udot[1] + Pmax*PetscSinScalar(u[0]) + ctx->D*(u[1] - ctx->omega_s)- ctx->Pm; 76 | 77 | ierr = VecRestoreArrayRead(U,&u);CHKERRQ(ierr); 78 | ierr = VecRestoreArrayRead(Udot,&udot);CHKERRQ(ierr); 79 | ierr = VecRestoreArray(F,&f);CHKERRQ(ierr); 80 | PetscFunctionReturn(0); 81 | } 82 | 83 | /* 84 | Defines the Jacobian of the ODE passed to the ODE solver. See TSSetIJacobian() for the meaning of a and the Jacobian. 85 | */ 86 | static PetscErrorCode IJacobian(TS ts,PetscReal t,Vec U,Vec Udot,PetscReal a,Mat A,Mat B,AppCtx *ctx) 87 | { 88 | PetscErrorCode ierr; 89 | PetscInt rowcol[] = {0,1}; 90 | PetscScalar J[2][2],Pmax; 91 | const PetscScalar *u,*udot; 92 | 93 | PetscFunctionBegin; 94 | ierr = VecGetArrayRead(U,&u);CHKERRQ(ierr); 95 | ierr = VecGetArrayRead(Udot,&udot);CHKERRQ(ierr); 96 | Pmax = ctx->Pmax; 97 | J[0][0] = a; J[0][1] = -ctx->omega_b; 98 | J[1][1] = 2.0*ctx->H/ctx->omega_s*a + ctx->D; J[1][0] = Pmax*PetscCosScalar(u[0]); 99 | 100 | ierr = MatSetValues(B,2,rowcol,2,rowcol,&J[0][0],INSERT_VALUES);CHKERRQ(ierr); 101 | ierr = VecRestoreArrayRead(U,&u);CHKERRQ(ierr); 102 | ierr = VecRestoreArrayRead(Udot,&udot);CHKERRQ(ierr); 103 | 104 | ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 105 | ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 106 | if (A != B) { 107 | ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 108 | ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 109 | } 110 | PetscFunctionReturn(0); 111 | } 112 | 113 | static PetscErrorCode RHSJacobianP(TS ts,PetscReal t,Vec X,Mat A,void *ctx0) 114 | { 115 | PetscErrorCode ierr; 116 | PetscInt row[] = {0,1},col[]={0}; 117 | PetscScalar J[2][1]; 118 | 119 | PetscFunctionBeginUser; 120 | J[0][0] = 0; 121 | J[1][0] = 1.; 122 | ierr = MatSetValues(A,2,row,1,col,&J[0][0],INSERT_VALUES);CHKERRQ(ierr); 123 | ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 124 | ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 125 | PetscFunctionReturn(0); 126 | } 127 | 128 | static PetscErrorCode CostIntegrand(TS ts,PetscReal t,Vec U,Vec R,AppCtx *ctx) 129 | { 130 | PetscErrorCode ierr; 131 | PetscScalar *r; 132 | const PetscScalar *u; 133 | 134 | PetscFunctionBegin; 135 | ierr = VecGetArrayRead(U,&u);CHKERRQ(ierr); 136 | ierr = VecGetArray(R,&r);CHKERRQ(ierr); 137 | r[0] = ctx->c*PetscPowScalarInt(PetscMax(0., u[0]-ctx->u_s),ctx->beta);CHKERRQ(ierr); 138 | ierr = VecRestoreArray(R,&r);CHKERRQ(ierr); 139 | ierr = VecRestoreArrayRead(U,&u);CHKERRQ(ierr); 140 | PetscFunctionReturn(0); 141 | } 142 | 143 | static PetscErrorCode DRDYFunction(TS ts,PetscReal t,Vec U,Vec *drdy,AppCtx *ctx) 144 | { 145 | PetscErrorCode ierr; 146 | PetscScalar *ry; 147 | const PetscScalar *u; 148 | 149 | PetscFunctionBegin; 150 | ierr = VecGetArrayRead(U,&u);CHKERRQ(ierr); 151 | ierr = VecGetArray(drdy[0],&ry);CHKERRQ(ierr); 152 | ry[0] = ctx->c*ctx->beta*PetscPowScalarInt(PetscMax(0., u[0]-ctx->u_s),ctx->beta-1);CHKERRQ(ierr); 153 | ierr = VecRestoreArray(drdy[0],&ry);CHKERRQ(ierr); 154 | ierr = VecRestoreArrayRead(U,&u);CHKERRQ(ierr); 155 | PetscFunctionReturn(0); 156 | } 157 | 158 | static PetscErrorCode DRDPFunction(TS ts,PetscReal t,Vec U,Vec *drdp,AppCtx *ctx) 159 | { 160 | PetscErrorCode ierr; 161 | PetscScalar *rp; 162 | 163 | PetscFunctionBegin; 164 | ierr = VecGetArray(drdp[0],&rp);CHKERRQ(ierr); 165 | rp[0] = 0.; 166 | ierr = VecRestoreArray(drdp[0],&rp);CHKERRQ(ierr); 167 | PetscFunctionReturn(0); 168 | } 169 | 170 | PetscErrorCode ComputeSensiP(Vec lambda,Vec mu,AppCtx *ctx) 171 | { 172 | PetscErrorCode ierr; 173 | PetscScalar *y,sensip; 174 | const PetscScalar *x; 175 | 176 | PetscFunctionBegin; 177 | ierr = VecGetArrayRead(lambda,&x);CHKERRQ(ierr); 178 | ierr = VecGetArray(mu,&y);CHKERRQ(ierr); 179 | sensip = 1./PetscSqrtScalar(1.-(ctx->Pm/ctx->Pmax)*(ctx->Pm/ctx->Pmax))/ctx->Pmax*x[0]+y[0]; 180 | /* ierr = PetscPrintf(PETSC_COMM_WORLD,"\n sensitivity wrt parameter pm: %g \n",(double)sensip);CHKERRQ(ierr); */ 181 | y[0] = sensip; 182 | ierr = VecRestoreArray(mu,&y);CHKERRQ(ierr); 183 | ierr = VecRestoreArrayRead(lambda,&x);CHKERRQ(ierr); 184 | PetscFunctionReturn(0); 185 | } 186 | 187 | PetscErrorCode monitor(Tao tao,AppCtx *ctx) 188 | { 189 | FILE *fp; 190 | PetscInt iterate; 191 | PetscReal f,gnorm,cnorm,xdiff; 192 | TaoConvergedReason reason; 193 | PetscErrorCode ierr; 194 | 195 | PetscFunctionBeginUser; 196 | ierr = TaoGetSolutionStatus(tao,&iterate,&f,&gnorm,&cnorm,&xdiff,&reason);CHKERRQ(ierr); 197 | 198 | fp = fopen("ex3opt_conv.out","a"); 199 | ierr = PetscFPrintf(PETSC_COMM_WORLD,fp,"%D %g\n",iterate,(double)gnorm);CHKERRQ(ierr); 200 | fclose(fp); 201 | PetscFunctionReturn(0); 202 | } 203 | 204 | int main(int argc,char **argv) 205 | { 206 | Vec p; 207 | PetscScalar *x_ptr; 208 | PetscErrorCode ierr; 209 | PetscMPIInt size; 210 | AppCtx ctx; 211 | Tao tao; 212 | KSP ksp; 213 | PC pc; 214 | Vec lowerb,upperb; 215 | PetscBool printtofile; 216 | 217 | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 218 | Initialize program 219 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 220 | ierr = PetscInitialize(&argc,&argv,NULL,help);if (ierr) return ierr; 221 | PetscFunctionBeginUser; 222 | ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 223 | if (size != 1) SETERRQ(PETSC_COMM_SELF,1,"This is a uniprocessor example only!"); 224 | 225 | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 226 | Set runtime options 227 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 228 | ierr = PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Swing equation options","");CHKERRQ(ierr); 229 | { 230 | ctx.beta = 2; 231 | ctx.c = 10000.0; 232 | ctx.u_s = 1.0; 233 | ctx.omega_s = 1.0; 234 | ctx.omega_b = 120.0*PETSC_PI; 235 | ctx.H = 5.0; 236 | ierr = PetscOptionsScalar("-Inertia","","",ctx.H,&ctx.H,NULL);CHKERRQ(ierr); 237 | ctx.D = 5.0; 238 | ierr = PetscOptionsScalar("-D","","",ctx.D,&ctx.D,NULL);CHKERRQ(ierr); 239 | ctx.E = 1.1378; 240 | ctx.V = 1.0; 241 | ctx.X = 0.545; 242 | ctx.Pmax = ctx.E*ctx.V/ctx.X;; 243 | ctx.Pmax_ini = ctx.Pmax; 244 | ierr = PetscOptionsScalar("-Pmax","","",ctx.Pmax,&ctx.Pmax,NULL);CHKERRQ(ierr); 245 | ctx.Pm = 1.06; 246 | ierr = PetscOptionsScalar("-Pm","","",ctx.Pm,&ctx.Pm,NULL);CHKERRQ(ierr); 247 | ctx.tf = 0.1; 248 | ctx.tcl = 0.2; 249 | ierr = PetscOptionsReal("-tf","Time to start fault","",ctx.tf,&ctx.tf,NULL);CHKERRQ(ierr); 250 | ierr = PetscOptionsReal("-tcl","Time to end fault","",ctx.tcl,&ctx.tcl,NULL);CHKERRQ(ierr); 251 | printtofile = PETSC_FALSE; 252 | ierr = PetscOptionsBool("-printtofile","Print convergence results to file","",printtofile,&printtofile,NULL);CHKERRQ(ierr); 253 | } 254 | ierr = PetscOptionsEnd();CHKERRQ(ierr); 255 | 256 | /* Create TAO solver and set desired solution method */ 257 | ierr = TaoCreate(PETSC_COMM_WORLD,&tao);CHKERRQ(ierr); 258 | ierr = TaoSetType(tao,TAOBLMVM);CHKERRQ(ierr); 259 | if(printtofile) { 260 | ierr = TaoSetMonitor(tao,(PetscErrorCode (*)(Tao, void*))monitor,(void *)&ctx,PETSC_NULL);CHKERRQ(ierr); 261 | } 262 | /* 263 | Optimization starts 264 | */ 265 | /* Set initial solution guess */ 266 | ierr = VecCreateSeq(PETSC_COMM_WORLD,1,&p);CHKERRQ(ierr); 267 | ierr = VecGetArray(p,&x_ptr);CHKERRQ(ierr); 268 | x_ptr[0] = ctx.Pm; 269 | ierr = VecRestoreArray(p,&x_ptr);CHKERRQ(ierr); 270 | 271 | ierr = TaoSetInitialVector(tao,p);CHKERRQ(ierr); 272 | /* Set routine for function and gradient evaluation */ 273 | ierr = TaoSetObjectiveAndGradientRoutine(tao,FormFunctionGradient,(void *)&ctx);CHKERRQ(ierr); 274 | 275 | /* Set bounds for the optimization */ 276 | ierr = VecDuplicate(p,&lowerb);CHKERRQ(ierr); 277 | ierr = VecDuplicate(p,&upperb);CHKERRQ(ierr); 278 | ierr = VecGetArray(lowerb,&x_ptr);CHKERRQ(ierr); 279 | x_ptr[0] = 0.; 280 | ierr = VecRestoreArray(lowerb,&x_ptr);CHKERRQ(ierr); 281 | ierr = VecGetArray(upperb,&x_ptr);CHKERRQ(ierr); 282 | x_ptr[0] = 1.1; 283 | ierr = VecRestoreArray(upperb,&x_ptr);CHKERRQ(ierr); 284 | ierr = TaoSetVariableBounds(tao,lowerb,upperb);CHKERRQ(ierr); 285 | 286 | /* Check for any TAO command line options */ 287 | ierr = TaoSetFromOptions(tao);CHKERRQ(ierr); 288 | ierr = TaoGetKSP(tao,&ksp);CHKERRQ(ierr); 289 | if (ksp) { 290 | ierr = KSPGetPC(ksp,&pc);CHKERRQ(ierr); 291 | ierr = PCSetType(pc,PCNONE);CHKERRQ(ierr); 292 | } 293 | 294 | /* SOLVE THE APPLICATION */ 295 | ierr = TaoSolve(tao);CHKERRQ(ierr); 296 | 297 | ierr = VecView(p,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 298 | ierr = VecDestroy(&p);CHKERRQ(ierr); 299 | ierr = VecDestroy(&lowerb);CHKERRQ(ierr); 300 | ierr = VecDestroy(&upperb);CHKERRQ(ierr); 301 | ierr = TaoDestroy(&tao);CHKERRQ(ierr); 302 | ierr = PetscFinalize(); 303 | return ierr; 304 | } 305 | 306 | /* ------------------------------------------------------------------ */ 307 | /* 308 | FormFunctionGradient - Evaluates the function and corresponding gradient. 309 | 310 | Input Parameters: 311 | tao - the Tao context 312 | X - the input vector 313 | ptr - optional user-defined context, as set by TaoSetObjectiveAndGradientRoutine() 314 | 315 | Output Parameters: 316 | f - the newly evaluated function 317 | G - the newly evaluated gradient 318 | */ 319 | PetscErrorCode FormFunctionGradient(Tao tao,Vec P,PetscReal *f,Vec G,void *ctx0) 320 | { 321 | AppCtx *ctx = (AppCtx*)ctx0; 322 | TS ts; 323 | Vec U; /* solution will be stored here */ 324 | Mat A; /* Jacobian matrix */ 325 | Mat Jacp; /* Jacobian matrix */ 326 | PetscErrorCode ierr; 327 | PetscInt n = 2; 328 | PetscReal ftime; 329 | PetscInt steps; 330 | PetscScalar *u; 331 | PetscScalar *x_ptr,*y_ptr; 332 | Vec lambda[1],q,mu[1]; 333 | PetscInt direction[2]; 334 | PetscBool terminate[2]; 335 | 336 | ierr = VecGetArray(P,&x_ptr);CHKERRQ(ierr); 337 | ctx->Pm = x_ptr[0]; 338 | ierr = VecRestoreArray(P,&x_ptr);CHKERRQ(ierr); 339 | 340 | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 341 | Create necessary matrix and vectors 342 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 343 | ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); 344 | ierr = MatSetSizes(A,n,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 345 | ierr = MatSetType(A,MATDENSE);CHKERRQ(ierr); 346 | ierr = MatSetFromOptions(A);CHKERRQ(ierr); 347 | ierr = MatSetUp(A);CHKERRQ(ierr); 348 | 349 | ierr = MatCreateVecs(A,&U,NULL);CHKERRQ(ierr); 350 | 351 | ierr = MatCreate(PETSC_COMM_WORLD,&Jacp);CHKERRQ(ierr); 352 | ierr = MatSetSizes(Jacp,PETSC_DECIDE,PETSC_DECIDE,2,1);CHKERRQ(ierr); 353 | ierr = MatSetFromOptions(Jacp);CHKERRQ(ierr); 354 | ierr = MatSetUp(Jacp);CHKERRQ(ierr); 355 | 356 | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 357 | Create timestepping solver context 358 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 359 | ierr = TSCreate(PETSC_COMM_WORLD,&ts);CHKERRQ(ierr); 360 | ierr = TSSetProblemType(ts,TS_NONLINEAR);CHKERRQ(ierr); 361 | ierr = TSSetType(ts,TSCN);CHKERRQ(ierr); 362 | ierr = TSSetIFunction(ts,NULL,(TSIFunction) IFunction,ctx);CHKERRQ(ierr); 363 | ierr = TSSetIJacobian(ts,A,A,(TSIJacobian)IJacobian,ctx);CHKERRQ(ierr); 364 | 365 | ierr = TSSetCostIntegrand(ts,1,NULL,(PetscErrorCode (*)(TS,PetscReal,Vec,Vec,void*))CostIntegrand, 366 | (PetscErrorCode (*)(TS,PetscReal,Vec,Vec*,void*))DRDYFunction, 367 | (PetscErrorCode (*)(TS,PetscReal,Vec,Vec*,void*))DRDPFunction,PETSC_TRUE,ctx);CHKERRQ(ierr); 368 | 369 | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 370 | Set initial conditions 371 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 372 | ierr = VecGetArray(U,&u);CHKERRQ(ierr); 373 | u[0] = PetscAsinScalar(ctx->Pm/ctx->Pmax); 374 | u[1] = 1.0; 375 | ierr = VecRestoreArray(U,&u);CHKERRQ(ierr); 376 | ierr = TSSetSolution(ts,U);CHKERRQ(ierr); 377 | 378 | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 379 | Save trajectory of solution so that TSAdjointSolve() may be used 380 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 381 | ierr = TSSetSaveTrajectory(ts);CHKERRQ(ierr); 382 | 383 | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 384 | Set solver options 385 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 386 | ierr = TSSetMaxTime(ts,1.0);CHKERRQ(ierr); 387 | ierr = TSSetExactFinalTime(ts,TS_EXACTFINALTIME_MATCHSTEP);CHKERRQ(ierr); 388 | ierr = TSSetTimeStep(ts,0.03125);CHKERRQ(ierr); 389 | ierr = TSSetFromOptions(ts);CHKERRQ(ierr); 390 | 391 | direction[0] = direction[1] = 1; 392 | terminate[0] = terminate[1] = PETSC_FALSE; 393 | 394 | ierr = TSSetEventHandler(ts,2,direction,terminate,EventFunction,PostEventFunction,(void*)ctx);CHKERRQ(ierr); 395 | 396 | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 397 | Solve nonlinear system 398 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 399 | ierr = TSSolve(ts,U);CHKERRQ(ierr); 400 | 401 | ierr = TSGetSolveTime(ts,&ftime);CHKERRQ(ierr); 402 | ierr = TSGetStepNumber(ts,&steps);CHKERRQ(ierr); 403 | /* ierr = VecView(U,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); */ 404 | 405 | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 406 | Adjoint model starts here 407 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 408 | ierr = MatCreateVecs(A,&lambda[0],NULL);CHKERRQ(ierr); 409 | /* Set initial conditions for the adjoint integration */ 410 | ierr = VecGetArray(lambda[0],&y_ptr);CHKERRQ(ierr); 411 | y_ptr[0] = 0.0; y_ptr[1] = 0.0; 412 | ierr = VecRestoreArray(lambda[0],&y_ptr);CHKERRQ(ierr); 413 | 414 | ierr = MatCreateVecs(Jacp,&mu[0],NULL);CHKERRQ(ierr); 415 | ierr = VecGetArray(mu[0],&x_ptr);CHKERRQ(ierr); 416 | x_ptr[0] = -1.0; 417 | ierr = VecRestoreArray(mu[0],&x_ptr);CHKERRQ(ierr); 418 | ierr = TSSetCostGradients(ts,1,lambda,mu);CHKERRQ(ierr); 419 | 420 | /* Set RHS JacobianP */ 421 | ierr = TSAdjointSetRHSJacobian(ts,Jacp,RHSJacobianP,ctx);CHKERRQ(ierr); 422 | 423 | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 424 | One can set up the integral to be evaluated during the forward run 425 | instead by calling this function before TSSolve and specifying 426 | PETSC_TRUE for the second last argument 427 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 428 | ierr = TSSetCostIntegrand(ts,1,NULL,(PetscErrorCode (*)(TS,PetscReal,Vec,Vec,void*))CostIntegrand, 429 | (PetscErrorCode (*)(TS,PetscReal,Vec,Vec*,void*))DRDYFunction, 430 | (PetscErrorCode (*)(TS,PetscReal,Vec,Vec*,void*))DRDPFunction,PETSC_FALSE,ctx);CHKERRQ(ierr); 431 | 432 | ierr = TSAdjointSolve(ts);CHKERRQ(ierr); 433 | ierr = TSGetCostIntegral(ts,&q);CHKERRQ(ierr); 434 | ierr = ComputeSensiP(lambda[0],mu[0],ctx);CHKERRQ(ierr); 435 | ierr = VecCopy(mu[0],G);CHKERRQ(ierr); 436 | 437 | ierr = TSGetCostIntegral(ts,&q);CHKERRQ(ierr); 438 | ierr = VecGetArray(q,&x_ptr);CHKERRQ(ierr); 439 | *f = -ctx->Pm + x_ptr[0]; 440 | ierr = VecRestoreArray(q,&x_ptr);CHKERRQ(ierr); 441 | 442 | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 443 | Free work space. All PETSc objects should be destroyed when they are no longer needed. 444 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 445 | ierr = MatDestroy(&A);CHKERRQ(ierr); 446 | ierr = MatDestroy(&Jacp);CHKERRQ(ierr); 447 | ierr = VecDestroy(&U);CHKERRQ(ierr); 448 | ierr = VecDestroy(&lambda[0]);CHKERRQ(ierr); 449 | ierr = VecDestroy(&mu[0]);CHKERRQ(ierr); 450 | ierr = TSDestroy(&ts);CHKERRQ(ierr); 451 | 452 | return 0; 453 | } 454 | -------------------------------------------------------------------------------- /lessons/adjoint/lesson.md: -------------------------------------------------------------------------------- 1 | # Using adjoint for optimization 2 | 3 | ## At a Glance 4 | 5 | 6 | ``` 7 | Questions |Objectives |Key Points 8 | --------------------------|-------------------------------|------------------------------------- 9 | How can gradients be      |Know PETSc/TAO's capability for|Adjoint enables dynamic 10 | computed for simulations? |adjoint and optimization |constrained optimization. 11 | | | 12 | How difficult is it to |Understand ingredients needed |Jacobian is imperative. 13 | use the adjoint method? |for adjoint calculation | 14 | | | 15 | |Understand the concern of |Performance may depend on 16 | |checkpointing |checkpointing at large scale. 17 | ``` 18 | 19 | **Note:** To begin this lesson... 20 | ``` 21 | cd handson/adjoint 22 | ``` 23 | 24 | ## Example 1: Generator Stability Analysis: 25 | 26 | This code uses [PETSc/TAO](https://www.mcs.anl.gov/petsc/) to demonstrates how to solve an ODE-constrained optimization problem with the Toolkit for Advanced Optimization (TAO), TSEvent, TSAdjoint and TS. 27 | The objective is to maximize the mechanical power input subject to the generator swing equations and a constraint on the maximum rotor angle deviation, which is reformulated as a minimization problem 28 | 29 | ![equation](http://latex.codecogs.com/gif.latex?%5Cbegin%7Balign%2A%7D%0D%0A%20%20%5Cmin%20%26%20%5C%7B-P_m%20%2B%20%5Csigma%5Cdisplaystyle%20%5Cint_%7Bt_0%7D%5E%7Bt_F%7D%20%5Cmax%5Cleft%280%2C%20%5Ctheta%20-%20%5Ctheta_%7Bmax%7D%5Cright%29%5E%5Ceta%20%5C%20%5Cmathrm%7Bd%7Dt%20%5C%7D%5C%5C%0D%0A%20%20%5Cnonumber%20%7E%7E%20%5Ctext%7Bs.t.%7D%20%26%20%5Cqquad%20%5Cfrac%7Bd%20%5Ctheta%7D%7Bdt%7D%20%3D%20%5Comega_B%5Cleft%28%5Comega%20-%20%5Comega_s%5Cright%29%20%5C%5C%0D%0A%20%20%26%20%5Cqquad%20%5Cfrac%7Bd%20%5Comega%7D%7Bdt%7D%20%3D%20%5Cfrac%7B%5Comega_s%7D%7B2H%7D%5Cleft%28P_m%20-%20P_%7Bmax%7D%5Csin%28%5Ctheta%29%20-%20D%28%5Comega%20-%20%5Comega_s%29%5Cright%29%0D%0A%5Cend%7Balign%2A%7D) 30 | 31 | Disturbance (a fault) is applied to the generator at time 0.1 and cleared at time 0.2. 32 | The objective function contains an integral function. 33 | The gradient is computed with the discrete adjoint of an implicit time stepping method ([Crank-Nicolson](https://en.wikipedia.org/wiki/Crank–Nicolson_method)). 34 | 35 | ### Compile the code 36 | During ATPESC, participants do not need to compile code because binaries are available in the ATPESC project folder on Cooley. In case you are using your own copy of PETSc, this example is located in `src/ts/examples/power_grid/`. To compile, run the following in the source folder 37 | ``` 38 | make ex3opt 39 | ``` 40 | The source code is included in [ex3opt.c](./ex3opt.c) 41 | 42 | All the example codes need to compiled only once. Different tasks can be accomplished using command line options. 43 | 44 | ### Command line options 45 | You can determine the command line options available for this particular example by doing 46 | ``` 47 | ./ex3opt -help 48 | ``` 49 | and show the options related to TAO only by doing 50 | ``` 51 | ./ex3opt -help | grep tao 52 | ``` 53 | 54 | ### Run 1: Monitor the optimization progress 55 | 56 | ``` 57 | ./ex3opt -tao_monitor -tao_view 58 | iter = 0, Function value: 2.03778, Residual: 144.125 59 | iter = 1, Function value: -0.552947, Residual: 43.1456 60 | iter = 2, Function value: -0.911654, Residual: 18.3028 61 | iter = 3, Function value: -1.00401, Residual: 2.48745 62 | iter = 4, Function value: -1.00649, Residual: 1.17916 63 | iter = 5, Function value: -1.00732, Residual: 0.125532 64 | iter = 6, Function value: -1.00733, Residual: 0.00012392 65 | iter = 7, Function value: -1.00733, Residual: 1.3024e-08 66 | iter = 8, Function value: -1.00733, Residual: 3.46501e-12 67 | Tao Object: 1 MPI processes 68 | type: blmvm 69 | Gradient steps: 0 70 | TaoLineSearch Object: 1 MPI processes 71 | type: more-thuente 72 | Active Set subset type: subvec 73 | convergence tolerances: gatol=1e-08, steptol=0., gttol=0. 74 | Residual in Function/Gradient:=3.46501e-12 75 | Objective value=-1.00733 76 | total number of iterations=8, (max: 2000) 77 | total number of function/gradient evaluations=9, (max: 4000) 78 | Solution converged: ||g(X)|| <= gatol 79 | Vec Object: 1 MPI processes 80 | type: seq 81 | 1.00793 82 | ``` 83 | #### Questions 84 | > **Examine the source code and find the user-provided functions for TAO, TS, and TSAdjoint respectively.** 85 | 86 | |Essential functions we have provided are FormFunctionGradient for TAO, TSIFunction and TSIJacobian for TS, RHSJacobianP for TSAdjoint. Because of the integral in the objective function, extra functions including CostIntegrand, DRDYFunction and DRDPFunction are given to TSAdjoint.| 87 | 88 | ### Further information 89 | 90 | A more complicated example for power grid application is in `src/ts/examples/power_grid/stability_9bus/ex9busopt.c`. 91 | 92 | 93 | ## Example 2: Hybrid Dynamical System: 94 | 95 | This code demonstrates how to compute the adjoint sensitivity for a complex dynamical system involving discontinuities with TSEvent, TSAdjoint and TS. The dynamics are described by the ODE 96 | 97 | ![equation](http://latex.codecogs.com/gif.latex?%5Cdot%7Bx%7D%20%3D%20A_i%20x) 98 | 99 | where ![equation](http://latex.codecogs.com/gif.latex?x%20%3D%20%5Bx_1%2C%20x_2%5D%5ET) and the matrix A change from 100 | 101 | ![equation](http://latex.codecogs.com/gif.latex?A_1%20%3D%20%5Cleft%5B%20%5Cbegin%7Barray%7D%7Bc%20c%7D1%20%26-100%5C%5C%2010%20%261%20%5Cend%7Barray%7D%0D%0A%5Cright%5D%0D%0A%5Cquad%20%5Ctext%7Bto%7D%20%5Cquad%0D%0AA_2%20%3D%20%5Cleft%5B%20%5Cbegin%7Barray%7D%7Bc%20c%7D1%20%2610%5C%5C%20-100%20%261%20%5Cend%7Barray%7D%0D%0A%5Cright%5D) 102 | 103 | when ![equation](http://latex.codecogs.com/gif.latex?%24x_2%3D2.75%20x_1%24) and switch back when ![equation](http://latex.codecogs.com/gif.latex?%24x_2%3D0.365%20x_1%24). 104 | 105 | Thus the ODE system alternates the right-hand side when a switching face is encountered. The switching surfaces are given by the algebraic constraints depending on the state variables, as shown below (left) 106 | 107 | 108 | 109 | * The parameter to which the sensitivities are computed is marked in red. 110 | * It represents the slope of the switching surface. 111 | * Intuitively the trajectory cannot be affected before it hits the surface. 112 | * The influence of the perturbation in the slope diminishes as the trajectory is approaching the equilibrium point. 113 | 114 | ### Compile the code 115 | This example is in `src/ts/examples/hybrid`. The source code is included in [ex1adj.c](./ex1adj.c) 116 | 117 | ``` 118 | make ex1adj 119 | ``` 120 | 121 | ### Make the graghics work via interactive mode on cooley 122 | Graphics is tricky. HPC users often do it offline. In order to make it work with cooley, your computer must have X11 (Mac users can install XQuartz). If you do not have it now, just skip the graphics parts since they are not essential. 123 | 124 | Apply for an interactive allocation (skip this if you already got one) 125 | ``` 126 | $ qsub -I -t 60 -n 1 -A 127 | ``` 128 | For example, if your interactive allocation gives you node cc115, open a new terminal and do the following: 129 | ``` 130 | $ ssh -C -X -Y cooley.alcf.anl.gov 131 | ``` 132 | ``` 133 | $ ssh -X cc115 134 | ``` 135 | Then continue to run the applications in this new terminal. 136 | 137 | ### Run 1: Monitor solution graphically with phase diagram 138 | 139 | ``` 140 | ./ex1adj -ts_monitor_draw_solution_phase -4,-2,2,2 -draw_pause -2 141 | ``` 142 | 143 | ### Run 2: Monitor the timestepping process 144 | 145 | ``` 146 | ./ex1adj -ts_monitor 147 | ``` 148 | Trailing (r) in some lines of the output indicates that a rollback happens. In this example, it is triggered by `TSEvent`. To check details about the event, we can use the event monitor 149 | ``` 150 | ./ex1adj -ts_monitor -ts_event_monitor 151 | ``` 152 | We can also monitor the timestepping for the adjoint calculation by doing 153 | ``` 154 | ./ex1adj -ts_monitor -ts_adjoint_monitor 155 | ``` 156 | 157 | ### Further information 158 | 159 | The example `ex1fwd.c` in the same folder illustrates the forward sensitivity approach for the same problem. 160 | 161 | 162 | ## Example 3: Diffusion-Reaction Problem 163 | 164 | This code demonstrates parallel adjoint calculation for a system of time-dependent PDEs on a 2D rectangular grid. 165 | The adjoint solution corresponds to the sensitivities of one component in the final solution w.r.t. the initial conditions. 166 | We will use this example to illustrate the performance considerations for realistic large-scale applications. In particular, we will show how to play with checkpointing and how to profile/tune the performance. 167 | 168 | ### Compile the code 169 | This example is in `src/ts/examples/advection-diffusion-reaction`. The source code is included in [ex5adj.c](./ex5adj.c) 170 | 171 | ``` 172 | make ex5adj 173 | ``` 174 | 175 | ### Run 1: Monitor solution graphically 176 | 177 | ``` 178 | mpiexec -n 4 ./ex5adj -forwardonly -implicitform 0 -ts_type rk \ 179 | -ts_monitor -ts_monitor_draw_solution 180 | ``` 181 | 182 | * `-forwardonly` perform the forward simulation without doing adjoint 183 | * `-implicitform 0 -ts_type rk` changes the time stepping algorithm to a Runge-Kutta method 184 | * `-ts_monitor_draw_solution` monitors the progress for the solution at each time step 185 | * Add `-draw_pause -2` if you want to pause at the end of simulation to see the plot 186 | 187 | ### Run 2: Optimal checkpointing schedule 188 | By default, the checkpoints are stored in binary files on disk. Of course, this may not be a good choice for large-scale applications running on high-performance machines where I/O cost is significant. We can make the solver use RAM for checkpointing and specify the maximum allowable checkpoints so that an optimal adjoint checkpointing schedule that minimizes the number of recomputations will be generated. 189 | 190 | ``` 191 | mpiexec -n 4 ./ex5adj -implicitform 0 -ts_type rk -ts_adapt_type none \ 192 | -ts_max_steps 10 -ts_monitor -ts_adjoint_monitor \ 193 | -ts_trajectory_type memory -ts_trajectory_max_cps_ram 3 \ 194 | -ts_trajectory_monitor -ts_trajectory_view 195 | ``` 196 | The output corresponds to the schedule depicted by the following diagram: 197 | 198 | 199 | 200 | #### Questions 201 | > **What will happen if we add the option `-ts_trajectory_max_cps_disk 2` to specify there are two available slots for disk checkpoints?** 202 | 203 | |Looking at the output, we will find that the new schedule uses both RAM and disk for checkpointing and takes two less recomputations.| 204 | 205 | ### Run 3: Implicit time integration method 206 | Now we switch to an implicit method ([Crank-Nicolson](https://en.wikipedia.org/wiki/Crank–Nicolson_method)) using fixed stepsize, which is the default setting in the code. At each time step, a nonlinear system is solved by the PETSc nonlinear solver `SNES`. 207 | ``` 208 | mpiexec -n 12 ./ex5adj -da_grid_x 1024 -da_grid_y 1024 -ts_max_steps 10 -snes_monitor -log_view -ts_monitor 209 | ``` 210 | * `-snes_monitor` shows the progress of `SNES` 211 | * `-log_view` prints a summary of the logging 212 | 213 | A snippet of the summary: 214 | ``` 215 | ... 216 | Phase summary info: 217 | Count: number of times phase was executed 218 | Time and Flop: Max - maximum over all processors 219 | Ratio - ratio of maximum to minimum over all processors 220 | Mess: number of messages sent 221 | Avg. len: average message length (bytes) 222 | Reduct: number of global reductions 223 | Global: entire computation 224 | Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop(). 225 | %T - percent time in this phase %F - percent flop in this phase 226 | %M - percent messages in this phase %L - percent message lengths in this phase 227 | %R - percent reductions in this phase 228 | Total Mflop/s: 10e-6 * (sum of flop over all processors)/(max time over all processors) 229 | ------------------------------------------------------------------------------------------------------------------------ 230 | Event Count Time (sec) Flop --- Global --- --- Stage --- Total 231 | Max Ratio Max Ratio Max Ratio Mess Avg len Reduct %T %F %M %L %R %T %F %M %L %R Mflop/s 232 | ------------------------------------------------------------------------------------------------------------------------ 233 | 234 | --- Event Stage 0: Main Stage 235 | 236 | VecDot 20 1.0 2.7505e-02 1.7 7.00e+06 1.0 0.0e+00 0.0e+00 2.0e+01 0 0 0 0 2 0 0 0 0 2 3050 237 | VecMDot 321 1.0 2.6292e+00 1.4 6.62e+08 1.0 0.0e+00 0.0e+00 3.2e+02 25 15 0 0 34 25 15 0 0 34 3017 238 | VecNorm 401 1.0 7.1590e-01 1.9 1.40e+08 1.0 0.0e+00 0.0e+00 4.0e+02 7 3 0 0 42 7 3 0 0 42 2349 239 | ... 240 | ``` 241 | 242 | #### Questions 243 | > **Where is the majority of CPU time spent?** 244 | 245 | |Of course answer may vary depending on the settings such as number of procs, problem size, and solver options. Typically most of the time should be spent on [VecMDot](http://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/Vec/VecMDot.html) or [MatMult](http://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/Mat/MatMult.html) | 246 | 247 | > **How expensive is it to do an adjoint step?** 248 | 249 | |For this particular run, an adjoint step takes about 60-70% of the running time of a forward step (compare the time between TSAdjointStep and TSStep). | 250 | 251 | > **How can we improve performance?** 252 | 253 | |1. Use memory instead of disk for checkpointing(`-ts_trajectory_type memory -ts_trajectory_solution_only 0`); 2. Tune the time stepping solver, nonlinear solver, linear solver, preconditioner and so forth. | 254 | 255 | ### Further information 256 | Because this example uses `DMDA`, Jacobian can be efficiently approxiated using finite difference with coloring. You can use the option `-snes_fd_color` to enable this feature. 257 | 258 | ## Out-Brief 259 | 260 | We have used [PETSc](https://www.mcs.anl.gov/petsc/) to demonstrate the adjoint capability as an enabling technology for dynamic-constrained optimization. In particular, we focused on time-depdent problems including complex dynamical systems with discontinuities and a large scale hyperbolic PDE. 261 | 262 | We have shown the basic usage of the adjoint solver as well as functionalities that can facilitate rapid development, diagnosis and performance profiling. 263 | 264 | ## Further Reading 265 | 266 | [PETSc Documentation](http://www.mcs.anl.gov/petsc/documentation/) 267 | 268 | 269 | 270 |   271 | 272 | --- 273 | 274 | [Back to all HandsOnLessons](../lessons.md) 275 | -------------------------------------------------------------------------------- /lessons/atpesc-instructions.md: -------------------------------------------------------------------------------- 1 | 2 | # Instructions for Numerical Package Hands-on Setup (ATPESC 2017) 3 | 4 | Participants in [ATPESC 2017](https://extremecomputingtraining.anl.gov) will work in groups of 2 for hands-on exercises. Forming groups of 2 helps us to cut in half the number of IT issues that may arise during hands-on exercises. Moreover, [pair programming](https://en.wikipedia.org/wiki/Pair_programming) (the practice of having two people work together on one machine, each taking turns between typing and commenting) results in real-time code review, which [research has shown](http://www.sciencedirect.com/science/article/pii/S0950584909000123) results in higher productivity. If you are not using an OSX or Linux laptop, it would be best to try to pair with someone who is. 5 | 6 | ## Basic Instructions 7 | 8 | Hands-on exercises are primarily run on the [Cooley](https://www.alcf.anl.gov/user-guides/cooley) cluster at ALCF. Please do the following initial setup. 9 | 10 | - Login to Cooley 11 | ``` 12 | ssh -C -X -Y username@cooley.alcf.anl.gov 13 | ``` 14 | - `-C` means to use compression 15 | - `-X` means to forward X11, `-Y` means _trusted_ X11 forwarding 16 | - Setup software environment on Cooley by adding the following to _~/.soft.cooley_ (preferably before `@default` line) 17 | ``` 18 | +mvapich2 19 | +gcc-4.8.1 20 | @visit 21 | PATH+=/projects/ATPESC2017/NumericalPackages/spack/bin 22 | MPIEXEC_OMPI=/projects/ATPESC2017/NumericalPackages/spack/opt/spack/linux-rhel6-x86_64/gcc-4.8.1/openmpi-2.1.1-5b4k4f3vzgwz5qmektcqja2av4c4bjrg/bin/mpiexec 23 | MPIEXEC=/soft/libraries/mpi/mvapich2/gcc/bin/mpiexec 24 | ``` 25 | - Run the following command to make the above change to _~/.soft.cooley_ effective. 26 | ``` 27 | resoft 28 | ``` 29 | - **WARNING: Do not attempt next step until after 9:30 am, when our Cooley reservation begins. Please be sure to work in pairs for the node reservations so that we have sufficient nodes for each pair to use 2 compute nodes for the exercises throughout the day.** 30 | - Obtain 2 compute nodes in _interactive_ (`-I`) mode to run the hands on exercises by running the following `qsub` command... 31 | ``` 32 | qsub -I -n 2 -t 600 -A ATPESC2017 -q training 33 | ``` 34 | - **Note**: Once the allocation has started, you will be logged into 35 | the reserved nodes and see a new prompt with a different hostname such as 36 | `cc122`. 37 | - The allocation should remain _reserved_ for the whole day. However, 38 | you can delete it simply by logging out of the interactive shell it put you in. 39 | - Now, copy over precompiled binaries and data files to your home dir. 40 | ``` 41 | cp -r /projects/ATPESC2017/NumericalPackages/handson ~/ 42 | ``` 43 | - Now proceed to run the [Hands-On exercises](lessons.md) as instructed. Each lesson will 44 | indicate the path in the `handson` directory you copied above you should 45 | `cd` to in order to begin the lesson. 46 | 47 | ## Optional Visualization Instructions 48 | 49 | Some of the hands-on exercises have optional visualization instructions 50 | and use a variety of visualization tools. 51 | 52 | Getting a room of more than 70 people with different laptops working with 53 | remote visualization from Cooley is not something either our tight agenda or 54 | our staff are prepared to support. In addition, where necessary, the hands-on 55 | leader(s) will demonstrate the use of some tools and learners can follow 56 | along at those points. 57 | 58 | There is a whole day of activity devoted to visualization resources and 59 | tools in the ATPESC agenda on August 9th. 60 | 61 | Nonetheless, we are providing here some instructions and tools for those 62 | would like to persue getting remote visualization working for these 63 | exercises. 64 | 65 | ### Note 66 | - For basic graphics exercises - you should be able to use X11 over ssh. 67 | i.e `ssh -C -X -Y cooley.alcf.anl.gov, ssh -X compute_node_allocated` 68 | and run the graphics part of the exercise. That said, many post-2014 69 | X servers have _INdirect GLX_ disabled in them breaking tools like 70 | paraview, VisIt and glvis. 71 | 72 | We have a devloped a script (to be run on your laptop) that can help with 73 | setting up a VNC connection to Cooley. It can work on MacOS, and with some 74 | Linux VNC clients (vinagre, vncviewer). However, the script is fragile and 75 | might not work with everyone's setup - hence this is optional. This script 76 | will do several things: 77 | 78 | - Log you into cooley once with your token. 79 | - Set up SSH Control Master so you have to log into Cooley with your token only once for the whole day and password-less will work thereafter. 80 | - Reserve 3 nodes for 12h on Cooley. 81 | - Set up a VNC connection to those nodes. 82 | 83 | Mac and Linux users are welcome download and run this script to 84 | setup a VNC connection. Doing so will permit quick use of tools 85 | like VisIt, paraview and/or glvis. 86 | 87 | To try this setup script... 88 | 89 | On Linux, 90 | ``` 91 | wget https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/master/tools/atpesc2017_cooley_vnc_setup.sh 92 | ``` 93 | 94 | On Mac, 95 | 96 | ``` 97 | curl -O https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/master/tools/atpesc2017_cooley_vnc_setup.sh 98 | ``` 99 | 100 | Ensure the script has execute permissions... 101 | ``` 102 | chmod 755 atpesc2017_cooley_vnc_setup.sh 103 | ``` 104 | 105 | Now, try running the script 106 | 107 | ``` 108 | ./atpesc2017_cooley_vnc_setup.sh 109 | ``` 110 | 111 | ### Notes 112 | - If allocating nodes via this script, please deallocate nodes that you might have previously allocated 113 | by simply logging out of the interactive allocation. 114 | 115 | ### Troubleshooting 116 | - If you have a different preferred Linux VNC client, you should be able to use it to connect to the VNC connection that is already setup by this script. For example, 117 | ``` 118 | krdc vnc://localhost:22590 119 | ``` 120 | - When rerunning the script - if the ssh command to setup VNC tunnel fails - you 121 | might have to kill the _ssh control master_ process and restart again. It's easiest 122 | to simply find all ssh logins to cooley and kill them 123 | ``` 124 | $ ps -ef | grep cooley 125 | 3640 7348 694 0 4:58PM ttys003 0:00.01 grep cooley 126 | 3640 7345 62009 0 4:58PM ttys004 0:00.03 ssh -C -X -Y cooley.alcf.anl.gov 127 | 3640 7347 62009 0 4:58PM ttys004 0:00.03 ssh -L 22590:cc122:5900 cooley.alcf.anl.gov 128 | $ kill -9 7345 7347 129 | ``` 130 | 131 | --- 132 | 133 | [Back to all HandsOnLessons](lessons.md) 134 | -------------------------------------------------------------------------------- /lessons/hand_coded_heat/1d_heat_equation.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/1d_heat_equation.xlsx -------------------------------------------------------------------------------- /lessons/hand_coded_heat/animated_basic_heat.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/animated_basic_heat.gif -------------------------------------------------------------------------------- /lessons/hand_coded_heat/basic0000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/basic0000.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/basic0001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/basic0001.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/basic0002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/basic0002.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/basic0003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/basic0003.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/heat.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #ifdef HAVE_FEENABLEEXCEPT 9 | #define _GNU_SOURCE 10 | #include 11 | #if 0 12 | #include "fe-handling-example.c" 13 | #endif 14 | #endif 15 | 16 | int const Nt_max = 50000; 17 | int const Nx_max = 10000; 18 | 19 | int noout = 0; 20 | int savi = 0; 21 | int outi = 100; 22 | int save = 0; 23 | char const *alg = "ftcs"; 24 | char const *prec = "double"; 25 | char const *ic = "const(1)"; 26 | double alpha = 0.2; 27 | double dt = 0.004; 28 | double dx = 0.1; 29 | double bc0 = 0; 30 | double bc1 = 1; 31 | double maxt = 2.0; 32 | 33 | double *curr=0, *last=0, *change_history=0, *exact=0, *error_history=0; 34 | double *cn_Amat = 0; 35 | 36 | int Nx = (int) (1/0.1+1.5); 37 | int Nt = (int) (1 / 0.004); 38 | 39 | /* 40 | * Utilities 41 | */ 42 | static double 43 | l2_norm(int n, double const *a, double const *b) 44 | { 45 | int i; 46 | double sum = 0; 47 | for (i = 0; i < n; i++) 48 | { 49 | double diff = a[i] - b[i]; 50 | sum += diff * diff; 51 | } 52 | return sum; 53 | } 54 | 55 | static void 56 | copy(int n, double *dst, double const *src) 57 | { 58 | int i; 59 | for (i = 0; i < n; i++) 60 | dst[i] = src[i]; 61 | } 62 | 63 | #define TSTART -1 64 | #define TFINAL -2 65 | #define RESIDUAL -3 66 | #define ERROR -4 67 | static void 68 | write_array(int t, int n, double dx, double const *a) 69 | { 70 | int i; 71 | char fname[32]; 72 | FILE *outf; 73 | 74 | if (noout) return; 75 | 76 | if (t == TSTART) 77 | snprintf(fname, sizeof(fname), "heat_soln_00000.curve"); 78 | else if (t == TFINAL) 79 | snprintf(fname, sizeof(fname), "heat_soln_final.curve"); 80 | else if (t == RESIDUAL) 81 | snprintf(fname, sizeof(fname), "change.curve"); 82 | else if (t == ERROR) 83 | snprintf(fname, sizeof(fname), "error.curve"); 84 | else 85 | { 86 | if (a == exact) 87 | snprintf(fname, sizeof(fname), "heat_exact_%05d.curve", t); 88 | else 89 | snprintf(fname, sizeof(fname), "heat_soln_%05d.curve", t); 90 | } 91 | 92 | outf = fopen(fname,"w"); 93 | for (i = 0; i < n; i++) 94 | fprintf(outf, "%8.4g %8.4g\n", i*dx, a[i]); 95 | fclose(outf); 96 | } 97 | 98 | 99 | static void 100 | r83_np_fa(int n, double *a) 101 | /* 102 | Licensing: This code is distributed under the GNU LGPL license. 103 | Modified: 30 May 2009 Author: John Burkardt 104 | Modified by Mark C. Miller, July 23, 2017 105 | */ 106 | { 107 | int i; 108 | 109 | for ( i = 1; i <= n-1; i++ ) 110 | { 111 | assert ( a[1+(i-1)*3] != 0.0 ); 112 | /* 113 | Store the multiplier in L. 114 | */ 115 | a[2+(i-1)*3] = a[2+(i-1)*3] / a[1+(i-1)*3]; 116 | /* 117 | Modify the diagonal entry in the next column. 118 | */ 119 | a[1+i*3] = a[1+i*3] - a[2+(i-1)*3] * a[0+i*3]; 120 | } 121 | 122 | assert( a[1+(n-1)*3] != 0.0 ); 123 | } 124 | 125 | static void 126 | initialize(void) 127 | { 128 | curr = (double *) calloc(Nx, sizeof(double)); 129 | last = (double *) calloc(Nx, sizeof(double)); 130 | if (save) 131 | { 132 | exact = (double *) calloc(Nx, sizeof(double)); 133 | change_history = (double *) calloc(Nt, sizeof(double)); 134 | error_history = (double *) calloc(Nt, sizeof(double)); 135 | } 136 | 137 | assert(strncmp(alg, "ftcs", 4)==0 || 138 | strncmp(alg, "upwind15", 8)==0 || 139 | strncmp(alg, "crankn", 6)==0); 140 | 141 | #ifdef HAVE_FEENABLEEXCEPT 142 | feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW); 143 | #endif 144 | 145 | if (!strncmp(alg, "crankn", 6)) 146 | { 147 | /* 148 | We do some additional initialization work for Crank-Nicolson. 149 | The matrix A does not change with time. We can set it once, 150 | factor it once, and solve repeatedly. 151 | */ 152 | int i; 153 | double w = alpha * dt / dx / dx; 154 | 155 | cn_Amat = ( double * ) malloc ( 3 * Nx * sizeof ( double ) ); 156 | 157 | cn_Amat[0+0*3] = 0.0; 158 | cn_Amat[1+0*3] = 1.0; 159 | cn_Amat[0+1*3] = 0.0; 160 | 161 | for ( i = 1; i < Nx - 1; i++ ) 162 | { 163 | cn_Amat[2+(i-1)*3] = - w; 164 | cn_Amat[1+ i *3] = 1.0 + 2.0 * w; 165 | cn_Amat[0+(i+1)*3] = - w; 166 | } 167 | 168 | cn_Amat[2+(Nx-2)*3] = 0.0; 169 | cn_Amat[1+(Nx-1)*3] = 1.0; 170 | cn_Amat[2+(Nx-1)*3] = 0.0; 171 | 172 | /* 173 | Factor the matrix. 174 | */ 175 | r83_np_fa(Nx, cn_Amat); 176 | } 177 | } 178 | 179 | #define HANDLE_ARG(VAR, TYPE, STYLE, HELP) \ 180 | { \ 181 | void *valp = (void*) &VAR; \ 182 | int const len = strlen(#VAR)+1; \ 183 | for (i = 1; i < argc; i++) \ 184 | {\ 185 | char const *style = #STYLE; \ 186 | int valid_style = style[1]=='d'||style[1]=='g'||style[1]=='s'; \ 187 | if (strncmp(argv[i], #VAR"=", len)) \ 188 | continue; \ 189 | assert(valid_style); \ 190 | if (strlen(argv[i]+len)) \ 191 | {\ 192 | if (style[1] == 'd') /* int */ \ 193 | *((int*) valp) = (int) strtol(argv[i]+len,0,10); \ 194 | else if (style[1] == 'g') /* double */ \ 195 | *((double*) valp) = (double) strtod(argv[i]+len,0); \ 196 | else if (style[1] == 's') /* char* */ \ 197 | *((char**) valp) = (char*) strdup(argv[i]+len); \ 198 | }\ 199 | }\ 200 | if (help) \ 201 | {\ 202 | char tmp[256]; \ 203 | int len = snprintf(tmp, sizeof(tmp), " %s=" #STYLE, \ 204 | #VAR, VAR);\ 205 | snprintf(tmp, sizeof(tmp), "%s (%s)", #HELP, #TYPE); \ 206 | fprintf(stderr, " %s=" #STYLE "%*s\n", \ 207 | #VAR, VAR, 80-len, tmp);\ 208 | }\ 209 | else \ 210 | fprintf(stderr, " %s="#STYLE"\n", \ 211 | #VAR, VAR);\ 212 | } 213 | 214 | static void 215 | process_args(int argc, char **argv) 216 | { 217 | int i; 218 | int help = 0; 219 | 220 | /* quick pass for 'help' anywhere on command line */ 221 | for (i = 0; i < argc && !help; i++) 222 | help = 0!=strcasestr(argv[i], "help"); 223 | 224 | if (help) 225 | { 226 | fprintf(stderr, "Usage:\n"); 227 | fprintf(stderr, " ./heat = =...\n"); 228 | } 229 | 230 | HANDLE_ARG(prec, char*, %s, precision half|float|double|quad); 231 | HANDLE_ARG(alpha, double, %g, material thermal diffusivity); 232 | HANDLE_ARG(dx, double, %g, x-incriment (1/dx->int)); 233 | HANDLE_ARG(dt, double, %g, t-incriment); 234 | HANDLE_ARG(maxt, double, %g, max. time to run simulation to); 235 | HANDLE_ARG(bc0, double, %g, bc @ x=0: u(0,t)); 236 | HANDLE_ARG(bc1, double, %g, bc @ x=1: u(1,t)); 237 | HANDLE_ARG(ic, char*, %s, ic @ t=0: u(x,0)); 238 | HANDLE_ARG(alg, char*, %s, algorithm ftcs|upwind15|crankn); 239 | HANDLE_ARG(savi, int, %d, save every i-th solution step); 240 | HANDLE_ARG(save, int, %d, save error in every saved solution); 241 | HANDLE_ARG(outi, int, %d, output progress every i-th solution step); 242 | HANDLE_ARG(noout, int, %d, disable all file outputs); 243 | 244 | if (help) 245 | { 246 | fprintf(stderr, "Examples...\n"); 247 | fprintf(stderr, " ./heat dx=0.01 dt=0.0002 alg=ftcs\n"); 248 | fprintf(stderr, " ./heat dx=0.1 bc0=5 bc1=10 ic=\"spikes(5,5)\"\n"); 249 | exit(1); 250 | } 251 | 252 | } 253 | 254 | static void 255 | set_initial_condition(int n, double *a, double dx, char const *ic) 256 | { 257 | int i; 258 | double x; 259 | 260 | if (!strncmp(ic, "const(", 6)) /* const(val) */ 261 | { 262 | double cval = strtod(ic+6, 0); 263 | for (i = 0; i < n; i++) 264 | a[i] = cval; 265 | } 266 | else if (!strncmp(ic, "step(", 5)) /* step(left,xmid,right) */ 267 | { 268 | char *p; 269 | double left = strtod(ic+5, &p); 270 | double xmid = strtod(p+1, &p); 271 | double right = strtod(p+1, 0); 272 | for (i = 0, x = 0; i < n; i++, x+=dx) 273 | { 274 | if (x < xmid) a[i] = left; 275 | else a[i] = right; 276 | } 277 | } 278 | else if (!strncmp(ic, "ramp(", 5)) /* ramp(left,right) */ 279 | { 280 | char *p; 281 | double left = strtod(ic+5, &p); 282 | double right = strtod(p+1, 0); 283 | double dv = (right-left)/(n-1); 284 | for (i = 0, x = left; i < n; i++, x+=dv) 285 | a[i] = x; 286 | } 287 | else if (!strncmp(ic, "rand(", 5)) /* rand(seed,amp) */ 288 | { 289 | char *p; 290 | int seed = (int) strtol(ic+5,&p,10); 291 | double amp = strtod(p+1, 0); 292 | const double maxr = ((long long)1<<31)-1; 293 | srandom(seed); 294 | for (i = 0; i < n; i++) 295 | a[i] = amp * random()/maxr; 296 | } 297 | else if (!strncmp(ic, "sin(Pi*x)", 9)) /* rand(seed,amp) */ 298 | { 299 | for (i = 0, x = 0; i < n; i++, x+=dx) 300 | a[i] = sin(M_PI*x); 301 | } 302 | else if (!strncmp(ic, "spikes(", 7)) /* spikes(Amp,Loc,Amp,Loc,...) */ 303 | { 304 | char const *p = &ic[6]; 305 | for (i = 0, x = 0; i < n; i++) 306 | a[i] = 0; 307 | while (*p != ')') 308 | { 309 | char *ep_amp, *ep_idx; 310 | double amp = strtod(p+1, &ep_amp); 311 | int idx = (int) strtod(ep_amp+1, &ep_idx); 312 | assert(idx0 && save) 492 | { 493 | compute_exact_solution(Nx, exact, dx, ic, alpha, ti*dt, bc0, bc1); 494 | if (savi && ti%savi==0) 495 | write_array(ti, Nx, dx, exact); 496 | } 497 | 498 | if (ti>0 && savi && ti%savi==0) 499 | write_array(ti, Nx, dx, curr); 500 | 501 | change = l2_norm(Nx, curr, last); 502 | if (save) 503 | { 504 | change_history[ti] = change; 505 | error_history[ti] = l2_norm(Nx, curr, exact); 506 | } 507 | 508 | copy(Nx, last, curr); 509 | 510 | if (outi && ti%outi==0) 511 | { 512 | printf("Iteration %04d: last change l2=%g\n", ti, change); 513 | } 514 | } 515 | 516 | write_array(TFINAL, Nx, dx, curr); 517 | if (save) 518 | { 519 | write_array(RESIDUAL, ti, dt, change_history); 520 | write_array(ERROR, ti, dt, error_history); 521 | } 522 | 523 | return finalize(ti, maxt, change); 524 | } 525 | -------------------------------------------------------------------------------- /lessons/hand_coded_heat/heat.c.numbered.txt: -------------------------------------------------------------------------------- 1 | 1 #include 2 | 2 #include 3 | 3 #include 4 | 4 #include 5 | 5 #include 6 | 6 #include 7 | 7 #include 8 | 8 #ifdef HAVE_FEENABLEEXCEPT 9 | 9 #define _GNU_SOURCE 10 | 10 #include 11 | 11 #if 0 12 | 12 #include "fe-handling-example.c" 13 | 13 #endif 14 | 14 #endif 15 | 15 16 | 16 int const Nt_max = 50000; 17 | 17 int const Nx_max = 10000; 18 | 18 19 | 19 int noout = 0; 20 | 20 int savi = 0; 21 | 21 int outi = 100; 22 | 22 int save = 0; 23 | 23 char const *alg = "ftcs"; 24 | 24 char const *prec = "double"; 25 | 25 char const *ic = "const(1)"; 26 | 26 double alpha = 0.2; 27 | 27 double dt = 0.004; 28 | 28 double dx = 0.1; 29 | 29 double bc0 = 0; 30 | 30 double bc1 = 1; 31 | 31 double maxt = 2.0; 32 | 32 33 | 33 double *curr=0, *last=0, *change_history=0, *exact=0, *error_history=0; 34 | 34 double *cn_Amat = 0; 35 | 35 36 | 36 int Nx = (int) (1/0.1+1.5); 37 | 37 int Nt = (int) (1 / 0.004); 38 | 38 39 | 39 /* 40 | 40 * Utilities 41 | 41 */ 42 | 42 static double 43 | 43 l2_norm(int n, double const *a, double const *b) 44 | 44 { 45 | 45 int i; 46 | 46 double sum = 0; 47 | 47 for (i = 0; i < n; i++) 48 | 48 { 49 | 49 double diff = a[i] - b[i]; 50 | 50 sum += diff * diff; 51 | 51 } 52 | 52 return sum; 53 | 53 } 54 | 54 55 | 55 static void 56 | 56 copy(int n, double *dst, double const *src) 57 | 57 { 58 | 58 int i; 59 | 59 for (i = 0; i < n; i++) 60 | 60 dst[i] = src[i]; 61 | 61 } 62 | 62 63 | 63 #define TSTART -1 64 | 64 #define TFINAL -2 65 | 65 #define RESIDUAL -3 66 | 66 #define ERROR -4 67 | 67 static void 68 | 68 write_array(int t, int n, double dx, double const *a) 69 | 69 { 70 | 70 int i; 71 | 71 char fname[32]; 72 | 72 FILE *outf; 73 | 73 74 | 74 if (noout) return; 75 | 75 76 | 76 if (t == TSTART) 77 | 77 snprintf(fname, sizeof(fname), "heat_soln_00000.curve"); 78 | 78 else if (t == TFINAL) 79 | 79 snprintf(fname, sizeof(fname), "heat_soln_final.curve"); 80 | 80 else if (t == RESIDUAL) 81 | 81 snprintf(fname, sizeof(fname), "change.curve"); 82 | 82 else if (t == ERROR) 83 | 83 snprintf(fname, sizeof(fname), "error.curve"); 84 | 84 else 85 | 85 { 86 | 86 if (a == exact) 87 | 87 snprintf(fname, sizeof(fname), "heat_exact_%05d.curve", t); 88 | 88 else 89 | 89 snprintf(fname, sizeof(fname), "heat_soln_%05d.curve", t); 90 | 90 } 91 | 91 92 | 92 outf = fopen(fname,"w"); 93 | 93 for (i = 0; i < n; i++) 94 | 94 fprintf(outf, "%8.4g %8.4g\n", i*dx, a[i]); 95 | 95 fclose(outf); 96 | 96 } 97 | 97 98 | 98 99 | 99 static void 100 | 100 r83_np_fa(int n, double *a) 101 | 101 /* 102 | 102 Licensing: This code is distributed under the GNU LGPL license. 103 | 103 Modified: 30 May 2009 Author: John Burkardt 104 | 104 Modified by Mark C. Miller, July 23, 2017 105 | 105 */ 106 | 106 { 107 | 107 int i; 108 | 108 109 | 109 for ( i = 1; i <= n-1; i++ ) 110 | 110 { 111 | 111 assert ( a[1+(i-1)*3] != 0.0 ); 112 | 112 /* 113 | 113 Store the multiplier in L. 114 | 114 */ 115 | 115 a[2+(i-1)*3] = a[2+(i-1)*3] / a[1+(i-1)*3]; 116 | 116 /* 117 | 117 Modify the diagonal entry in the next column. 118 | 118 */ 119 | 119 a[1+i*3] = a[1+i*3] - a[2+(i-1)*3] * a[0+i*3]; 120 | 120 } 121 | 121 122 | 122 assert( a[1+(n-1)*3] != 0.0 ); 123 | 123 } 124 | 124 125 | 125 static void 126 | 126 initialize(void) 127 | 127 { 128 | 128 curr = (double *) calloc(Nx, sizeof(double)); 129 | 129 last = (double *) calloc(Nx, sizeof(double)); 130 | 130 if (save) 131 | 131 { 132 | 132 exact = (double *) calloc(Nx, sizeof(double)); 133 | 133 change_history = (double *) calloc(Nt, sizeof(double)); 134 | 134 error_history = (double *) calloc(Nt, sizeof(double)); 135 | 135 } 136 | 136 137 | 137 assert(strncmp(alg, "ftcs", 4)==0 || 138 | 138 strncmp(alg, "upwind15", 8)==0 || 139 | 139 strncmp(alg, "crankn", 6)==0); 140 | 140 141 | 141 #ifdef HAVE_FEENABLEEXCEPT 142 | 142 feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW); 143 | 143 #endif 144 | 144 145 | 145 if (!strncmp(alg, "crankn", 6)) 146 | 146 { 147 | 147 /* 148 | 148 We do some additional initialization work for Crank-Nicolson. 149 | 149 The matrix A does not change with time. We can set it once, 150 | 150 factor it once, and solve repeatedly. 151 | 151 */ 152 | 152 int i; 153 | 153 double w = alpha * dt / dx / dx; 154 | 154 155 | 155 cn_Amat = ( double * ) malloc ( 3 * Nx * sizeof ( double ) ); 156 | 156 157 | 157 cn_Amat[0+0*3] = 0.0; 158 | 158 cn_Amat[1+0*3] = 1.0; 159 | 159 cn_Amat[0+1*3] = 0.0; 160 | 160 161 | 161 for ( i = 1; i < Nx - 1; i++ ) 162 | 162 { 163 | 163 cn_Amat[2+(i-1)*3] = - w; 164 | 164 cn_Amat[1+ i *3] = 1.0 + 2.0 * w; 165 | 165 cn_Amat[0+(i+1)*3] = - w; 166 | 166 } 167 | 167 168 | 168 cn_Amat[2+(Nx-2)*3] = 0.0; 169 | 169 cn_Amat[1+(Nx-1)*3] = 1.0; 170 | 170 cn_Amat[2+(Nx-1)*3] = 0.0; 171 | 171 172 | 172 /* 173 | 173 Factor the matrix. 174 | 174 */ 175 | 175 r83_np_fa(Nx, cn_Amat); 176 | 176 } 177 | 177 } 178 | 178 179 | 179 #define HANDLE_ARG(VAR, TYPE, STYLE, HELP) \ 180 | 180 { \ 181 | 181 void *valp = (void*) &VAR; \ 182 | 182 int const len = strlen(#VAR)+1; \ 183 | 183 for (i = 1; i < argc; i++) \ 184 | 184 {\ 185 | 185 char const *style = #STYLE; \ 186 | 186 int valid_style = style[1]=='d'||style[1]=='g'||style[1]=='s'; \ 187 | 187 if (strncmp(argv[i], #VAR"=", len)) \ 188 | 188 continue; \ 189 | 189 assert(valid_style); \ 190 | 190 if (strlen(argv[i]+len)) \ 191 | 191 {\ 192 | 192 if (style[1] == 'd') /* int */ \ 193 | 193 *((int*) valp) = (int) strtol(argv[i]+len,0,10); \ 194 | 194 else if (style[1] == 'g') /* double */ \ 195 | 195 *((double*) valp) = (double) strtod(argv[i]+len,0); \ 196 | 196 else if (style[1] == 's') /* char* */ \ 197 | 197 *((char**) valp) = (char*) strdup(argv[i]+len); \ 198 | 198 }\ 199 | 199 }\ 200 | 200 if (help) \ 201 | 201 {\ 202 | 202 char tmp[256]; \ 203 | 203 int len = snprintf(tmp, sizeof(tmp), " %s=" #STYLE, \ 204 | 204 #VAR, VAR);\ 205 | 205 snprintf(tmp, sizeof(tmp), "%s (%s)", #HELP, #TYPE); \ 206 | 206 fprintf(stderr, " %s=" #STYLE "%*s\n", \ 207 | 207 #VAR, VAR, 80-len, tmp);\ 208 | 208 }\ 209 | 209 else \ 210 | 210 fprintf(stderr, " %s="#STYLE"\n", \ 211 | 211 #VAR, VAR);\ 212 | 212 } 213 | 213 214 | 214 static void 215 | 215 process_args(int argc, char **argv) 216 | 216 { 217 | 217 int i; 218 | 218 int help = 0; 219 | 219 220 | 220 /* quick pass for 'help' anywhere on command line */ 221 | 221 for (i = 0; i < argc && !help; i++) 222 | 222 help = 0!=strcasestr(argv[i], "help"); 223 | 223 224 | 224 if (help) 225 | 225 { 226 | 226 fprintf(stderr, "Usage:\n"); 227 | 227 fprintf(stderr, " ./heat = =...\n"); 228 | 228 } 229 | 229 230 | 230 HANDLE_ARG(prec, char*, %s, precision half|float|double|quad); 231 | 231 HANDLE_ARG(alpha, double, %g, material thermal diffusivity); 232 | 232 HANDLE_ARG(dx, double, %g, x-incriment (1/dx->int)); 233 | 233 HANDLE_ARG(dt, double, %g, t-incriment); 234 | 234 HANDLE_ARG(maxt, double, %g, max. time to run simulation to); 235 | 235 HANDLE_ARG(bc0, double, %g, bc @ x=0: u(0,t)); 236 | 236 HANDLE_ARG(bc1, double, %g, bc @ x=1: u(1,t)); 237 | 237 HANDLE_ARG(ic, char*, %s, ic @ t=0: u(x,0)); 238 | 238 HANDLE_ARG(alg, char*, %s, algorithm ftcs|upwind15|crankn); 239 | 239 HANDLE_ARG(savi, int, %d, save every i-th solution step); 240 | 240 HANDLE_ARG(save, int, %d, save error in every saved solution); 241 | 241 HANDLE_ARG(outi, int, %d, output progress every i-th solution step); 242 | 242 HANDLE_ARG(noout, int, %d, disable all file outputs); 243 | 243 244 | 244 if (help) 245 | 245 { 246 | 246 fprintf(stderr, "Examples...\n"); 247 | 247 fprintf(stderr, " ./heat Nx=51 dt=0.002 alg=ftcs\n"); 248 | 248 fprintf(stderr, " ./heat Nx=51 bc0=5 bc1=10\n"); 249 | 249 exit(1); 250 | 250 } 251 | 251 252 | 252 } 253 | 253 254 | 254 static void 255 | 255 set_initial_condition(int n, double *a, double dx, char const *ic) 256 | 256 { 257 | 257 int i; 258 | 258 double x; 259 | 259 260 | 260 if (!strncmp(ic, "const(", 6)) /* const(val) */ 261 | 261 { 262 | 262 double cval = strtod(ic+6, 0); 263 | 263 for (i = 0; i < n; i++) 264 | 264 a[i] = cval; 265 | 265 } 266 | 266 else if (!strncmp(ic, "step(", 5)) /* step(left,xmid,right) */ 267 | 267 { 268 | 268 char *p; 269 | 269 double left = strtod(ic+5, &p); 270 | 270 double xmid = strtod(p+1, &p); 271 | 271 double right = strtod(p+1, 0); 272 | 272 for (i = 0, x = 0; i < n; i++, x+=dx) 273 | 273 { 274 | 274 if (x < xmid) a[i] = left; 275 | 275 else a[i] = right; 276 | 276 } 277 | 277 } 278 | 278 else if (!strncmp(ic, "ramp(", 5)) /* ramp(left,right) */ 279 | 279 { 280 | 280 char *p; 281 | 281 double left = strtod(ic+5, &p); 282 | 282 double right = strtod(p+1, 0); 283 | 283 double dv = (right-left)/(n-1); 284 | 284 for (i = 0, x = left; i < n; i++, x+=dv) 285 | 285 a[i] = x; 286 | 286 } 287 | 287 else if (!strncmp(ic, "rand(", 5)) /* rand(seed,amp) */ 288 | 288 { 289 | 289 char *p; 290 | 290 int seed = (int) strtol(ic+5,&p,10); 291 | 291 double amp = strtod(p+1, 0); 292 | 292 const double maxr = ((long long)1<<31)-1; 293 | 293 srandom(seed); 294 | 294 for (i = 0; i < n; i++) 295 | 295 a[i] = amp * random()/maxr; 296 | 296 } 297 | 297 else if (!strncmp(ic, "sin(Pi*x)", 9)) /* rand(seed,amp) */ 298 | 298 { 299 | 299 for (i = 0, x = 0; i < n; i++, x+=dx) 300 | 300 a[i] = sin(M_PI*x); 301 | 301 } 302 | 302 else if (!strncmp(ic, "spikes(", 7)) /* spikes(Amp,Loc,Amp,Loc,...) */ 303 | 303 { 304 | 304 char const *p = &ic[6]; 305 | 305 for (i = 0, x = 0; i < n; i++) 306 | 306 a[i] = 0; 307 | 307 while (*p != ')') 308 | 308 { 309 | 309 char *ep_amp, *ep_idx; 310 | 310 double amp = strtod(p+1, &ep_amp); 311 | 311 int idx = (int) strtod(ep_amp+1, &ep_idx); 312 | 312 assert(idx0 && save) 492 | 492 { 493 | 493 compute_exact_solution(Nx, exact, dx, ic, alpha, ti*dt, bc0, bc1); 494 | 494 if (savi && ti%savi==0) 495 | 495 write_array(ti, Nx, dx, exact); 496 | 496 } 497 | 497 498 | 498 if (ti>0 && savi && ti%savi==0) 499 | 499 write_array(ti, Nx, dx, curr); 500 | 500 501 | 501 change = l2_norm(Nx, curr, last); 502 | 502 if (save) 503 | 503 { 504 | 504 change_history[ti] = change; 505 | 505 error_history[ti] = l2_norm(Nx, curr, exact); 506 | 506 } 507 | 507 508 | 508 copy(Nx, last, curr); 509 | 509 510 | 510 if (outi && ti%outi==0) 511 | 511 { 512 | 512 printf("Iteration %04d: last change l2=%g\n", ti, change); 513 | 513 } 514 | 514 } 515 | 515 516 | 516 write_array(TFINAL, Nx, dx, curr); 517 | 517 if (save) 518 | 518 { 519 | 519 write_array(RESIDUAL, ti, dt, change_history); 520 | 520 write_array(ERROR, ti, dt, error_history); 521 | 521 } 522 | 522 523 | 523 return finalize(ti, maxt, change); 524 | 524 } 525 | -------------------------------------------------------------------------------- /lessons/hand_coded_heat/highres0000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/highres0000.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/highres0001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/highres0001.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/hr_crankn0000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_crankn0000.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/hr_crankn0001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_crankn0001.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/hr_crankn0002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_crankn0002.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/hr_smalldt_long0000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_smalldt_long0000.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/hr_smalldt_long0001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_smalldt_long0001.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/hr_smalldt_long0002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_smalldt_long0002.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/hr_smalldt_long0003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_smalldt_long0003.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/hr_smalldt_long0004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_smalldt_long0004.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/hr_spikes0000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_spikes0000.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/hr_spikes0001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_spikes0001.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/hr_spikes0002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_spikes0002.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/hr_spikes_crankn0000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_spikes_crankn0000.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/hr_spikes_crankn0001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_spikes_crankn0001.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/hr_spikes_smalldt0000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_spikes_smalldt0000.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/hr_spikes_smalldt0001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_spikes_smalldt0001.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/hr_spikes_smalldt0002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_spikes_smalldt0002.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/hr_spikes_smalldt0003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_spikes_smalldt0003.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/makefile: -------------------------------------------------------------------------------- 1 | PROB = basic 2 | REPORT = iops fops mem 3 | 4 | help: 5 | ./heat --help; exit 0 6 | 7 | clean: 8 | @for x in *; do \ 9 | if [[ -d $$x ]]; then \ 10 | echo "Removing directory $$x"; \ 11 | rm -rf $$x; \ 12 | fi; \ 13 | done 14 | 15 | # 16 | # To get performance data, we actually run multiple instances 17 | # using different valgrind tools 18 | # 19 | run: 20 | @rm -rf ${PROB}; mkdir ${PROB} 21 | @echo "./heat alpha=${ALPHA} dx=${DX} dt=${DT} bc0=${BC0} bc1=${BC1} ic=${IC} alg=${ALG} eps=${EPS} maxi=${MAXI} savi=${SAVI} save=${SAVE} outi=${OUTI}" 22 | @pushd ${PROB}; \ 23 | if [[ -n $$(echo ${REPORT} | grep ops) ]]; then \ 24 | valgrind --log-file=valgrind_lackey.out --tool=lackey --detailed-counts=yes ../heat alpha=${ALPHA} dx=${DX} dt=${DT} bc0=${BC0} bc1=${BC1} ic="${IC}" alg=${ALG} eps=${EPS} maxi=${MAXI} savi=${SAVI} save=${SAVE} outi=${OUTI} noout=1 >& heat_lackey.out & \ 25 | fi; \ 26 | if [[ -n $$(echo ${REPORT} | grep mem) ]]; then \ 27 | valgrind --log-file=valgrind_memcheck.out --tool=memcheck ../heat alpha=${ALPHA} dx=${DX} dt=${DT} bc0=${BC0} bc1=${BC1} ic="${IC}" alg=${ALG} eps=${EPS} maxi=${MAXI} savi=${SAVI} save=${SAVE} outi=${OUTI} noout=1 >& heat_memcheck.out & \ 28 | fi; \ 29 | ../heat alpha=${ALPHA} dx=${DX} dt=${DT} bc0=${BC0} bc1=${BC1} ic="${IC}" alg=${ALG} eps=${EPS} maxi=${MAXI} savi=${SAVI} save=${SAVE} outi=${OUTI} & \ 30 | wait 31 | @if [[ -n $$(echo ${REPORT} | grep iops) ]]; then \ 32 | echo "Integer ops = $$(cat ${PROB}/valgrind_lackey.out | grep I1\\\|I8\\\|I16\\\|I32\\\|I64 | tr -s ' ' | cut -d' ' -f5 | tr -d ',' | tr '\n' '+' | sed -e 's/$$/0\n/' | bc)"; \ 33 | fi 34 | @if [[ -n $$(echo ${REPORT} | grep fops) ]]; then \ 35 | echo "Floating point ops = $$(cat ${PROB}/valgrind_lackey.out | grep F32\\\|F64\\\|F128\\\|V128\\\|V256 | tr -s ' ' | cut -d' ' -f5 | tr -d ',' | tr '\n' '+' | sed -e 's/$$/0\n/' | bc)"; \ 36 | fi 37 | @if [[ -n $$(echo ${REPORT} | grep mem) ]]; then \ 38 | echo "Memory used = $$(cat ${PROB}/valgrind_memcheck.out | grep 'total heap usage:' | tr -s ' ' | cut -d' ' -f9 | tr -d ',\n' | sed -e 's/$$/-748\n/' | bc) bytes"; \ 39 | fi 40 | 41 | # 42 | # Short cuts 43 | # 44 | basic: 45 | ${MAKE} PROB=$@ SAVI=100 run 46 | 47 | basic_spikes: 48 | ${MAKE} PROB=$@ BC1=0 IC="spikes(10,2,10,9)" SAVI=25 run 49 | 50 | hr_spikes: 51 | ${MAKE} PROB=$@ BC1=0 IC="spikes(10,20,10,90)" DX=0.01 SAVI=10 run 52 | 53 | hr_spikes_smalldt: 54 | ${MAKE} PROB=$@ BC1=0 IC="spikes(10,20,10,90)" DX=0.01 DT=0.0001 SAVI=500 run 55 | 56 | hr_spikes_crankn: 57 | ${MAKE} PROB=$@ BC1=0 IC="spikes(10,20,10,90)" DX=0.01 ALG=crankn SAVI=10 run 58 | 59 | hr_spikes_crankn_largedt: 60 | ${MAKE} PROB=$@ BC1=0 IC="spikes(10,20,10,90)" DX=0.01 DT=0.008 ALG=crankn SAVI=5 run 61 | 62 | highres: 63 | ${MAKE} PROB=$@ DX=0.01 SAVI=10 run 64 | 65 | hr_smalldt_short: 66 | ${MAKE} PROB=$@ DX=0.01 DT=0.001 SAVI=250 OUTI=250 run 67 | 68 | hr_smalldt_long: 69 | ${MAKE} PROB=$@ DX=0.01 DT=0.001 SAVI=250 OUTI=250 MAXI=20000 run 70 | 71 | hr_crankn: 72 | ${MAKE} PROB=$@ DX=0.01 DT=0.001 ALG=crankn SAVI=100 run 73 | 74 | crankn_faster: 75 | ${MAKE} PROB=$@ DX=0.01 DT=0.008 ALG=crankn SAVI=25 OUTI=50 run 76 | 77 | view: 78 | @pushd ${PROB};\ 79 | ${VISIT} -cli -s ../plot_heat.py 80 | 81 | 82 | all: basic highres hr_smalldt_short hr_smalldt_long hr_crankn crankn_faster 83 | -------------------------------------------------------------------------------- /lessons/hand_coded_heat/makefile.txt: -------------------------------------------------------------------------------- 1 | makefile -------------------------------------------------------------------------------- /lessons/hand_coded_heat/plot_heat.py: -------------------------------------------------------------------------------- 1 | import sys, time 2 | 3 | hostName = 'scratlantis' 4 | 5 | hp0=GetMachineProfile(hostName) 6 | hp1=GetMachineProfile(hostName) 7 | hp1.ClearLaunchProfiles() 8 | hp1.AddLaunchProfiles(hp0.GetLaunchProfiles(0)) 9 | OpenComputeEngine(hp1) 10 | 11 | ca = CurveAttributes() 12 | ca.lineWidth = 1 13 | ca.designator = "" 14 | ca.showLegend = 0 15 | ca.showLabels = 0 16 | ca.curveColor = (0, 0, 255, 255) 17 | #ca.showPoints = 1 18 | ca.symbol = ca.Circle 19 | ca.pointSize = 5 20 | 21 | #SetWindowLayout(2) 22 | SetActiveWindow(1) 23 | OpenDatabase("heat_soln_*.curve database",0) 24 | AddPlot("Curve","curve") 25 | SetPlotOptions(ca) 26 | DrawPlots() 27 | v = GetViewCurve() 28 | v.viewportCoords = (0.2, 0.95, 0.15, 0.85) 29 | SetViewCurve(v) 30 | if v.rangeCoords[1] - v.rangeCoords[0] < 2: 31 | v.domainCoords = (-0.1, 1.1) 32 | v.rangeCoords = (-0.1, 1.1) 33 | SetViewCurve(v) 34 | 35 | #SetActiveWindow(2) 36 | #DeleteAllPlots(); 37 | #val = OpenDatabase("error.curve") 38 | #if val: 39 | # AddPlot("Curve","curve") 40 | # ca.designator = "Error" 41 | # ca.curveColor = (255, 0, 0, 255) 42 | # ca.showPoints = 0 43 | # SetPlotOptions(ca) 44 | #OpenDatabase("residual.curve") 45 | #AddPlot("Curve","curve") 46 | #DrawPlots() 47 | 48 | #SetActiveWindow(1) 49 | for i in range(TimeSliderGetNStates()-1): 50 | time.sleep(0.1) 51 | TimeSliderNextState() 52 | ResetView() 53 | time.sleep(10) 54 | sys.exit(0) 55 | -------------------------------------------------------------------------------- /lessons/hand_coded_heat/problem_setup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/problem_setup.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/simple_1d_heat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/simple_1d_heat.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/spikes0000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/spikes0000.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/spikes0001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/spikes0001.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/spikes0002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/spikes0002.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/spikes0003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/spikes0003.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/spikes0004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/spikes0004.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/spikes0005.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/spikes0005.png -------------------------------------------------------------------------------- /lessons/hand_coded_heat/spikes_animated.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/spikes_animated.gif -------------------------------------------------------------------------------- /lessons/iterativesolvers/lesson.md: -------------------------------------------------------------------------------- 1 | # Iterative Solution of Linear and Nonlinear Systems 2 | 3 | ## At a Glance 4 | 5 | 6 | ``` 7 | Questions |Objectives |Key Points 8 | --------------------------|-------------------------------|------------------------------------- 9 | Does the preconditioner | See that the preconditioner | Through a single interface, 10 | affect the convergence | can be crucial for | PETSc supports runtime choices 11 | rate of Krylov solvers? | convergence. | of algorithms and options. 12 | | | 13 | How can I choose algs. | Learn the basics of using | Experimenting with 14 | and options at runtime | PETSc solvers & understanding | algorithms is essential 15 | when using PETSc? | output. | for good performance. 16 | ``` 17 | 18 | Before running the examples, you must switch to the bash shell by using 19 | 20 | ``` 21 | bash 22 | ``` 23 | 24 | ## Example 1: Structural Mechanics Beam Deflection: 25 | 26 | This code uses MFEM and [PETSc/TAO](https://www.mcs.anl.gov/petsc/) to demonstrate the convergence of Krylov methods. 27 | 28 | The source code is included in [ex2p.c](./ex2p.c) 29 | 30 | Notes: Normally PETSc options can be passed as command line arguments. But because MFEM turns off this capability, PETSc options must be passed either in a file or in the PETSC_OPTIONS environmental variable. See the file rc_ex2p for the PETSc options that are supplied to the application in these examples. 31 | 32 | ### Run 1: Run with Jacobi preconditioner 33 | 34 | ``` 35 | PETSC_OPTIONS="-pc_type jacobi -ksp_max_it 25" ./ex2p -petscopts rc_ex2p --mesh /projects/ATPESC2017/NumericalPackages/handson/mfem/data/beam-tri.mesh 36 | ``` 37 | 38 | The first column of the output is the residual norm. The next two are the maximum and minimum estimated eigenvalues of the operator and the final column is the condition number. 39 | 40 | #### Questions 41 | > **Is the iteration converging?** 42 | 43 | > **Read the output at the bottom from -ksp_view ... What Krylov method and preconditioner are being used?** 44 | 45 | ### Run 2: Run with the algebraic multigrid preconditioner 46 | 47 | ``` 48 | ./ex2p -petscopts rc_ex2p --mesh /projects/ATPESC2017/NumericalPackages/handson/mfem/data/beam-tri.mesh 49 | ``` 50 | 51 | #### Questions 52 | > **Is the iteration now converging?** 53 | 54 | > **Read the output at the bottom from -ksp_view ... What Krylov method and preconditioner are being used?** 55 | 56 | ### Run 3: Run with the algebraic multigrid preconditioner but no conjugate gradient method 57 | 58 | ``` 59 | PETSC_OPTIONS="-ksp_norm_type preconditioned -ksp_type richardson -ksp_max_it 25" ./ex2p -petscopts rc_ex2p --mesh /projects/ATPESC2017/NumericalPackages/handson/mfem/data/beam-tri.mesh 60 | ``` 61 | 62 | #### Questions 63 | > **Is the iteration now converging?** 64 | 65 | ### Run 4: Run with the algebraic multigrid preconditioner but with GMRES and a restart of 10 66 | 67 | ``` 68 | PETSC_OPTIONS="-ksp_norm_type preconditioned -ksp_type gmres -ksp_gmres_restart 10" ./ex2p -petscopts rc_ex2p --mesh /projects/ATPESC2017/NumericalPackages/handson/mfem/data/beam-tri.mesh 69 | ``` 70 | 71 | Now run with a gmres restart of 30 72 | 73 | ``` 74 | PETSC_OPTIONS="-ksp_norm_type preconditioned -ksp_type gmres -ksp_gmres_restart 30" ./ex2p -petscopts rc_ex2p --mesh /projects/ATPESC2017/NumericalPackages/handson/mfem/data/beam-tri.mesh 75 | ``` 76 | 77 | Note the convergence is now very similar to that with CG. 78 | 79 | Now attempt to run this in parallel and obtain solver performance data 80 | ``` 81 | PETSC_OPTIONS="-log_view -ksp_norm_type preconditioned -ksp_type gmres -ksp_gmres_restart 30" ${MPIEXEC_OMPI} -n 4 ./ex2p -petscopts rc_ex2p --mesh /projects/ATPESC2017/NumericalPackages/handson/mfem/data/beam-tri.mesh 82 | ``` 83 | 84 | ## Example 2: Nonlinear Problem: 85 | 86 | ``` 87 | PETSC_OPTS="-snes_rtol 1.e-10 -snes_view -pc_type bjacobi -sub_pc_type ilu " ${MPIEXEC_OMPI} -n 4 ./ex10p -m ../../data/beam-quad.mesh --petscopts rc_ex10p -s 3 -rs 2 -dt 3 | more 88 | ``` 89 | 90 | Note the quadratic convergence; the residual norm exponent doubles until it runs out of digits to double. 91 | 92 | ## Out-Brief 93 | 94 | We have used [PETSc](https://www.mcs.anl.gov/petsc/) to demonstrate the use of preconditioned Krylov methods. Many examples are available for various aspects of PETSc functionality, including 95 | * [Krylov solver examples](http://www.mcs.anl.gov/petsc/petsc-current/src/ksp/ksp/examples/tutorials) 96 | * [Nonlinear solver examples](http://www.mcs.anl.gov/petsc/petsc-current/src/snes/examples/tutorials) 97 | 98 | 99 | 100 | 101 |   102 | 103 | --- 104 | 105 | [Back to all HandsOnLessons](../lessons.md) 106 | -------------------------------------------------------------------------------- /lessons/lesson_template/animated_basic_heat.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/lesson_template/animated_basic_heat.gif -------------------------------------------------------------------------------- /lessons/lesson_template/basic0000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/lesson_template/basic0000.png -------------------------------------------------------------------------------- /lessons/lesson_template/lesson.md: -------------------------------------------------------------------------------- 1 | # Lesson Title 2 | 3 | ## At a Glance 4 | 5 | 6 | **Note**: GitHub Markdown tables are very limited! To do this section of questions, 7 | objectives and key points properly, we need to use more features of Jekyll then I 8 | wanna worry about prior to ATPESC. We will fix this after ATPESC to use Jekyll 9 | properly and it will improve its look substantially. Also, to avoid horizontal scroll 10 | of this pre-formatted section, try to keep to less than 102 chars in width. 11 | 12 | ``` 13 | Questions |Objectives |Key Points 14 | ---------------------------|--------------------------------|---------- 15 | Question 1? |Objective 1 |Key Point 1 16 | Question 2? |Objective 2 |Key Point 2 17 | Question 3? |Objective 3 |Key Point 3 18 | ``` 19 | 20 | * **Questions** are the those things we want learners to know the answers to by the end of the lesson. 21 | We don't have to list all possible questions here...only the two or three _most_ important. 22 | * **Objectives** are those things we want learners to actually do or observe during the lesson. Again, 23 | only list here the ones that are _most_ important. 24 | * **Key Points** are those things we want learners to take-away from the lesson. 25 | 26 | ## The Problem Being Solved 27 | 28 | Describe the problem(s) that will be solved in this lesson. 29 | If possible, include a picture or graphic here describing the physical problem setup. If the application 30 | or tool being used can deal with a variety of input physical problems, its fine to mention 31 | that but here just include a picture of the problem they will be running in the _runs_ 32 | below. Maybe include the equation being solved as well. 33 | 34 | ![](http://latex.codecogs.com/gif.latex?%5Cfrac%7B%5Cpartial%20u%7D%7B%5Cpartial%20t%7D%20%3D%20%5Calpha%20%5Cfrac%7B%5Cpartial%5E2%20u%7D%7B%5Cpartial%20x%5E2%7D) 35 | 36 | ## The Example Source Code 37 | 38 | Describe the application, its command-line arguments, have a link to view the actual source code 39 | or, if you prefer, include snipits of the source code here in a code-highlighted box as below 40 | 41 | ```c++ 42 | Geometry::~Geometry() 43 | { 44 | for (int i = 0; i < NumGeom; i++) 45 | { 46 | delete PerfGeomToGeomJac[i]; 47 | delete GeomVert[i]; 48 | } 49 | } 50 | ``` 51 | 52 | ## Running the Example 53 | 54 | ### Run 1 (Problem Name) 55 | 56 | Give the command-line to run the example 57 | 58 | #### Expected Behavior/Output 59 | 60 | Include here what learner should expect to happen 61 | 62 | * How long might it take to run 63 | * How long might they have to wait for resources before it can run 64 | * What should they seen on their terminal 65 | 66 | #### Examining Results 67 | 68 | Include here examples of either plots or data you expect learners to observe. 69 | 70 | ![An Image](basic0000.png) 71 | 72 | Or, if you need to control the size, or have multiple images next to each other 73 | use a Markdown table and raw html... 74 | 75 | ||| 76 | 77 | **Note:** You can create [gif animations](https://www.tjhsst.edu/~dhyatt/supercomp/n401a.html) 78 | with ImageMagick tool available on most systems as `convert` command as in... 79 | 80 | ``` 81 | convert -delay 20 -loop 0 image*. animation.gif 82 | ``` 83 | 84 | ![Gif Animations](animated_basic_heat.gif) 85 | 86 | Alternatively, you can upload videos to YouTube and embed them here 87 | 88 | 89 | 90 | #### Questions 91 | 92 | > **Question #1?** (triple-click box below to reveal answer) 93 | 94 | **Note:** These Questions and _Answer Boxes_ are somewhat cheesey for time being. 95 | We can expand our use of Jekyll and improve look and feel after ATPESC. In meantime, 96 | in order for these _Answer Boxes_ to behave as desired (e.g. hidden text which 97 | gets revealed by user triple-clicking in box), they have to be all on a single 98 | line with no line breaks and have to be white text on white backgroud. Yeah, its 99 | cheesey but will work for now. 100 | 101 | |Answer to Question #1| 102 | 103 | > **Question #2?** (triple-click box below to reveal answer) 104 | 105 | |Answer to Question #2| 106 | 107 | --- 108 | 109 | ### Run 2 (Problem Name) 110 | 111 | #### Expected Behavior/Output 112 | 113 | #### Examining Results 114 | 115 | Include here examples of either plots or data you expect learners to observe. 116 | 117 | #### Questions 118 | 119 | > **Question #1?** (triple-click box below to reveal answer) 120 | 121 | |Answer to Question #1| 122 | 123 | > **Question #2?** (triple-click box below to reveal answer) 124 | 125 | |Answer to Question #2| 126 | 127 | --- 128 | 129 | ### Run 3 130 | 131 | #### Expected Behavior/Output 132 | 133 | #### Examining Results 134 | 135 | Include here examples of either plots or data you expect learners to observe. 136 | 137 | #### Questions 138 | 139 | > **Question #1?** (triple-click box below to reveal answer) 140 | 141 | |Answer to Question #1| 142 | 143 | > **Question #2?** (triple-click box below to reveal answer) 144 | 145 | |Answer to Question #2| 146 | 147 | --- 148 | 149 | ## Out-Brief 150 | 151 | Here, re-emphasize the lesson objectives and key points. 152 | 153 | Its fine to go into greater detail about questions or objectives this lesson 154 | did not fully cover. 155 | 156 | ### Further Reading 157 | 158 | Include links to other online sources you might want to include. 159 | 160 | 161 | 162 |   163 | 164 | --- 165 | 166 | [Back to all HandsOnLessons](../lessons.md) 167 | -------------------------------------------------------------------------------- /lessons/lessons.md: -------------------------------------------------------------------------------- 1 | Lessons 2 | ------------ 3 | 4 | As described in [Welcome to HandsOnLessons](../README.md), hosted here are a series of increasingly sophisticated hands-on lessons aimed at helping users of all experience levels learn to use a variety of high-performance scientific software packages for solving complex numerical problems. This collection is just beginning; over time, more lessons will be provided to cover other important topics and packages. 5 | 6 | * [Basic, One-Dimensional Heat Equation](hand_coded_heat/lesson.md) 7 | * [Structured Meshes](AMReX/lesson.md) 8 | * [Finite Elements Convergence](mfem_convergence/lesson.md) 9 | * [Time Integrators](time_integrators/lesson.md) 10 | * [Iterative Solvers](iterativesolvers/lesson.md) 11 | * [Sparse Direct Solvers](superlu-mfem/lesson.md) 12 | * [Algebraic Multigrid](AMG/lesson.md) 13 | * [Adjoint Solvers](adjoint/lesson.md) 14 | 15 |   16 | 17 | --- 18 | 19 | [Lesson Template](lesson_template/lesson.md) -- intended for lesson developers, not for HandsOnLesson learners 20 | -------------------------------------------------------------------------------- /lessons/mfem_convergence/diffusion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/mfem_convergence/diffusion.png -------------------------------------------------------------------------------- /lessons/mfem_convergence/ex8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/mfem_convergence/ex8.png -------------------------------------------------------------------------------- /lessons/mfem_convergence/lesson.md: -------------------------------------------------------------------------------- 1 | # Finite Elements and Convergence with MFEM 2 | 3 | ## At a Glance 4 | 5 | 6 | ``` 7 | Questions |Objectives |Key Points 8 | -----------------------------|--------------------------------|--------------------------- 9 | What is a finite element |Understand basic finite element |Basis functions determine 10 | method? |machinery |the quality of the solution 11 | | | 12 | What is a high order method? |Understand how polynomial |High order methods add more 13 | |order affects simulations |unknowns on the same mesh 14 | | |for more precise solutions 15 | | | 16 | What is convergence? |Understand how convergence and |High order methods converge 17 | |convergence rate is calculated |faster for smooth solutions 18 | ``` 19 | 20 | **Note:** To begin this lesson... 21 | ``` 22 | cd handson/mfem/examples/atpesc/mfem 23 | ``` 24 | 25 | ## A Widely Applicable Equation 26 | 27 | In this lesson, we demonstrate the discretization of a simple Poisson problem using 28 | the [MFEM library](http://mfem.org) and examine the finite element approximation error 29 | under uniform refinement. An example of this equation is steady-state [heat](../hand_coded_heat/lesson.md) 30 | [conduction](../time_integrators/lesson.md). 31 | 32 | |[](ex8.png)| [](diffusion.png)| 33 | 34 | ### Governing Equation 35 | 36 | The [_Poisson Equation_](https://en.wikipedia.org/wiki/Poisson's_equation) is a partial 37 | differential equation (PDE) that can be used to model steady-state heat conduction, 38 | electric potentials and gravitational fields. In mathematical terms ... 39 | 40 | |![](http://latex.codecogs.com/gif.latex?-%5Cnabla%5E2u%20%3D%20f)|(1)| 41 | 42 | where _u_ is the potential field and _f_ is the source function. This PDE is a generalization 43 | of the [_Laplace Equation_](https://en.wikipedia.org/wiki/Laplace%27s_equation). 44 | 45 | ### Finite element basics 46 | 47 | To solve the above continuous equation using computers we need to 48 | [discretize](https://en.wikipedia.org/wiki/Discretization) it by introducing a finite 49 | (discrete) number of unknowns to compute for. 50 | In the [_Finite Element Method_](https://en.wikipedia.org/wiki/Finite_element_method) (FEM), this is 51 | done using the concept of _basis functions_. 52 | 53 | Instead of calculating the exact analytic solution _u_, consider approximating it by 54 | 55 | |![](http://latex.codecogs.com/gif.latex?u%20%5Capprox%20%5Csum_%7Bj%3D1%7D%5En%20c_j%20%5Cphi_j)|(2)| 56 | 57 | where ![](http://latex.codecogs.com/gif.latex?c_j) are scalar unknown coefficients and 58 | ![](http://latex.codecogs.com/gif.latex?%5Cphi_j) are known _basis functions_. They are 59 | typically piecewise-polynomial functions which are only non-zero on small portions of the 60 | computational mesh. With finite elements, the mesh can be totally unstructured, curved and 61 | non-conforming. 62 | 63 | |[](mesh.png)| 64 | 65 | To solve for the unknown coefficients, we multiply Poisson's equation by another (test) 66 | basis function ![](http://latex.codecogs.com/gif.latex?%5Cphi_i) and integrate by parts 67 | to obtain 68 | 69 | |![](http://latex.codecogs.com/gif.latex?%5Csum_%7Bj%3D1%7D%5En%5Cint_%5COmega%20c_j%20%5Cnabla%20%5Cphi_j%20%5Ccdot%20%5Cnabla%20%5Cphi_i%20dV%20%3D%20%5Cint_%5COmega%20f%20%5Cphi_i)|(3)| 70 | 71 | for every basis function ![](http://latex.codecogs.com/gif.latex?%5Cphi_i). 72 | (Here we are assuming homogeneous Dirichlet boundary conditions, corresponding e.g. to 73 | zero temperature on the whole boundary.) 74 | 75 | Since the basis functions are known, we can rewrite (3) as 76 | 77 | |![](http://latex.codecogs.com/gif.latex?%5Cmathbf%7BAx%7D%20%3D%20%5Cmathbf%7Bb%7D)|(4)| 78 | 79 | where 80 | 81 | |![](http://latex.codecogs.com/gif.latex?A_%7Bij%7D%20%3D%20%5Cint_%5COmega%20%5Cnabla%20%5Cphi_i%20%5Ccdot%20%5Cnabla%20%5Cphi_j%20dV)|(5)| 82 | |![](http://latex.codecogs.com/gif.latex?b_i%20%3D%20%5Cint_%5COmega%20f%20%5Cphi_i%20dV)|(6)| 83 | |![](http://latex.codecogs.com/gif.latex?x_j%20%3D%20c_j)|(7)| 84 | 85 | This is a ![](http://latex.codecogs.com/gif.latex?n%20%5Ctimes%20n) linear system that 86 | can be solved [directly](../superlu-mfem/lesson.md) or [iterarively](../iterativesolvers/lesson.md) 87 | for the unknown coefficients. Note that we are free to choose the basis functions 88 | ![](http://latex.codecogs.com/gif.latex?%5Cphi_i) as we see fit. 89 | 90 | --- 91 | 92 | ## Convergence Study Source Code 93 | 94 | To define the system we need to solve, we need three things. First, we need to define our 95 | basis functions which live on the computational mesh. 96 | 97 | ```c++ 98 | // order is the FEM basis functions polynomial order 99 | FiniteElementCollection *fec = new H1_FECollection(order, dim); 100 | 101 | // pmesh is the parallel computational mesh 102 | ParFiniteElementSpace *fespace = new ParFiniteElementSpace(pmesh, fec); 103 | ``` 104 | 105 | This defines a collection of H1 functions (meaning they have well-defined gradient) of 106 | a given polynomial order on a parallel computational mesh pmesh. Next, we need to define 107 | the integrals in Equation (5) 108 | 109 | ```c++ 110 | ParBilinearForm *a = new ParBilinearForm(fespace); 111 | ConstantCoefficient one(1.0); 112 | a->AddDomainIntegrator(new DiffusionIntegrator(one)); 113 | a->Assemble(); 114 | ``` 115 | 116 | and Equation (6) 117 | 118 | ```c++ 119 | // f_exact is a C function defining the source 120 | FunctionCoefficient f(f_exact); 121 | ParLinearForm *b = new ParLinearForm(fespace); 122 | b->AddDomainIntegrator(new DomainLFIntegrator(f)); 123 | b->Assemble(); 124 | ``` 125 | 126 | This defines the matrix A and the vector b. We then solve the linear 127 | system for our solution vector x using [AMG-preconditioned](../AMG/lesson.md) PCG iteration. 128 | 129 | ```c++ 130 | // FEM -> Linear System 131 | HypreParMatrix A; 132 | Vector B, X; 133 | a->FormLinearSystem(ess_tdof_list, x, *b, A, X, B); 134 | 135 | // AMG preconditioner 136 | HypreBoomerAMG *amg = new HypreBoomerAMG(A); 137 | amg->SetPrintLevel(0); 138 | 139 | // PCG Krylov solver 140 | HyprePCG *pcg = new HyprePCG(A); 141 | pcg->SetTol(1e-12); 142 | pcg->SetMaxIter(200); 143 | pcg->SetPrintLevel(0); 144 | pcg->SetPreconditioner(*amg); 145 | 146 | // Solve the system A X = B 147 | pcg->Mult(B, X); 148 | 149 | // Linear System -> FEM 150 | a->RecoverFEMSolution(X, *b, x); 151 | ``` 152 | 153 | In this lesson we know what the exact solution is, so we can measure the amount of 154 | error in our approximate solution in two ways: 155 | 156 | |![](http://latex.codecogs.com/gif.latex?%5Cleft%20%5C%7C%20u_%7B%5Cmbox%7Bexact%7D%7D%20-%20u_%7B%5Cmbox%7Bh%7D%7D%20%5Cright%20%5C%7C_%7BL_2%7D%5E2%20%3D%20%5Cint_%5COmega%20%5Cleft%7C%20u_%7B%5Cmbox%7Bexact%7D%7D%20-%20u_%7B%5Cmbox%7Bh%7D%7D%20%5Cright%20%7C%5E2)|(8)| 157 | |![](http://latex.codecogs.com/gif.latex?%5Cleft%20%5C%7C%20u_%7B%5Cmbox%7Bexact%7D%7D%20-%20u_%7B%5Cmbox%7Bh%7D%7D%20%5Cright%20%5C%7C_%7BH%5E1%7D%5E2%20%3D%20%5Cleft%20%5C%7C%20u_%7B%5Cmbox%7Bexact%7D%7D%20-%20u_%7B%5Cmbox%7Bh%7D%7D%20%5Cright%20%5C%7C_%7BL_2%7D%5E2%20+%20%5Cleft%20%5C%7C%20%5Cnabla%20u_%7B%5Cmbox%7Bexact%7D%7D%20-%20%5Cnabla%20u_%7B%5Cmbox%7Bh%7D%7D%20%5Cright%20%5C%7C_%7BL_2%7D%5E2)|(9)| 158 | 159 | The second one is know as the _energy norm_, which is derived directly from the weak form of the PDE. 160 | 161 | We expect the error to behave like 162 | 163 | |![](http://latex.codecogs.com/gif.latex?%5Cleft%20%5C%7C%20u_%7B%5Cmbox%7Bexact%7D%7D%20-%20u_%7B%5Cmbox%7Bh%7D%7D%20%5Cright%20%5C%7C_%7BL_2%7D%5E2%20%5Cleq%20Ch%5E%7Br%7D)|(10)| 164 | 165 | where ![](http://latex.codecogs.com/gif.latex?h) is the mesh size, ![](http://latex.codecogs.com/gif.latex?C) 166 | is a mesh-independent constant and ![](http://latex.codecogs.com/gif.latex?r) is the 167 | [_convergence rate_](https://en.wikipedia.org/wiki/Rate_of_convergence). 168 | 169 | Given approximations at two different mesh resolutions, we can estimate the convergence rate as 170 | follows (![](http://latex.codecogs.com/gif.latex?C) doesn't change when we refine the mesh and compare runs): 171 | 172 | |![](http://latex.codecogs.com/gif.latex?r%20%5Capprox%20%5Cfrac%7B%5Clog%5C%20%5Cfrac%7B%20%5Cleft%20%5C%7C%20u_%7B%5Cmbox%7Bexact%7D%7D%20-%20u_%7Bh_%7B%5Cmbox%7Bnew%7D%7D%7D%20%5Cright%20%5C%7C_%7BL_2%7D%7D%7B%5Cleft%20%5C%7C%20u_%7B%5Cmbox%7Bexact%7D%7D%20-%20u_%7Bh_%7B%5Cmbox%7Bold%7D%7D%7D%20%5Cright%20%5C%7C_%7BL_2%7D%7D%7D%7B%20%5Clog%20%5Cfrac%7Bh_%7B%5Cmbox%7Bnew%7D%7D%7D%7Bh_%7B%5Cmbox%7Bold%7D%7D%7D%7D)|(11)| 173 | 174 | In code this is implemented in a refinement loop as follows: 175 | 176 | ```c++ 177 | double l2_err = x.ComputeL2Error(u); 178 | double h1_err = x.ComputeH1Error(&u, &u_grad, &one, 1.0, 1); 179 | pmesh->GetCharacteristics(h_min, h_max, kappa_min, kappa_max); 180 | 181 | l2_rate = log(l2_err/l2_err_prev) / log(h_min/h_prev); 182 | h1_rate = log(h1_err/h1_err_prev) / log(h_min/h_prev); 183 | ``` 184 | 185 | --- 186 | 187 | ## Running the Convergence Study 188 | 189 | The convergence study in `handson/mfem/examples/atpesc/mfem` has the following options 190 | 191 | ``` 192 | ./convergence --help 193 | 194 | Usage: ./convergence [options] ... 195 | Options: 196 | -h, --help 197 | Print this help message and exit. 198 | -m , --mesh , current value: ../../../data/star.mesh 199 | Mesh file to use. 200 | -o , --order , current value: 1 201 | Finite element order (polynomial degree). 202 | -sc, --static-condensation, -no-sc, --no-static-condensation, current option: --no-static-condensation 203 | Enable static condensation. 204 | -r , --refinements , current value: 4 205 | Number of total uniform refinements 206 | -sr , --serial-refinements , current value: 2 207 | Maximum number of serial uniform refinements 208 | -f , --frequency , current value: 1 209 | Set the frequency for the exact solution. 210 | ``` 211 | 212 | ### Run 1 (Low order) 213 | 214 | In this run, we will examine the error after 7 uniform refinements in both the L2 and H1 norms using 215 | first order (linear) basis functions. We use the `star.mesh` 2D mesh file. 216 | 217 | ``` 218 | ./convergence -r 7 219 | Options used: 220 | --mesh ../../../data/star.mesh 221 | --order 1 222 | --no-static-condensation 223 | --refinements 7 224 | --serial-refinements 2 225 | --frequency 1 226 | ---------------------------------------------------------------------------------------- 227 | DOFs h L^2 error L^2 rate H^1 error H^1 rate 228 | ---------------------------------------------------------------------------------------- 229 | 31 0.4876 0.3252 0 2.631 0 230 | 101 0.2438 0.09293 1.807 1.387 0.9229 231 | 361 0.1219 0.02393 1.957 0.7017 0.9836 232 | 1361 0.06095 0.006027 1.989 0.3518 0.996 233 | 5281 0.03048 0.00151 1.997 0.176 0.999 234 | 20801 0.01524 0.0003776 1.999 0.08803 0.9997 235 | 82561 0.007619 9.441e-05 2 0.04402 0.9999 236 | ``` 237 | 238 | Note that the L2 error is converging at a rate of 2 while the H1 error is only converging at a rate of 1. 239 | 240 | ### Run 2 (High order) 241 | 242 | Now consider the same run only we are using 3rd order (cubic) basis functions instead. 243 | 244 | ``` 245 | ./convergence -r 7 -o 3 246 | Options used: 247 | --mesh ../../../data/star.mesh 248 | --order 3 249 | --no-static-condensation 250 | --refinements 7 251 | --serial-refinements 2 252 | --frequency 1 253 | ---------------------------------------------------------------------------------------- 254 | DOFs h L^2 error L^2 rate H^1 error H^1 rate 255 | ---------------------------------------------------------------------------------------- 256 | 211 0.4876 0.004777 0 0.118 0 257 | 781 0.2438 0.0003178 3.91 0.01576 2.905 258 | 3001 0.1219 2.008e-05 3.984 0.001995 2.982 259 | 11761 0.06095 1.258e-06 3.997 0.0002501 2.996 260 | 46561 0.03048 7.864e-08 4 3.129e-05 2.999 261 | 185281 0.01524 4.915e-09 4 3.912e-06 3 262 | 739201 0.007619 3.072e-10 4 4.891e-07 3 263 | ``` 264 | 265 | The L2 error is now converging at a rate of 4 and the H1 error is converging at a rate of 3. 266 | This is because the exact solution in these runs is smooth, so higher-order methods 267 | approximate it better. 268 | 269 | #### Questions 270 | 271 | > **How many unknowns do we need in runs 1 and 2 to get 4 digits of accuracy? Which method is more efficient: low-order or high-order?** 272 | 273 | |The high-order methods is more efficient. It needs only 3001 unknowns compared to 82561 unknowns for the low-order method!| 274 | 275 | ### Run 3 (3D example) 276 | The previous two runs used a 2D mesh in serial, but the same code can be used to run a 3D problem in parallel. 277 | 278 | ``` 279 | ${MPIEXEC_OMPI} -n 4 ./convergence -r 4 -o 2 -m ../../../data/inline-hex.mesh 280 | Options used: 281 | --mesh ../../../data/inline-hex.mesh 282 | --order 2 283 | --no-static-condensation 284 | --refinements 4 285 | --serial-refinements 2 286 | --frequency 1 287 | ---------------------------------------------------------------------------------------- 288 | DOFs h L^2 error L^2 rate H^1 error H^1 rate 289 | ---------------------------------------------------------------------------------------- 290 | 729 0.25 0.001386 0 0.02215 0 291 | 4913 0.125 0.0001772 2.967 0.005532 2.002 292 | 35937 0.0625 2.227e-05 2.993 0.001377 2.007 293 | 274625 0.03125 2.787e-06 2.998 0.0003441 2 294 | ``` 295 | 296 | #### Questions 297 | 298 | > **Experiment with different orders in 2D and 3D. What convergence rate will you expect in L2 and H1 for a given basis order ![](http://latex.codecogs.com/gif.latex?p)?** 299 | 300 | | For a smooth exact solution, the convergence rate in energy norm (H1) is p. Using the so-called Nitsche's Trick, one can prove that we pick an additional order in L2, so the convergence rate there is p+1| 301 | 302 | --- 303 | 304 | ## Out-Brief 305 | 306 | We demonstrated the ease of implementing a order and dimension independent finite element 307 | code in MFEM. We discussed the basics of the finite element method as well as demonstrated 308 | the effect of the polynomial order of the basis functions on convergence rates. 309 | 310 | ### Further Reading 311 | 312 | To learn more about MFEM, including example codes and miniapps visit [mfem.org](http://mfem.org). 313 | 314 | 315 | 316 |   317 | 318 | --- 319 | 320 | [Back to all HandsOnLessons](../lessons.md) 321 | -------------------------------------------------------------------------------- /lessons/mfem_convergence/mesh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/mfem_convergence/mesh.png -------------------------------------------------------------------------------- /lessons/superlu-mfem/gmres.mpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/superlu-mfem/gmres.mpg -------------------------------------------------------------------------------- /lessons/superlu-mfem/gmres_residual.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/superlu-mfem/gmres_residual.png -------------------------------------------------------------------------------- /lessons/superlu-mfem/gmres_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/superlu-mfem/gmres_time.png -------------------------------------------------------------------------------- /lessons/superlu-mfem/lesson.md: -------------------------------------------------------------------------------- 1 | # Sparse Direct Solver 2 | 3 | ## At A Glance 4 | 5 | ``` 6 | Questions |Objectives |Key Points 7 | ---------------------------|--------------------------------|---------- 8 | Why need direct solver? | Can obtain accurate solution | Robust for difficult problems 9 | What parameters affect | Try different ordering options | Performance (time & memory) 10 | performance? | | can vary a lot 11 | ``` 12 | 13 | **Note:** To begin this lesson 14 | ``` 15 | cd handson/mfem/examples/atpesc/superlu 16 | ``` 17 | 18 | ## The problem being solved 19 | 20 | The [convdiff.c](https://github.com/mfem/mfem/blob/atpesc-dev/examples/atpesc/superlu/convdiff.cpp) 21 | application is modeling the steady state convection-diffusion equation in 2D 22 | with a constant velocity. This equation is used to model the concentration 23 | of something like a _die_ in a _moving_ fluid as it diffuses and flows through 24 | he fluid. The equation is as follows: 25 | 26 | |![](http://latex.codecogs.com/gif.latex?%5Cnabla%20%5Ccdot%20%28%5Ckappa%20%5Cnabla%20u%29%20-%20%5Cnabla%20%5Ccdot%20%28%5Coverrightarrow%7Bv%7Du%29%2BR%3D0)|(1)| 27 | 28 | Where _u_ is the concentration that we are tracking, 29 | ![](http://latex.codecogs.com/gif.latex?%5Ckappa) is the diffusion rate, 30 | _v_ is the velocity of the flow and _R_ is a concentration source. 31 | 32 | In the application we use here, the velocity vector _direction_ is fixed in the _+x_ 33 | direction. However, the _magnitude_ is set by the user (default of 100), 34 | ![](http://latex.codecogs.com/gif.latex?%5Ckappa) is fixed at 1.0, and the 35 | source is 0.0 everywhere except for a small disc centered at the middle of the 36 | domain where it is 1.0. 37 | 38 | |Initial Condition| 39 | |:---:| 40 | |[](mfem-superlu0000.png)| 41 | 42 | Solving this PDE is well known to cause convergence problems for iterative solvers, 43 | for larger _v_. We use MFEM as a vehicle to demonstrate the use of a distributed, 44 | direct solver, [SuperLU_DIST](http://crd-legacy.lbl.gov/~xiaoye/SuperLU/), 45 | to solve very ill-conditioned linear systems. 46 | 47 | ## The Example Source Code 48 | 49 | ## Running the Example 50 | 51 | ### Run 1: default setting with GMRES solver, preconditioned by hypre, velocity = 100 52 | 53 | ``` 54 | $ ./convdiff 55 | 56 | Options used: 57 | --refine 0 58 | --order 1 59 | --velocity 100 60 | --no-visit 61 | --no-superlu 62 | --slu-colperm 0 63 | Number of unknowns: 10201 64 | ============================================= 65 | Setup phase times: 66 | ============================================= 67 | GMRES Setup: 68 | wall clock time = 0.010000 seconds 69 | wall MFLOPS = 0.000000 70 | cpu clock time = 0.010000 seconds 71 | cpu MFLOPS = 0.000000 72 | 73 | L2 norm of b: 9.500000e-04 74 | Initial L2 norm of residual: 9.500000e-04 75 | ============================================= 76 | 77 | Iters resid.norm conv.rate rel.res.norm 78 | ----- ------------ ---------- ------------ 79 | 1 4.065439e-04 0.427941 4.279409e-01 80 | 2 1.318995e-04 0.324441 1.388415e-01 81 | 3 4.823031e-05 0.365660 5.076874e-02 82 | ... 83 | 23 2.436775e-16 0.249025 2.565027e-13 84 | 85 | Final L2 norm of residual: 2.436857e-16 86 | 87 | ============================================= 88 | Solve phase times: 89 | ============================================= 90 | GMRES Solve: 91 | wall clock time = 0.030000 seconds 92 | wall MFLOPS = 0.000000 93 | cpu clock time = 0.020000 seconds 94 | cpu MFLOPS = 0.000000 95 | 96 | GMRES Iterations = 23 97 | Final GMRES Relative Residual Norm = 2.56511e-13 98 | Time required for solver: 0.0362886 (s) 99 | ``` 100 | 101 | |Steady State| 102 | |:---:| 103 | |[](mfem-superlu0005.png)| 104 | 105 | --- 106 | 107 | ### Run 2: increase velocity to 1000, GMRES does not converge anymore 108 | 109 | ``` 110 | $ ./convdiff --velocity 1000 111 | 112 | Options used: 113 | --refine 0 114 | --order 1 115 | --velocity 1000 116 | --no-visit 117 | --no-superlu 118 | --slu-colperm 0 119 | Number of unknowns: 10201 120 | ============================================= 121 | Setup phase times: 122 | ============================================= 123 | GMRES Setup: 124 | wall clock time = 0.020000 seconds 125 | wall MFLOPS = 0.000000 126 | cpu clock time = 0.010000 seconds 127 | cpu MFLOPS = 0.000000 128 | 129 | L2 norm of b: 9.500000e-04 130 | Initial L2 norm of residual: 9.500000e-04 131 | ============================================= 132 | 133 | Iters resid.norm conv.rate rel.res.norm 134 | ----- ------------ ---------- ------------ 135 | 1 9.500000e-04 1.000000 1.000000e+00 136 | 2 9.500000e-04 1.000000 1.000000e+00 137 | 3 9.500000e-04 1.000000 1.000000e+00 138 | ... 139 | 200 9.500000e-04 1.000000 1.000000e+00 140 | ``` 141 | 142 | Below, we plot behavior of the GMRES method for velocity values in the 143 | range [100,1000] at incriments, _dv_, of 25 and also show an animation 144 | of the solution GMRES gives as velocity increases 145 | 146 | |Solutions @_dv_=25 in [100,1000]|Contours of Solution @ _vel=1000_| 147 | |:---:||:---:| 148 | ||[](mfem-superlu0003.png)| 149 | 150 | |Time to Solution|L2 norm of final residual| 151 | |:---:||:---:| 152 | |[](gmres_time.png)|[](gmres_residual.png)| 153 | 154 | > **What do you think is happening?** 155 | 156 | |GMRES method works ok for low velocity values. As velocity increases, GMRES method eventually crosses a threshold where it can no longer provide a useful result.| 157 | 158 | > **Why does time to solution show smoother transition than L2 norm?** 159 | 160 | |As instability is approached, more GMRES iterations are required to reach desired norm. So GMRES is still able to manage the solve and achieve a near-zero L2 norm. It just takes more and more iterations. Once GMRES is unable to solve the L2 norm explodes.| 161 | 162 | --- 163 | 164 | ### Run 3: Now use SuperLU_DIST, with default options 165 | ``` 166 | $ ./convdiff -slu --velocity 1000 167 | 168 | Options used: 169 | --refine 0 170 | --order 1 171 | --velocity 1000 172 | --no-visit 173 | --superlu 174 | --slu-colperm 0 175 | Number of unknowns: 10201 176 | 177 | ** Memory Usage ********************************** 178 | ** NUMfact space (MB): (sum-of-all-processes) 179 | L\U : 41.12 | Total : 50.72 180 | ** Total highmark (MB): 181 | Sum-of-all : 62.27 | Avg : 62.27 | Max : 62.27 182 | ************************************************** 183 | Time required for solver: 38.2684 (s) 184 | Final L2 norm of residual: 1.55553e-18 185 | ``` 186 | 187 | |Stead State For _vel=1000_| 188 | |:---:| 189 | |[](mfem-superlu0004.png)| 190 | 191 | ### Run 4: Now use SuperLU_DIST, with MMD(A'+A) ordering. 192 | ``` 193 | $ ./convdiff -slu --velocity 1000 --slu-colperm 2 194 | 195 | Options used: 196 | --refine 0 197 | --order 1 198 | --velocity 1000 199 | --no-visit 200 | --superlu 201 | --slu-colperm 2 202 | Number of unknowns: 10201 203 | Nonzeros in L 594238 204 | Nonzeros in U 580425 205 | nonzeros in L+U 1164462 206 | nonzeros in LSUB 203857 207 | 208 | ** Memory Usage ********************************** 209 | ** NUMfact space (MB): (sum-of-all-processes) 210 | L\U : 10.07 | Total : 16.19 211 | ** Total highmark (MB): 212 | Sum-of-all : 16.19 | Avg : 16.19 | Max : 16.19 213 | ************************************************** 214 | Time required for solver: 0.780516 (s) 215 | Final L2 norm of residual: 1.52262e-18 216 | ``` 217 | NOTE: the number of nonzeros in L+U is much smaller than natural ordering. 218 | This affects the memory usage and runtime. 219 | 220 | ### Run 5: Now use SuperLU_DIST, with Metis(A'+A) ordering. 221 | ``` 222 | $ ./convdiff -slu --velocity 1000 --slu-colperm 4 223 | 224 | Options used: 225 | --refine 0 226 | --order 1 227 | --velocity 1000 228 | --no-visit 229 | --superlu 230 | --slu-colperm 4 231 | Number of unknowns: 10201 232 | Nonzeros in L 522306 233 | Nonzeros in U 527748 234 | nonzeros in L+U 1039853 235 | nonzeros in LSUB 218211 236 | 237 | ** Memory Usage ********************************** 238 | ** NUMfact space (MB): (sum-of-all-processes) 239 | L\U : 9.24 | Total : 15.64 240 | ** Total highmark (MB): 241 | Sum-of-all : 15.64 | Avg : 15.64 | Max : 15.64 242 | ************************************************** 243 | Time required for solver: 0.786936 (s) 244 | Final L2 norm of residual: 1.55331e-18 245 | ``` 246 | 247 | |Solutions @_dv_=25 in [100,1000]|Steady State Solution @ _vel=1000_| 248 | |:---:||:---:| 249 | ||[](mfem-superlu0004.png)| 250 | 251 | |Time to Solution| 252 | |:---:| 253 | |[](slu_metis_time.png)| 254 | 255 | ### Run 6: Now use SuperLU_DIST, with Metis(A'+A) ordering, using 16 MPI tasks, on a larger problem. 256 | 257 | By adding `--refine 2`, each element in the mesh is subdivided twice yielding a 16x larger problem. 258 | Here, we'll run on 16 tasks and just grep the output form some key values of interest. 259 | 260 | ``` 261 | $ ${MPIEXEC_OMPI} -n 16 ./convdiff --refine 2 --velocity 1000 -slu --slu-colperm 4 >& junk.out 262 | $ grep 'Time required for solver:' junk.out 263 | Time required for solver: 10.3593 (s) 264 | Time required for solver: 16.3567 (s) 265 | Time required for solver: 11.6391 (s) 266 | Time required for solver: 10.669 (s) 267 | Time required for solver: 10.0605 (s) 268 | Time required for solver: 10.1216 (s) 269 | Time required for solver: 20.0721 (s) 270 | Time required for solver: 10.6205 (s) 271 | Time required for solver: 13.8445 (s) 272 | Time required for solver: 11.8943 (s) 273 | Time required for solver: 16.1552 (s) 274 | Time required for solver: 13.0849 (s) 275 | Time required for solver: 14.0008 (s) 276 | Time required for solver: 13.238 (s) 277 | Time required for solver: 12.387 (s) 278 | Time required for solver: 9.81836 (s) 279 | $ grep 'Final L2 norm of residual:' junk.out 280 | Final L2 norm of residual: 3.06951e-18 281 | Final L2 norm of residual: 3.06951e-18 282 | Final L2 norm of residual: 3.06951e-18 283 | Final L2 norm of residual: 3.06951e-18 284 | Final L2 norm of residual: 3.06951e-18 285 | Final L2 norm of residual: 3.06951e-18 286 | Final L2 norm of residual: 3.06951e-18 287 | Final L2 norm of residual: 3.06951e-18 288 | Final L2 norm of residual: 3.06951e-18 289 | Final L2 norm of residual: 3.06951e-18 290 | Final L2 norm of residual: 3.06951e-18 291 | Final L2 norm of residual: 3.06951e-18 292 | Final L2 norm of residual: 3.06951e-18 293 | Final L2 norm of residual: 3.06951e-18 294 | Final L2 norm of residual: 3.06951e-18 295 | Final L2 norm of residual: 3.06951e-18 296 | ``` 297 | 298 | > **Can you explain the processor times _relative_ to the previous, single processor run?** 299 | 300 | |We've increased the mesh size by 16x here. But, we've also added 16x processors. Yet, the time for those processors to run ranged between 10 and 20 seconds with an average of 12.7 seconds. The smaller, single processor run took 0.786936 and taking the ratio of these numbers, we get ~16. However, recall that the matrix size goes up as the SQUARE of the mesh size and this accounts for this additional factor of 16.| 301 | 302 | --- 303 | 304 | ## Out-Brief 305 | 306 | In this lesson, we have used [MFEM](http://mfem.org) as a vehicle to demonstrate 307 | the value of direct solvers from the [SuperLU_DIST](http://crd-legacy.lbl.gov/~xiaoye/SuperLU/) 308 | numerical package. 309 | 310 | ### Further Reading 311 | 312 | To learn more about sparse direct solver, see Gene Golub SIAM Summer School 313 | course materials: 314 | [Lecture Notes](http://www.siam.org/students/g2s3/2013/lecturers/XSLi/Lecture-Notes/sherry.pdf), 315 | [Book Chapter](http://crd-legacy.lbl.gov/~xiaoye/g2s3-summary.pdf), and 316 | [Video](http://www.siam.org/students/g2s3/2013/course.html) 317 | 318 | 319 | 320 |   321 | 322 | --- 323 | 324 | [Back to all HandsOnLessons](../lessons.md) 325 | -------------------------------------------------------------------------------- /lessons/superlu-mfem/mfem-superlu0000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/superlu-mfem/mfem-superlu0000.png -------------------------------------------------------------------------------- /lessons/superlu-mfem/mfem-superlu0001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/superlu-mfem/mfem-superlu0001.png -------------------------------------------------------------------------------- /lessons/superlu-mfem/mfem-superlu0002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/superlu-mfem/mfem-superlu0002.png -------------------------------------------------------------------------------- /lessons/superlu-mfem/mfem-superlu0003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/superlu-mfem/mfem-superlu0003.png -------------------------------------------------------------------------------- /lessons/superlu-mfem/mfem-superlu0004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/superlu-mfem/mfem-superlu0004.png -------------------------------------------------------------------------------- /lessons/superlu-mfem/mfem-superlu0005.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/superlu-mfem/mfem-superlu0005.png -------------------------------------------------------------------------------- /lessons/superlu-mfem/slu_metis.mpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/superlu-mfem/slu_metis.mpg -------------------------------------------------------------------------------- /lessons/superlu-mfem/slu_metis_residual.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/superlu-mfem/slu_metis_residual.png -------------------------------------------------------------------------------- /lessons/superlu-mfem/slu_metis_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/superlu-mfem/slu_metis_time.png -------------------------------------------------------------------------------- /lessons/time_integrators/mfem_sundials_dtt0000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/time_integrators/mfem_sundials_dtt0000.png -------------------------------------------------------------------------------- /lessons/time_integrators/mfem_sundials_dtt0001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/time_integrators/mfem_sundials_dtt0001.png -------------------------------------------------------------------------------- /lessons/time_integrators/mfem_sundials_dtt0002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/time_integrators/mfem_sundials_dtt0002.png -------------------------------------------------------------------------------- /lessons/time_integrators/mfem_sundials_dtt0003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/time_integrators/mfem_sundials_dtt0003.png -------------------------------------------------------------------------------- /lessons/time_integrators/mfem_sundials_explicit0000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/time_integrators/mfem_sundials_explicit0000.png -------------------------------------------------------------------------------- /lessons/time_integrators/mfem_sundials_explicit0001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/time_integrators/mfem_sundials_explicit0001.png -------------------------------------------------------------------------------- /lessons/time_integrators/mfem_sundials_explicit0002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/time_integrators/mfem_sundials_explicit0002.png -------------------------------------------------------------------------------- /lessons/time_integrators/mfem_sundials_explicit20000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/time_integrators/mfem_sundials_explicit20000.png -------------------------------------------------------------------------------- /lessons/time_integrators/mfem_sundials_explicit20001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/time_integrators/mfem_sundials_explicit20001.png -------------------------------------------------------------------------------- /lessons/time_integrators/mfem_sundials_explicit20002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/time_integrators/mfem_sundials_explicit20002.png -------------------------------------------------------------------------------- /lessons/time_integrators/nonlinear_heat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/time_integrators/nonlinear_heat.png -------------------------------------------------------------------------------- /lessons/time_integrators/pyramid_animated.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/time_integrators/pyramid_animated.gif -------------------------------------------------------------------------------- /lessons/time_integrators/transient-heat.cpp.numbered.txt: -------------------------------------------------------------------------------- 1 | 1 // MFEM Example 16 - Parallel Version 2 | 2 // SUNDIALS Modification 3 | 3 // 4 | 4 // Compile with: make ex16p 5 | 5 // 6 | 6 // Sample runs: 7 | 7 // mpiexec -n 4 ex16p 8 | 8 // mpiexec -n 4 ex16p -m ../../data/inline-tri.mesh 9 | 9 // mpiexec -n 4 ex16p -m ../../data/disc-nurbs.mesh -tf 2 10 | 10 // mpiexec -n 4 ex16p -s 12 -a 0.0 -k 1.0 11 | 11 // mpiexec -n 4 ex16p -s 1 -a 1.0 -k 0.0 -dt 4e-6 -tf 2e-2 -vs 50 12 | 12 // mpiexec -n 8 ex16p -s 2 -a 0.5 -k 0.5 -o 4 -dt 8e-6 -tf 2e-2 -vs 50 13 | 13 // mpiexec -n 4 ex16p -s 3 -dt 2.0e-4 -tf 4.0e-2 14 | 14 // mpiexec -n 16 ex16p -m ../../data/fichera-q2.mesh 15 | 15 // mpiexec -n 16 ex16p -m ../../data/escher-p2.mesh 16 | 16 // mpiexec -n 8 ex16p -m ../../data/beam-tet.mesh -tf 10 -dt 0.1 17 | 17 // mpiexec -n 4 ex16p -m ../../data/amr-quad.mesh -o 4 -rs 0 -rp 0 18 | 18 // mpiexec -n 4 ex16p -m ../../data/amr-hex.mesh -o 2 -rs 0 -rp 0 19 | 19 // 20 | 20 // Description: This example solves a time dependent nonlinear heat equation 21 | 21 // problem of the form du/dt = C(u), with a non-linear diffusion 22 | 22 // operator C(u) = \nabla \cdot (\kappa + \alpha u) \nabla u. 23 | 23 // 24 | 24 // The example demonstrates the use of nonlinear operators (the 25 | 25 // class ConductionOperator defining C(u)), as well as their 26 | 26 // implicit time integration. Note that implementing the method 27 | 27 // ConductionOperator::ImplicitSolve is the only requirement for 28 | 28 // high-order implicit (SDIRK) time integration. By default, this 29 | 29 // example uses the SUNDIALS ODE solvers from CVODE and ARKODE. 30 | 30 // 31 | 31 // We recommend viewing examples 2, 9 and 10 before viewing this 32 | 32 // example. 33 | 33 34 | 34 #include "mfem.hpp" 35 | 35 #include "papi.h" 36 | 36 #include 37 | 37 #include 38 | 38 #include 39 | 39 #include 40 | 40 41 | 41 using namespace std; 42 | 42 using namespace mfem; 43 | 43 44 | 44 /** After spatial discretization, the conduction model can be written as: 45 | 45 * 46 | 46 * du/dt = M^{-1}(-Ku) 47 | 47 * 48 | 48 * where u is the vector representing the temperature, M is the mass matrix, 49 | 49 * and K is the diffusion operator with diffusivity depending on u: 50 | 50 * (\kappa + \alpha u). 51 | 51 * 52 | 52 * Class ConductionOperator represents the right-hand side of the above ODE. 53 | 53 */ 54 | 54 class ConductionOperator : public TimeDependentOperator 55 | 55 { 56 | 56 protected: 57 | 57 ParFiniteElementSpace &fespace; 58 | 58 Array ess_tdof_list; // this list remains empty for pure Neumann b.c. 59 | 59 60 | 60 ParBilinearForm *M; 61 | 61 ParBilinearForm *K; 62 | 62 63 | 63 HypreParMatrix Mmat; 64 | 64 HypreParMatrix Kmat; 65 | 65 HypreParMatrix *T; // T = M + dt K 66 | 66 double current_dt; 67 | 67 68 | 68 CGSolver M_solver; // Krylov solver for inverting the mass matrix M 69 | 69 HypreSmoother M_prec; // Preconditioner for the mass matrix M 70 | 70 71 | 71 CGSolver T_solver; // Implicit solver for T = M + dt K 72 | 72 HypreSmoother T_prec; // Preconditioner for the implicit solver 73 | 73 74 | 74 double alpha, kappa; 75 | 75 76 | 76 mutable Vector z; // auxiliary vector 77 | 77 78 | 78 public: 79 | 79 ConductionOperator(ParFiniteElementSpace &f, double alpha, double kappa, 80 | 80 const Vector &u); 81 | 81 82 | 82 virtual void Mult(const Vector &u, Vector &du_dt) const; 83 | 83 /** Solve the Backward-Euler equation: k = f(u + dt*k, t), for the unknown k. 84 | 84 This is the only requirement for high-order SDIRK implicit integration.*/ 85 | 85 virtual void ImplicitSolve(const double dt, const Vector &u, Vector &k); 86 | 86 87 | 87 /** Solve the system (M + dt K) y = M b. The result y replaces the input b. 88 | 88 This method is used by the implicit SUNDIALS solvers. */ 89 | 89 void SundialsSolve(const double dt, Vector &b); 90 | 90 91 | 91 /// Update the diffusion BilinearForm K using the given true-dof vector `u`. 92 | 92 void SetParameters(const Vector &u); 93 | 93 94 | 94 virtual ~ConductionOperator(); 95 | 95 }; 96 | 96 97 | 97 /// Custom Jacobian system solver for the SUNDIALS time integrators. 98 | 98 /** For the ODE system represented by ConductionOperator 99 | 99 100 | 100 M du/dt = -K(u), 101 | 101 102 | 102 this class facilitates the solution of linear systems of the form 103 | 103 104 | 104 (M + γK) y = M b, 105 | 105 106 | 106 for given b, u (not used), and γ = GetTimeStep(). */ 107 | 107 class SundialsJacSolver : public SundialsODELinearSolver 108 | 108 { 109 | 109 private: 110 | 110 ConductionOperator *oper; 111 | 111 112 | 112 public: 113 | 113 SundialsJacSolver() : oper(NULL) { } 114 | 114 115 | 115 int InitSystem(void *sundials_mem); 116 | 116 int SetupSystem(void *sundials_mem, int conv_fail, 117 | 117 const Vector &y_pred, const Vector &f_pred, int &jac_cur, 118 | 118 Vector &v_temp1, Vector &v_temp2, Vector &v_temp3); 119 | 119 int SolveSystem(void *sundials_mem, Vector &b, const Vector &weight, 120 | 120 const Vector &y_cur, const Vector &f_cur); 121 | 121 int FreeSystem(void *sundials_mem); 122 | 122 }; 123 | 123 124 | 124 double InitialTemperature(const Vector &x); 125 | 125 126 | 126 static void initialize_papi(void) 127 | 127 { 128 | 128 #ifdef HAVE_PAPI 129 | 129 float ireal_time, iproc_time, imflops; 130 | 130 long long iflpops; 131 | 131 132 | 132 assert(PAPI_library_init(PAPI_VER_CURRENT) == PAPI_VER_CURRENT); 133 | 133 assert(PAPI_flops(&ireal_time,&iproc_time,&iflpops,&imflops) >= PAPI_OK); 134 | 134 #endif 135 | 135 } 136 | 136 137 | 137 static void finalize_papi(void) 138 | 138 { 139 | 139 #ifdef HAVE_PAPI 140 | 140 float real_time, proc_time, mflops; 141 | 141 long long flpops; 142 | 142 PAPI_dmem_info_t dmem; 143 | 143 144 | 144 assert(PAPI_flops(&real_time,&proc_time,&flpops,&mflops) >= PAPI_OK); 145 | 145 assert(PAPI_get_dmem_info(&dmem) >= PAPI_OK); 146 | 146 147 | 147 cout << "Memory Info:" << endl;; 148 | 148 cout << "\tMem Size: " << dmem.size << endl; 149 | 149 cout << "\tMem Resident:\t\t" << dmem.resident << endl; 150 | 150 cout << "\tMem Heap: " << dmem.heap << endl; 151 | 151 cout << "Timing Info:" << endl; 152 | 152 cout << "\tReal_time: " << real_time << endl; 153 | 153 cout << "\tProc_time: " << proc_time << endl; 154 | 154 cout << "Flops Info:" << endl; 155 | 155 cout << "\tTotal flpops: " << flpops << endl; 156 | 156 cout << "\tMFLOPS: " << mflops << endl; 157 | 157 #endif 158 | 158 } 159 | 159 160 | 160 161 | 161 int main(int argc, char *argv[]) 162 | 162 { 163 | 163 initialize_papi(); 164 | 164 165 | 165 // Initialize MPI. 166 | 166 int num_procs, myid; 167 | 167 MPI_Init(&argc, &argv); 168 | 168 MPI_Comm_size(MPI_COMM_WORLD, &num_procs); 169 | 169 MPI_Comm_rank(MPI_COMM_WORLD, &myid); 170 | 170 171 | 171 // Parse command-line options. 172 | 172 int dim = 2; 173 | 173 int ref_levels = 0; 174 | 174 int order = 1; 175 | 175 double t_final = 0.5; 176 | 176 double dt = 0.01; 177 | 177 double alpha = 0.0; 178 | 178 double kappa = 0.5; 179 | 179 bool implicit = false; 180 | 180 bool adaptdt = false; 181 | 181 double reltol = 1e-4; 182 | 182 double abstol = 1e-4; 183 | 183 bool noout = false; 184 | 184 185 | 185 OptionsParser args(argc, argv); 186 | 186 args.AddOption(&dim, "-d", "--dim", 187 | 187 "Number of dimensions in the problem (1 or 2)."); 188 | 188 args.AddOption(&ref_levels, "-r", "--refine", 189 | 189 "Number of times to refine the mesh uniformly."); 190 | 190 args.AddOption(&order, "-o", "--order", 191 | 191 "Order (degree) of the finite elements."); 192 | 192 args.AddOption(&t_final, "-tf", "--t-final", 193 | 193 "Final time; start time is 0."); 194 | 194 args.AddOption(&dt, "-dt", "--time-step", 195 | 195 "Initial time step."); 196 | 196 args.AddOption(&alpha, "-a", "--alpha", 197 | 197 "Alpha coefficient for conductivity: kappa + alpha*temperature"); 198 | 198 args.AddOption(&kappa, "-k", "--kappa", 199 | 199 "Kappa coefficient conductivity: kappa + alpha*temperature"); 200 | 200 args.AddOption(&adaptdt, "-adt", "--adapt-time-step", "-fdt", "--fixed-time-step", 201 | 201 "Flag whether or not to adapt the time step."); 202 | 202 args.AddOption(&implicit, "-imp", "--implicit", "-exp", "--explicit", 203 | 203 "Implicit or Explicit ODE solution."); 204 | 204 args.AddOption(&reltol, "-rtol", "--relative-tolerance", 205 | 205 "Relative tolerance in Sundials time integrator."); 206 | 206 args.AddOption(&abstol, "-atol", "--absolute-tolerance", 207 | 207 "Absolute tolerance in Sundials time integrator."); 208 | 208 args.AddOption(&noout, "-noout", "--no-output", "-out", "--do-output", 209 | 209 "Disable all file outputs."); 210 | 210 211 | 211 int precision = 8; 212 | 212 cout.precision(precision); 213 | 213 args.Parse(); 214 | 214 if (!args.Good()) 215 | 215 { 216 | 216 args.PrintUsage(cout); 217 | 217 MPI_Finalize(); 218 | 218 return 1; 219 | 219 } 220 | 220 221 | 221 if (myid == 0) 222 | 222 { 223 | 223 args.PrintOptions(cout); 224 | 224 } 225 | 225 226 | 226 Mesh *mesh; 227 | 227 if (dim == 1) 228 | 228 { 229 | 229 mesh = new Mesh(16, 1.0); 230 | 230 } 231 | 231 else if (dim == 2) 232 | 232 { 233 | 233 mesh = new Mesh(16, 16, Element::QUADRILATERAL, 1, 1.0, 1.0); 234 | 234 } 235 | 235 else if (dim == 3) 236 | 236 { 237 | 237 mesh = new Mesh(16, 16, 16, Element::HEXAHEDRON, 1, 1.0, 1.0, 1.0); 238 | 238 } 239 | 239 else 240 | 240 { 241 | 241 cout << "Diminsion mus be set to 1, 2, or 3." << endl; 242 | 242 return 2; 243 | 243 } 244 | 244 ParMesh *pmesh = new ParMesh(MPI_COMM_WORLD, *mesh); 245 | 245 for (int lev = 0; lev < ref_levels; lev++) 246 | 246 { 247 | 247 pmesh->UniformRefinement(); 248 | 248 } 249 | 249 delete mesh; 250 | 250 251 | 251 // Define the ARKODE solver used for time integration. Either implicit or explicit. 252 | 252 ODESolver *ode_solver = NULL; 253 | 253 ARKODESolver *arkode = NULL; 254 | 254 SundialsJacSolver sun_solver; // Used by the implicit ARKODE solver. 255 | 255 256 | 256 if (implicit) 257 | 257 { 258 | 258 arkode = new ARKODESolver(MPI_COMM_WORLD, ARKODESolver::IMPLICIT); 259 | 259 arkode->SetLinearSolver(sun_solver); 260 | 260 } 261 | 261 else 262 | 262 { 263 | 263 arkode = new ARKODESolver(MPI_COMM_WORLD, ARKODESolver::EXPLICIT); 264 | 264 arkode->SetERKTableNum(FEHLBERG_13_7_8); 265 | 265 } 266 | 266 arkode->SetStepMode(ARK_ONE_STEP); 267 | 267 arkode->SetSStolerances(reltol, abstol); 268 | 268 arkode->SetMaxStep(t_final / 2.0); 269 | 269 if (!adaptdt) 270 | 270 { 271 | 271 arkode->SetFixedStep(dt); 272 | 272 } 273 | 273 ode_solver = arkode; 274 | 274 275 | 275 // Define the vector finite element space representing the current and the 276 | 276 // initial temperature, u_ref. 277 | 277 H1_FECollection fe_coll(order, dim); 278 | 278 ParFiniteElementSpace fespace(pmesh, &fe_coll); 279 | 279 ParGridFunction u_gf(&fespace); 280 | 280 int fe_size = fespace.GlobalTrueVSize(); 281 | 281 if (myid == 0) 282 | 282 { 283 | 283 cout << "Number of temperature unknowns: " << fe_size << endl; 284 | 284 } 285 | 285 286 | 286 // Set the initial conditions for u. 287 | 287 FunctionCoefficient u_0(InitialTemperature); 288 | 288 u_gf.ProjectCoefficient(u_0); 289 | 289 Vector u; 290 | 290 u_gf.GetTrueDofs(u); 291 | 291 292 | 292 // Initialize the conduction operator and the VisIt visualization. 293 | 293 ConductionOperator oper(fespace, alpha, kappa, u); 294 | 294 u_gf.SetFromTrueDofs(u); 295 | 295 VisItDataCollection visit_dc("dump", pmesh); 296 | 296 visit_dc.RegisterField("temperature", &u_gf); 297 | 297 if (!noout) 298 | 298 { 299 | 299 visit_dc.SetCycle(0); 300 | 300 visit_dc.SetTime(0.0); 301 | 301 visit_dc.Save(); 302 | 302 } 303 | 303 304 | 304 // Perform time-integration 305 | 305 if (myid == 0) 306 | 306 { 307 | 307 cout << "Integrating the ODE ..." << endl; 308 | 308 } 309 | 309 ode_solver->Init(oper); 310 | 310 double t = 0.0; 311 | 311 bool last_step = false; 312 | 312 for (int ti = 1; !last_step; ti++) 313 | 313 { 314 | 314 if (dt > t_final - t) 315 | 315 { 316 | 316 dt = t_final - t; 317 | 317 arkode->SetFixedStep(dt); 318 | 318 } 319 | 319 ode_solver->Step(u, t, dt); 320 | 320 321 | 321 if (myid == 0) 322 | 322 { 323 | 323 cout << "step " << ti << ", t = " << t << endl; 324 | 324 arkode->PrintInfo(); 325 | 325 } 326 | 326 327 | 327 u_gf.SetFromTrueDofs(u); 328 | 328 329 | 329 if (!noout) 330 | 330 { 331 | 331 visit_dc.SetCycle(ti); 332 | 332 visit_dc.SetTime(t); 333 | 333 visit_dc.Save(); 334 | 334 } 335 | 335 336 | 336 oper.SetParameters(u); 337 | 337 last_step = (t >= t_final - 1e-8*dt); 338 | 338 } 339 | 339 340 | 340 // Cleanup 341 | 341 delete ode_solver; 342 | 342 delete pmesh; 343 | 343 MPI_Finalize(); 344 | 344 345 | 345 finalize_papi(); 346 | 346 347 | 347 return 0; 348 | 348 } 349 | 349 350 | 350 ConductionOperator::ConductionOperator(ParFiniteElementSpace &f, double al, 351 | 351 double kap, const Vector &u) 352 | 352 : TimeDependentOperator(f.GetTrueVSize(), 0.0), fespace(f), M(NULL), K(NULL), 353 | 353 T(NULL), current_dt(0.0), 354 | 354 M_solver(f.GetComm()), T_solver(f.GetComm()), z(height) 355 | 355 { 356 | 356 const double rel_tol = 1e-8; 357 | 357 358 | 358 M = new ParBilinearForm(&fespace); 359 | 359 M->AddDomainIntegrator(new MassIntegrator()); 360 | 360 M->Assemble(0); // keep sparsity pattern of M and K the same 361 | 361 M->FormSystemMatrix(ess_tdof_list, Mmat); 362 | 362 363 | 363 M_solver.iterative_mode = false; 364 | 364 M_solver.SetRelTol(rel_tol); 365 | 365 M_solver.SetAbsTol(0.0); 366 | 366 M_solver.SetMaxIter(100); 367 | 367 M_solver.SetPrintLevel(0); 368 | 368 M_prec.SetType(HypreSmoother::Jacobi); 369 | 369 M_solver.SetPreconditioner(M_prec); 370 | 370 M_solver.SetOperator(Mmat); 371 | 371 372 | 372 alpha = al; 373 | 373 kappa = kap; 374 | 374 375 | 375 T_solver.iterative_mode = false; 376 | 376 T_solver.SetRelTol(rel_tol); 377 | 377 T_solver.SetAbsTol(0.0); 378 | 378 T_solver.SetMaxIter(100); 379 | 379 T_solver.SetPrintLevel(0); 380 | 380 T_solver.SetPreconditioner(T_prec); 381 | 381 382 | 382 SetParameters(u); 383 | 383 } 384 | 384 385 | 385 void ConductionOperator::Mult(const Vector &u, Vector &du_dt) const 386 | 386 { 387 | 387 // Compute: 388 | 388 // du_dt = M^{-1}*-K(u) 389 | 389 // for du_dt 390 | 390 Kmat.Mult(u, z); 391 | 391 z.Neg(); // z = -z 392 | 392 M_solver.Mult(z, du_dt); 393 | 393 } 394 | 394 395 | 395 void ConductionOperator::ImplicitSolve(const double dt, 396 | 396 const Vector &u, Vector &du_dt) 397 | 397 { 398 | 398 // Solve the equation: 399 | 399 // du_dt = M^{-1}*[-K(u + dt*du_dt)] 400 | 400 // for du_dt 401 | 401 if (!T) 402 | 402 { 403 | 403 T = Add(1.0, Mmat, dt, Kmat); 404 | 404 current_dt = dt; 405 | 405 T_solver.SetOperator(*T); 406 | 406 } 407 | 407 MFEM_VERIFY(dt == current_dt, ""); // SDIRK methods use the same dt 408 | 408 Kmat.Mult(u, z); 409 | 409 z.Neg(); 410 | 410 T_solver.Mult(z, du_dt); 411 | 411 } 412 | 412 413 | 413 void ConductionOperator::SundialsSolve(const double dt, Vector &b) 414 | 414 { 415 | 415 // Solve the system (M + dt K) y = M b. The result y replaces the input b. 416 | 416 if (!T || dt != current_dt) 417 | 417 { 418 | 418 delete T; 419 | 419 T = Add(1.0, Mmat, dt, Kmat); 420 | 420 current_dt = dt; 421 | 421 T_solver.SetOperator(*T); 422 | 422 } 423 | 423 Mmat.Mult(b, z); 424 | 424 T_solver.Mult(z, b); 425 | 425 } 426 | 426 427 | 427 void ConductionOperator::SetParameters(const Vector &u) 428 | 428 { 429 | 429 ParGridFunction u_alpha_gf(&fespace); 430 | 430 u_alpha_gf.SetFromTrueDofs(u); 431 | 431 for (int i = 0; i < u_alpha_gf.Size(); i++) 432 | 432 { 433 | 433 u_alpha_gf(i) = kappa + alpha*u_alpha_gf(i); 434 | 434 } 435 | 435 436 | 436 delete K; 437 | 437 K = new ParBilinearForm(&fespace); 438 | 438 439 | 439 GridFunctionCoefficient u_coeff(&u_alpha_gf); 440 | 440 441 | 441 K->AddDomainIntegrator(new DiffusionIntegrator(u_coeff)); 442 | 442 K->Assemble(0); // keep sparsity pattern of M and K the same 443 | 443 K->FormSystemMatrix(ess_tdof_list, Kmat); 444 | 444 delete T; 445 | 445 T = NULL; // re-compute T on the next ImplicitSolve or SundialsSolve 446 | 446 } 447 | 447 448 | 448 ConductionOperator::~ConductionOperator() 449 | 449 { 450 | 450 delete T; 451 | 451 delete M; 452 | 452 delete K; 453 | 453 } 454 | 454 455 | 455 456 | 456 int SundialsJacSolver::InitSystem(void *sundials_mem) 457 | 457 { 458 | 458 TimeDependentOperator *td_oper = GetTimeDependentOperator(sundials_mem); 459 | 459 460 | 460 // During development, we use dynamic_cast<> to ensure the setup is correct: 461 | 461 oper = dynamic_cast(td_oper); 462 | 462 MFEM_VERIFY(oper, "operator is not ConductionOperator"); 463 | 463 return 0; 464 | 464 } 465 | 465 466 | 466 int SundialsJacSolver::SetupSystem(void *sundials_mem, int conv_fail, 467 | 467 const Vector &y_pred, const Vector &f_pred, 468 | 468 int &jac_cur, Vector &v_temp1, 469 | 469 Vector &v_temp2, Vector &v_temp3) 470 | 470 { 471 | 471 jac_cur = 1; 472 | 472 473 | 473 return 0; 474 | 474 } 475 | 475 476 | 476 int SundialsJacSolver::SolveSystem(void *sundials_mem, Vector &b, 477 | 477 const Vector &weight, const Vector &y_cur, 478 | 478 const Vector &f_cur) 479 | 479 { 480 | 480 oper->SundialsSolve(GetTimeStep(sundials_mem), b); 481 | 481 482 | 482 return 0; 483 | 483 } 484 | 484 485 | 485 int SundialsJacSolver::FreeSystem(void *sundials_mem) 486 | 486 { 487 | 487 return 0; 488 | 488 } 489 | 489 490 | 490 491 | 491 //This will be a "pyramid" initial temperature with 1.0 at the center 492 | 492 //tending to 0.0 at all the boundaries. 493 | 493 double InitialTemperature(const Vector &x) 494 | 494 { 495 | 495 double max_comp_dist = 0.0; 496 | 496 for (int d = 0; d < x.Size(); ++d) 497 | 497 { 498 | 498 double comp_dist = std::abs(x[d] - 0.5); 499 | 499 if (comp_dist > max_comp_dist) 500 | 500 { 501 | 501 max_comp_dist = comp_dist; 502 | 502 } 503 | 503 } 504 | 504 return 1.0 - 2.0*max_comp_dist; 505 | 505 } 506 | -------------------------------------------------------------------------------- /tools/atpesc2017_cooley_vnc_setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | acct=ATPESC2017 3 | nnodes=2 4 | tl=600 5 | localos=`uname` 6 | linuxvnc='' 7 | 8 | if [[ -z "$1" ]]; then 9 | echo "Usage: $0 [debug]" 10 | echo " if 'debug' present reduces allocation to 1 node @ 20 mins" 11 | exit 1 12 | fi 13 | cooley_username=$1 14 | 15 | if [[ "$2" == debug ]]; then 16 | set -x 17 | nnodes=1 18 | tl=20 19 | fi 20 | 21 | if [[ "$localos" == "Linux" ]]; then 22 | if [ -f /usr/bin/vncviewer ]; then 23 | linuxvnc=vncviewer 24 | elif [ -f /usr/bin/vinagre ]; then 25 | linuxvnc=vinagre 26 | else 27 | echo "Please install vncviewer (from TigerVNC) or vinagre and rerun the script" 28 | exit 1 29 | fi 30 | fi 31 | 32 | # Ensure ~/.ssh/config exists and has limited permissions 33 | if [[ ! -e ~/.ssh/config ]]; then 34 | if [[ ! -e ~/.ssh ]]; then 35 | mkdir ~/.ssh 36 | chmod 700 ~/.ssh ~/.ssh/config 37 | fi 38 | touch ~/.ssh/config 39 | chmod 700 ~/.ssh/config 40 | fi 41 | if [[ ! -e ~/.ssh/cm_socket ]]; then 42 | mkdir ~/.ssh/cm_socket 43 | fi 44 | 45 | # 46 | # Append stuff to ~/.ssh/config for ssh control master to cooley 47 | # 48 | if [[ -z "$(grep cooley-nph ~/.ssh/config)" ]]; then 49 | cat >> ~/.ssh/config << EOF 50 | #added by NumericalPackagesHandsOn 51 | Host cooley-nph 52 | Hostname cooley.alcf.anl.gov 53 | Compression yes 54 | ControlMaster auto 55 | ControlPersist 12h 56 | ControlPath ~/.ssh/cm_socket/%r@cooley.alcf.anl.gov:%p 57 | EOF 58 | fi 59 | 60 | # 61 | # open login to cooley (will prompt) and put in bg and keep open all day 62 | # This is the login that all others will use shared authentication with 63 | # 64 | ssh -N -f ${cooley_username}@cooley-nph 65 | 66 | # 67 | # copy vnc dot files to cooley prompt for desired vnc password 68 | # 69 | ssh ${cooley_username}@cooley-nph "mkdir -p ~/.vnc; cat > ~/.vnc/xstartup" << EOF 70 | #!/bin/bash 71 | #created by NumericalPackagesHandsOn 72 | export DISPLAY=:0.0 73 | export HANDSON=/projects/ATPESC2017/NumericalPackages/handson/ 74 | xterm -fn 10x20 & 75 | twm 76 | EOF 77 | ssh ${cooley_username}@cooley-nph "chmod u+x ~/.vnc/xstartup" 78 | # 79 | # Update users .soft.cooley file for basic setup 80 | # 81 | #ssh ${cooley_username}@cooley-nph "cat >> ~/.soft.cooley" << EOF 82 | ##added by NumericalPackagesHandsOn 83 | #+gcc-4.8.1 84 | #@visit 85 | #EOF 86 | # 87 | # Get a temporary password from user and confirm its intended 88 | # 89 | while true; do 90 | read -p "Create temporary VNC Password: " pw 91 | echo "You have entered \"$pw\", is this correct?" 92 | select yn in "Yes" "No"; do 93 | case $yn in 94 | Yes ) break 2;; 95 | esac 96 | done 97 | done 98 | # Push the password to cooley and vncpasswd encode it 99 | ssh ${cooley_username}@cooley-nph "rm -f ~/.vnc/passwd; echo $pw | vncpasswd -f > ~/.vnc/passwd; chmod 600 ~/.vnc/passwd" 100 | 101 | # 102 | # Reserve 2 nodes for interactive use all day 103 | # 104 | ssh -t -t -f ${cooley_username}@cooley-nph "qsub -I -n $nnodes -t $tl -A $acct" -q training > ./qsub-interactive.out 2>&1 & 105 | 106 | # 107 | # Loop watching output from above to get allocation node name 108 | # 109 | nodid="" 110 | while [[ -z "$nodid" ]] ; do 111 | echo "Checking for allocation completion" 112 | nodid=$(cat ./qsub-interactive.out | tr ' ' '\n' | grep cc[0-9][0-9][0-9].cooley | cut -d'.' -f1) 113 | sleep 5 114 | done 115 | echo "Got allocation at $nodid" 116 | 117 | # 118 | # Startup xvncserver on the allocation 119 | # 120 | ssh ${cooley_username}@cooley-nph "nohup ssh $nodid x0vncserver --display=:0.0 --NeverShared=1 --geometry=1200x900+0+0 --PasswordFile=/home/$cooley_username/.vnc/passwd --MaxProcessorUsage=100 >& /dev/null &" 121 | sleep 5 122 | 123 | # 124 | # Set up 2-hop ssh tunnel to allocation, (above) through login and run xstartup there 125 | # 126 | ssh -f -L 22590:$nodid:5900 ${cooley_username}@cooley-nph "nohup ssh $nodid ~/.vnc/xstartup >& /dev/null &" 127 | sleep 5 128 | 129 | # 130 | # finally, start the vnc client on local machine 131 | # 132 | echo "Attempting to connect VNC to localhost:22590 - If this fails you can reattempt this manually" 133 | if [[ "$localos" == Darwin ]]; then 134 | open vnc://localhost:22590 135 | elif [[ "$localos" == Linux ]]; then 136 | $linuxvnc localhost::22590 137 | elif [[ "$localos" == windows ]]; then 138 | echo "not implemented" 139 | fi 140 | --------------------------------------------------------------------------------