├── Gemfile
├── LICENSE
├── README.md
├── _config.yml
├── contribution_guide.md
├── how_to_preview_locally.md
├── lessons
    ├── AMG
    │   ├── lesson.md
    │   ├── rhs.jpg
    │   └── solution.jpg
    ├── AMReX
    │   ├── VisIt_2D.pdf
    │   ├── lesson.md
    │   ├── phi.gif
    │   ├── phi_adv_noref.0.jpg
    │   ├── phi_adv_noref.60.jpg
    │   ├── phi_adv_ref.0.jpg
    │   ├── phi_adv_ref.60.jpg
    │   ├── phi_diff_0.jpg
    │   └── phi_diff_10000.jpg
    ├── adjoint
    │   ├── chkpt.png
    │   ├── ex1.png
    │   ├── ex1adj.c
    │   ├── ex1adj.png
    │   ├── ex3opt.c
    │   ├── ex5adj.c
    │   └── lesson.md
    ├── atpesc-instructions.md
    ├── hand_coded_heat
    │   ├── 1d_heat_equation.xlsx
    │   ├── animated_basic_heat.gif
    │   ├── basic0000.png
    │   ├── basic0001.png
    │   ├── basic0002.png
    │   ├── basic0003.png
    │   ├── heat.c
    │   ├── heat.c.numbered.txt
    │   ├── highres0000.png
    │   ├── highres0001.png
    │   ├── hr_crankn0000.png
    │   ├── hr_crankn0001.png
    │   ├── hr_crankn0002.png
    │   ├── hr_smalldt_long0000.png
    │   ├── hr_smalldt_long0001.png
    │   ├── hr_smalldt_long0002.png
    │   ├── hr_smalldt_long0003.png
    │   ├── hr_smalldt_long0004.png
    │   ├── hr_spikes0000.png
    │   ├── hr_spikes0001.png
    │   ├── hr_spikes0002.png
    │   ├── hr_spikes_crankn0000.png
    │   ├── hr_spikes_crankn0001.png
    │   ├── hr_spikes_smalldt0000.png
    │   ├── hr_spikes_smalldt0001.png
    │   ├── hr_spikes_smalldt0002.png
    │   ├── hr_spikes_smalldt0003.png
    │   ├── lesson.md
    │   ├── makefile
    │   ├── makefile.txt
    │   ├── plot_heat.py
    │   ├── problem_setup.png
    │   ├── simple_1d_heat.png
    │   ├── spikes0000.png
    │   ├── spikes0001.png
    │   ├── spikes0002.png
    │   ├── spikes0003.png
    │   ├── spikes0004.png
    │   ├── spikes0005.png
    │   └── spikes_animated.gif
    ├── iterativesolvers
    │   └── lesson.md
    ├── lesson_template
    │   ├── animated_basic_heat.gif
    │   ├── basic0000.png
    │   └── lesson.md
    ├── lessons.md
    ├── mfem_convergence
    │   ├── diffusion.png
    │   ├── ex8.png
    │   ├── lesson.md
    │   └── mesh.png
    ├── superlu-mfem
    │   ├── gmres.mpg
    │   ├── gmres_residual.png
    │   ├── gmres_time.png
    │   ├── lesson.md
    │   ├── mfem-superlu0000.png
    │   ├── mfem-superlu0001.png
    │   ├── mfem-superlu0002.png
    │   ├── mfem-superlu0003.png
    │   ├── mfem-superlu0004.png
    │   ├── mfem-superlu0005.png
    │   ├── slu_metis.mpg
    │   ├── slu_metis_residual.png
    │   └── slu_metis_time.png
    └── time_integrators
    │   ├── lesson.md
    │   ├── mfem_sundials_dtt0000.png
    │   ├── mfem_sundials_dtt0001.png
    │   ├── mfem_sundials_dtt0002.png
    │   ├── mfem_sundials_dtt0003.png
    │   ├── mfem_sundials_explicit0000.png
    │   ├── mfem_sundials_explicit0001.png
    │   ├── mfem_sundials_explicit0002.png
    │   ├── mfem_sundials_explicit20000.png
    │   ├── mfem_sundials_explicit20001.png
    │   ├── mfem_sundials_explicit20002.png
    │   ├── nonlinear_heat.png
    │   ├── pyramid_animated.gif
    │   └── transient-heat.cpp.numbered.txt
└── tools
    └── atpesc2017_cooley_vnc_setup.sh


/Gemfile:
--------------------------------------------------------------------------------
1 | gem 'github-pages', group: :jekyll_plugins
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Extreme-scale Scientific Software Development Kit (xSDK)
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Welcome to HandsOnLessons
 2 | 
 3 | 
 4 | Hosted here are a series of increasingly sophisticated _hands-on_ lessons aimed at helping
 5 | users of all experience levels learn to use a variety of scientific software packages for
 6 | solving complex numerical problems. We begin with custom, hand-coded solutions to the
 7 | homogeneous, one-dimensional heat equation to demonstrate basic numerical and performance
 8 | issues such as accuracy, stability, time to solution, memory, and flops required, along
 9 | with motivation for the use of numerical software packages to help achieve more robust,
10 | efficient, scalable, extensible, and portable software.
11 | 
12 | [Go to the Lessons](lessons/lessons.md)
13 | 
14 | We slowly build upon
15 | these simple, early examples introducing additional complexities such as inhomogenieties,
16 | higher-order solutions, time-variabilities, nonlinearities, and complex geometries in higher
17 | dimensions. We demonstrate the use of a variety of numerical software packages to address these
18 | issues and the advantages they offer over hand-coded software.
19 | 
20 | Throughout the currently designed lesson plans, we use the [MFEM](http://mfem.org) (unstructured)
21 | and [AMReX](https://github.com/AMReX-Codes/AMReX-Codes.github.io) (structured, adaptive)
22 | packages as _demonstration vehicles_. Both of these packages include the basic functional pieces
23 | necessary to start from a continuous description of a physical problem to solve, through
24 | PDE specification, numerical analysis, discretization, algorithm development and then implementation.
25 | In addition, they include essential abstractions to support scalable, parallel expression
26 | of the algorithms and to orchestrate the application of various numerical packages in the
27 | solution.
28 | 
29 | In addition, we use [PAPI](http://icl.utk.edu/papi) (performance counters) to enable users to
30 | observe variations in performance (time and space) as algorithmic choices are varied and
31 | [VisIt](http://visit.llnl.gov) to visualize results.
32 | 
33 | These initial lessons are a starting point for a growing set of hands-on examples to demonstrate
34 | a broad range of numerical software packages.
35 | 
36 | The packages demonstrated here benefit from many person-years of software development
37 | aimed at addressing such issues as extreme scalability and multi-modal parallelism such as message-passing,
38 | many-threads and/or GPUs.  Numerical results computed with the packages demonstrated here have been vetted over
39 | many years of use in a variety of application settings. Nonetheless, because the main focus of these
40 | lessons is in **introducing** how to use these packages, there are likely few specific hands-on lessons
41 | here in which there are opportunities to observe these important capabilities of numerical software packages.
42 | 
43 | See the [Contributing Guide](contribution_guide.md) for instructions on contributing lessons.
44 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-cayman
2 | exclude: tools
3 | 


--------------------------------------------------------------------------------
/contribution_guide.md:
--------------------------------------------------------------------------------
  1 | # Contributing to HandsOnLessons
  2 | 
  3 | ## About GitHub Pages and Jekyll
  4 | 
  5 | [Jekyll](https://jekyllrb.com) allows site content developers to build beautiful
  6 | mostly static sites by composing ASCII files involving a combination of three
  7 | technologies...
  8 | 
  9 | * [Markdown](https://guides.github.com/features/mastering-markdown/)
 10 | (or [Textile](https://www.promptworks.com/textile)),
 11 | * YAML [Front Matter](http://jekyllrb.com/docs/frontmatter/) page configuration code
 12 | * [Liquid](https://shopify.github.io/liquid/) content filtering and page construction code
 13 | 
 14 | The Jekyll engine reads source `.md` files and, optionally, a number of
 15 | other CSS, HTML (and other Web technology code snippets and accouterments) stored in
 16 | supporting files and directories in the repository (when necessary) and builds the
 17 | site HTML files.
 18 | 
 19 | On a GitHub Pages site, this process happens automatically upon each new commit of
 20 | files to the site's repository. GitHub uses Jekyll to re-generate the site and the
 21 | changes go live shortly thereafter. See below about how to preview changes to the
 22 | site before committing them.
 23 | 
 24 | Within the repository, a single source `.md` file will contain not only Markdown
 25 | content, but may also contain, optionally, a YAML code block at the beginning of
 26 | the file (called its _front matter_) which holds information on how the page is
 27 | to be configured when the site is built and, optionally, Liquid instructions
 28 | which can appear anywhere in the file and which program the Jekyll engine on how
 29 | to filter, merge and combine content snippets into a generated page.
 30 | 
 31 | So far, we are not using either YAML Front Matter or Liquid here to build our
 32 | site. We may eventually decide we might need to do that and it would be fine
 33 | but there isn't anything yet that has indicated we have a need for that. I
 34 | mention this because we can easily get much more sophisticated than we are
 35 | currently in managing and hosting this content.
 36 | 
 37 | ## Previewing your changes locally
 38 | 
 39 | If you are new to GitHub and Jekyll themed GitHub pages, read this section to
 40 | learn how to preview your work locally before committing it to GitHub. On the
 41 | other hand, if its easier for you, you can just commit changes to GitHub, see
 42 | how they turn out there and modify if you are not satisfied. That workflow
 43 | just takes a bit more time because GitHub/Jekyll backend may be delayed a
 44 | minute or two in re-generating the site. And, it also means your changes
 45 | are always going live.
 46 | 
 47 | This site is a GitHub pages site using a GitHub [supported](https://pages.github.com/themes/)
 48 | Jekyll theme backend. This means you compose content in GitHub Markdown and when you commit
 49 | changes to GitHub, Jekyll generates the HTML pages for the site automatically. But, it also
 50 | means its a bit harder for you to preview, locally, your changes before committing.
 51 | 
 52 | But, if we don't like the current theme we're using,
 53 | [Cayman](https://pages-themes.github.io/cayman/), we can easily change
 54 | it by going to the repository's settings page on github, scrolling down to the
 55 | GitHub Pages section and hitting the `Change Theme` button and then selecting
 56 | another theme. Note, there are only a handful of themes supported by GitHub this
 57 | way (e.g. easily switchable via the `Change Theme` button). However, that does not
 58 | mean we cannot choose from another UNsupported theme. There are hundreds of
 59 | [Jekyll themes](http://jekyllthemes.org) available and we should probably spend
 60 | some more time to find best. Using an UNsupported theme simply means that we'd have
 61 | to buy into committing all the Jekyll accouterments to our repo and, as a result,
 62 | it is not as easy to switch the theme later if we don't like it.
 63 | 
 64 | To permit the site to be easily switched to another GitHub supported Jekyll theme,
 65 | I have chosen **not** to commit to the repository all the Jekyll accouterments.
 66 | In any event, even if they were present, you would still wind up having to run
 67 | Jekyll to re-build and serve the site, each time you want to preview it.
 68 | To preview changes locally, you will need to run these commands
 69 | 
 70 | ```
 71 | $ gem install github-pages
 72 | $ bundle exec jekyll serve
 73 | ```
 74 | 
 75 | To get all the tools on my Mac (gem, bundle, jekyll), I used Homebrew.
 76 | 
 77 | Detailed instructions on setting up GitHub pages locally can be found [here](how_to_preview_locally.md).
 78 | 
 79 | ## Including Math Equations
 80 | 
 81 | I am not sure I have found the easiest solution here. I have read that MathJax may be
 82 | better but it seemed to require a bit more configuration than had time to play and
 83 | what I tried here seemed to work. The code snippet below demonstrates an example...
 84 | 
 85 | ```
 86 | ![equation](http://latex.codecogs.com/gif.latex?%5Cfrac%7B%5Cpartial%20Q%7D%7B%5Cpartial%20t%7D%20%3D%20%5Cfrac%7B%5Cpartial%20s%7D%7B%5Cpartial%20t%7D)
 87 | ```
 88 | 
 89 | ![equation](http://latex.codecogs.com/gif.latex?%5Cfrac%7B%5Cpartial%20Q%7D%7B%5Cpartial%20t%7D%20%3D%20%5Cfrac%7B%5Cpartial%20s%7D%7B%5Cpartial%20t%7D)
 90 | 
 91 | Everything after the question-mark character at the end of `latex?` and before the closing right-parenthesis
 92 | is the a URL-encoded form of the latex commands to generate the equation. Here are the steps...
 93 | 
 94 | 1. Create the latex for the equation
 95 | 2. URL-encode it using something like, [url-encode-decode](http://www.url-encode-decode.com)
 96 | 3. Replace all instances of `+` that step 2 inserted to represent spaces with `%20`
 97 | 4. Paste the resulting, possibly very long, string between `?` and enclosing `)` in the above.
 98 | 
 99 | ## Adding a Hands On Example
100 | 
101 | 1. Be sure to start from the [lesson template](lessons/lesson_template/lesson.md)
102 | 1. Add a new directory to `./lessons/<new-example-name>`
103 | 1. Add a new line to `./lessons/lessons.md` for the new example
104 | 1. Copy the [lesson template](lessons/lesson_template/lesson.md), `./lessons/lesson_template/lesson.md` to `./lessons/<new-example-name>`.
105 | 1. Edit/revise the copied `lesson.md` file to create the new lesson
106 | 1. You can link to images, source code files that you place in `./lessons/<new-example-name>`
107 |    using standard Markdown links.
108 | 
109 | I think it would be best put all content related to each hands-on lesson we develop here
110 | into its own separate directory. That means all images, example codes, markdown pages, etc.
111 | Then, we can have a separate page (or page hierarchy) that indexes the examples.
112 | 
113 | I know Jekyll has a built-in concept of a `posts` object. That is because Jekyll is designed
114 | around the notion of supporting blogging. It may make sense to handle each hands-on kinda
115 | sorta like a `post` in Jekyll. But, I think that also means that content related to each
116 | lesson gets scattered across multiple directories (at least given the **default**) way that
117 | Jekyll seems to handle `posts`. We need to investigate proper use of Jekyll further **after**
118 | we've completed ATPESC.
119 | 
120 | ## GitHub Style Primer
121 | 
122 | This section is just a copy of boilerplate content from GitHub Pages template
123 | about how to use Markdown, etc. I have kept it here for convenience.
124 | 
125 | You can use the [editor on GitHub](https://github.com/xsdk-project/HandsOnLessons/edit/master/README.md) to maintain and preview the content for your website in Markdown files.
126 | 
127 | Whenever you commit to this repository, GitHub Pages will run [Jekyll](https://jekyllrb.com/) to rebuild the pages in your site, from the content in your Markdown files.
128 | 
129 | ### Markdown
130 | 
131 | Markdown is a lightweight and easy-to-use syntax for styling your writing. It includes conventions for
132 | 
133 | ```markdown
134 | Syntax highlighted code block
135 | 
136 | # Header 1
137 | ## Header 2
138 | ### Header 3
139 | 
140 | - Bulleted
141 | - List
142 | 
143 | 1. Numbered
144 | 2. List
145 | 
146 | **Bold** and _Italic_ and `Code` text
147 | 
148 | [Link](url) and ![Image](src)
149 | ```
150 | 
151 | For more details see [GitHub Flavored Markdown](https://guides.github.com/features/mastering-markdown/).
152 | 
153 | ### Jekyll Themes
154 | 
155 | Your Pages site will use the layout and styles from the Jekyll theme you have selected in your [repository settings](https://github.com/xsdk-project/HandsOnLessons/settings). The name of this theme is saved in the Jekyll `_config.yml` configuration file.
156 | 
157 | ### Support or Contact
158 | 
159 | Having trouble with Pages? Check out our [documentation](https://help.github.com/categories/github-pages-basics/) or [contact support](https://github.com/contact) and we’ll help you sort it out.
160 | 


--------------------------------------------------------------------------------
/how_to_preview_locally.md:
--------------------------------------------------------------------------------
 1 | # Preview the site locally
 2 | 
 3 | ## Requirements
 4 | 
 5 | * github-pages
 6 | * bundler
 7 | * jekyll
 8 | 
 9 | The recommended way of installing these is to use **gem**, the [Ruby](https://www.ruby-lang.org/en/) package manager.
10 | 
11 | ```
12 | $ gem install github-pages
13 | $ gem install bundler
14 | $ gem install jekyll
15 | ```
16 | 
17 | But Ruby version 2.1.0 or higher is required. The version can be checked with
18 | ```
19 | $ ruby -v
20 | ```
21 | 
22 | The native Ruby on my Mac (Sierra 10.12.6) is 2.0. So I took the following steps to upgrade it.
23 | 
24 | 1. Open your terminal and run
25 | ```
26 | $ \curl -sSL https://get.rvm.io | bash -s stable
27 | ```
28 | When this is complete, you need to restart your terminal for the rvm to work.
29 | 1. Run
30 | ```
31 | $ rvm install ruby-2.4
32 | ```
33 | Type ruby -v in the terminal, if it is 2.4, you are done.
34 | 1. If it still shows ruby 2.0, run
35 | ```
36 | $ rvm use ruby-2.4
37 | ```
38 | To set this as the default version, run
39 | ```
40 | $ rvm use ruby-2.4 --default
41 | ```
42 | 
43 | ## Preview your work using a brower
44 | 
45 | 1. Navigate into the **root** directory of the project
46 | 1. Run Jekyll locally
47 | ```
48 | $ bundle exec jekyll serve
49 | Configuration file: /Users/hongzhang/Projects/HandsOnLessons/_config.yml
50 | Configuration file: /Users/hongzhang/Projects/HandsOnLessons/_config.yml
51 |             Source: /Users/hongzhang/Projects/HandsOnLessons
52 |        Destination: /Users/hongzhang/Projects/HandsOnLessons/_site
53 |  Incremental build: disabled. Enable with --incremental
54 |       Generating...
55 |    GitHub Metadata: No GitHub API authentication could be found. Some fields may be missing or have incorrect data.
56 |                     done in 0.582 seconds.
57 |  Auto-regeneration: enabled for '/Users/hongzhang/Projects/HandsOnLessons'
58 | Configuration file: /Users/hongzhang/Projects/HandsOnLessons/_config.yml
59 |     Server address: http://127.0.0.1:4000
60 |   Server running... press ctrl-c to stop.
61 | ```
62 | 1. Open your browser, preview the local site at **http://127.0.0.1:4000**
63 | 


--------------------------------------------------------------------------------
/lessons/AMG/lesson.md:
--------------------------------------------------------------------------------
  1 | # Algebraic Multigrid
  2 | 
  3 | ## At a Glance
  4 | <!-- (Expected # minutes to complete) %% temporarily omit -->
  5 | 
  6 | 
  7 | ```
  8 | Questions                 |Objectives                           |Key Points
  9 | --------------------------|----------- -------------------------|--------------------------
 10 | Why multigrid over CG for |Understand multigrid concept         |Faster convergence,
 11 | large problems?           |                                     |better scalability
 12 | Why use more aggressive   |Understand need for low complexities |Lower memory use, faster
 13 | coarsening for AMG?       |                                     |times, but more iterations
 14 | Why a structured solver   |Understand importance of suitable    |Higher efficiency,
 15 | for a structured problem? |data structures                      |faster solve times
 16 | ```
 17 | 
 18 | **Note:** To begin this lesson...
 19 | ```
 20 | cd handson/amrex/amg
 21 | ```
 22 | 
 23 | ## The Problem
 24 | 
 25 | The linear system to be solved is generated by AMReX from the following differential equation:
 26 | 
 27 | ![equation](http://latex.codecogs.com/gif.latex?%5Cvarphi-%5CDelta%5Ccdot%5Cbeta%5Cnabla%5Cvarphi%3DRHS)
 28 | 
 29 | with Dirichlet boundary conditions.
 30 | 
 31 | The grid is a cube consisting of 128 x 128 x 128 cells, consisteing of (at least) 8 subgrids.
 32 | We also consider a larger grid with 256 x 256 x 256 cells.
 33 | 
 34 | The right hand side (left image) and solution (right image) are plotted below:
 35 | 
 36 | <img src = "rhs.jpg" width ="300">       <img src = "solution.jpg" width ="300">
 37 | 
 38 | 
 39 | ## The Example Input File
 40 | 
 41 | To run AMReX with hypre, an input file is required to specify the desired hypre solvers to be used and also allows to define problem options, e.g. grid size, as well as solver options for some of the solvers. The content of the file 'inputs' is given below, although some specific input files are also provided for the handson exercises.
 42 | 
 43 | ```
 44 | n_cell = 128
 45 | max_grid_size = 64
 46 | tol_rel = 1.e-6
 47 | 
 48 | bc_type = Dirichlet  # Dirichlet, Neumann, or periodic
 49 | 
 50 | hypre.solver_flag = PFMG-PCG # SMG, or PFMG, SMG-PCG, PFMG-PCG, PCG, BoomerAMG, AMG-PCG, or DS-PCG
 51 | hypre.print_level = 1
 52 | #hypre.agg_num_levels = 1  # uses aggressive coarsening in BoomerAMG  and AMG-PCG
 53 | 
 54 | ## Below are some more BoomerAMG options which change
 55 | #hypre.relax_type = 6  #uses symmetric Gauss-Seidel smoothing
 56 | #hypre.coarsen_type = 8  #uses PMIS coarsening instead of HMIS
 57 | #hypre.Pmx_elmts = 6 # changes max nnz per row from 4 to 6 in interpolation
 58 | #hypre.interp_type = 0  #uses classified interpolation instead of distance-two interpolation
 59 | #hypre.strong_threshold = 0.5 # changes strength threshold
 60 | #hypre.max_row_sum = 1.0 # changes treatment of diagonal dominant portions
 61 | 
 62 | ##Below are some more PFMG options which affect convergence and times
 63 | #hypre.pfmg_rap_type = 1 # uses nonGalerkin version for PFMG and PFMG-CG
 64 | #hypre.skip_relax = 0 # skips some relaxations in PFMG and PFMG-CG
 65 | 
 66 | ```
 67 | 
 68 | ## Running the Example
 69 | 
 70 | ### Exercise 1: Compare a generic iterative solver (CG) with multigrid
 71 | 
 72 | Use the following command to solve our problem using conjugate gradient (CG):
 73 | ```
 74 | /usr/bin/time -p mpiexec -n 8 ./amrex pcg
 75 | ```
 76 | 
 77 | You should get some output that looks like this
 78 | ```
 79 | MPI initialized with 8 MPI processes
 80 | 
 81 | 213 Hypre Solver Iterations, Relative Residual 9.6515871445080283e-07
 82 | 
 83 | Max-norm of the error is 0.0002812723371
 84 | Maximum absolute value of the solution is 0.9991262625
 85 | Maximum absolute value of the rhs is 1661.007274
 86 | real 3.46
 87 | user 21.75
 88 | sys 1.07
 89 | ```
 90 | 
 91 | Now we solve the same problem using PFMG, the structured multigrid solver from hypre:
 92 | ```
 93 | /usr/bin/time -p mpiexec -n 8 ./amrex pfmg
 94 | ```
 95 | 
 96 | You should get some output that looks like this
 97 | ```
 98 | MPI initialized with 8 MPI processes
 99 | 
100 | 22 Hypre Solver Iterations, Relative Residual 8.2557612429588765e-07
101 | 
102 | Max-norm of the error is 0.0002812747961
103 | Maximum absolute value of the solution is 0.9991262625
104 | Maximum absolute value of the rhs is 1661.007274
105 | real 1.47
106 | user 8.72
107 | sys 0.99
108 | ```
109 | #### Examining Results
110 | 
111 | Examine the number of iterations and the time listed in the line starting with 'real' for both runs.
112 | 
113 | #### Questions
114 | 
115 | > **How do the numbers of iterations compare?**
116 | 
117 | |<font color="white">PFMG converges much faster, almost 10 times as fast</font>|
118 | 
119 | > **How do the times compare?**
120 | 
121 | |<font color="white">PFMG is more than twice as fast</font>|
122 | 
123 | > **What does this say about the cost of an iteration for CG compared to PFMG?**
124 | 
125 | |<font color="white">One iteration of PFMG is more costly than one CG iteration.</font>|
126 | 
127 | 
128 | ### Example 2 (Use PFMG as a preconditioner for CG)
129 | 
130 | Now use the following command:
131 | ```
132 | /usr/bin/time -p mpiexec -n 8 ./amrex pfmgpcg
133 | ```
134 | 
135 | You should get some output that looks like this
136 | ```
137 | MPI initialized with 8 MPI processes
138 | 
139 | 10 Hypre Solver Iterations, Relative Residual 4.7155525002784425e-07
140 | 
141 | Max-norm of the error is 0.0002813010027
142 | Maximum absolute value of the solution is 0.9991262625
143 | Maximum absolute value of the rhs is 1661.007274
144 | real 1.23
145 | user 6.73
146 | sys 0.98
147 | ```
148 | 
149 | #### Questions
150 | 
151 | > **How does the number of iterations compare to that of PFMG without CG?**
152 | 
153 | |<font color="white">PFMG with PCG converges about twice as fast as PFMG, 22 times as fast as CG.</font>|
154 | 
155 | > **How do the times compare?**
156 | 
157 | |<font color="white">PFMG-PCG is faster than PFMG alone. It is almost 3 times as fast as CG.</font>|
158 | 
159 | > **What does this say about the cost of an iteration for PFMG-PCG compared to PFMG?**
160 | 
161 | |<font color="white">One iteration of PFMG-PCG is more costly than one PFMG iteration.</font>|
162 | 
163 | Since using multigrid in combination with CG is faster than multigrid alone for the considered problem, we now only consider multigrid solvers in combination with CG for the sake of time in the hands-on exercises.
164 | 
165 | ### Example 3 (Examine scalability of CG compared with PFMG-CG)
166 | 
167 | We now solve the larger problem using first CG and then PFMG-PCG.
168 | 
169 | Now use the following command:
170 | ```
171 | /usr/bin/time -p mpiexec -n 8 ./amrex pcg.large
172 | ```
173 | 
174 | You should get some output that looks like this
175 | ```
176 | MPI initialized with 8 MPI processes
177 | 
178 | 440 Hypre Solver Iterations, Relative Residual 9.9740013439759751e-07
179 | 
180 | Max-norm of the error is 7.039397221e-05
181 | Maximum absolute value of the solution is 0.9997778176
182 | Maximum absolute value of the rhs is 1663.462965
183 | real 42.25
184 | user 333.29
185 | sys 2.47
186 | ```
187 | 
188 | Now use the following command:
189 | ```
190 | /usr/bin/time -p mpiexec -n 8 ./amrex pfmgpcg.large
191 | ```
192 | 
193 | You should get some output that looks like this
194 | ```
195 | MPI initialized with 8 MPI processes
196 | 
197 | 11 Hypre Solver Iterations, Relative Residual 2.5598385447572329e-07
198 | 
199 | Max-norm of the error is 7.041558516e-05
200 | Maximum absolute value of the solution is 0.9997778176
201 | Maximum absolute value of the rhs is 1663.462965
202 | real 7.15
203 | user 52.28
204 | sys 2.93
205 | ```
206 | 
207 | #### Examining Results
208 | 
209 | > **How do the numbers of iterations now compare?**
210 | 
211 | |<font color="white">Iterations for PCG doubled, whereas PFMG-PCG only increased by 1. PFMG-PCG converges 40 times as fast as PCG.</font>|
212 | 
213 | > **How do the times compare?**
214 | 
215 | |<font color="white">PFMG-PCG is almost 6 times as fast as PCG.</font>|
216 | 
217 | > **If you compare these numbers to the numbers for the smaller system, what do you observe?**
218 | 
219 | |<font color="white">Times and iterations for PCG grow much faster than for PFMG-PCG with increasing problem size. PFMG-PCG is more scalable than PCG.</font>|
220 | 
221 | 
222 | ### Example 4 (Examine complexities in AMG-PCG)
223 | 
224 | We now go back to the smaller problem using AMG-PCG.
225 | 
226 | Now use the following command:
227 | ```
228 | /usr/bin/time -p mpiexec -n 8 ./amrex amgpcg
229 | ```
230 | 
231 | You should get some output that looks like this
232 | ```
233 | MPI initialized with 8 MPI processes
234 | 
235 |  Num MPI tasks = 8
236 | 
237 |  Num OpenMP threads = 1
238 | 
239 | 
240 | BoomerAMG SETUP PARAMETERS:
241 | 
242 |  Max levels = 25
243 |  Num levels = 8
244 | 
245 |  Strength Threshold = 0.250000
246 |  Interpolation Truncation Factor = 0.000000
247 |  Maximum Row Sum Threshold for Dependency Weakening = 0.900000
248 | 
249 |  Coarsening Type = HMIS
250 |  measures are determined locally
251 | 
252 | 
253 |  No global partition option chosen.
254 | 
255 |  Interpolation = extended+i interpolation
256 | 
257 | Operator Matrix Information:
258 | 
259 |             nonzero         entries per row        row sums
260 | lev   rows  entries  sparse  min  max   avg       min         max
261 | ===================================================================
262 |  0 2097152 14581760  0.000     4    7   7.0   1.000e-03   9.830e+05
263 |  1 1048122 19632610  0.000     7   42  18.7   1.998e-03   1.229e+06
264 |  2  199271  9681535  0.000    15   89  48.6   4.627e-03   1.397e+06
265 |  3   27167  2149919  0.003    17  140  79.1   2.503e-02   1.491e+06
266 |  4    3504   306430  0.025    13  185  87.5   3.300e-01   2.597e+06
267 |  5     458    32358  0.154    11  175  70.7   1.164e+00   7.021e+06
268 |  6      61     2375  0.638    10   60  38.9  -1.998e+09   7.281e+09
269 |  7       6       36  1.000     6    6   6.0   9.485e+06   7.651e+07
270 | 
271 | Interpolation Matrix Information:
272 |                  entries/row    min     max         row sums
273 | lev  rows cols    min max     weight   weight     min       max
274 | =================================================================
275 |  0 2097152 x 1048122   1   4   1.111e-01 4.631e-01 3.333e-01 1.000e+00
276 |  1 1048122 x 199271   1   4   3.236e-03 5.927e-01 1.070e-01 1.000e+00
277 |  2 199271 x 27167   0   4  -1.101e-01 7.178e-01 0.000e+00 1.000e+00
278 |  3 27167 x 3504    0   4  -5.812e-01 6.900e-01 0.000e+00 1.000e+00
279 |  4  3504 x 458     0   4  -3.235e+01 6.382e+01 0.000e+00 1.000e+00
280 |  5   458 x 61      0   4  -3.563e+01 1.590e+01 -3.338e+01 1.000e+00
281 |  6    61 x 6       0   3   2.779e-03 5.764e-01 0.000e+00 1.012e+00
282 | 
283 | 
284 |      Complexity:    grid = 1.609679
285 |                 operator = 3.181168
286 |                 memory = 3.843933
287 | 
288 | 
289 | 
290 | 
291 | BoomerAMG SOLVER PARAMETERS:
292 | 
293 |   Maximum number of cycles:         1
294 |   Stopping Tolerance:               0.000000e+00
295 |   Cycle type (1 = V, 2 = W, etc.):  1
296 | 
297 |   Relaxation Parameters:
298 |    Visiting Grid:                     down   up  coarse
299 |             Number of sweeps:            1    1     1
300 |    Type 0=Jac, 3=hGS, 6=hSGS, 9=GE:     13   14     9
301 |    Point types, partial sweeps (1=C, -1=F):
302 |                   Pre-CG relaxation (down):   0
303 |                    Post-CG relaxation (up):   0
304 |                              Coarsest grid:   0
305 | 
306 | 
307 | 10 Hypre Solver Iterations, Relative Residual 6.9077383873163803e-07
308 | 
309 | Max-norm of the error is 0.0002813125533
310 | Maximum absolute value of the solution is 0.9991262625
311 | Maximum absolute value of the rhs is 1661.007274
312 | real 4.63
313 | user 33.54
314 | sys 1.47
315 | ```
316 | 
317 | This output gives the stats for the developed AMG preconditioner. It shows the number of levels, the average number of nonzeros in total and per row for each matrix
318 | ![](http://latex.codecogs.com/gif.latex?A_i)
319 | as well as each interpolation operator.
320 | It also shows the operator complexity, which is defined as the sum of the number of nonzeroes of all operators
321 | ![](http://latex.codecogs.com/gif.latex?A_i)
322 | divided by the number of nonzeroes of the original matrix _A_ =
323 | ![](http://latex.codecogs.com/gif.latex?A_0) :
324 | 
325 | ![equation](http://latex.codecogs.com/gif.latex?%5Cfrac%7B%5Csum_%7Bi%3D0%7D%5EL%20nnz%28A_i%29%7D%7Bnnz%28A%29%7D).
326 | 
327 | The memory complexity also includes the number of nonzeroes of all interpolation operators in the sum:
328 | 
329 | ![equation](http://latex.codecogs.com/gif.latex?%5Cfrac%7B%5Csum_%7Bi%3D0%7D%5E%7BL-1%7D%20%28nnz%28A_i%29%2Bnnz%28P_i%29%29%2Bnnz%28A_L%29%7D%7Bnnz%28A%29%7D)
330 | 
331 | #### Questions
332 | 
333 | > **Is the operator complexity acceptable?**
334 | 
335 | |<font color="white">No, it is too large, above 2!</font>|
336 | 
337 | > **How does the complexity affect performance?**
338 | 
339 | |<font color="white">The method is slower than PFMG-PCG and even PCG, inspite of a low number of iterations.</font>|
340 | 
341 | Now, let us use AMG-PCG with aggressive coarsening turned on for the first level.
342 | 
343 | ```
344 | /usr/bin/time -p mpiexec -n 8 ./amrex amgpcg2
345 | ```
346 | 
347 | You should get some output that looks like this
348 | ```
349 | MPI initialized with 8 MPI processes
350 |  Num OpenMP threads = 1
351 | 
352 | 
353 | BoomerAMG SETUP PARAMETERS:
354 | 
355 |  Max levels = 25
356 |  Num levels = 7
357 | 
358 |  Strength Threshold = 0.250000
359 |  Interpolation Truncation Factor = 0.000000
360 |  Maximum Row Sum Threshold for Dependency Weakening = 0.900000
361 | 
362 |  Coarsening Type = HMIS
363 | 
364 |  No. of levels of aggressive coarsening: 1
365 | 
366 |  Interpolation on agg. levels= multipass interpolation
367 |  measures are determined locally
368 | 
369 | 
370 |  No global partition option chosen.
371 | 
372 |  Interpolation = extended+i interpolation
373 | 
374 | Operator Matrix Information:
375 | 
376 |             nonzero         entries per row        row sums
377 | lev   rows  entries  sparse  min  max   avg       min         max
378 | ===================================================================
379 |  0 2097152 14581760  0.000     4    7   7.0   1.000e-03   9.830e+05
380 |  1  168473  3001117  0.000     9   36  17.8   1.196e-02   1.835e+06
381 |  2   36380  1786702  0.001    15   93  49.1   4.442e-02   3.245e+06
382 |  3    4862   345260  0.015    15  146  71.0   2.634e-01   5.022e+06
383 |  4     674    46930  0.103    14  184  69.6   1.035e+00   1.199e+07
384 |  5      84     3542  0.502    13   74  42.2   2.600e+06   5.014e+07
385 |  6       7       49  1.000     7    7   7.0   6.754e+06   2.370e+07
386 | 
387 | 
388 | Interpolation Matrix Information:
389 |                  entries/row    min     max         row sums
390 | lev  rows cols    min max     weight   weight     min       max
391 | =================================================================
392 |  0 2097152 x 168473   1   9   1.055e-02 1.000e+00 1.220e-01 1.000e+00
393 |  1 168473 x 36380   1   4   3.841e-03 1.000e+00 1.630e-01 1.000e+00
394 |  2 36380 x 4862    0   4  -4.129e-03 1.000e+00 0.000e+00 1.000e+00
395 |  3  4862 x 674     0   4  -1.383e-01 6.712e-01 0.000e+00 1.000e+00
396 |  4   674 x 84      0   4  -6.354e-01 6.935e-01 0.000e+00 1.000e+00
397 |  5    84 x 7       0   4  -2.982e-02 1.394e-01 0.000e+00 1.000e+00
398 | 
399 | 
400 |      Complexity:    grid = 1.100365
401 |                 operator = 1.355485
402 |                 memory = 1.707254
403 | 
404 | 
405 | 
406 | 
407 | BoomerAMG SOLVER PARAMETERS:
408 |   Maximum number of cycles:         1
409 |   Stopping Tolerance:               0.000000e+00
410 |   Cycle type (1 = V, 2 = W, etc.):  1
411 | 
412 |   Relaxation Parameters:
413 |    Visiting Grid:                     down   up  coarse
414 |             Number of sweeps:            1    1     1
415 |    Type 0=Jac, 3=hGS, 6=hSGS, 9=GE:     13   14     9
416 |    Point types, partial sweeps (1=C, -1=F):
417 |                   Pre-CG relaxation (down):   0
418 |                    Post-CG relaxation (up):   0
419 |                              Coarsest grid:   0
420 | 
421 | 
422 | 13 Hypre Solver Iterations, Relative Residual 2.6981728821542126e-07
423 | 
424 | Max-norm of the error is 0.000281305921
425 | Maximum absolute value of the solution is 0.9991262625
426 | Maximum absolute value of the rhs is 1661.007274
427 | real 2.17
428 | user 14.03
429 | sys 1.34
430 | ```
431 | 
432 | #### Questions
433 | 
434 | > **How does the number of levels change? The complexity?**
435 | 
436 | |<font color="white">There is one level less. The complexity is much improved, almost 3 times as small, clearly below 2, closer to 1.</font>|
437 | 
438 | > **How does this affect the performance?**
439 | 
440 | |<font color="white">The time is more than twice as fast, however convergence is worse.</font>|
441 | 
442 | > **How does this compare to PFMG-PCG when applied to the same problem? Why?**
443 | 
444 | |<font color="white">PFMG-PCG is almost twice as fast, even converges slightly faster. PFMG-PCG takes advantage of the structure in the problem, which AMG-PCG cannot do.</font>|
445 | 
446 | 
447 | ## Out-Brief
448 | 
449 | We investigated why multigrid methods are preferrable over generic solvers like conjugate gradient for large suitable PDE problems.
450 | Additional improvements can be achieved when using them as preconditioners for Krylov solvers like conjugate gradient.
451 | For unstructured multigrid solvers, it is important to keep complexities low, since large complexitites lead to slow solve times and require much memory.
452 | For structured problems, solvers that take advantage of the structure of the problem are more efficient than unstructured solvers.
453 | 
454 | 
455 | ### Further Reading
456 | 
457 | To learn more about algebraic multigrid, see
458 | [An Introduction to Algebraic Multigrid](https://computation.llnl.gov/projects/hypre-scalable-linear-solvers-multigrid-methods/CiSE_2006_amg_220851.pdf)
459 | 
460 | More information on hypre , including documentation and further publications, can be found [here](http://www.llnl.gov/CASC/hypre)
461 | 
462 | <!-- Insert space, horizontal line, and link to HandsOnLesson table -->
463 | 
464 | &nbsp;
465 | 
466 | ---
467 | 
468 | [Back to all HandsOnLessons](../lessons.md)
469 | 


--------------------------------------------------------------------------------
/lessons/AMG/rhs.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/AMG/rhs.jpg


--------------------------------------------------------------------------------
/lessons/AMG/solution.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/AMG/solution.jpg


--------------------------------------------------------------------------------
/lessons/AMReX/VisIt_2D.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/AMReX/VisIt_2D.pdf


--------------------------------------------------------------------------------
/lessons/AMReX/lesson.md:
--------------------------------------------------------------------------------
  1 | # AMReX -- a block-structured Adaptive Mesh Refinement (AMR) framework
  2 | 
  3 | ## At a Glance
  4 | <!-- (Expected # minutes to complete) %% temporarily omit -->
  5 | 
  6 | 
  7 | ```
  8 | Questions                 |Objectives                           |Key Points
  9 | --------------------------|----------- -------------------------|--------------------------
 10 | How do I start to use     | Understand easy set-up              | It's not hard to get started
 11 | AMReX?                    |                                     |
 12 |                           |                                     |
 13 | How do I 'turn on' AMR?   | Understand minimum specs for AMR    | When the algorithm is correctly designed
 14 |                           |                                     | and implemented, AMR 'just works'
 15 |                           |                                     |
 16 | How do I visualize AMR    | Use Visit for AMR results           | Visualization tools exist for AMR data.
 17 | results?
 18 | ```
 19 | 
 20 | ## Example: Single-Level Heat Equation
 21 | 
 22 | ### The Equation and the Discretization
 23 | 
 24 | First lets revisit the heat equation problem.
 25 | 
 26 | This algorithm should look familiar to you -- in each time step we call the following two Fortran routines:
 27 | ```fortran
 28 |   ! x-fluxes
 29 |   do    j = lo(2), hi(2)
 30 |      do i = lo(1), hi(1)+1
 31 |         fluxx(i,j) = ( phi(i,j) - phi(i-1,j) ) / dx(1)
 32 |      end do
 33 |   end do
 34 | 
 35 |   ! y-fluxes
 36 |   do    j = lo(2), hi(2)+1
 37 |      do i = lo(1), hi(1)
 38 |         fluxy(i,j) = ( phi(i,j) - phi(i,j-1) ) / dx(2)
 39 |      end do
 40 |   end do
 41 | ```
 42 | 
 43 | and
 44 | ```
 45 |   do    j = lo(2), hi(2)
 46 |      do i = lo(1), hi(1)
 47 | 
 48 |         phinew(i,j) = phiold(i,j) &
 49 |              + dtdx(1) * (fluxx(i+1,j  ) - fluxx(i,j)) &
 50 |              + dtdx(2) * (fluxy(i  ,j+1) - fluxy(i,j))
 51 | 
 52 |      end do
 53 |   end do
 54 | 
 55 | ```
 56 | 
 57 | The other parts of the algorithm -- that, in particular, involve MPI communication, are handled in the C++:
 58 | 
 59 | ```C++
 60 |         MultiFab::Copy(phi_old, phi_new, 0, 0, 1, 0);
 61 | ```
 62 | 
 63 | and
 64 | 
 65 | ```C++
 66 |             old_phi.FillBoundary(geom.periodicity());
 67 | ```
 68 | 
 69 | See if it makes sense what order all of these are called in.
 70 | 
 71 | ### Running the Problem
 72 | 
 73 | **Note:** To run this part of the lesson
 74 | ```
 75 | cd handson/amrex/AMReX_diffusion
 76 | ```
 77 | 
 78 | In this directory you'll see
 79 | 
 80 | ```
 81 | main2d.gnu.MPI.ex -- the executable
 82 | inputs_2d -- the inputs file
 83 | fextract -- an executable that extracts a 1-d slice from 2-d or 3-d data
 84 | extract_slice -- a simple script that calls fextract on each of our plotfiles
 85 | plot_phi -- a simple gnuplot script to read and animate the 1-d slices
 86 | ```
 87 | 
 88 | The inputs file currently has
 89 | 
 90 | ```
 91 | nsteps = 20000
 92 | n_cell = 256 256
 93 | max_grid_size = 128
 94 | plot_int = 1000
 95 | is_periodic = 1 0
 96 | 
 97 | ```
 98 | 
 99 | The grid is a cube consisting of 256 x 256 cells, consisting of 4 subgrids each
100 | of size 128x128 cells.  The problem is periodic in the x-direction and not in the y-direction.
101 | This problem happens to be set-up to have homogeneous Neumann boundary conditions when not periodic.
102 | 
103 | Let's try running this 2-d problem 
104 | 
105 | ```
106 | ./main2d.gnu.MPI.ex inputs_2d  
107 | ```
108 | 
109 | Then let's extract 1-d slices from the plotfiles and animate them
110 | 
111 | ```
112 | source extract_slice
113 | gnuplot plot_phi
114 | ```
115 | 
116 | This should make an animated gif like the one you see here.
117 | 
118 | |[<img src="phi.gif" width="400">](phi.gif)
119 | 
120 | If you'd like to see the 2-d solution, use Visit to open up a plotfile.
121 | 
122 | ```
123 | Select ``File'' then ``Open file ...'',
124 | then select the Header file associated the the plotfile of interest (e.g., _plt00000/Header_
125 | Here are instructions (from the Users Guide) for making a simple plot:
126 | 
127 | To view the data, select ``Add'' then ``Pseudocolor'' then ``phi'' then ``Draw''.
128 | 
129 | To view the grid structure (not particularly interesting yet, but when we add AMR it will be), select
130 | ``subset'' then ``levels''.  Then double-click the text ``Subset - levels'',
131 | enable the ``Wireframe'' option, select ``Apply'', select ``Dismiss'', and then select ``Draw''.
132 | 
133 | To save the image, select ``File'' then ``Set save options'', then customize the image format
134 | to your liking, then click ``Save''.
135 | ```
136 | 
137 | Your images should look similar to those below.
138 | 
139 | |Time Step 0|Time Step 10000|
140 | |:---:|:---:|
141 | |[<img src = "phi_diff_0.jpg" width ="300">](phi_diff_0.jpg)|[<img src = "phi_diff_10000.jpg" width ="300">](phi_diff_10000.jpg)
142 | 
143 | 
144 | ## What does this do in parallel
145 | 
146 | Let's now try
147 | ```
148 | mpiexec -n 1 ./main2d.gnu.MPI.ex inputs_2d plot_int=-1 max_step= 1000  | grep "Run time"
149 | mpiexec -n 2 ./main2d.gnu.MPI.ex inputs_2d plot_int=-1 max_step= 1000  | grep "Run time"
150 | mpiexec -n 4 ./main2d.gnu.MPI.ex inputs_2d plot_int=-1 max_step= 1000  | grep "Run time"
151 | ```
152 | and see how the timings compare.
153 | 
154 | Questions to think about:
155 | 
156 | Why did we set plot_int = -1 in the command line?
157 | 
158 | If this didn't scale perfectly, why not?
159 | 
160 | ## Example: Multi-Level Advection
161 | 
162 | ### The Equation and the Discretization
163 | 
164 | Now let's consider scalar advection with a specified time-dependent velocity field.  In this
165 | example we'll be using AMR.
166 | 
167 | This algorithm should also look familiar to you -- in each time step we construct fluxes and use them to update the solution.
168 | ```fortran
169 |   ! Do a conservative update
170 |   do    j = lo(2),hi(2)
171 |      do i = lo(1),hi(1)
172 |         uout(i,j) = uin(i,j) + &
173 |              ( (flxx(i,j) - flxx(i+1,j)) * dtdx(1) &
174 |              + (flxy(i,j) - flxy(i,j+1)) * dtdx(2) )
175 |      enddo
176 |   enddo
177 | ```
178 | 
179 | Here the construction of the fluxes is a little more complicated, and because we are going to use AMR, we
180 | must save the fluxes at each level so that we can use them in a refluxing operation. The subcycling in time
181 | algorithm, which we haven't really had time to talk about, looks like
182 | ```C++
183 |     if (lev < finest_level)
184 |     {
185 |         // recursive call for next-finer level
186 |         for (int i = 1; i <= nsubsteps[lev+1]; ++i)
187 |         {
188 |             timeStep(lev+1, time+(i-1)*dt[lev+1], i);
189 |         }
190 | 
191 |         if (do_reflux)
192 |         {
193 |             // update lev based on coarse-fine flux mismatch
194 |             flux_reg[lev+1]->Reflux(*phi_new[lev], 1.0, 0, 0, phi_new[lev]->nComp(), geom[lev]);
195 |         }
196 | 
197 |         AverageDownTo(lev); // average lev+1 down to lev
198 |     }
199 | ```
200 | 
201 | ### Running the Problem
202 | 
203 | **Note:** To run this part of the lesson
204 | ```
205 | cd handson/amrex/AMReX_advection
206 | ```
207 | 
208 | In this directory you'll see
209 | 
210 | ```
211 | main2d.gnu.MPI.ex -- the executable
212 | inputs -- the inputs file
213 | ```
214 | 
215 | The inputs file currently has
216 | 
217 | ```
218 | max_step = 120
219 | amr.n_cell = 64 64
220 | amr.max_grid_size = 32
221 | amr.plot_int = 10
222 | 
223 | ```
224 | 
225 | The grid here is a cube consisting of 64 x 64 cells, consisting of 4 subgrids each
226 | of size 32x32 cells.  The problem is periodic in the x-direction and not in the y-direction.
227 | This problem happens to be set-up to have homogeneous Neumann boundary conditions when not periodic.
228 | 
229 | Let's try running this 2-d problem with no refinement
230 | 
231 | ```
232 | ./main2d.gnu.MPI.ex inputs amr.max_level=0
233 | ```
234 | 
235 | To see the 2-d solution, use Visit to look at plt00000 and plt00060, for example.
236 | You should see something like this (though these pictures are
237 | made using a different visualization program.)
238 | 
239 | |Time Step 0|Time Step 60|
240 | |:---:|:---:|
241 | |[<img src = "phi_adv_noref.0.jpg" width ="300">](phi_adv_noref.0.jpg)|[<img src = "phi_adv_noref.60.jpg" width ="300">](phi_adv_noref.60.jpg)
242 | 
243 | ## Now let's turn on AMR.
244 | 
245 | Let's now run with
246 | ```
247 | ./main2d.gnu.MPI.ex inputs amr.max_level=2
248 | ```
249 | 
250 | and again visualize the results.  
251 | 
252 | |Time Step 0|Time Step 60|
253 | |:---:|:---:|
254 | |[<img src = "phi_adv_ref.0.jpg" width ="300">](phi_adv_ref.0.jpg)|[<img src = "phi_adv_ref.60.jpg" width ="300">](phi_adv_ref.60.jpg)
255 | 
256 | ### Further Reading
257 | 
258 | Learn more about AMReX [here](https://www.github.com/AMReX-codes/amrex) and take a look at the Users Guide in Docs.
259 | 
260 | <!-- Insert space, horizontal line, and link to HandsOnLesson table -->
261 | 
262 | &nbsp;
263 | 
264 | ---
265 | 
266 | [Back to all HandsOnLessons](../lessons.md)
267 | 


--------------------------------------------------------------------------------
/lessons/AMReX/phi.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/AMReX/phi.gif


--------------------------------------------------------------------------------
/lessons/AMReX/phi_adv_noref.0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/AMReX/phi_adv_noref.0.jpg


--------------------------------------------------------------------------------
/lessons/AMReX/phi_adv_noref.60.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/AMReX/phi_adv_noref.60.jpg


--------------------------------------------------------------------------------
/lessons/AMReX/phi_adv_ref.0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/AMReX/phi_adv_ref.0.jpg


--------------------------------------------------------------------------------
/lessons/AMReX/phi_adv_ref.60.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/AMReX/phi_adv_ref.60.jpg


--------------------------------------------------------------------------------
/lessons/AMReX/phi_diff_0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/AMReX/phi_diff_0.jpg


--------------------------------------------------------------------------------
/lessons/AMReX/phi_diff_10000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/AMReX/phi_diff_10000.jpg


--------------------------------------------------------------------------------
/lessons/adjoint/chkpt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/adjoint/chkpt.png


--------------------------------------------------------------------------------
/lessons/adjoint/ex1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/adjoint/ex1.png


--------------------------------------------------------------------------------
/lessons/adjoint/ex1adj.c:
--------------------------------------------------------------------------------
  1 | static char help[] = "Adjoint sensitivity of a hybrid system with state-dependent switchings.\n";
  2 | 
  3 | /*
  4 |   The dynamics is described by the ODE
  5 |                   u_t = A_i u
  6 | 
  7 |   where A_1 = [ 1  -100
  8 |                 10  1  ],
  9 |         A_2 = [ 1    10
 10 |                -100  1 ].
 11 |   The index i changes from 1 to 2 when u[1]=2.75u[0] and from 2 to 1 when u[1]=0.36u[0].
 12 |   Initially u=[0 1]^T and i=1.
 13 | 
 14 |   References:
 15 |   H. Zhang, S. Abhyankar, E. Constantinescu, M. Mihai, Discrete Adjoint Sensitivity Analysis of Hybrid Dynamical Systems With Switching, IEEE Transactions on Circuits and Systems I: Regular Papers, 64(5), May 2017
 16 |   I. A. Hiskens, M.A. Pai, Trajectory Sensitivity Analysis of Hybrid Systems, IEEE Transactions on Circuits and Systems, Vol 47, No 2, February 2000
 17 | */
 18 | 
 19 | #include <petscts.h>
 20 | 
 21 | typedef struct {
 22 |   PetscScalar lambda1;
 23 |   PetscScalar lambda2;
 24 |   PetscInt    mode;  /* mode flag*/
 25 | } AppCtx;
 26 | 
 27 | PetscErrorCode EventFunction(TS ts,PetscReal t,Vec U,PetscScalar *fvalue,void *ctx)
 28 | {
 29 |   AppCtx            *actx=(AppCtx*)ctx;
 30 |   PetscErrorCode    ierr;
 31 |   const PetscScalar *u;
 32 | 
 33 |   PetscFunctionBegin;
 34 |   ierr = VecGetArrayRead(U,&u);CHKERRQ(ierr);
 35 |   if (actx->mode == 1) {
 36 |     fvalue[0] = u[1]-actx->lambda1*u[0];
 37 |   }else if (actx->mode == 2) {
 38 |     fvalue[0] = u[1]-actx->lambda2*u[0];
 39 |   }
 40 |   ierr = VecRestoreArrayRead(U,&u);CHKERRQ(ierr);
 41 |   PetscFunctionReturn(0);
 42 | }
 43 | 
 44 | PetscErrorCode ShiftGradients(TS ts,Vec U,AppCtx *actx)
 45 | {
 46 |   Vec               *lambda,*mu;
 47 |   PetscScalar       *x,*y;
 48 |   const PetscScalar *u;
 49 |   PetscErrorCode    ierr;
 50 |   PetscScalar       tmp[2],A1[2][2],A2[2],denorm;
 51 |   PetscInt          numcost;
 52 | 
 53 |   PetscFunctionBegin;
 54 |   ierr = TSGetCostGradients(ts,&numcost,&lambda,&mu);CHKERRQ(ierr);
 55 |   ierr = VecGetArrayRead(U,&u);CHKERRQ(ierr);
 56 | 
 57 |   if (actx->mode==2) {
 58 |     denorm = -actx->lambda1*(u[0]-100.*u[1])+1.*(10.*u[0]+u[1]);
 59 |     A1[0][0] = 110.*u[1]*(-actx->lambda1)/denorm+1.;
 60 |     A1[0][1] = -110.*u[0]*(-actx->lambda1)/denorm;
 61 |     A1[1][0] = 110.*u[1]*1./denorm;
 62 |     A1[1][1] = -110.*u[0]*1./denorm+1.;
 63 | 
 64 |     A2[0] = 110.*u[1]*(-u[0])/denorm;
 65 |     A2[1] = -110.*u[0]*(-u[0])/denorm;
 66 |   } else {
 67 |     denorm = -actx->lambda2*(u[0]+10.*u[1])+1.*(-100.*u[0]+u[1]);
 68 |     A1[0][0] = 110.*u[1]*(actx->lambda2)/denorm+1;
 69 |     A1[0][1] = -110.*u[0]*(actx->lambda2)/denorm;
 70 |     A1[1][0] = -110.*u[1]*1./denorm;
 71 |     A1[1][1] = 110.*u[0]*1./denorm+1.;
 72 | 
 73 |     A2[0] = 0;
 74 |     A2[1] = 0;
 75 |   }
 76 | 
 77 |   ierr = VecRestoreArrayRead(U,&u);CHKERRQ(ierr);
 78 | 
 79 |   ierr   = VecGetArray(lambda[0],&x);CHKERRQ(ierr);
 80 |   ierr   = VecGetArray(mu[0],&y);CHKERRQ(ierr);
 81 |   tmp[0] = A1[0][0]*x[0]+A1[0][1]*x[1];
 82 |   tmp[1] = A1[1][0]*x[0]+A1[1][1]*x[1];
 83 |   y[0]   = y[0] + A2[0]*x[0]+A2[1]*x[1];
 84 |   x[0]   = tmp[0];
 85 |   x[1]   = tmp[1];
 86 |   ierr   = VecRestoreArray(mu[0],&y);CHKERRQ(ierr);
 87 |   ierr   = VecRestoreArray(lambda[0],&x);CHKERRQ(ierr);
 88 | 
 89 |   ierr   = VecGetArray(lambda[1],&x);CHKERRQ(ierr);
 90 |   ierr   = VecGetArray(mu[1],&y);CHKERRQ(ierr);
 91 |   tmp[0] = A1[0][0]*x[0]+A1[0][1]*x[1];
 92 |   tmp[1] = A1[1][0]*x[0]+A1[1][1]*x[1];
 93 |   y[0]   = y[0] + A2[0]*x[0]+A2[1]*x[1];
 94 |   x[0]   = tmp[0];
 95 |   x[1]   = tmp[1];
 96 |   ierr   = VecRestoreArray(mu[1],&y);CHKERRQ(ierr);
 97 |   ierr   = VecRestoreArray(lambda[1],&x);CHKERRQ(ierr);
 98 |   PetscFunctionReturn(0);
 99 | }
100 | 
101 | PetscErrorCode PostEventFunction(TS ts,PetscInt nevents,PetscInt event_list[],PetscReal t,Vec U,PetscBool forwardsolve,void* ctx)
102 | {
103 |   AppCtx         *actx=(AppCtx*)ctx;
104 |   PetscErrorCode ierr;
105 | 
106 |   PetscFunctionBegin;
107 |   /* ierr = VecView(U,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); */
108 |   if (!forwardsolve) {
109 |     ierr = ShiftGradients(ts,U,actx);CHKERRQ(ierr);
110 |   }
111 |   if (actx->mode == 1) {
112 |     actx->mode = 2;
113 |     /* ierr = PetscPrintf(PETSC_COMM_SELF,"Change from mode 1 to 2 at t = %f \n",t);CHKERRQ(ierr); */
114 |   } else if (actx->mode == 2) {
115 |     actx->mode = 1;
116 |     /* ierr = PetscPrintf(PETSC_COMM_SELF,"Change from mode 2 to 1 at t = %f \n",t);CHKERRQ(ierr); */
117 |   }
118 |   PetscFunctionReturn(0);
119 | }
120 | 
121 | /*
122 |      Defines the ODE passed to the ODE solver
123 | */
124 | static PetscErrorCode IFunction(TS ts,PetscReal t,Vec U,Vec Udot,Vec F,void *ctx)
125 | {
126 |   AppCtx            *actx=(AppCtx*)ctx;
127 |   PetscErrorCode    ierr;
128 |   PetscScalar       *f;
129 |   const PetscScalar *u,*udot;
130 | 
131 |   PetscFunctionBegin;
132 |   /*  The next three lines allow us to access the entries of the vectors directly */
133 |   ierr = VecGetArrayRead(U,&u);CHKERRQ(ierr);
134 |   ierr = VecGetArrayRead(Udot,&udot);CHKERRQ(ierr);
135 |   ierr = VecGetArray(F,&f);CHKERRQ(ierr);
136 | 
137 |   if (actx->mode == 1) {
138 |     f[0] = udot[0]-u[0]+100*u[1];
139 |     f[1] = udot[1]-10*u[0]-u[1];
140 |   } else if (actx->mode == 2) {
141 |     f[0] = udot[0]-u[0]-10*u[1];
142 |     f[1] = udot[1]+100*u[0]-u[1];
143 |   }
144 | 
145 |   ierr = VecRestoreArrayRead(U,&u);CHKERRQ(ierr);
146 |   ierr = VecRestoreArrayRead(Udot,&udot);CHKERRQ(ierr);
147 |   ierr = VecRestoreArray(F,&f);CHKERRQ(ierr);
148 |   PetscFunctionReturn(0);
149 | }
150 | 
151 | /*
152 |      Defines the Jacobian of the ODE passed to the ODE solver. See TSSetIJacobian() for the meaning of a and the Jacobian.
153 | */
154 | static PetscErrorCode IJacobian(TS ts,PetscReal t,Vec U,Vec Udot,PetscReal a,Mat A,Mat B,void *ctx)
155 | {
156 |   AppCtx            *actx=(AppCtx*)ctx;
157 |   PetscErrorCode    ierr;
158 |   PetscInt          rowcol[] = {0,1};
159 |   PetscScalar       J[2][2];
160 |   const PetscScalar *u,*udot;
161 | 
162 |   PetscFunctionBegin;
163 |   ierr = VecGetArrayRead(U,&u);CHKERRQ(ierr);
164 |   ierr = VecGetArrayRead(Udot,&udot);CHKERRQ(ierr);
165 | 
166 |   if (actx->mode == 1) {
167 |     J[0][0] = a-1;                       J[0][1] = 100;
168 |     J[1][0] = -10;                       J[1][1] = a-1;
169 |   } else if (actx->mode == 2) {
170 |     J[0][0] = a-1;                       J[0][1] = -10;
171 |     J[1][0] = 100;                       J[1][1] = a-1;
172 |   }
173 |   ierr = MatSetValues(B,2,rowcol,2,rowcol,&J[0][0],INSERT_VALUES);CHKERRQ(ierr);
174 | 
175 |   ierr = VecRestoreArrayRead(U,&u);CHKERRQ(ierr);
176 |   ierr = VecRestoreArrayRead(Udot,&udot);CHKERRQ(ierr);
177 | 
178 |   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
179 |   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
180 |   if (A != B) {
181 |     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
182 |     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
183 |   }
184 |   PetscFunctionReturn(0);
185 | }
186 | 
187 | /* Matrix JacobianP is constant so that it only needs to be evaluated once */
188 | static PetscErrorCode RHSJacobianP(TS ts,PetscReal t,Vec X,Mat A, void *ctx)
189 | {
190 |   PetscFunctionBeginUser;
191 |   PetscFunctionReturn(0);
192 | }
193 | 
194 | int main(int argc,char **argv)
195 | {
196 |   TS             ts;            /* ODE integrator */
197 |   Vec            U;             /* solution will be stored here */
198 |   Mat            A;             /* Jacobian matrix */
199 |   Mat            Ap;            /* dfdp */
200 |   PetscErrorCode ierr;
201 |   PetscMPIInt    size;
202 |   PetscInt       n = 2;
203 |   PetscScalar    *u,*v;
204 |   AppCtx         app;
205 |   PetscInt       direction[1];
206 |   PetscBool      terminate[1];
207 |   Vec            lambda[2],mu[2];
208 |   PetscReal      tend;
209 | 
210 |   FILE           *f;
211 |   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
212 |      Initialize program
213 |      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
214 |   ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
215 |   ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
216 |   if (size > 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Only for sequential runs");
217 |   app.mode = 1;
218 |   app.lambda1 = 2.75;
219 |   app.lambda2 = 0.36;
220 |   tend = 0.125;
221 |   ierr = PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"ex1adj options","");CHKERRQ(ierr);
222 |   {
223 |     ierr = PetscOptionsReal("-lambda1","","",app.lambda1,&app.lambda1,NULL);CHKERRQ(ierr);
224 |     ierr = PetscOptionsReal("-lambda2","","",app.lambda2,&app.lambda2,NULL);CHKERRQ(ierr);
225 |     ierr = PetscOptionsReal("-tend","","",tend,&tend,NULL);CHKERRQ(ierr);
226 |   }
227 |   ierr = PetscOptionsEnd();CHKERRQ(ierr);
228 | 
229 |   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
230 |     Create necessary matrix and vectors
231 |     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
232 |   ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr);
233 |   ierr = MatSetSizes(A,n,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
234 |   ierr = MatSetType(A,MATDENSE);CHKERRQ(ierr);
235 |   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
236 |   ierr = MatSetUp(A);CHKERRQ(ierr);
237 | 
238 |   ierr = MatCreateVecs(A,&U,NULL);CHKERRQ(ierr);
239 | 
240 |   ierr = MatCreate(PETSC_COMM_WORLD,&Ap);CHKERRQ(ierr);
241 |   ierr = MatSetSizes(Ap,n,1,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
242 |   ierr = MatSetType(Ap,MATDENSE);CHKERRQ(ierr);
243 |   ierr = MatSetFromOptions(Ap);CHKERRQ(ierr);
244 |   ierr = MatSetUp(Ap);CHKERRQ(ierr);
245 |   ierr = MatZeroEntries(Ap);CHKERRQ(ierr); /* initialize to zeros */
246 | 
247 |   ierr = VecGetArray(U,&u);CHKERRQ(ierr);
248 |   u[0] = 0;
249 |   u[1] = 1;
250 |   ierr = VecRestoreArray(U,&u);CHKERRQ(ierr);
251 |   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
252 |      Create timestepping solver context
253 |      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
254 |   ierr = TSCreate(PETSC_COMM_WORLD,&ts);CHKERRQ(ierr);
255 |   ierr = TSSetProblemType(ts,TS_NONLINEAR);CHKERRQ(ierr);
256 |   ierr = TSSetType(ts,TSCN);CHKERRQ(ierr);
257 |   ierr = TSSetIFunction(ts,NULL,(TSIFunction)IFunction,&app);CHKERRQ(ierr);
258 |   ierr = TSSetIJacobian(ts,A,A,(TSIJacobian)IJacobian,&app);CHKERRQ(ierr);
259 | 
260 |   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
261 |      Set initial conditions
262 |    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
263 |   ierr = TSSetSolution(ts,U);CHKERRQ(ierr);
264 | 
265 |   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
266 |     Save trajectory of solution so that TSAdjointSolve() may be used
267 |    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
268 |   ierr = TSSetSaveTrajectory(ts);CHKERRQ(ierr);
269 | 
270 |   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
271 |      Set solver options
272 |    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
273 |   ierr = TSSetMaxTime(ts,tend);CHKERRQ(ierr);
274 |   ierr = TSSetExactFinalTime(ts,TS_EXACTFINALTIME_MATCHSTEP);CHKERRQ(ierr);
275 |   ierr = TSSetTimeStep(ts,1./256.);CHKERRQ(ierr);
276 |   ierr = TSSetFromOptions(ts);CHKERRQ(ierr);
277 | 
278 |   /* Set directions and terminate flags for the two events */
279 |   direction[0] = 0;
280 |   terminate[0] = PETSC_FALSE;
281 |   ierr = TSSetEventHandler(ts,1,direction,terminate,EventFunction,PostEventFunction,(void*)&app);CHKERRQ(ierr);
282 | 
283 |   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
284 |      Run timestepping solver
285 |      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
286 |   ierr = TSSolve(ts,U);CHKERRQ(ierr);
287 | 
288 |   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
289 |      Adjoint model starts here
290 |      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
291 |   ierr = MatCreateVecs(A,&lambda[0],NULL);CHKERRQ(ierr);
292 |   ierr = MatCreateVecs(A,&lambda[1],NULL);CHKERRQ(ierr);
293 |   /*   Set initial conditions for the adjoint integration */
294 |   ierr = VecZeroEntries(lambda[0]);CHKERRQ(ierr);
295 |   ierr = VecZeroEntries(lambda[1]);CHKERRQ(ierr);
296 |   ierr = VecGetArray(lambda[0],&u);CHKERRQ(ierr);
297 |   u[0] = 1.;
298 |   ierr = VecRestoreArray(lambda[0],&u);CHKERRQ(ierr);
299 |   ierr = VecGetArray(lambda[1],&u);CHKERRQ(ierr);
300 |   u[1] = 1.;
301 |   ierr = VecRestoreArray(lambda[1],&u);CHKERRQ(ierr);
302 | 
303 |   ierr = MatCreateVecs(Ap,&mu[0],NULL);CHKERRQ(ierr);
304 |   ierr = MatCreateVecs(Ap,&mu[1],NULL);CHKERRQ(ierr);
305 |   ierr = VecZeroEntries(mu[0]);CHKERRQ(ierr);
306 |   ierr = VecZeroEntries(mu[1]);CHKERRQ(ierr);
307 |   ierr = TSSetCostGradients(ts,2,lambda,mu);CHKERRQ(ierr);
308 | 
309 |   /*   Set RHS JacobianP */
310 |   ierr = TSAdjointSetRHSJacobian(ts,Ap,RHSJacobianP,&app);CHKERRQ(ierr);
311 | 
312 |   ierr = TSAdjointSolve(ts);CHKERRQ(ierr);
313 | 
314 |   /*
315 |   ierr = VecView(lambda[0],PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
316 |   ierr = VecView(lambda[1],PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
317 |   ierr = VecView(mu[0],PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
318 |   ierr = VecView(mu[1],PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
319 |   */
320 |   ierr = VecGetArray(mu[0],&u);CHKERRQ(ierr);
321 |   ierr = VecGetArray(mu[1],&v);CHKERRQ(ierr);
322 |   f = fopen("adj_mu.out", "a");
323 |   ierr = PetscFPrintf(PETSC_COMM_WORLD,f,"%20.15lf %20.15lf %20.15lf\n",tend,u[0],v[0]);CHKERRQ(ierr);
324 |   ierr = VecRestoreArray(mu[0],&u);CHKERRQ(ierr);
325 |   ierr = VecRestoreArray(mu[1],&v);CHKERRQ(ierr);
326 |   fclose(f);
327 |   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
328 |      Free work space.  All PETSc objects should be destroyed when they are no longer needed.
329 |    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
330 |   ierr = MatDestroy(&A);CHKERRQ(ierr);
331 |   ierr = VecDestroy(&U);CHKERRQ(ierr);
332 |   ierr = TSDestroy(&ts);CHKERRQ(ierr);
333 | 
334 |   ierr = MatDestroy(&Ap);CHKERRQ(ierr);
335 |   ierr = VecDestroy(&lambda[0]);CHKERRQ(ierr);
336 |   ierr = VecDestroy(&lambda[1]);CHKERRQ(ierr);
337 |   ierr = VecDestroy(&mu[0]);CHKERRQ(ierr);
338 |   ierr = VecDestroy(&mu[1]);CHKERRQ(ierr);
339 |   ierr = PetscFinalize();
340 |   return ierr;
341 | }
342 | 


--------------------------------------------------------------------------------
/lessons/adjoint/ex1adj.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/adjoint/ex1adj.png


--------------------------------------------------------------------------------
/lessons/adjoint/ex3opt.c:
--------------------------------------------------------------------------------
  1 | 
  2 | static char help[] = "Finds optimal parameter P_m for the generator system while maintaining generator stability.\n";
  3 | 
  4 | /*F
  5 | 
  6 | \begin{eqnarray}
  7 |                  \frac{d \theta}{dt} = \omega_b (\omega - \omega_s)
  8 |                  \frac{2 H}{\omega_s}\frac{d \omega}{dt} & = & P_m - P_max \sin(\theta) -D(\omega - \omega_s)\\
  9 | \end{eqnarray}
 10 | 
 11 | F*/
 12 | 
 13 | /*
 14 |   This code demonstrates how to solve a ODE-constrained optimization problem with TAO, TSEvent, TSAdjoint and TS.
 15 |   The problem features discontinuities and a cost function in integral form.
 16 |   The gradient is computed with the discrete adjoint of an implicit theta method, see ex3adj.c for details.
 17 | */
 18 | #include <petsctao.h>
 19 | #include <petscts.h>
 20 | 
 21 | typedef struct {
 22 |   PetscScalar H,D,omega_b,omega_s,Pmax,Pmax_ini,Pm,E,V,X,u_s,c;
 23 |   PetscInt    beta;
 24 |   PetscReal   tf,tcl;
 25 | } AppCtx;
 26 | 
 27 | PetscErrorCode FormFunctionGradient(Tao,Vec,PetscReal*,Vec,void*);
 28 | 
 29 | /* Event check */
 30 | PetscErrorCode EventFunction(TS ts,PetscReal t,Vec X,PetscScalar *fvalue,void *ctx)
 31 | {
 32 |   AppCtx        *user=(AppCtx*)ctx;
 33 | 
 34 |   PetscFunctionBegin;
 35 |   /* Event for fault-on time */
 36 |   fvalue[0] = t - user->tf;
 37 |   /* Event for fault-off time */
 38 |   fvalue[1] = t - user->tcl;
 39 | 
 40 |   PetscFunctionReturn(0);
 41 | }
 42 | 
 43 | PetscErrorCode PostEventFunction(TS ts,PetscInt nevents,PetscInt event_list[],PetscReal t,Vec X,PetscBool forwardsolve,void* ctx)
 44 | {
 45 |   AppCtx *user=(AppCtx*)ctx;
 46 | 
 47 |   PetscFunctionBegin;
 48 | 
 49 |   if (event_list[0] == 0) {
 50 |     if (forwardsolve) user->Pmax = 0.0; /* Apply disturbance - this is done by setting Pmax = 0 */
 51 |     else user->Pmax = user->Pmax_ini; /* Going backward, reversal of event */
 52 |   } else if(event_list[0] == 1) {
 53 |     if (forwardsolve) user->Pmax = user->Pmax_ini; /* Remove the fault  - this is done by setting Pmax = Pmax_ini */
 54 |     else user->Pmax = 0.0; /* Going backward, reversal of event */
 55 |   }
 56 |   PetscFunctionReturn(0);
 57 | }
 58 | 
 59 | /*
 60 |      Defines the ODE passed to the ODE solver
 61 | */
 62 | static PetscErrorCode IFunction(TS ts,PetscReal t,Vec U,Vec Udot,Vec F,AppCtx *ctx)
 63 | {
 64 |   PetscErrorCode    ierr;
 65 |   PetscScalar       *f,Pmax;
 66 |   const PetscScalar *u,*udot;
 67 | 
 68 |   PetscFunctionBegin;
 69 |   /*  The next three lines allow us to access the entries of the vectors directly */
 70 |   ierr = VecGetArrayRead(U,&u);CHKERRQ(ierr);
 71 |   ierr = VecGetArrayRead(Udot,&udot);CHKERRQ(ierr);
 72 |   ierr = VecGetArray(F,&f);CHKERRQ(ierr);
 73 |   Pmax = ctx->Pmax;
 74 |   f[0] = udot[0] - ctx->omega_b*(u[1] - ctx->omega_s);
 75 |   f[1] = 2.0*ctx->H/ctx->omega_s*udot[1] +  Pmax*PetscSinScalar(u[0]) + ctx->D*(u[1] - ctx->omega_s)- ctx->Pm;
 76 | 
 77 |   ierr = VecRestoreArrayRead(U,&u);CHKERRQ(ierr);
 78 |   ierr = VecRestoreArrayRead(Udot,&udot);CHKERRQ(ierr);
 79 |   ierr = VecRestoreArray(F,&f);CHKERRQ(ierr);
 80 |   PetscFunctionReturn(0);
 81 | }
 82 | 
 83 | /*
 84 |      Defines the Jacobian of the ODE passed to the ODE solver. See TSSetIJacobian() for the meaning of a and the Jacobian.
 85 | */
 86 | static PetscErrorCode IJacobian(TS ts,PetscReal t,Vec U,Vec Udot,PetscReal a,Mat A,Mat B,AppCtx *ctx)
 87 | {
 88 |   PetscErrorCode    ierr;
 89 |   PetscInt          rowcol[] = {0,1};
 90 |   PetscScalar       J[2][2],Pmax;
 91 |   const PetscScalar *u,*udot;
 92 | 
 93 |   PetscFunctionBegin;
 94 |   ierr = VecGetArrayRead(U,&u);CHKERRQ(ierr);
 95 |   ierr = VecGetArrayRead(Udot,&udot);CHKERRQ(ierr);
 96 |   Pmax = ctx->Pmax;
 97 |   J[0][0] = a;                       J[0][1] = -ctx->omega_b;
 98 |   J[1][1] = 2.0*ctx->H/ctx->omega_s*a + ctx->D;   J[1][0] = Pmax*PetscCosScalar(u[0]);
 99 | 
100 |   ierr    = MatSetValues(B,2,rowcol,2,rowcol,&J[0][0],INSERT_VALUES);CHKERRQ(ierr);
101 |   ierr    = VecRestoreArrayRead(U,&u);CHKERRQ(ierr);
102 |   ierr    = VecRestoreArrayRead(Udot,&udot);CHKERRQ(ierr);
103 | 
104 |   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
105 |   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
106 |   if (A != B) {
107 |     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
108 |     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
109 |   }
110 |   PetscFunctionReturn(0);
111 | }
112 | 
113 | static PetscErrorCode RHSJacobianP(TS ts,PetscReal t,Vec X,Mat A,void *ctx0)
114 | {
115 |   PetscErrorCode ierr;
116 |   PetscInt       row[] = {0,1},col[]={0};
117 |   PetscScalar    J[2][1];
118 | 
119 |   PetscFunctionBeginUser;
120 |   J[0][0] = 0;
121 |   J[1][0] = 1.;
122 |   ierr  = MatSetValues(A,2,row,1,col,&J[0][0],INSERT_VALUES);CHKERRQ(ierr);
123 |   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
124 |   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
125 |   PetscFunctionReturn(0);
126 | }
127 | 
128 | static PetscErrorCode CostIntegrand(TS ts,PetscReal t,Vec U,Vec R,AppCtx *ctx)
129 | {
130 |   PetscErrorCode    ierr;
131 |   PetscScalar       *r;
132 |   const PetscScalar *u;
133 | 
134 |   PetscFunctionBegin;
135 |   ierr = VecGetArrayRead(U,&u);CHKERRQ(ierr);
136 |   ierr = VecGetArray(R,&r);CHKERRQ(ierr);
137 |   r[0] = ctx->c*PetscPowScalarInt(PetscMax(0., u[0]-ctx->u_s),ctx->beta);CHKERRQ(ierr);
138 |   ierr = VecRestoreArray(R,&r);CHKERRQ(ierr);
139 |   ierr = VecRestoreArrayRead(U,&u);CHKERRQ(ierr);
140 |   PetscFunctionReturn(0);
141 | }
142 | 
143 | static PetscErrorCode DRDYFunction(TS ts,PetscReal t,Vec U,Vec *drdy,AppCtx *ctx)
144 | {
145 |   PetscErrorCode     ierr;
146 |   PetscScalar        *ry;
147 |   const PetscScalar  *u;
148 | 
149 |   PetscFunctionBegin;
150 |   ierr = VecGetArrayRead(U,&u);CHKERRQ(ierr);
151 |   ierr = VecGetArray(drdy[0],&ry);CHKERRQ(ierr);
152 |   ry[0] = ctx->c*ctx->beta*PetscPowScalarInt(PetscMax(0., u[0]-ctx->u_s),ctx->beta-1);CHKERRQ(ierr);
153 |   ierr = VecRestoreArray(drdy[0],&ry);CHKERRQ(ierr);
154 |   ierr = VecRestoreArrayRead(U,&u);CHKERRQ(ierr);
155 |   PetscFunctionReturn(0);
156 | }
157 | 
158 | static PetscErrorCode DRDPFunction(TS ts,PetscReal t,Vec U,Vec *drdp,AppCtx *ctx)
159 | {
160 |   PetscErrorCode ierr;
161 |   PetscScalar    *rp;
162 | 
163 |   PetscFunctionBegin;
164 |   ierr = VecGetArray(drdp[0],&rp);CHKERRQ(ierr);
165 |   rp[0] = 0.;
166 |   ierr  = VecRestoreArray(drdp[0],&rp);CHKERRQ(ierr);
167 |   PetscFunctionReturn(0);
168 | }
169 | 
170 | PetscErrorCode ComputeSensiP(Vec lambda,Vec mu,AppCtx *ctx)
171 | {
172 |   PetscErrorCode    ierr;
173 |   PetscScalar       *y,sensip;
174 |   const PetscScalar *x;
175 | 
176 |   PetscFunctionBegin;
177 |   ierr = VecGetArrayRead(lambda,&x);CHKERRQ(ierr);
178 |   ierr = VecGetArray(mu,&y);CHKERRQ(ierr);
179 |   sensip = 1./PetscSqrtScalar(1.-(ctx->Pm/ctx->Pmax)*(ctx->Pm/ctx->Pmax))/ctx->Pmax*x[0]+y[0];
180 |   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"\n sensitivity wrt parameter pm: %g \n",(double)sensip);CHKERRQ(ierr); */
181 |   y[0] = sensip;
182 |   ierr = VecRestoreArray(mu,&y);CHKERRQ(ierr);
183 |   ierr = VecRestoreArrayRead(lambda,&x);CHKERRQ(ierr);
184 |   PetscFunctionReturn(0);
185 | }
186 | 
187 | PetscErrorCode monitor(Tao tao,AppCtx *ctx)
188 | {
189 |   FILE               *fp;
190 |   PetscInt           iterate;
191 |   PetscReal          f,gnorm,cnorm,xdiff;
192 |   TaoConvergedReason reason;
193 |   PetscErrorCode     ierr;
194 | 
195 |   PetscFunctionBeginUser;
196 |   ierr = TaoGetSolutionStatus(tao,&iterate,&f,&gnorm,&cnorm,&xdiff,&reason);CHKERRQ(ierr);
197 | 
198 |   fp = fopen("ex3opt_conv.out","a");
199 |   ierr = PetscFPrintf(PETSC_COMM_WORLD,fp,"%D %g\n",iterate,(double)gnorm);CHKERRQ(ierr);
200 |   fclose(fp);
201 |   PetscFunctionReturn(0);
202 | }
203 | 
204 | int main(int argc,char **argv)
205 | {
206 |   Vec                p;
207 |   PetscScalar        *x_ptr;
208 |   PetscErrorCode     ierr;
209 |   PetscMPIInt        size;
210 |   AppCtx             ctx;
211 |   Tao                tao;
212 |   KSP                ksp;
213 |   PC                 pc;
214 |   Vec                lowerb,upperb;
215 |   PetscBool          printtofile;
216 | 
217 |   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
218 |      Initialize program
219 |      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
220 |   ierr = PetscInitialize(&argc,&argv,NULL,help);if (ierr) return ierr;
221 |   PetscFunctionBeginUser;
222 |   ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
223 |   if (size != 1) SETERRQ(PETSC_COMM_SELF,1,"This is a uniprocessor example only!");
224 | 
225 |   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
226 |     Set runtime options
227 |     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
228 |   ierr = PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Swing equation options","");CHKERRQ(ierr);
229 |   {
230 |     ctx.beta    = 2;
231 |     ctx.c       = 10000.0;
232 |     ctx.u_s     = 1.0;
233 |     ctx.omega_s = 1.0;
234 |     ctx.omega_b = 120.0*PETSC_PI;
235 |     ctx.H       = 5.0;
236 |     ierr        = PetscOptionsScalar("-Inertia","","",ctx.H,&ctx.H,NULL);CHKERRQ(ierr);
237 |     ctx.D       = 5.0;
238 |     ierr        = PetscOptionsScalar("-D","","",ctx.D,&ctx.D,NULL);CHKERRQ(ierr);
239 |     ctx.E       = 1.1378;
240 |     ctx.V       = 1.0;
241 |     ctx.X       = 0.545;
242 |     ctx.Pmax    = ctx.E*ctx.V/ctx.X;;
243 |     ctx.Pmax_ini = ctx.Pmax;
244 |     ierr        = PetscOptionsScalar("-Pmax","","",ctx.Pmax,&ctx.Pmax,NULL);CHKERRQ(ierr);
245 |     ctx.Pm      = 1.06;
246 |     ierr        = PetscOptionsScalar("-Pm","","",ctx.Pm,&ctx.Pm,NULL);CHKERRQ(ierr);
247 |     ctx.tf      = 0.1;
248 |     ctx.tcl     = 0.2;
249 |     ierr        = PetscOptionsReal("-tf","Time to start fault","",ctx.tf,&ctx.tf,NULL);CHKERRQ(ierr);
250 |     ierr        = PetscOptionsReal("-tcl","Time to end fault","",ctx.tcl,&ctx.tcl,NULL);CHKERRQ(ierr);
251 |     printtofile = PETSC_FALSE;
252 |     ierr        = PetscOptionsBool("-printtofile","Print convergence results to file","",printtofile,&printtofile,NULL);CHKERRQ(ierr);
253 |   }
254 |   ierr = PetscOptionsEnd();CHKERRQ(ierr);
255 | 
256 |   /* Create TAO solver and set desired solution method */
257 |   ierr = TaoCreate(PETSC_COMM_WORLD,&tao);CHKERRQ(ierr);
258 |   ierr = TaoSetType(tao,TAOBLMVM);CHKERRQ(ierr);
259 |   if(printtofile) {
260 |     ierr = TaoSetMonitor(tao,(PetscErrorCode (*)(Tao, void*))monitor,(void *)&ctx,PETSC_NULL);CHKERRQ(ierr);
261 |   }
262 |   /*
263 |      Optimization starts
264 |   */
265 |   /* Set initial solution guess */
266 |   ierr = VecCreateSeq(PETSC_COMM_WORLD,1,&p);CHKERRQ(ierr);
267 |   ierr = VecGetArray(p,&x_ptr);CHKERRQ(ierr);
268 |   x_ptr[0] = ctx.Pm;
269 |   ierr = VecRestoreArray(p,&x_ptr);CHKERRQ(ierr);
270 | 
271 |   ierr = TaoSetInitialVector(tao,p);CHKERRQ(ierr);
272 |   /* Set routine for function and gradient evaluation */
273 |   ierr = TaoSetObjectiveAndGradientRoutine(tao,FormFunctionGradient,(void *)&ctx);CHKERRQ(ierr);
274 | 
275 |   /* Set bounds for the optimization */
276 |   ierr = VecDuplicate(p,&lowerb);CHKERRQ(ierr);
277 |   ierr = VecDuplicate(p,&upperb);CHKERRQ(ierr);
278 |   ierr = VecGetArray(lowerb,&x_ptr);CHKERRQ(ierr);
279 |   x_ptr[0] = 0.;
280 |   ierr = VecRestoreArray(lowerb,&x_ptr);CHKERRQ(ierr);
281 |   ierr = VecGetArray(upperb,&x_ptr);CHKERRQ(ierr);
282 |   x_ptr[0] = 1.1;
283 |   ierr = VecRestoreArray(upperb,&x_ptr);CHKERRQ(ierr);
284 |   ierr = TaoSetVariableBounds(tao,lowerb,upperb);CHKERRQ(ierr);
285 | 
286 |   /* Check for any TAO command line options */
287 |   ierr = TaoSetFromOptions(tao);CHKERRQ(ierr);
288 |   ierr = TaoGetKSP(tao,&ksp);CHKERRQ(ierr);
289 |   if (ksp) {
290 |     ierr = KSPGetPC(ksp,&pc);CHKERRQ(ierr);
291 |     ierr = PCSetType(pc,PCNONE);CHKERRQ(ierr);
292 |   }
293 | 
294 |   /* SOLVE THE APPLICATION */
295 |   ierr = TaoSolve(tao);CHKERRQ(ierr);
296 | 
297 |   ierr = VecView(p,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
298 |   ierr = VecDestroy(&p);CHKERRQ(ierr);
299 |   ierr = VecDestroy(&lowerb);CHKERRQ(ierr);
300 |   ierr = VecDestroy(&upperb);CHKERRQ(ierr);
301 |   ierr = TaoDestroy(&tao);CHKERRQ(ierr);
302 |   ierr = PetscFinalize();
303 |   return ierr;
304 | }
305 | 
306 | /* ------------------------------------------------------------------ */
307 | /*
308 |    FormFunctionGradient - Evaluates the function and corresponding gradient.
309 | 
310 |    Input Parameters:
311 |    tao - the Tao context
312 |    X   - the input vector
313 |    ptr - optional user-defined context, as set by TaoSetObjectiveAndGradientRoutine()
314 | 
315 |    Output Parameters:
316 |    f   - the newly evaluated function
317 |    G   - the newly evaluated gradient
318 | */
319 | PetscErrorCode FormFunctionGradient(Tao tao,Vec P,PetscReal *f,Vec G,void *ctx0)
320 | {
321 |   AppCtx         *ctx = (AppCtx*)ctx0;
322 |   TS             ts;
323 |   Vec            U;             /* solution will be stored here */
324 |   Mat            A;             /* Jacobian matrix */
325 |   Mat            Jacp;          /* Jacobian matrix */
326 |   PetscErrorCode ierr;
327 |   PetscInt       n = 2;
328 |   PetscReal      ftime;
329 |   PetscInt       steps;
330 |   PetscScalar    *u;
331 |   PetscScalar    *x_ptr,*y_ptr;
332 |   Vec            lambda[1],q,mu[1];
333 |   PetscInt       direction[2];
334 |   PetscBool      terminate[2];
335 | 
336 |   ierr = VecGetArray(P,&x_ptr);CHKERRQ(ierr);
337 |   ctx->Pm = x_ptr[0];
338 |   ierr = VecRestoreArray(P,&x_ptr);CHKERRQ(ierr);
339 | 
340 |   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
341 |     Create necessary matrix and vectors
342 |     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
343 |   ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr);
344 |   ierr = MatSetSizes(A,n,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
345 |   ierr = MatSetType(A,MATDENSE);CHKERRQ(ierr);
346 |   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
347 |   ierr = MatSetUp(A);CHKERRQ(ierr);
348 | 
349 |   ierr = MatCreateVecs(A,&U,NULL);CHKERRQ(ierr);
350 | 
351 |   ierr = MatCreate(PETSC_COMM_WORLD,&Jacp);CHKERRQ(ierr);
352 |   ierr = MatSetSizes(Jacp,PETSC_DECIDE,PETSC_DECIDE,2,1);CHKERRQ(ierr);
353 |   ierr = MatSetFromOptions(Jacp);CHKERRQ(ierr);
354 |   ierr = MatSetUp(Jacp);CHKERRQ(ierr);
355 | 
356 |   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
357 |      Create timestepping solver context
358 |      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
359 |   ierr = TSCreate(PETSC_COMM_WORLD,&ts);CHKERRQ(ierr);
360 |   ierr = TSSetProblemType(ts,TS_NONLINEAR);CHKERRQ(ierr);
361 |   ierr = TSSetType(ts,TSCN);CHKERRQ(ierr);
362 |   ierr = TSSetIFunction(ts,NULL,(TSIFunction) IFunction,ctx);CHKERRQ(ierr);
363 |   ierr = TSSetIJacobian(ts,A,A,(TSIJacobian)IJacobian,ctx);CHKERRQ(ierr);
364 | 
365 |   ierr = TSSetCostIntegrand(ts,1,NULL,(PetscErrorCode (*)(TS,PetscReal,Vec,Vec,void*))CostIntegrand,
366 |                                         (PetscErrorCode (*)(TS,PetscReal,Vec,Vec*,void*))DRDYFunction,
367 |                                         (PetscErrorCode (*)(TS,PetscReal,Vec,Vec*,void*))DRDPFunction,PETSC_TRUE,ctx);CHKERRQ(ierr);
368 | 
369 |   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
370 |      Set initial conditions
371 |    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
372 |   ierr = VecGetArray(U,&u);CHKERRQ(ierr);
373 |   u[0] = PetscAsinScalar(ctx->Pm/ctx->Pmax);
374 |   u[1] = 1.0;
375 |   ierr = VecRestoreArray(U,&u);CHKERRQ(ierr);
376 |   ierr = TSSetSolution(ts,U);CHKERRQ(ierr);
377 | 
378 |   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
379 |     Save trajectory of solution so that TSAdjointSolve() may be used
380 |    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
381 |   ierr = TSSetSaveTrajectory(ts);CHKERRQ(ierr);
382 | 
383 |   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
384 |      Set solver options
385 |    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
386 |   ierr = TSSetMaxTime(ts,1.0);CHKERRQ(ierr);
387 |   ierr = TSSetExactFinalTime(ts,TS_EXACTFINALTIME_MATCHSTEP);CHKERRQ(ierr);
388 |   ierr = TSSetTimeStep(ts,0.03125);CHKERRQ(ierr);
389 |   ierr = TSSetFromOptions(ts);CHKERRQ(ierr);
390 | 
391 |   direction[0] = direction[1] = 1;
392 |   terminate[0] = terminate[1] = PETSC_FALSE;
393 | 
394 |   ierr = TSSetEventHandler(ts,2,direction,terminate,EventFunction,PostEventFunction,(void*)ctx);CHKERRQ(ierr);
395 | 
396 |   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
397 |      Solve nonlinear system
398 |      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
399 |   ierr = TSSolve(ts,U);CHKERRQ(ierr);
400 | 
401 |   ierr = TSGetSolveTime(ts,&ftime);CHKERRQ(ierr);
402 |   ierr = TSGetStepNumber(ts,&steps);CHKERRQ(ierr);
403 |   /* ierr = VecView(U,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); */
404 | 
405 |   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
406 |      Adjoint model starts here
407 |      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
408 |   ierr = MatCreateVecs(A,&lambda[0],NULL);CHKERRQ(ierr);
409 |   /*   Set initial conditions for the adjoint integration */
410 |   ierr = VecGetArray(lambda[0],&y_ptr);CHKERRQ(ierr);
411 |   y_ptr[0] = 0.0; y_ptr[1] = 0.0;
412 |   ierr = VecRestoreArray(lambda[0],&y_ptr);CHKERRQ(ierr);
413 | 
414 |   ierr = MatCreateVecs(Jacp,&mu[0],NULL);CHKERRQ(ierr);
415 |   ierr = VecGetArray(mu[0],&x_ptr);CHKERRQ(ierr);
416 |   x_ptr[0] = -1.0;
417 |   ierr = VecRestoreArray(mu[0],&x_ptr);CHKERRQ(ierr);
418 |   ierr = TSSetCostGradients(ts,1,lambda,mu);CHKERRQ(ierr);
419 | 
420 |   /*   Set RHS JacobianP */
421 |   ierr = TSAdjointSetRHSJacobian(ts,Jacp,RHSJacobianP,ctx);CHKERRQ(ierr);
422 | 
423 |   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
424 |      One can set up the integral to be evaluated during the forward run
425 |      instead by calling this function before TSSolve and specifying
426 |      PETSC_TRUE for the second last argument
427 |      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
428 |   ierr = TSSetCostIntegrand(ts,1,NULL,(PetscErrorCode (*)(TS,PetscReal,Vec,Vec,void*))CostIntegrand,
429 |                                         (PetscErrorCode (*)(TS,PetscReal,Vec,Vec*,void*))DRDYFunction,
430 |                                         (PetscErrorCode (*)(TS,PetscReal,Vec,Vec*,void*))DRDPFunction,PETSC_FALSE,ctx);CHKERRQ(ierr);
431 | 
432 |   ierr = TSAdjointSolve(ts);CHKERRQ(ierr);
433 |   ierr = TSGetCostIntegral(ts,&q);CHKERRQ(ierr);
434 |   ierr = ComputeSensiP(lambda[0],mu[0],ctx);CHKERRQ(ierr);
435 |   ierr = VecCopy(mu[0],G);CHKERRQ(ierr);
436 | 
437 |   ierr = TSGetCostIntegral(ts,&q);CHKERRQ(ierr);
438 |   ierr = VecGetArray(q,&x_ptr);CHKERRQ(ierr);
439 |   *f   = -ctx->Pm + x_ptr[0];
440 |   ierr = VecRestoreArray(q,&x_ptr);CHKERRQ(ierr);
441 | 
442 |   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
443 |      Free work space.  All PETSc objects should be destroyed when they are no longer needed.
444 |    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
445 |   ierr = MatDestroy(&A);CHKERRQ(ierr);
446 |   ierr = MatDestroy(&Jacp);CHKERRQ(ierr);
447 |   ierr = VecDestroy(&U);CHKERRQ(ierr);
448 |   ierr = VecDestroy(&lambda[0]);CHKERRQ(ierr);
449 |   ierr = VecDestroy(&mu[0]);CHKERRQ(ierr);
450 |   ierr = TSDestroy(&ts);CHKERRQ(ierr);
451 | 
452 |   return 0;
453 | }
454 | 


--------------------------------------------------------------------------------
/lessons/adjoint/lesson.md:
--------------------------------------------------------------------------------
  1 | # Using adjoint for optimization
  2 | 
  3 | ## At a Glance
  4 | <!-- (Expected # minutes to complete) %% temporarily omit -->
  5 | 
  6 | ```
  7 | Questions                 |Objectives                     |Key Points
  8 | --------------------------|-------------------------------|-------------------------------------
  9 | How can gradients be      |Know PETSc/TAO's capability for|Adjoint enables dynamic
 10 | computed for simulations? |adjoint and optimization       |constrained optimization.
 11 |                           |                               |
 12 | How difficult is it to    |Understand ingredients needed  |Jacobian is imperative.
 13 | use the adjoint method?   |for adjoint calculation        |
 14 |                           |                               |
 15 |                           |Understand the concern of      |Performance may depend on
 16 |                           |checkpointing                  |checkpointing at large scale.
 17 | ```
 18 | 
 19 | **Note:** To begin this lesson...
 20 | ```
 21 | cd handson/adjoint
 22 | ```
 23 | 
 24 | ## Example 1: Generator Stability Analysis:
 25 | 
 26 | This code uses [PETSc/TAO](https://www.mcs.anl.gov/petsc/) to demonstrates how to solve an ODE-constrained optimization problem with the Toolkit for Advanced Optimization (TAO), TSEvent, TSAdjoint and TS.
 27 | The objective is to maximize the mechanical power input subject to the generator swing equations and a constraint on the maximum rotor angle deviation, which is reformulated as a minimization problem
 28 | 
 29 | ![equation](http://latex.codecogs.com/gif.latex?%5Cbegin%7Balign%2A%7D%0D%0A%20%20%5Cmin%20%26%20%5C%7B-P_m%20%2B%20%5Csigma%5Cdisplaystyle%20%5Cint_%7Bt_0%7D%5E%7Bt_F%7D%20%5Cmax%5Cleft%280%2C%20%5Ctheta%20-%20%5Ctheta_%7Bmax%7D%5Cright%29%5E%5Ceta%20%5C%20%5Cmathrm%7Bd%7Dt%20%5C%7D%5C%5C%0D%0A%20%20%5Cnonumber%20%7E%7E%20%5Ctext%7Bs.t.%7D%20%26%20%5Cqquad%20%5Cfrac%7Bd%20%5Ctheta%7D%7Bdt%7D%20%3D%20%5Comega_B%5Cleft%28%5Comega%20-%20%5Comega_s%5Cright%29%20%5C%5C%0D%0A%20%20%26%20%5Cqquad%20%5Cfrac%7Bd%20%5Comega%7D%7Bdt%7D%20%3D%20%5Cfrac%7B%5Comega_s%7D%7B2H%7D%5Cleft%28P_m%20-%20P_%7Bmax%7D%5Csin%28%5Ctheta%29%20-%20D%28%5Comega%20-%20%5Comega_s%29%5Cright%29%0D%0A%5Cend%7Balign%2A%7D)
 30 | 
 31 | Disturbance (a fault) is applied to the generator at time 0.1 and cleared at time 0.2.
 32 | The objective function contains an integral function.
 33 | The gradient is computed with the discrete adjoint of an implicit time stepping method ([Crank-Nicolson](https://en.wikipedia.org/wiki/Crank–Nicolson_method)).
 34 | 
 35 | ### Compile the code
 36 | During ATPESC, participants do not need to compile code because binaries are available in the ATPESC project folder on Cooley. In case you are using your own copy of PETSc, this example is located in `src/ts/examples/power_grid/`. To compile, run the following in the source folder
 37 | ```
 38 | make ex3opt
 39 | ```
 40 | The source code is included in [ex3opt.c](./ex3opt.c)
 41 | 
 42 | All the example codes need to compiled only once. Different tasks can be accomplished using command line options.
 43 | 
 44 | ### Command line options
 45 | You can determine the command line options available for this particular example by doing
 46 | ```
 47 | ./ex3opt -help
 48 | ```
 49 | and show the options related to TAO only by doing
 50 | ```
 51 | ./ex3opt -help | grep tao
 52 | ```
 53 | 
 54 | ### Run 1: Monitor the optimization progress
 55 | 
 56 | ```
 57 | ./ex3opt -tao_monitor -tao_view
 58 | iter =   0, Function value: 2.03778,  Residual: 144.125
 59 | iter =   1, Function value: -0.552947,  Residual: 43.1456
 60 | iter =   2, Function value: -0.911654,  Residual: 18.3028
 61 | iter =   3, Function value: -1.00401,  Residual: 2.48745
 62 | iter =   4, Function value: -1.00649,  Residual: 1.17916
 63 | iter =   5, Function value: -1.00732,  Residual: 0.125532
 64 | iter =   6, Function value: -1.00733,  Residual: 0.00012392
 65 | iter =   7, Function value: -1.00733,  Residual: 1.3024e-08
 66 | iter =   8, Function value: -1.00733,  Residual: 3.46501e-12
 67 | Tao Object: 1 MPI processes
 68 | type: blmvm
 69 | Gradient steps: 0
 70 | TaoLineSearch Object: 1 MPI processes
 71 | type: more-thuente
 72 | Active Set subset type: subvec
 73 | convergence tolerances: gatol=1e-08,   steptol=0.,   gttol=0.
 74 | Residual in Function/Gradient:=3.46501e-12
 75 | Objective value=-1.00733
 76 | total number of iterations=8,                          (max: 2000)
 77 | total number of function/gradient evaluations=9,      (max: 4000)
 78 | Solution converged:    ||g(X)|| <= gatol
 79 | Vec Object: 1 MPI processes
 80 | type: seq
 81 | 1.00793
 82 | ```
 83 | #### Questions
 84 | > **Examine the source code and find the user-provided functions for TAO, TS, and TSAdjoint respectively.**
 85 | 
 86 | |<font color="white">Essential functions we have provided are FormFunctionGradient for TAO, TSIFunction and TSIJacobian for TS,  RHSJacobianP for TSAdjoint. Because of the integral in the objective function, extra functions including CostIntegrand, DRDYFunction and DRDPFunction are given to TSAdjoint.</font>|
 87 | 
 88 | ### Further information
 89 | 
 90 | A more complicated example for power grid application is in `src/ts/examples/power_grid/stability_9bus/ex9busopt.c`.
 91 | 
 92 | 
 93 | ## Example 2: Hybrid Dynamical System:
 94 | 
 95 | This code demonstrates how to compute the adjoint sensitivity for a complex dynamical system involving discontinuities with TSEvent, TSAdjoint and TS. The dynamics are described by the ODE
 96 | 
 97 | ![equation](http://latex.codecogs.com/gif.latex?%5Cdot%7Bx%7D%20%3D%20A_i%20x)
 98 | 
 99 | where ![equation](http://latex.codecogs.com/gif.latex?x%20%3D%20%5Bx_1%2C%20x_2%5D%5ET) and the matrix A change from
100 | 
101 | ![equation](http://latex.codecogs.com/gif.latex?A_1%20%3D%20%5Cleft%5B%20%5Cbegin%7Barray%7D%7Bc%20c%7D1%20%26-100%5C%5C%2010%20%261%20%5Cend%7Barray%7D%0D%0A%5Cright%5D%0D%0A%5Cquad%20%5Ctext%7Bto%7D%20%5Cquad%0D%0AA_2%20%3D%20%5Cleft%5B%20%5Cbegin%7Barray%7D%7Bc%20c%7D1%20%2610%5C%5C%20-100%20%261%20%5Cend%7Barray%7D%0D%0A%5Cright%5D)
102 | 
103 |  when ![equation](http://latex.codecogs.com/gif.latex?%24x_2%3D2.75%20x_1%24) and switch back  when ![equation](http://latex.codecogs.com/gif.latex?%24x_2%3D0.365%20x_1%24).
104 | 
105 | Thus the ODE system alternates the right-hand side when a switching face is encountered. The switching surfaces are given by the algebraic constraints depending on the state variables, as shown below (left)
106 | 
107 | <img src="ex1.png" width="400"><img src="ex1adj.png" width="400">
108 | 
109 | * The parameter to which the sensitivities are computed is marked in red.
110 | * It represents the slope of the switching surface.
111 | * Intuitively the trajectory cannot be affected before it hits the surface.
112 | * The influence of the perturbation in the slope diminishes as the trajectory is approaching the equilibrium point.
113 | 
114 | ### Compile the code
115 | This example is in `src/ts/examples/hybrid`. The source code is included in [ex1adj.c](./ex1adj.c)
116 | 
117 | ```
118 | make ex1adj
119 | ```
120 | 
121 | ### Make the graghics work via interactive mode on cooley
122 | Graphics is tricky. HPC users often do it offline. In order to make it work with cooley, your computer must have X11 (Mac users can install XQuartz). If you do not have it now, just skip the graphics parts since they are not essential.
123 | 
124 | Apply for an interactive allocation (skip this if you already got one)
125 | ```
126 | $ qsub -I -t 60 -n 1 -A <project_name>
127 | ```
128 | For example, if your interactive allocation gives you node cc115, open a new terminal and do the following:
129 | ```
130 | $ ssh -C -X -Y cooley.alcf.anl.gov
131 | ```
132 | ```
133 | $ ssh -X cc115
134 | ```
135 | Then continue to run the applications in this new terminal.
136 | 
137 | ### Run 1: Monitor solution graphically with phase diagram
138 | 
139 | ```
140 | ./ex1adj -ts_monitor_draw_solution_phase -4,-2,2,2 -draw_pause -2
141 | ```
142 | 
143 | ### Run 2: Monitor the timestepping process
144 | 
145 | ```
146 | ./ex1adj -ts_monitor
147 | ```
148 | Trailing (r) in some lines of the output indicates that a rollback happens. In this example, it is triggered by `TSEvent`. To check  details about the event, we can use the event monitor
149 | ```
150 | ./ex1adj -ts_monitor -ts_event_monitor
151 | ```
152 | We can also monitor the timestepping for the adjoint calculation by doing
153 | ```
154 | ./ex1adj -ts_monitor -ts_adjoint_monitor
155 | ```
156 | 
157 | ### Further information
158 | 
159 | The example `ex1fwd.c` in the same folder illustrates the forward sensitivity approach for the same problem.
160 | 
161 | 
162 | ## Example 3: Diffusion-Reaction Problem
163 | 
164 | This code demonstrates parallel adjoint calculation for a system of time-dependent PDEs on a 2D rectangular grid.
165 | The adjoint solution corresponds to the sensitivities of one component in the final solution w.r.t. the initial conditions.
166 | We will use this example to illustrate the performance considerations for realistic large-scale applications. In particular, we will show how to play with checkpointing and how to profile/tune the performance.
167 | 
168 | ### Compile the code
169 | This example is in `src/ts/examples/advection-diffusion-reaction`. The source code is included in [ex5adj.c](./ex5adj.c)
170 | 
171 | ```
172 | make ex5adj
173 | ```
174 | 
175 | ### Run 1: Monitor solution graphically
176 | 
177 | ```
178 | mpiexec -n 4 ./ex5adj -forwardonly -implicitform 0 -ts_type rk \
179 |                      -ts_monitor -ts_monitor_draw_solution
180 | ```
181 | 
182 | * `-forwardonly` perform the forward simulation without doing adjoint
183 | * `-implicitform 0 -ts_type rk` changes the time stepping algorithm to a Runge-Kutta method
184 | * `-ts_monitor_draw_solution` monitors the progress for the solution at each time step
185 | * Add `-draw_pause -2` if you want to pause at the end of simulation to see the plot
186 | 
187 | ### Run 2: Optimal checkpointing schedule
188 | By default, the checkpoints are stored in binary files on disk. Of course, this may not be a good choice for large-scale applications running on high-performance machines where I/O cost is significant. We can make the solver use RAM for checkpointing and specify the maximum allowable checkpoints so that an optimal adjoint checkpointing schedule that minimizes the number of recomputations will be generated.
189 | 
190 | ```
191 | mpiexec -n 4 ./ex5adj -implicitform 0 -ts_type rk -ts_adapt_type none \
192 |                      -ts_max_steps 10 -ts_monitor -ts_adjoint_monitor \
193 |                      -ts_trajectory_type memory -ts_trajectory_max_cps_ram 3 \
194 |                      -ts_trajectory_monitor -ts_trajectory_view
195 | ```
196 | The output corresponds to the schedule depicted by the following diagram:
197 | 
198 | <img src="chkpt.png" width="800">
199 | 
200 | #### Questions
201 | > **What will happen if we add the option `-ts_trajectory_max_cps_disk 2` to specify there are two available slots for disk checkpoints?**
202 | 
203 | |<font color="white">Looking at the output, we will find that the new schedule uses both RAM and disk for checkpointing and takes two less recomputations.</font>|
204 | 
205 | ### Run 3: Implicit time integration method
206 | Now we switch to an implicit method ([Crank-Nicolson](https://en.wikipedia.org/wiki/Crank–Nicolson_method)) using fixed stepsize, which is the default setting in the code. At each time step, a nonlinear system is solved by the PETSc nonlinear solver `SNES`.
207 | ```
208 | mpiexec -n 12 ./ex5adj -da_grid_x 1024 -da_grid_y 1024 -ts_max_steps 10 -snes_monitor -log_view -ts_monitor
209 | ```
210 | * `-snes_monitor` shows the progress of `SNES`
211 | * `-log_view` prints a summary of the logging
212 | 
213 | A snippet of the summary:
214 | ```
215 | ...
216 | Phase summary info:
217 |    Count: number of times phase was executed
218 |    Time and Flop: Max - maximum over all processors
219 |                    Ratio - ratio of maximum to minimum over all processors
220 |    Mess: number of messages sent
221 |    Avg. len: average message length (bytes)
222 |    Reduct: number of global reductions
223 |    Global: entire computation
224 |    Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().
225 |       %T - percent time in this phase         %F - percent flop in this phase
226 |       %M - percent messages in this phase     %L - percent message lengths in this phase
227 |       %R - percent reductions in this phase
228 |    Total Mflop/s: 10e-6 * (sum of flop over all processors)/(max time over all processors)
229 | ------------------------------------------------------------------------------------------------------------------------
230 | Event                Count      Time (sec)     Flop                             --- Global ---  --- Stage ---   Total
231 |                    Max Ratio  Max     Ratio   Max  Ratio  Mess   Avg len Reduct  %T %F %M %L %R  %T %F %M %L %R Mflop/s
232 | ------------------------------------------------------------------------------------------------------------------------
233 | 
234 | --- Event Stage 0: Main Stage
235 | 
236 | VecDot                20 1.0 2.7505e-02 1.7 7.00e+06 1.0 0.0e+00 0.0e+00 2.0e+01  0  0  0  0  2   0  0  0  0  2  3050
237 | VecMDot              321 1.0 2.6292e+00 1.4 6.62e+08 1.0 0.0e+00 0.0e+00 3.2e+02 25 15  0  0 34  25 15  0  0 34  3017
238 | VecNorm              401 1.0 7.1590e-01 1.9 1.40e+08 1.0 0.0e+00 0.0e+00 4.0e+02  7  3  0  0 42   7  3  0  0 42  2349
239 | ...
240 | ```
241 | 
242 | #### Questions
243 | > **Where is the majority of CPU time spent?**
244 | 
245 | |<font color="white">Of course answer may vary depending on the settings such as number of procs, problem size, and solver options. Typically most of the time should be spent on [VecMDot](http://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/Vec/VecMDot.html) or [MatMult](http://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/Mat/MatMult.html) </font>|
246 | 
247 | > **How expensive is it to do an adjoint step?**
248 | 
249 | |<font color="white">For this particular run, an adjoint step takes about 60-70% of the running time of a forward step (compare the time between TSAdjointStep and TSStep). </font>|
250 | 
251 | > **How can we improve performance?**
252 | 
253 | |<font color="white">1. Use memory instead of disk for checkpointing(`-ts_trajectory_type memory -ts_trajectory_solution_only 0`); 2. Tune the time stepping solver, nonlinear solver, linear solver, preconditioner and so forth. </font>|
254 | 
255 | ### Further information
256 | Because this example uses `DMDA`, Jacobian can be efficiently approxiated using finite difference with coloring. You can use the option `-snes_fd_color` to enable this feature.
257 | 
258 | ## Out-Brief
259 | 
260 | We have used [PETSc](https://www.mcs.anl.gov/petsc/) to demonstrate the adjoint capability as an enabling technology for dynamic-constrained optimization. In particular, we focused on time-depdent problems including complex dynamical systems with discontinuities and a large scale hyperbolic PDE.
261 | 
262 | We have shown the basic usage of the adjoint solver as well as functionalities that can facilitate rapid development, diagnosis and performance profiling.
263 | 
264 | ## Further Reading
265 | 
266 | [PETSc Documentation](http://www.mcs.anl.gov/petsc/documentation/)
267 | 
268 | <!-- Insert space, horizontal line, and link to HandsOnLesson table -->
269 | 
270 | &nbsp;
271 | 
272 | ---
273 | 
274 | [Back to all HandsOnLessons](../lessons.md)
275 | 


--------------------------------------------------------------------------------
/lessons/atpesc-instructions.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Instructions for Numerical Package Hands-on Setup (ATPESC 2017)
  3 | 
  4 | Participants in [ATPESC 2017](https://extremecomputingtraining.anl.gov) will work in groups of 2 for hands-on exercises.  Forming groups of 2 helps us to cut in half the number of IT issues that may arise during hands-on exercises.  Moreover, [pair programming](https://en.wikipedia.org/wiki/Pair_programming) (the practice of having two people work together on one machine, each taking turns between typing and commenting) results in real-time code review, which [research has shown](http://www.sciencedirect.com/science/article/pii/S0950584909000123) results in higher productivity. If you are not using an OSX or Linux laptop, it would be best to try to pair with someone who is.  
  5 | 
  6 | ## Basic Instructions
  7 | 
  8 | Hands-on exercises are primarily run on the [Cooley](https://www.alcf.anl.gov/user-guides/cooley) cluster at ALCF. Please do the following initial setup.
  9 | 
 10 |  - Login to Cooley
 11 | ```
 12 | ssh -C -X -Y username@cooley.alcf.anl.gov
 13 | ```
 14 |    - `-C` means to use compression
 15 |    - `-X` means to forward X11, `-Y` means _trusted_ X11 forwarding
 16 |  - Setup software environment on Cooley by adding the following to  _~/.soft.cooley_ (preferably before `@default` line)
 17 | ```
 18 | +mvapich2
 19 | +gcc-4.8.1
 20 | @visit
 21 | PATH+=/projects/ATPESC2017/NumericalPackages/spack/bin
 22 | MPIEXEC_OMPI=/projects/ATPESC2017/NumericalPackages/spack/opt/spack/linux-rhel6-x86_64/gcc-4.8.1/openmpi-2.1.1-5b4k4f3vzgwz5qmektcqja2av4c4bjrg/bin/mpiexec
 23 | MPIEXEC=/soft/libraries/mpi/mvapich2/gcc/bin/mpiexec
 24 | ```
 25 |   - Run the following command to make the above change to _~/.soft.cooley_ effective.
 26 | ```
 27 | resoft
 28 | ```
 29 |   - **WARNING: Do not attempt next step until after 9:30 am, when our Cooley reservation begins.  Please be sure to work in pairs for the node reservations so that we have sufficient nodes for each pair to use 2 compute nodes for the exercises throughout the day.**
 30 |   - Obtain 2 compute nodes in _interactive_ (`-I`) mode to run the hands on exercises by running the following `qsub` command...
 31 | ```
 32 | qsub -I -n 2 -t 600 -A ATPESC2017 -q training
 33 | ```
 34 |      - **Note**: Once the allocation has started, you will be logged into
 35 |        the reserved nodes and see a new prompt with a different hostname such as
 36 |        `cc122`.
 37 |      - The allocation should remain _reserved_ for the whole day. However,
 38 |        you can delete it simply by logging out of the interactive shell it put you in.
 39 |    - Now, copy over precompiled binaries and data files to your home dir.
 40 | ```
 41 | cp -r /projects/ATPESC2017/NumericalPackages/handson ~/
 42 | ```
 43 |    - Now proceed to run the [Hands-On exercises](lessons.md) as instructed. Each lesson will
 44 |      indicate the path in the `handson` directory you copied above you should
 45 |      `cd` to in order to begin the lesson.
 46 | 
 47 | ## Optional Visualization Instructions
 48 | 
 49 | Some of the hands-on exercises have optional visualization instructions
 50 | and use a variety of visualization tools.
 51 | 
 52 | Getting a room of more than 70 people with different laptops working with
 53 | remote visualization from Cooley is not something either our tight agenda or
 54 | our staff are prepared to support. In addition, where necessary, the hands-on
 55 | leader(s) will demonstrate the use of some tools and learners can follow 
 56 | along at those points.
 57 | 
 58 | There is a whole day of activity devoted to visualization resources and
 59 | tools in the ATPESC agenda on August 9th.
 60 | 
 61 | Nonetheless, we are providing here some instructions and tools for those
 62 | would like to persue getting remote visualization working for these
 63 | exercises.
 64 | 
 65 | ### Note
 66 |   - For basic graphics exercises - you should be able to use X11 over ssh.
 67 |     i.e `ssh -C -X -Y cooley.alcf.anl.gov, ssh -X compute_node_allocated`
 68 |     and run the graphics part of the exercise. That said, many post-2014
 69 |     X servers have _INdirect GLX_ disabled in them breaking tools like
 70 |     paraview, VisIt and glvis.
 71 | 
 72 | We have a devloped a script (to be run on your laptop) that can help with
 73 | setting up a VNC connection to Cooley. It can work on MacOS, and with some
 74 | Linux VNC clients (vinagre, vncviewer). However, the script is fragile and
 75 | might not work with everyone's setup - hence this is optional. This script
 76 | will do several things:
 77 | 
 78 |  - Log you into cooley once with your token.
 79 |  - Set up SSH Control Master so you have to log into Cooley with your token only once for the whole day and password-less will work thereafter.
 80 |  - Reserve 3 nodes for 12h on Cooley.
 81 |  - Set up a VNC connection to those nodes.
 82 | 
 83 |    Mac and Linux users are welcome download and run this script to
 84 |    setup a VNC connection. Doing so will permit quick use of tools
 85 |    like VisIt, paraview and/or glvis. 
 86 | 
 87 | To try this setup script...
 88 | 
 89 | On Linux,
 90 | ```
 91 | wget https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/master/tools/atpesc2017_cooley_vnc_setup.sh
 92 | ```
 93 | 
 94 | On Mac,
 95 | 
 96 | ```
 97 | curl -O https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/master/tools/atpesc2017_cooley_vnc_setup.sh
 98 | ```
 99 | 
100 | Ensure the script has execute permissions...
101 | ```
102 | chmod 755 atpesc2017_cooley_vnc_setup.sh
103 | ```
104 | 
105 | Now, try running the script
106 | 
107 | ```
108 | ./atpesc2017_cooley_vnc_setup.sh <your_cooley_username>
109 | ```
110 | 
111 | ### Notes
112 |   - If allocating nodes via this script, please deallocate nodes that you might have previously allocated
113 |     by simply logging out of the interactive allocation.
114 | 
115 | ### Troubleshooting
116 |   - If you have a different preferred Linux VNC client, you should be able to use it to connect to the VNC connection that is already setup by this script. For example,
117 | ```
118 | krdc vnc://localhost:22590
119 | ```
120 |   - When rerunning the script - if the ssh command to setup VNC tunnel fails - you
121 |     might have to kill the _ssh control master_ process and restart again. It's easiest
122 |     to simply find all ssh logins to cooley and kill them
123 | ```
124 | $ ps -ef | grep cooley
125 | 3640  7348   694   0  4:58PM ttys003    0:00.01 grep cooley
126 | 3640  7345 62009   0  4:58PM ttys004    0:00.03 ssh -C -X -Y cooley.alcf.anl.gov
127 | 3640  7347 62009   0  4:58PM ttys004    0:00.03 ssh -L 22590:cc122:5900 cooley.alcf.anl.gov
128 | $ kill -9 7345 7347
129 | ```
130 | 
131 | ---
132 | 
133 | [Back to all HandsOnLessons](lessons.md)
134 | 


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/1d_heat_equation.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/1d_heat_equation.xlsx


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/animated_basic_heat.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/animated_basic_heat.gif


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/basic0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/basic0000.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/basic0001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/basic0001.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/basic0002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/basic0002.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/basic0003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/basic0003.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/heat.c:
--------------------------------------------------------------------------------
  1 | #include <assert.h>
  2 | #include <float.h>
  3 | #include <math.h>
  4 | #include <stdio.h>
  5 | #include <stdlib.h>
  6 | #include <string.h>
  7 | #include <unistd.h>
  8 | #ifdef HAVE_FEENABLEEXCEPT
  9 | #define _GNU_SOURCE
 10 | #include <fenv.h>
 11 | #if 0
 12 | #include "fe-handling-example.c"
 13 | #endif
 14 | #endif
 15 | 
 16 | int const Nt_max = 50000;
 17 | int const Nx_max = 10000;
 18 | 
 19 | int noout = 0;
 20 | int savi = 0;
 21 | int outi = 100;
 22 | int save = 0;
 23 | char const *alg = "ftcs";
 24 | char const *prec = "double";
 25 | char const *ic = "const(1)";
 26 | double alpha = 0.2;
 27 | double dt = 0.004;
 28 | double dx = 0.1;
 29 | double bc0 = 0;
 30 | double bc1 = 1;
 31 | double maxt = 2.0;
 32 | 
 33 | double *curr=0, *last=0, *change_history=0, *exact=0, *error_history=0;
 34 | double *cn_Amat = 0;
 35 | 
 36 | int Nx = (int) (1/0.1+1.5);
 37 | int Nt = (int) (1 / 0.004);
 38 | 
 39 | /*
 40 |  * Utilities 
 41 |  */
 42 | static double
 43 | l2_norm(int n, double const *a, double const *b)
 44 | {
 45 |     int i;
 46 |     double sum = 0;
 47 |     for (i = 0; i < n; i++)
 48 |     {
 49 |         double diff = a[i] - b[i];
 50 |         sum += diff * diff;
 51 |     }
 52 |     return sum;
 53 | }
 54 | 
 55 | static void
 56 | copy(int n, double *dst, double const *src)
 57 | {
 58 |     int i;
 59 |     for (i = 0; i < n; i++)
 60 |         dst[i] = src[i];
 61 | }
 62 | 
 63 | #define TSTART -1
 64 | #define TFINAL -2
 65 | #define RESIDUAL -3
 66 | #define ERROR -4
 67 | static void
 68 | write_array(int t, int n, double dx, double const *a)
 69 | {
 70 |     int i;
 71 |     char fname[32];
 72 |     FILE *outf;
 73 | 
 74 |     if (noout) return;
 75 | 
 76 |     if (t == TSTART)
 77 |         snprintf(fname, sizeof(fname), "heat_soln_00000.curve");
 78 |     else if (t == TFINAL)
 79 |         snprintf(fname, sizeof(fname), "heat_soln_final.curve");
 80 |     else if (t == RESIDUAL)
 81 |         snprintf(fname, sizeof(fname), "change.curve");
 82 |     else if (t == ERROR)
 83 |         snprintf(fname, sizeof(fname), "error.curve");
 84 |     else
 85 |     {
 86 |         if (a == exact)
 87 |             snprintf(fname, sizeof(fname), "heat_exact_%05d.curve", t);
 88 |         else
 89 |             snprintf(fname, sizeof(fname), "heat_soln_%05d.curve", t);
 90 |     }
 91 |     
 92 |     outf = fopen(fname,"w");
 93 |     for (i = 0; i < n; i++)
 94 |         fprintf(outf, "%8.4g %8.4g\n", i*dx, a[i]);
 95 |     fclose(outf);
 96 | }
 97 | 
 98 | 
 99 | static void
100 | r83_np_fa(int n, double *a)
101 | /*
102 |   Licensing: This code is distributed under the GNU LGPL license. 
103 |   Modified: 30 May 2009 Author: John Burkardt
104 |   Modified by Mark C. Miller, July 23, 2017
105 | */
106 | {
107 |     int i;
108 | 
109 |     for ( i = 1; i <= n-1; i++ )
110 |     {
111 |         assert ( a[1+(i-1)*3] != 0.0 );
112 |         /*
113 |           Store the multiplier in L.
114 |         */
115 |         a[2+(i-1)*3] = a[2+(i-1)*3] / a[1+(i-1)*3];
116 |         /*
117 |           Modify the diagonal entry in the next column.
118 |         */
119 |         a[1+i*3] = a[1+i*3] - a[2+(i-1)*3] * a[0+i*3];
120 |     }
121 | 
122 |     assert( a[1+(n-1)*3] != 0.0 );
123 | }
124 | 
125 | static void
126 | initialize(void)
127 | {
128 |     curr = (double *) calloc(Nx, sizeof(double));
129 |     last = (double *) calloc(Nx, sizeof(double));
130 |     if (save)
131 |     {
132 |         exact = (double *) calloc(Nx, sizeof(double));
133 |         change_history = (double *) calloc(Nt, sizeof(double));
134 |         error_history = (double *) calloc(Nt, sizeof(double));
135 |     }
136 | 
137 |     assert(strncmp(alg, "ftcs", 4)==0 ||
138 |            strncmp(alg, "upwind15", 8)==0 ||
139 |            strncmp(alg, "crankn", 6)==0);
140 | 
141 | #ifdef HAVE_FEENABLEEXCEPT
142 |     feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW);
143 | #endif
144 | 
145 |     if (!strncmp(alg, "crankn", 6))
146 |     {
147 |         /*
148 |           We do some additional initialization work for Crank-Nicolson.
149 |           The matrix A does not change with time.  We can set it once,
150 |           factor it once, and solve repeatedly.
151 |         */
152 |         int i;
153 |         double w = alpha * dt / dx / dx;
154 | 
155 |         cn_Amat = ( double * ) malloc ( 3 * Nx * sizeof ( double ) );
156 | 
157 |         cn_Amat[0+0*3] = 0.0;
158 |         cn_Amat[1+0*3] = 1.0;
159 |         cn_Amat[0+1*3] = 0.0;
160 | 
161 |         for ( i = 1; i < Nx - 1; i++ )
162 |         {
163 |             cn_Amat[2+(i-1)*3] =           - w;
164 |             cn_Amat[1+ i   *3] = 1.0 + 2.0 * w;
165 |             cn_Amat[0+(i+1)*3] =           - w;
166 |         }
167 |         
168 |         cn_Amat[2+(Nx-2)*3] = 0.0;
169 |         cn_Amat[1+(Nx-1)*3] = 1.0;
170 |         cn_Amat[2+(Nx-1)*3] = 0.0;
171 | 
172 |         /*
173 |           Factor the matrix.
174 |         */
175 |         r83_np_fa(Nx, cn_Amat);
176 |     }
177 | }
178 | 
179 | #define HANDLE_ARG(VAR, TYPE, STYLE, HELP) \
180 | { \
181 |     void *valp = (void*) &VAR; \
182 |     int const len = strlen(#VAR)+1; \
183 |     for (i = 1; i < argc; i++) \
184 |     {\
185 |         char const *style = #STYLE; \
186 |         int valid_style = style[1]=='d'||style[1]=='g'||style[1]=='s'; \
187 |         if (strncmp(argv[i], #VAR"=", len)) \
188 |             continue; \
189 |         assert(valid_style); \
190 | 	if (strlen(argv[i]+len)) \
191 |         {\
192 |             if      (style[1] == 'd') /* int */ \
193 |                 *((int*) valp) = (int) strtol(argv[i]+len,0,10); \
194 |             else if (style[1] == 'g') /* double */ \
195 |                 *((double*) valp) = (double) strtod(argv[i]+len,0); \
196 |             else if (style[1] == 's') /* char* */ \
197 |                 *((char**) valp) = (char*) strdup(argv[i]+len); \
198 |         }\
199 |     }\
200 |     if (help) \
201 |     {\
202 |         char tmp[256]; \
203 |         int len = snprintf(tmp, sizeof(tmp), "        %s=" #STYLE, \
204 |             #VAR, VAR);\
205 |         snprintf(tmp, sizeof(tmp), "%s (%s)", #HELP, #TYPE); \
206 |         fprintf(stderr, "        %s=" #STYLE "%*s\n", \
207 |             #VAR, VAR, 80-len, tmp);\
208 |     }\
209 |     else \
210 |         fprintf(stderr, "    %s="#STYLE"\n", \
211 |             #VAR, VAR);\
212 | }
213 | 
214 | static void
215 | process_args(int argc, char **argv)
216 | {
217 |     int i;
218 |     int help = 0;
219 | 
220 |     /* quick pass for 'help' anywhere on command line */
221 |     for (i = 0; i < argc && !help; i++)
222 |         help = 0!=strcasestr(argv[i], "help");
223 |     
224 |     if (help)
225 |     {
226 |         fprintf(stderr, "Usage:\n");
227 |         fprintf(stderr, "    ./heat <arg>=<value> <arg>=<value>...\n");
228 |     }
229 | 
230 |     HANDLE_ARG(prec, char*, %s, precision half|float|double|quad);
231 |     HANDLE_ARG(alpha, double, %g, material thermal diffusivity);
232 |     HANDLE_ARG(dx, double, %g, x-incriment (1/dx->int));
233 |     HANDLE_ARG(dt, double, %g, t-incriment);
234 |     HANDLE_ARG(maxt, double, %g, max. time to run simulation to);
235 |     HANDLE_ARG(bc0, double, %g, bc @ x=0: u(0,t));
236 |     HANDLE_ARG(bc1, double, %g, bc @ x=1: u(1,t));
237 |     HANDLE_ARG(ic, char*, %s, ic @ t=0: u(x,0));
238 |     HANDLE_ARG(alg, char*, %s, algorithm ftcs|upwind15|crankn);
239 |     HANDLE_ARG(savi, int, %d, save every i-th solution step);
240 |     HANDLE_ARG(save, int, %d, save error in every saved solution);
241 |     HANDLE_ARG(outi, int, %d, output progress every i-th solution step);
242 |     HANDLE_ARG(noout, int, %d, disable all file outputs);
243 | 
244 |     if (help)
245 |     {
246 |         fprintf(stderr, "Examples...\n");
247 |         fprintf(stderr, "    ./heat dx=0.01 dt=0.0002 alg=ftcs\n");
248 |         fprintf(stderr, "    ./heat dx=0.1 bc0=5 bc1=10 ic=\"spikes(5,5)\"\n");
249 |         exit(1);
250 |     }
251 | 
252 | }
253 | 
254 | static void
255 | set_initial_condition(int n, double *a, double dx, char const *ic)
256 | {
257 |     int i;
258 |     double x;
259 | 
260 |     if (!strncmp(ic, "const(", 6)) /* const(val) */
261 |     {
262 |         double cval = strtod(ic+6, 0);
263 |         for (i = 0; i < n; i++)
264 |             a[i] = cval;
265 |     }
266 |     else if (!strncmp(ic, "step(", 5)) /* step(left,xmid,right) */
267 |     {
268 |         char *p;
269 |         double left = strtod(ic+5, &p);
270 |         double xmid = strtod(p+1, &p);
271 |         double right = strtod(p+1, 0);
272 |         for (i = 0, x = 0; i < n; i++, x+=dx)
273 |         {
274 |             if (x < xmid) a[i] = left;
275 |             else          a[i] = right;
276 |         }
277 |     }
278 |     else if (!strncmp(ic, "ramp(", 5)) /* ramp(left,right) */
279 |     {
280 |         char *p;
281 |         double left = strtod(ic+5, &p);
282 |         double right = strtod(p+1, 0);
283 |         double dv = (right-left)/(n-1);
284 |         for (i = 0, x = left; i < n; i++, x+=dv)
285 |             a[i] = x;
286 |     }
287 |     else if (!strncmp(ic, "rand(", 5)) /* rand(seed,amp) */
288 |     {
289 |         char *p;
290 |         int seed = (int) strtol(ic+5,&p,10);
291 |         double amp = strtod(p+1, 0);
292 |         const double maxr = ((long long)1<<31)-1;
293 |         srandom(seed);
294 |         for (i = 0; i < n; i++)
295 |             a[i] = amp * random()/maxr;
296 |     }
297 |     else if (!strncmp(ic, "sin(Pi*x)", 9)) /* rand(seed,amp) */
298 |     {
299 |         for (i = 0, x = 0; i < n; i++, x+=dx)
300 |             a[i] = sin(M_PI*x);
301 |     }
302 |     else if (!strncmp(ic, "spikes(", 7)) /* spikes(Amp,Loc,Amp,Loc,...) */
303 |     {
304 |         char const *p = &ic[6];
305 |         for (i = 0, x = 0; i < n; i++)
306 |             a[i] = 0;
307 |         while (*p != ')')
308 |         {
309 |             char *ep_amp, *ep_idx;
310 |             double amp = strtod(p+1, &ep_amp);
311 |             int idx = (int) strtod(ep_amp+1, &ep_idx);
312 |             assert(idx<n);
313 |             a[idx] = amp;
314 |             p = ep_idx;
315 |         }
316 | 
317 |     }
318 | 
319 |     write_array(TSTART, Nx, dx, a);
320 | }
321 | 
322 | static void 
323 | compute_exact_solution(int n, double *a, double dx, char const *ic,
324 |     double alpha, double t, double bc0, double bc1)
325 | {
326 |     int i;
327 |     double x;
328 |     
329 |     if (bc0 == 0 && bc1 == 0 && !strncmp(ic, "sin(Pi*x)", 9))
330 |     {
331 |         for (i = 0, x = 0; i < n; i++, x+=dx)
332 |             a[i] = sin(M_PI*x)*exp(-alpha*M_PI*M_PI*t);
333 |     }
334 |     else if (bc0 == 0 && bc1 == 0 && !strncmp(ic, "const(", 6))
335 |     {
336 |         double cval = strtod(ic+6, 0);
337 |         for (i = 0, x = 0; i < n; i++, x+=dx)
338 |         {
339 |             int n;
340 |             double fsum = 0;
341 | 
342 |             /* sum first 200 terms of Fourier series */
343 |             for (n = 1; n < 200; n++)
344 |             {
345 |                 double coeff = 2*cval*(1-pow(-1.0,(double)n))/(n*M_PI);
346 |                 double func = sin(n*M_PI*x)*exp(-alpha*n*n*M_PI*M_PI*t);
347 |                 fsum += coeff * func;
348 |             }
349 |             a[i] = fsum;
350 |         }
351 |     }
352 |     else /* can only compute final steady state solution */
353 |     {
354 |         for (i = 0, x = 0; i < n; i++, x+=dx)
355 |             a[i] = bc0 + (bc1-bc0)*x;
356 |     }
357 | }
358 | 
359 | static void
360 | solution_update_ftcs(int n, double *curr, double const *last,
361 |     double alpha, double dx, double dt,
362 |     double bc_0, double bc_1)
363 | {
364 | #if 0
365 |     int i;
366 |     double k = alpha * alpha * dt / (dx * dx);
367 |     curr[0  ] = bc_0;
368 |     curr[n-1] = bc_1;
369 |     for (i = 1; i < n-1; i++)
370 |         curr[i] = last[i] + k * (last[i-1] - 2 * last[i] + last[i+1]);
371 | #endif
372 |     double const r = alpha * dt / (dx * dx);
373 | 
374 |     /* Impose boundary conditions for solution indices i==0 and i==n-1 */
375 |     curr[0  ] = bc_0;
376 |     curr[n-1] = bc_1;
377 | 
378 |     /* Update the solution using FTCS algorithm */
379 |     for (int i = 1; i < n-1; i++)
380 |         curr[i] = r*last[i+1] + (1-2*r)*last[i] + r*last[i-1];
381 | }
382 | 
383 | static void
384 | solution_update_upwind15(int n, double *curr, double const *last,
385 |     double alpha, double dx, double dt,
386 |     double bc_0, double bc_1)
387 | {
388 |     double const f2 = 1.0/24;
389 |     double const f1 = 1.0/6;
390 |     double const f0 = 1.0/4;
391 |     double const k = alpha * alpha * dt / (dx * dx);
392 |     double const k2 = k*k;
393 | 
394 |     int i;
395 |     curr[0  ] = bc_0;
396 |     curr[1  ] = last[1  ] + k * (last[0  ] - 2 * last[1  ] + last[2  ]);
397 |     curr[n-2] = last[n-2] + k * (last[n-3] - 2 * last[n-2] + last[n-1]);
398 |     curr[n-1] = bc_1;
399 |     for (i = 2; i < n-2; i++)
400 |         curr[i] =  f2*(12*k2  -2*k    )*last[i-2]
401 |                   +f2*(12*k2  -2*k    )*last[i+2]
402 |                   -f1*(12*k2  -8*k    )*last[i-1]
403 |                   -f1*(12*k2  -8*k    )*last[i+1]
404 |                   +f0*(12*k2 -10*k  +4)*last[i  ];
405 | }
406 | 
407 | static void 
408 | r83_np_sl ( int n, double const *a_lu, double const *b, double *x)
409 |     /* Licensing: This code is distributed under the GNU LGPL license. 
410 |        Modified: 30 May 2009 Author: John Burkardt
411 |        Modified by Mark C. Miller, miller86@llnl.gov, July 23, 2017
412 |     */
413 | {
414 |     int i;
415 | 
416 |     for ( i = 0; i < n; i++ )
417 |         x[i] = b[i];
418 | 
419 |     /* Solve L * Y = B.  */
420 |     for ( i = 1; i < n; i++ )
421 |         x[i] = x[i] - a_lu[2+(i-1)*3] * x[i-1];
422 | 
423 |     /* Solve U * X = Y.  */
424 |     for ( i = n; 1 <= i; i-- )
425 |     {
426 |         x[i-1] = x[i-1] / a_lu[1+(i-1)*3];
427 |         if ( 1 < i )
428 |             x[i-2] = x[i-2] - a_lu[0+(i-1)*3] * x[i-1];
429 |     }
430 | }
431 | 
432 | static void
433 | solution_update_crankn(int n, double *curr, double const *last,
434 |     double alpha, double dx, double dt,
435 |     double bc_0, double bc_1)
436 | {
437 |     /* Do the solve */
438 |     r83_np_sl (n, cn_Amat, last, curr);
439 |     curr[0] = bc0;
440 |     curr[n-1] = bc1;
441 | }
442 | 
443 | int finalize(int ti, double maxt, double change)
444 | {
445 |     int retval = 0;
446 | 
447 |     if (outi)
448 |         printf("Iteration %04d: last change l2=%g\n", ti, change);
449 | 
450 |     free(curr);
451 |     free(last);
452 |     if (exact) free(exact);
453 |     if (change_history) free(change_history);
454 |     if (error_history) free(error_history);
455 |     if (cn_Amat) free(cn_Amat);
456 |     if (strncmp(alg, "ftcs", 4)) free((void*)alg);
457 |     if (strncmp(prec, "double", 6)) free((void*)prec);
458 |     if (strncmp(ic, "const(1)", 8)) free((void*)ic);
459 | 
460 |     return retval;
461 | }
462 | 
463 | int main(int argc, char **argv)
464 | {
465 |     int i, ti;
466 |     double error;
467 |     FILE *outf;
468 | 
469 |     process_args(argc, argv);
470 | 
471 |     double change;
472 |     Nx = (int) (1/dx+1.5);
473 |     Nt = (int) (maxt / dt);
474 |     dx = 1.0/(Nx-1);
475 | 
476 |     initialize();
477 | 
478 |     /* Initial condition */
479 |     set_initial_condition(Nx, last, dx, ic);
480 | 
481 |     /* Iterate until residual is small or hit max iterations */
482 |     for (ti = 0; ti*dt < maxt; ti++)
483 |     {
484 |         if (!strcmp(alg, "ftcs"))
485 |             solution_update_ftcs(Nx, curr, last, alpha, dx, dt, bc0, bc1);
486 |         else if (!strcmp(alg, "upwind15"))
487 |             solution_update_upwind15(Nx, curr, last, alpha, dx, dt, bc0, bc1);
488 |         else if (!strcmp(alg, "crankn"))
489 |             solution_update_crankn(Nx, curr, last, alpha, dx, dt, bc0, bc1);
490 | 
491 |         if (ti>0 && save)
492 |         {
493 |             compute_exact_solution(Nx, exact, dx, ic, alpha, ti*dt, bc0, bc1);
494 |             if (savi && ti%savi==0)
495 |                 write_array(ti, Nx, dx, exact);
496 |         }
497 | 
498 |         if (ti>0 && savi && ti%savi==0)
499 |             write_array(ti, Nx, dx, curr);
500 | 
501 |         change = l2_norm(Nx, curr, last);
502 |         if (save)
503 |         {
504 |             change_history[ti] = change;
505 |             error_history[ti] = l2_norm(Nx, curr, exact);
506 |         }
507 | 
508 |         copy(Nx, last, curr);
509 | 
510 |         if (outi && ti%outi==0)
511 |         {
512 |             printf("Iteration %04d: last change l2=%g\n", ti, change);
513 |         }
514 |     }
515 | 
516 |     write_array(TFINAL, Nx, dx, curr);
517 |     if (save)
518 |     {
519 |         write_array(RESIDUAL, ti, dt, change_history);
520 |         write_array(ERROR, ti, dt, error_history);
521 |     }
522 | 
523 |     return finalize(ti, maxt, change);
524 | }
525 | 


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/heat.c.numbered.txt:
--------------------------------------------------------------------------------
  1 |      1	#include <assert.h>
  2 |      2	#include <float.h>
  3 |      3	#include <math.h>
  4 |      4	#include <stdio.h>
  5 |      5	#include <stdlib.h>
  6 |      6	#include <string.h>
  7 |      7	#include <unistd.h>
  8 |      8	#ifdef HAVE_FEENABLEEXCEPT
  9 |      9	#define _GNU_SOURCE
 10 |     10	#include <fenv.h>
 11 |     11	#if 0
 12 |     12	#include "fe-handling-example.c"
 13 |     13	#endif
 14 |     14	#endif
 15 |     15	
 16 |     16	int const Nt_max = 50000;
 17 |     17	int const Nx_max = 10000;
 18 |     18	
 19 |     19	int noout = 0;
 20 |     20	int savi = 0;
 21 |     21	int outi = 100;
 22 |     22	int save = 0;
 23 |     23	char const *alg = "ftcs";
 24 |     24	char const *prec = "double";
 25 |     25	char const *ic = "const(1)";
 26 |     26	double alpha = 0.2;
 27 |     27	double dt = 0.004;
 28 |     28	double dx = 0.1;
 29 |     29	double bc0 = 0;
 30 |     30	double bc1 = 1;
 31 |     31	double maxt = 2.0;
 32 |     32	
 33 |     33	double *curr=0, *last=0, *change_history=0, *exact=0, *error_history=0;
 34 |     34	double *cn_Amat = 0;
 35 |     35	
 36 |     36	int Nx = (int) (1/0.1+1.5);
 37 |     37	int Nt = (int) (1 / 0.004);
 38 |     38	
 39 |     39	/*
 40 |     40	 * Utilities 
 41 |     41	 */
 42 |     42	static double
 43 |     43	l2_norm(int n, double const *a, double const *b)
 44 |     44	{
 45 |     45	    int i;
 46 |     46	    double sum = 0;
 47 |     47	    for (i = 0; i < n; i++)
 48 |     48	    {
 49 |     49	        double diff = a[i] - b[i];
 50 |     50	        sum += diff * diff;
 51 |     51	    }
 52 |     52	    return sum;
 53 |     53	}
 54 |     54	
 55 |     55	static void
 56 |     56	copy(int n, double *dst, double const *src)
 57 |     57	{
 58 |     58	    int i;
 59 |     59	    for (i = 0; i < n; i++)
 60 |     60	        dst[i] = src[i];
 61 |     61	}
 62 |     62	
 63 |     63	#define TSTART -1
 64 |     64	#define TFINAL -2
 65 |     65	#define RESIDUAL -3
 66 |     66	#define ERROR -4
 67 |     67	static void
 68 |     68	write_array(int t, int n, double dx, double const *a)
 69 |     69	{
 70 |     70	    int i;
 71 |     71	    char fname[32];
 72 |     72	    FILE *outf;
 73 |     73	
 74 |     74	    if (noout) return;
 75 |     75	
 76 |     76	    if (t == TSTART)
 77 |     77	        snprintf(fname, sizeof(fname), "heat_soln_00000.curve");
 78 |     78	    else if (t == TFINAL)
 79 |     79	        snprintf(fname, sizeof(fname), "heat_soln_final.curve");
 80 |     80	    else if (t == RESIDUAL)
 81 |     81	        snprintf(fname, sizeof(fname), "change.curve");
 82 |     82	    else if (t == ERROR)
 83 |     83	        snprintf(fname, sizeof(fname), "error.curve");
 84 |     84	    else
 85 |     85	    {
 86 |     86	        if (a == exact)
 87 |     87	            snprintf(fname, sizeof(fname), "heat_exact_%05d.curve", t);
 88 |     88	        else
 89 |     89	            snprintf(fname, sizeof(fname), "heat_soln_%05d.curve", t);
 90 |     90	    }
 91 |     91	    
 92 |     92	    outf = fopen(fname,"w");
 93 |     93	    for (i = 0; i < n; i++)
 94 |     94	        fprintf(outf, "%8.4g %8.4g\n", i*dx, a[i]);
 95 |     95	    fclose(outf);
 96 |     96	}
 97 |     97	
 98 |     98	
 99 |     99	static void
100 |    100	r83_np_fa(int n, double *a)
101 |    101	/*
102 |    102	  Licensing: This code is distributed under the GNU LGPL license. 
103 |    103	  Modified: 30 May 2009 Author: John Burkardt
104 |    104	  Modified by Mark C. Miller, July 23, 2017
105 |    105	*/
106 |    106	{
107 |    107	    int i;
108 |    108	
109 |    109	    for ( i = 1; i <= n-1; i++ )
110 |    110	    {
111 |    111	        assert ( a[1+(i-1)*3] != 0.0 );
112 |    112	        /*
113 |    113	          Store the multiplier in L.
114 |    114	        */
115 |    115	        a[2+(i-1)*3] = a[2+(i-1)*3] / a[1+(i-1)*3];
116 |    116	        /*
117 |    117	          Modify the diagonal entry in the next column.
118 |    118	        */
119 |    119	        a[1+i*3] = a[1+i*3] - a[2+(i-1)*3] * a[0+i*3];
120 |    120	    }
121 |    121	
122 |    122	    assert( a[1+(n-1)*3] != 0.0 );
123 |    123	}
124 |    124	
125 |    125	static void
126 |    126	initialize(void)
127 |    127	{
128 |    128	    curr = (double *) calloc(Nx, sizeof(double));
129 |    129	    last = (double *) calloc(Nx, sizeof(double));
130 |    130	    if (save)
131 |    131	    {
132 |    132	        exact = (double *) calloc(Nx, sizeof(double));
133 |    133	        change_history = (double *) calloc(Nt, sizeof(double));
134 |    134	        error_history = (double *) calloc(Nt, sizeof(double));
135 |    135	    }
136 |    136	
137 |    137	    assert(strncmp(alg, "ftcs", 4)==0 ||
138 |    138	           strncmp(alg, "upwind15", 8)==0 ||
139 |    139	           strncmp(alg, "crankn", 6)==0);
140 |    140	
141 |    141	#ifdef HAVE_FEENABLEEXCEPT
142 |    142	    feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW);
143 |    143	#endif
144 |    144	
145 |    145	    if (!strncmp(alg, "crankn", 6))
146 |    146	    {
147 |    147	        /*
148 |    148	          We do some additional initialization work for Crank-Nicolson.
149 |    149	          The matrix A does not change with time.  We can set it once,
150 |    150	          factor it once, and solve repeatedly.
151 |    151	        */
152 |    152	        int i;
153 |    153	        double w = alpha * dt / dx / dx;
154 |    154	
155 |    155	        cn_Amat = ( double * ) malloc ( 3 * Nx * sizeof ( double ) );
156 |    156	
157 |    157	        cn_Amat[0+0*3] = 0.0;
158 |    158	        cn_Amat[1+0*3] = 1.0;
159 |    159	        cn_Amat[0+1*3] = 0.0;
160 |    160	
161 |    161	        for ( i = 1; i < Nx - 1; i++ )
162 |    162	        {
163 |    163	            cn_Amat[2+(i-1)*3] =           - w;
164 |    164	            cn_Amat[1+ i   *3] = 1.0 + 2.0 * w;
165 |    165	            cn_Amat[0+(i+1)*3] =           - w;
166 |    166	        }
167 |    167	        
168 |    168	        cn_Amat[2+(Nx-2)*3] = 0.0;
169 |    169	        cn_Amat[1+(Nx-1)*3] = 1.0;
170 |    170	        cn_Amat[2+(Nx-1)*3] = 0.0;
171 |    171	
172 |    172	        /*
173 |    173	          Factor the matrix.
174 |    174	        */
175 |    175	        r83_np_fa(Nx, cn_Amat);
176 |    176	    }
177 |    177	}
178 |    178	
179 |    179	#define HANDLE_ARG(VAR, TYPE, STYLE, HELP) \
180 |    180	{ \
181 |    181	    void *valp = (void*) &VAR; \
182 |    182	    int const len = strlen(#VAR)+1; \
183 |    183	    for (i = 1; i < argc; i++) \
184 |    184	    {\
185 |    185	        char const *style = #STYLE; \
186 |    186	        int valid_style = style[1]=='d'||style[1]=='g'||style[1]=='s'; \
187 |    187	        if (strncmp(argv[i], #VAR"=", len)) \
188 |    188	            continue; \
189 |    189	        assert(valid_style); \
190 |    190		if (strlen(argv[i]+len)) \
191 |    191	        {\
192 |    192	            if      (style[1] == 'd') /* int */ \
193 |    193	                *((int*) valp) = (int) strtol(argv[i]+len,0,10); \
194 |    194	            else if (style[1] == 'g') /* double */ \
195 |    195	                *((double*) valp) = (double) strtod(argv[i]+len,0); \
196 |    196	            else if (style[1] == 's') /* char* */ \
197 |    197	                *((char**) valp) = (char*) strdup(argv[i]+len); \
198 |    198	        }\
199 |    199	    }\
200 |    200	    if (help) \
201 |    201	    {\
202 |    202	        char tmp[256]; \
203 |    203	        int len = snprintf(tmp, sizeof(tmp), "        %s=" #STYLE, \
204 |    204	            #VAR, VAR);\
205 |    205	        snprintf(tmp, sizeof(tmp), "%s (%s)", #HELP, #TYPE); \
206 |    206	        fprintf(stderr, "        %s=" #STYLE "%*s\n", \
207 |    207	            #VAR, VAR, 80-len, tmp);\
208 |    208	    }\
209 |    209	    else \
210 |    210	        fprintf(stderr, "    %s="#STYLE"\n", \
211 |    211	            #VAR, VAR);\
212 |    212	}
213 |    213	
214 |    214	static void
215 |    215	process_args(int argc, char **argv)
216 |    216	{
217 |    217	    int i;
218 |    218	    int help = 0;
219 |    219	
220 |    220	    /* quick pass for 'help' anywhere on command line */
221 |    221	    for (i = 0; i < argc && !help; i++)
222 |    222	        help = 0!=strcasestr(argv[i], "help");
223 |    223	    
224 |    224	    if (help)
225 |    225	    {
226 |    226	        fprintf(stderr, "Usage:\n");
227 |    227	        fprintf(stderr, "    ./heat <arg>=<value> <arg>=<value>...\n");
228 |    228	    }
229 |    229	
230 |    230	    HANDLE_ARG(prec, char*, %s, precision half|float|double|quad);
231 |    231	    HANDLE_ARG(alpha, double, %g, material thermal diffusivity);
232 |    232	    HANDLE_ARG(dx, double, %g, x-incriment (1/dx->int));
233 |    233	    HANDLE_ARG(dt, double, %g, t-incriment);
234 |    234	    HANDLE_ARG(maxt, double, %g, max. time to run simulation to);
235 |    235	    HANDLE_ARG(bc0, double, %g, bc @ x=0: u(0,t));
236 |    236	    HANDLE_ARG(bc1, double, %g, bc @ x=1: u(1,t));
237 |    237	    HANDLE_ARG(ic, char*, %s, ic @ t=0: u(x,0));
238 |    238	    HANDLE_ARG(alg, char*, %s, algorithm ftcs|upwind15|crankn);
239 |    239	    HANDLE_ARG(savi, int, %d, save every i-th solution step);
240 |    240	    HANDLE_ARG(save, int, %d, save error in every saved solution);
241 |    241	    HANDLE_ARG(outi, int, %d, output progress every i-th solution step);
242 |    242	    HANDLE_ARG(noout, int, %d, disable all file outputs);
243 |    243	
244 |    244	    if (help)
245 |    245	    {
246 |    246	        fprintf(stderr, "Examples...\n");
247 |    247	        fprintf(stderr, "    ./heat Nx=51 dt=0.002 alg=ftcs\n");
248 |    248	        fprintf(stderr, "    ./heat Nx=51 bc0=5 bc1=10\n");
249 |    249	        exit(1);
250 |    250	    }
251 |    251	
252 |    252	}
253 |    253	
254 |    254	static void
255 |    255	set_initial_condition(int n, double *a, double dx, char const *ic)
256 |    256	{
257 |    257	    int i;
258 |    258	    double x;
259 |    259	
260 |    260	    if (!strncmp(ic, "const(", 6)) /* const(val) */
261 |    261	    {
262 |    262	        double cval = strtod(ic+6, 0);
263 |    263	        for (i = 0; i < n; i++)
264 |    264	            a[i] = cval;
265 |    265	    }
266 |    266	    else if (!strncmp(ic, "step(", 5)) /* step(left,xmid,right) */
267 |    267	    {
268 |    268	        char *p;
269 |    269	        double left = strtod(ic+5, &p);
270 |    270	        double xmid = strtod(p+1, &p);
271 |    271	        double right = strtod(p+1, 0);
272 |    272	        for (i = 0, x = 0; i < n; i++, x+=dx)
273 |    273	        {
274 |    274	            if (x < xmid) a[i] = left;
275 |    275	            else          a[i] = right;
276 |    276	        }
277 |    277	    }
278 |    278	    else if (!strncmp(ic, "ramp(", 5)) /* ramp(left,right) */
279 |    279	    {
280 |    280	        char *p;
281 |    281	        double left = strtod(ic+5, &p);
282 |    282	        double right = strtod(p+1, 0);
283 |    283	        double dv = (right-left)/(n-1);
284 |    284	        for (i = 0, x = left; i < n; i++, x+=dv)
285 |    285	            a[i] = x;
286 |    286	    }
287 |    287	    else if (!strncmp(ic, "rand(", 5)) /* rand(seed,amp) */
288 |    288	    {
289 |    289	        char *p;
290 |    290	        int seed = (int) strtol(ic+5,&p,10);
291 |    291	        double amp = strtod(p+1, 0);
292 |    292	        const double maxr = ((long long)1<<31)-1;
293 |    293	        srandom(seed);
294 |    294	        for (i = 0; i < n; i++)
295 |    295	            a[i] = amp * random()/maxr;
296 |    296	    }
297 |    297	    else if (!strncmp(ic, "sin(Pi*x)", 9)) /* rand(seed,amp) */
298 |    298	    {
299 |    299	        for (i = 0, x = 0; i < n; i++, x+=dx)
300 |    300	            a[i] = sin(M_PI*x);
301 |    301	    }
302 |    302	    else if (!strncmp(ic, "spikes(", 7)) /* spikes(Amp,Loc,Amp,Loc,...) */
303 |    303	    {
304 |    304	        char const *p = &ic[6];
305 |    305	        for (i = 0, x = 0; i < n; i++)
306 |    306	            a[i] = 0;
307 |    307	        while (*p != ')')
308 |    308	        {
309 |    309	            char *ep_amp, *ep_idx;
310 |    310	            double amp = strtod(p+1, &ep_amp);
311 |    311	            int idx = (int) strtod(ep_amp+1, &ep_idx);
312 |    312	            assert(idx<n);
313 |    313	            a[idx] = amp;
314 |    314	            p = ep_idx;
315 |    315	        }
316 |    316	
317 |    317	    }
318 |    318	
319 |    319	    write_array(TSTART, Nx, dx, a);
320 |    320	}
321 |    321	
322 |    322	static void 
323 |    323	compute_exact_solution(int n, double *a, double dx, char const *ic,
324 |    324	    double alpha, double t, double bc0, double bc1)
325 |    325	{
326 |    326	    int i;
327 |    327	    double x;
328 |    328	    
329 |    329	    if (bc0 == 0 && bc1 == 0 && !strncmp(ic, "sin(Pi*x)", 9))
330 |    330	    {
331 |    331	        for (i = 0, x = 0; i < n; i++, x+=dx)
332 |    332	            a[i] = sin(M_PI*x)*exp(-alpha*M_PI*M_PI*t);
333 |    333	    }
334 |    334	    else if (bc0 == 0 && bc1 == 0 && !strncmp(ic, "const(", 6))
335 |    335	    {
336 |    336	        double cval = strtod(ic+6, 0);
337 |    337	        for (i = 0, x = 0; i < n; i++, x+=dx)
338 |    338	        {
339 |    339	            int n;
340 |    340	            double fsum = 0;
341 |    341	
342 |    342	            /* sum first 200 terms of Fourier series */
343 |    343	            for (n = 1; n < 200; n++)
344 |    344	            {
345 |    345	                double coeff = 2*cval*(1-pow(-1.0,(double)n))/(n*M_PI);
346 |    346	                double func = sin(n*M_PI*x)*exp(-alpha*n*n*M_PI*M_PI*t);
347 |    347	                fsum += coeff * func;
348 |    348	            }
349 |    349	            a[i] = fsum;
350 |    350	        }
351 |    351	    }
352 |    352	    else /* can only compute final steady state solution */
353 |    353	    {
354 |    354	        for (i = 0, x = 0; i < n; i++, x+=dx)
355 |    355	            a[i] = bc0 + (bc1-bc0)*x;
356 |    356	    }
357 |    357	}
358 |    358	
359 |    359	static void
360 |    360	solution_update_ftcs(int n, double *curr, double const *last,
361 |    361	    double alpha, double dx, double dt,
362 |    362	    double bc_0, double bc_1)
363 |    363	{
364 |    364	#if 0
365 |    365	    int i;
366 |    366	    double k = alpha * alpha * dt / (dx * dx);
367 |    367	    curr[0  ] = bc_0;
368 |    368	    curr[n-1] = bc_1;
369 |    369	    for (i = 1; i < n-1; i++)
370 |    370	        curr[i] = last[i] + k * (last[i-1] - 2 * last[i] + last[i+1]);
371 |    371	#endif
372 |    372	    double const r = alpha * dt / (dx * dx);
373 |    373	
374 |    374	    /* Impose boundary conditions for solution indices i==0 and i==n-1 */
375 |    375	    curr[0  ] = bc_0;
376 |    376	    curr[n-1] = bc_1;
377 |    377	
378 |    378	    /* Update the solution using FTCS algorithm */
379 |    379	    for (int i = 1; i < n-1; i++)
380 |    380	        curr[i] = r*last[i+1] + (1-2*r)*last[i] + r*last[i-1];
381 |    381	}
382 |    382	
383 |    383	static void
384 |    384	solution_update_upwind15(int n, double *curr, double const *last,
385 |    385	    double alpha, double dx, double dt,
386 |    386	    double bc_0, double bc_1)
387 |    387	{
388 |    388	    double const f2 = 1.0/24;
389 |    389	    double const f1 = 1.0/6;
390 |    390	    double const f0 = 1.0/4;
391 |    391	    double const k = alpha * alpha * dt / (dx * dx);
392 |    392	    double const k2 = k*k;
393 |    393	
394 |    394	    int i;
395 |    395	    curr[0  ] = bc_0;
396 |    396	    curr[1  ] = last[1  ] + k * (last[0  ] - 2 * last[1  ] + last[2  ]);
397 |    397	    curr[n-2] = last[n-2] + k * (last[n-3] - 2 * last[n-2] + last[n-1]);
398 |    398	    curr[n-1] = bc_1;
399 |    399	    for (i = 2; i < n-2; i++)
400 |    400	        curr[i] =  f2*(12*k2  -2*k    )*last[i-2]
401 |    401	                  +f2*(12*k2  -2*k    )*last[i+2]
402 |    402	                  -f1*(12*k2  -8*k    )*last[i-1]
403 |    403	                  -f1*(12*k2  -8*k    )*last[i+1]
404 |    404	                  +f0*(12*k2 -10*k  +4)*last[i  ];
405 |    405	}
406 |    406	
407 |    407	static void 
408 |    408	r83_np_sl ( int n, double const *a_lu, double const *b, double *x)
409 |    409	    /* Licensing: This code is distributed under the GNU LGPL license. 
410 |    410	       Modified: 30 May 2009 Author: John Burkardt
411 |    411	       Modified by Mark C. Miller, miller86@llnl.gov, July 23, 2017
412 |    412	    */
413 |    413	{
414 |    414	    int i;
415 |    415	
416 |    416	    for ( i = 0; i < n; i++ )
417 |    417	        x[i] = b[i];
418 |    418	
419 |    419	    /* Solve L * Y = B.  */
420 |    420	    for ( i = 1; i < n; i++ )
421 |    421	        x[i] = x[i] - a_lu[2+(i-1)*3] * x[i-1];
422 |    422	
423 |    423	    /* Solve U * X = Y.  */
424 |    424	    for ( i = n; 1 <= i; i-- )
425 |    425	    {
426 |    426	        x[i-1] = x[i-1] / a_lu[1+(i-1)*3];
427 |    427	        if ( 1 < i )
428 |    428	            x[i-2] = x[i-2] - a_lu[0+(i-1)*3] * x[i-1];
429 |    429	    }
430 |    430	}
431 |    431	
432 |    432	static void
433 |    433	solution_update_crankn(int n, double *curr, double const *last,
434 |    434	    double alpha, double dx, double dt,
435 |    435	    double bc_0, double bc_1)
436 |    436	{
437 |    437	    /* Do the solve */
438 |    438	    r83_np_sl (n, cn_Amat, last, curr);
439 |    439	    curr[0] = bc0;
440 |    440	    curr[n-1] = bc1;
441 |    441	}
442 |    442	
443 |    443	int finalize(int ti, double maxt, double change)
444 |    444	{
445 |    445	    int retval = 0;
446 |    446	
447 |    447	    if (outi)
448 |    448	        printf("Iteration %04d: last change l2=%g\n", ti, change);
449 |    449	
450 |    450	    free(curr);
451 |    451	    free(last);
452 |    452	    if (exact) free(exact);
453 |    453	    if (change_history) free(change_history);
454 |    454	    if (error_history) free(error_history);
455 |    455	    if (cn_Amat) free(cn_Amat);
456 |    456	    if (strncmp(alg, "ftcs", 4)) free((void*)alg);
457 |    457	    if (strncmp(prec, "double", 6)) free((void*)prec);
458 |    458	    if (strncmp(ic, "const(1)", 8)) free((void*)ic);
459 |    459	
460 |    460	    return retval;
461 |    461	}
462 |    462	
463 |    463	int main(int argc, char **argv)
464 |    464	{
465 |    465	    int i, ti;
466 |    466	    double error;
467 |    467	    FILE *outf;
468 |    468	
469 |    469	    process_args(argc, argv);
470 |    470	
471 |    471	    double change;
472 |    472	    Nx = (int) (1/dx+1.5);
473 |    473	    Nt = (int) (maxt / dt);
474 |    474	    dx = 1.0/(Nx-1);
475 |    475	
476 |    476	    initialize();
477 |    477	
478 |    478	    /* Initial condition */
479 |    479	    set_initial_condition(Nx, last, dx, ic);
480 |    480	
481 |    481	    /* Iterate until residual is small or hit max iterations */
482 |    482	    for (ti = 0; ti*dt < maxt; ti++)
483 |    483	    {
484 |    484	        if (!strcmp(alg, "ftcs"))
485 |    485	            solution_update_ftcs(Nx, curr, last, alpha, dx, dt, bc0, bc1);
486 |    486	        else if (!strcmp(alg, "upwind15"))
487 |    487	            solution_update_upwind15(Nx, curr, last, alpha, dx, dt, bc0, bc1);
488 |    488	        else if (!strcmp(alg, "crankn"))
489 |    489	            solution_update_crankn(Nx, curr, last, alpha, dx, dt, bc0, bc1);
490 |    490	
491 |    491	        if (ti>0 && save)
492 |    492	        {
493 |    493	            compute_exact_solution(Nx, exact, dx, ic, alpha, ti*dt, bc0, bc1);
494 |    494	            if (savi && ti%savi==0)
495 |    495	                write_array(ti, Nx, dx, exact);
496 |    496	        }
497 |    497	
498 |    498	        if (ti>0 && savi && ti%savi==0)
499 |    499	            write_array(ti, Nx, dx, curr);
500 |    500	
501 |    501	        change = l2_norm(Nx, curr, last);
502 |    502	        if (save)
503 |    503	        {
504 |    504	            change_history[ti] = change;
505 |    505	            error_history[ti] = l2_norm(Nx, curr, exact);
506 |    506	        }
507 |    507	
508 |    508	        copy(Nx, last, curr);
509 |    509	
510 |    510	        if (outi && ti%outi==0)
511 |    511	        {
512 |    512	            printf("Iteration %04d: last change l2=%g\n", ti, change);
513 |    513	        }
514 |    514	    }
515 |    515	
516 |    516	    write_array(TFINAL, Nx, dx, curr);
517 |    517	    if (save)
518 |    518	    {
519 |    519	        write_array(RESIDUAL, ti, dt, change_history);
520 |    520	        write_array(ERROR, ti, dt, error_history);
521 |    521	    }
522 |    522	
523 |    523	    return finalize(ti, maxt, change);
524 |    524	}
525 | 


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/highres0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/highres0000.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/highres0001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/highres0001.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/hr_crankn0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_crankn0000.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/hr_crankn0001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_crankn0001.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/hr_crankn0002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_crankn0002.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/hr_smalldt_long0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_smalldt_long0000.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/hr_smalldt_long0001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_smalldt_long0001.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/hr_smalldt_long0002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_smalldt_long0002.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/hr_smalldt_long0003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_smalldt_long0003.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/hr_smalldt_long0004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_smalldt_long0004.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/hr_spikes0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_spikes0000.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/hr_spikes0001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_spikes0001.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/hr_spikes0002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_spikes0002.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/hr_spikes_crankn0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_spikes_crankn0000.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/hr_spikes_crankn0001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_spikes_crankn0001.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/hr_spikes_smalldt0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_spikes_smalldt0000.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/hr_spikes_smalldt0001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_spikes_smalldt0001.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/hr_spikes_smalldt0002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_spikes_smalldt0002.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/hr_spikes_smalldt0003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/hr_spikes_smalldt0003.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/makefile:
--------------------------------------------------------------------------------
 1 | PROB = basic
 2 | REPORT = iops fops mem
 3 | 
 4 | help:
 5 | 	./heat --help; exit 0
 6 | 
 7 | clean:
 8 | 	@for x in *; do \
 9 | 	    if [[ -d $$x ]]; then \
10 | 	        echo "Removing directory $$x"; \
11 | 		rm -rf $$x; \
12 | 	    fi; \
13 | 	done
14 | 
15 | #
16 | # To get performance data, we actually run multiple instances
17 | # using different valgrind tools
18 | #
19 | run:
20 | 	@rm -rf ${PROB}; mkdir ${PROB}
21 | 	@echo "./heat alpha=${ALPHA} dx=${DX} dt=${DT} bc0=${BC0} bc1=${BC1} ic=${IC} alg=${ALG} eps=${EPS} maxi=${MAXI} savi=${SAVI} save=${SAVE} outi=${OUTI}"
22 | 	@pushd ${PROB}; \
23 | 	if [[ -n $$(echo ${REPORT} | grep ops) ]]; then \
24 | 	    valgrind --log-file=valgrind_lackey.out --tool=lackey --detailed-counts=yes ../heat alpha=${ALPHA} dx=${DX} dt=${DT} bc0=${BC0} bc1=${BC1} ic="${IC}" alg=${ALG} eps=${EPS} maxi=${MAXI} savi=${SAVI} save=${SAVE} outi=${OUTI} noout=1 >& heat_lackey.out & \
25 | 	fi; \
26 | 	if [[ -n $$(echo ${REPORT} | grep mem) ]]; then \
27 | 	    valgrind --log-file=valgrind_memcheck.out --tool=memcheck ../heat alpha=${ALPHA} dx=${DX} dt=${DT} bc0=${BC0} bc1=${BC1} ic="${IC}" alg=${ALG} eps=${EPS} maxi=${MAXI} savi=${SAVI} save=${SAVE} outi=${OUTI} noout=1 >& heat_memcheck.out & \
28 | 	fi; \
29 | 	../heat alpha=${ALPHA} dx=${DX} dt=${DT} bc0=${BC0} bc1=${BC1} ic="${IC}" alg=${ALG} eps=${EPS} maxi=${MAXI} savi=${SAVI} save=${SAVE} outi=${OUTI} & \
30 | 	wait
31 | 	@if [[ -n $$(echo ${REPORT} | grep iops) ]]; then \
32 | 	    echo "Integer ops        = $$(cat ${PROB}/valgrind_lackey.out | grep I1\\\|I8\\\|I16\\\|I32\\\|I64 | tr -s ' ' | cut -d' ' -f5 | tr -d ',' | tr '\n' '+' | sed -e 's/$$/0\n/' | bc)"; \
33 | 	fi
34 | 	@if [[ -n $$(echo ${REPORT} | grep fops) ]]; then \
35 | 	    echo "Floating point ops = $$(cat ${PROB}/valgrind_lackey.out | grep F32\\\|F64\\\|F128\\\|V128\\\|V256 | tr -s ' ' | cut -d' ' -f5 | tr -d ',' | tr '\n' '+' | sed -e 's/$$/0\n/' | bc)"; \
36 | 	fi
37 | 	@if [[ -n $$(echo ${REPORT} | grep mem) ]]; then \
38 | 	    echo "Memory used        = $$(cat ${PROB}/valgrind_memcheck.out | grep 'total heap usage:' | tr -s ' ' | cut -d' ' -f9 | tr -d ',\n' | sed -e 's/$$/-748\n/' | bc) bytes"; \
39 | 	fi
40 | 
41 | #
42 | # Short cuts
43 | #
44 | basic:
45 | 	${MAKE} PROB=$@ SAVI=100 run
46 | 
47 | basic_spikes:
48 | 	${MAKE} PROB=$@ BC1=0 IC="spikes(10,2,10,9)" SAVI=25 run
49 | 
50 | hr_spikes:
51 | 	${MAKE} PROB=$@ BC1=0 IC="spikes(10,20,10,90)" DX=0.01 SAVI=10 run
52 | 
53 | hr_spikes_smalldt:
54 | 	${MAKE} PROB=$@ BC1=0 IC="spikes(10,20,10,90)" DX=0.01 DT=0.0001 SAVI=500 run
55 | 
56 | hr_spikes_crankn:
57 | 	${MAKE} PROB=$@ BC1=0 IC="spikes(10,20,10,90)" DX=0.01 ALG=crankn SAVI=10 run
58 | 
59 | hr_spikes_crankn_largedt:
60 | 	${MAKE} PROB=$@ BC1=0 IC="spikes(10,20,10,90)" DX=0.01 DT=0.008 ALG=crankn SAVI=5 run
61 | 
62 | highres:
63 | 	${MAKE} PROB=$@ DX=0.01 SAVI=10 run
64 | 
65 | hr_smalldt_short:
66 | 	${MAKE} PROB=$@ DX=0.01 DT=0.001 SAVI=250 OUTI=250 run
67 | 
68 | hr_smalldt_long:
69 | 	${MAKE} PROB=$@ DX=0.01 DT=0.001 SAVI=250 OUTI=250 MAXI=20000 run
70 | 
71 | hr_crankn:
72 | 	${MAKE} PROB=$@ DX=0.01 DT=0.001 ALG=crankn SAVI=100 run
73 | 
74 | crankn_faster:
75 | 	${MAKE} PROB=$@ DX=0.01 DT=0.008 ALG=crankn SAVI=25 OUTI=50 run
76 | 
77 | view:
78 | 	@pushd ${PROB};\
79 |         ${VISIT} -cli -s ../plot_heat.py
80 | 
81 | 
82 | all: basic highres hr_smalldt_short hr_smalldt_long hr_crankn crankn_faster
83 | 


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/makefile.txt:
--------------------------------------------------------------------------------
1 | makefile


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/plot_heat.py:
--------------------------------------------------------------------------------
 1 | import sys, time
 2 | 
 3 | hostName = 'scratlantis'
 4 | 
 5 | hp0=GetMachineProfile(hostName)
 6 | hp1=GetMachineProfile(hostName)
 7 | hp1.ClearLaunchProfiles()
 8 | hp1.AddLaunchProfiles(hp0.GetLaunchProfiles(0))
 9 | OpenComputeEngine(hp1)
10 | 
11 | ca = CurveAttributes()
12 | ca.lineWidth = 1
13 | ca.designator = ""
14 | ca.showLegend = 0
15 | ca.showLabels = 0
16 | ca.curveColor = (0, 0, 255, 255)
17 | #ca.showPoints = 1
18 | ca.symbol = ca.Circle
19 | ca.pointSize = 5
20 | 
21 | #SetWindowLayout(2)
22 | SetActiveWindow(1)
23 | OpenDatabase("heat_soln_*.curve database",0)
24 | AddPlot("Curve","curve")
25 | SetPlotOptions(ca)
26 | DrawPlots()
27 | v = GetViewCurve()
28 | v.viewportCoords = (0.2, 0.95, 0.15, 0.85)
29 | SetViewCurve(v)
30 | if v.rangeCoords[1] - v.rangeCoords[0] < 2:
31 |     v.domainCoords = (-0.1, 1.1)
32 |     v.rangeCoords = (-0.1, 1.1)
33 |     SetViewCurve(v)
34 | 
35 | #SetActiveWindow(2)
36 | #DeleteAllPlots();
37 | #val = OpenDatabase("error.curve")
38 | #if val:
39 | #    AddPlot("Curve","curve")
40 | #    ca.designator = "Error"
41 | #    ca.curveColor = (255, 0, 0, 255)
42 | #    ca.showPoints = 0
43 | #    SetPlotOptions(ca)
44 | #OpenDatabase("residual.curve")
45 | #AddPlot("Curve","curve")
46 | #DrawPlots()
47 | 
48 | #SetActiveWindow(1)
49 | for i in range(TimeSliderGetNStates()-1):
50 |     time.sleep(0.1)
51 |     TimeSliderNextState()
52 |     ResetView()
53 | time.sleep(10)
54 | sys.exit(0)
55 | 


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/problem_setup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/problem_setup.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/simple_1d_heat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/simple_1d_heat.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/spikes0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/spikes0000.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/spikes0001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/spikes0001.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/spikes0002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/spikes0002.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/spikes0003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/spikes0003.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/spikes0004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/spikes0004.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/spikes0005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/spikes0005.png


--------------------------------------------------------------------------------
/lessons/hand_coded_heat/spikes_animated.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/hand_coded_heat/spikes_animated.gif


--------------------------------------------------------------------------------
/lessons/iterativesolvers/lesson.md:
--------------------------------------------------------------------------------
  1 | # Iterative Solution of Linear and Nonlinear Systems
  2 | 
  3 | ## At a Glance
  4 | <!-- (Expected # minutes to complete) %% temporarily omit -->
  5 | 
  6 | ```
  7 | Questions                 |Objectives                     |Key Points
  8 | --------------------------|-------------------------------|-------------------------------------
  9 | Does the preconditioner   | See that the preconditioner   | Through a single interface,
 10 | affect the convergence    | can be crucial for            | PETSc supports runtime choices 
 11 | rate of Krylov solvers?   | convergence.                  | of algorithms and options.
 12 |                           |                               |
 13 | How can I choose algs.    | Learn the basics of using     | Experimenting with
 14 | and options at runtime    | PETSc solvers & understanding | algorithms is essential
 15 | when using PETSc?         | output.                       | for good performance.
 16 | ```
 17 | 
 18 | Before running the examples, you must switch to the bash shell by using
 19 | 
 20 | ```
 21 | bash
 22 | ```
 23 | 
 24 | ## Example 1: Structural Mechanics Beam Deflection:
 25 | 
 26 | This code uses MFEM and [PETSc/TAO](https://www.mcs.anl.gov/petsc/) to demonstrate the convergence of Krylov methods.
 27 | 
 28 | The source code is included in [ex2p.c](./ex2p.c)
 29 | 
 30 | Notes: Normally PETSc options can be passed as command line arguments. But because MFEM turns off this capability, PETSc options must be passed either in a file or in the PETSC_OPTIONS environmental variable. See the file rc_ex2p for the PETSc options that are supplied to the application in these examples.
 31 | 
 32 | ### Run 1: Run with Jacobi preconditioner
 33 | 
 34 | ```
 35 | PETSC_OPTIONS="-pc_type jacobi -ksp_max_it 25" ./ex2p -petscopts rc_ex2p --mesh /projects/ATPESC2017/NumericalPackages/handson/mfem/data/beam-tri.mesh
 36 | ```
 37 | 
 38 | The first column of the output is the residual norm. The next two are the maximum and minimum estimated eigenvalues of the operator and the final column is the condition number.
 39 | 
 40 | #### Questions
 41 | > **Is the iteration converging?**
 42 | 
 43 | > **Read the output at the bottom from -ksp_view ... What Krylov method and preconditioner are being used?**
 44 | 
 45 | ### Run 2: Run with the algebraic multigrid preconditioner
 46 | 
 47 | ```
 48 | ./ex2p -petscopts rc_ex2p --mesh /projects/ATPESC2017/NumericalPackages/handson/mfem/data/beam-tri.mesh
 49 | ```
 50 | 
 51 | #### Questions
 52 | > **Is the iteration now converging?**
 53 | 
 54 | > **Read the output at the bottom from -ksp_view ... What Krylov method and preconditioner are being used?**
 55 | 
 56 | ### Run 3: Run with the algebraic multigrid preconditioner but no conjugate gradient method
 57 | 
 58 | ```
 59 | PETSC_OPTIONS="-ksp_norm_type preconditioned -ksp_type richardson -ksp_max_it 25"  ./ex2p -petscopts rc_ex2p --mesh /projects/ATPESC2017/NumericalPackages/handson/mfem/data/beam-tri.mesh
 60 | ```
 61 | 
 62 | #### Questions
 63 | > **Is the iteration now converging?**
 64 | 
 65 | ### Run 4: Run with the algebraic multigrid preconditioner but with GMRES and a restart of 10
 66 | 
 67 | ```
 68 | PETSC_OPTIONS="-ksp_norm_type preconditioned -ksp_type gmres -ksp_gmres_restart 10"  ./ex2p -petscopts rc_ex2p --mesh /projects/ATPESC2017/NumericalPackages/handson/mfem/data/beam-tri.mesh
 69 | ```
 70 | 
 71 | Now run with a gmres restart of 30
 72 | 
 73 | ```
 74 | PETSC_OPTIONS="-ksp_norm_type preconditioned -ksp_type gmres -ksp_gmres_restart 30"  ./ex2p -petscopts rc_ex2p --mesh /projects/ATPESC2017/NumericalPackages/handson/mfem/data/beam-tri.mesh
 75 | ```
 76 | 
 77 | Note the convergence is now very similar to that with CG.
 78 | 
 79 | Now attempt to run this in parallel and obtain solver performance data
 80 | ```
 81 | PETSC_OPTIONS="-log_view -ksp_norm_type preconditioned -ksp_type gmres -ksp_gmres_restart 30" ${MPIEXEC_OMPI} -n 4 ./ex2p -petscopts rc_ex2p --mesh /projects/ATPESC2017/NumericalPackages/handson/mfem/data/beam-tri.mesh
 82 | ```
 83 | 
 84 | ## Example 2: Nonlinear Problem:
 85 | 
 86 | ```
 87 | PETSC_OPTS="-snes_rtol 1.e-10 -snes_view  -pc_type bjacobi -sub_pc_type ilu " ${MPIEXEC_OMPI} -n 4 ./ex10p -m ../../data/beam-quad.mesh --petscopts rc_ex10p -s 3 -rs 2 -dt 3 | more
 88 | ```
 89 | 
 90 | Note the quadratic convergence; the residual norm exponent doubles until it runs out of digits to double.
 91 | 
 92 | ## Out-Brief
 93 | 
 94 | We have used [PETSc](https://www.mcs.anl.gov/petsc/) to demonstrate the use of preconditioned Krylov methods. Many examples are available for various aspects of PETSc functionality, including
 95 | * [Krylov solver examples](http://www.mcs.anl.gov/petsc/petsc-current/src/ksp/ksp/examples/tutorials)
 96 | * [Nonlinear solver examples](http://www.mcs.anl.gov/petsc/petsc-current/src/snes/examples/tutorials)
 97 | 
 98 | 
 99 | <!-- Insert space, horizontal line, and link to HandsOnLesson table -->
100 | 
101 | &nbsp;
102 | 
103 | ---
104 | 
105 | [Back to all HandsOnLessons](../lessons.md)
106 | 


--------------------------------------------------------------------------------
/lessons/lesson_template/animated_basic_heat.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/lesson_template/animated_basic_heat.gif


--------------------------------------------------------------------------------
/lessons/lesson_template/basic0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/lesson_template/basic0000.png


--------------------------------------------------------------------------------
/lessons/lesson_template/lesson.md:
--------------------------------------------------------------------------------
  1 | # Lesson Title
  2 | 
  3 | ## At a Glance
  4 | <!-- (Expected # minutes to complete) %% temporarily omit -->
  5 | 
  6 | **Note**: GitHub Markdown tables are very limited! To do this section of questions,
  7 | objectives and key points properly, we need to use more features of Jekyll then I
  8 | wanna worry about prior to ATPESC. We will fix this after ATPESC to use Jekyll
  9 | properly and it will improve its look substantially. Also, to avoid horizontal scroll
 10 | of this pre-formatted section, try to keep to less than 102 chars in width.
 11 | 
 12 | ```
 13 | Questions                  |Objectives                      |Key Points
 14 | ---------------------------|--------------------------------|----------
 15 | Question 1?                |Objective 1                     |Key Point 1
 16 | Question 2?                |Objective 2                     |Key Point 2
 17 | Question 3?                |Objective 3                     |Key Point 3
 18 | ```
 19 | 
 20 | * **Questions** are the those things we want learners to know the answers to by the end of the lesson.
 21 | We don't have to list all possible questions here...only the two or three _most_ important.
 22 | * **Objectives** are those things we want learners to actually do or observe during the lesson. Again,
 23 | only list here the ones that are _most_ important.
 24 | * **Key Points** are those things we want learners to take-away from the lesson.
 25 | 
 26 | ## The Problem Being Solved
 27 | 
 28 | Describe the problem(s) that will be solved in this lesson.
 29 | If possible, include a picture or graphic here describing the physical problem setup. If the application
 30 | or tool being used can deal with a variety of input physical problems, its fine to mention
 31 | that but here just include a picture of the problem they will be running in the _runs_
 32 | below. Maybe include the equation being solved as well.
 33 | 
 34 | ![](http://latex.codecogs.com/gif.latex?%5Cfrac%7B%5Cpartial%20u%7D%7B%5Cpartial%20t%7D%20%3D%20%5Calpha%20%5Cfrac%7B%5Cpartial%5E2%20u%7D%7B%5Cpartial%20x%5E2%7D)
 35 | 
 36 | ## The Example Source Code
 37 | 
 38 | Describe the application, its command-line arguments, have a link to view the actual source code
 39 | or, if you prefer, include snipits of the source code here in a code-highlighted box as below
 40 | 
 41 | ```c++
 42 | Geometry::~Geometry()
 43 | {
 44 |    for (int i = 0; i < NumGeom; i++)
 45 |    {
 46 |       delete PerfGeomToGeomJac[i];
 47 |       delete GeomVert[i];
 48 |    }
 49 | }
 50 | ```
 51 | 
 52 | ## Running the Example
 53 | 
 54 | ### Run 1 (Problem Name)
 55 | 
 56 | Give the command-line to run the example
 57 | 
 58 | #### Expected Behavior/Output
 59 | 
 60 | Include here what learner should expect to happen
 61 | 
 62 | * How long might it take to run
 63 | * How long might they have to wait for resources before it can run
 64 | * What should they seen on their terminal
 65 | 
 66 | #### Examining Results
 67 | 
 68 | Include here examples of either plots or data you expect learners to observe.
 69 | 
 70 | ![An Image](basic0000.png)
 71 | 
 72 | Or, if you need to control the size, or have multiple images next to each other
 73 | use a Markdown table and raw html...
 74 | 
 75 | |<img src="basic0000.png" width="200">|<img src="basic0000.png" width="400">|
 76 | 
 77 | **Note:** You can create [gif animations](https://www.tjhsst.edu/~dhyatt/supercomp/n401a.html)
 78 | with ImageMagick tool available on most systems as `convert` command as in...
 79 | 
 80 | ```
 81 | convert -delay 20 -loop 0 image*.<ext> animation.gif
 82 | ```
 83 | 
 84 | ![Gif Animations](animated_basic_heat.gif)
 85 | 
 86 | Alternatively, you can upload videos to YouTube and embed them here
 87 | 
 88 | <iframe width="560" height="315" src="https://www.youtube.com/embed/bsSFYrDXK0k" frameborder="0" allowfullscreen></iframe>
 89 | 
 90 | #### Questions
 91 | 
 92 | > **Question #1?** (triple-click box below to reveal answer)
 93 | 
 94 | **Note:** These Questions and _Answer Boxes_ are somewhat cheesey for time being.
 95 | We can expand our use of Jekyll and improve look and feel after ATPESC. In meantime,
 96 | in order for these _Answer Boxes_ to behave as desired (e.g. hidden text which
 97 | gets revealed by user triple-clicking in box), they have to be all on a single
 98 | line with no line breaks and have to be white text on white backgroud. Yeah, its
 99 | cheesey but will work for now.
100 | 
101 | |<font color="white">Answer to Question #1</font>|
102 | 
103 | > **Question #2?** (triple-click box below to reveal answer)
104 | 
105 | |<font color="white">Answer to Question #2</font>|
106 | 
107 | ---
108 | 
109 | ### Run 2 (Problem Name)
110 | 
111 | #### Expected Behavior/Output
112 | 
113 | #### Examining Results
114 | 
115 | Include here examples of either plots or data you expect learners to observe.
116 | 
117 | #### Questions
118 | 
119 | > **Question #1?** (triple-click box below to reveal answer)
120 | 
121 | |<font color="white">Answer to Question #1</font>|
122 | 
123 | > **Question #2?** (triple-click box below to reveal answer)
124 | 
125 | |<font color="white">Answer to Question #2</font>|
126 | 
127 | ---
128 | 
129 | ### Run 3
130 | 
131 | #### Expected Behavior/Output
132 | 
133 | #### Examining Results
134 | 
135 | Include here examples of either plots or data you expect learners to observe.
136 | 
137 | #### Questions
138 | 
139 | > **Question #1?** (triple-click box below to reveal answer)
140 | 
141 | |<font color="white">Answer to Question #1</font>|
142 | 
143 | > **Question #2?** (triple-click box below to reveal answer)
144 | 
145 | |<font color="white">Answer to Question #2</font>|
146 | 
147 | ---
148 | 
149 | ## Out-Brief
150 | 
151 | Here, re-emphasize the lesson objectives and key points.
152 | 
153 | Its fine to go into greater detail about questions or objectives this lesson
154 | did not fully cover.
155 | 
156 | ### Further Reading
157 | 
158 | Include links to other online sources you might want to include.
159 | 
160 | <!-- Insert space, horizontal line, and link to HandsOnLesson table -->
161 | 
162 | &nbsp;
163 | 
164 | ---
165 | 
166 | [Back to all HandsOnLessons](../lessons.md)
167 | 


--------------------------------------------------------------------------------
/lessons/lessons.md:
--------------------------------------------------------------------------------
 1 | Lessons
 2 | ------------
 3 | 
 4 | As described in [Welcome to HandsOnLessons](../README.md), hosted here are a series of increasingly sophisticated hands-on lessons aimed at helping users of all experience levels learn to use a variety of high-performance scientific software packages for solving complex numerical problems.  This collection is just beginning; over time, more lessons will be provided to cover other important topics and packages.
 5 | 
 6 | * [Basic, One-Dimensional Heat Equation](hand_coded_heat/lesson.md)
 7 | * [Structured Meshes](AMReX/lesson.md)
 8 | * [Finite Elements Convergence](mfem_convergence/lesson.md)
 9 | * [Time Integrators](time_integrators/lesson.md)
10 | * [Iterative Solvers](iterativesolvers/lesson.md)
11 | * [Sparse Direct Solvers](superlu-mfem/lesson.md)
12 | * [Algebraic Multigrid](AMG/lesson.md)
13 | * [Adjoint Solvers](adjoint/lesson.md)
14 | 
15 | &nbsp;
16 | 
17 | ---
18 | 
19 | [Lesson Template](lesson_template/lesson.md) -- intended for lesson developers, not for HandsOnLesson learners
20 | 


--------------------------------------------------------------------------------
/lessons/mfem_convergence/diffusion.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/mfem_convergence/diffusion.png


--------------------------------------------------------------------------------
/lessons/mfem_convergence/ex8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/mfem_convergence/ex8.png


--------------------------------------------------------------------------------
/lessons/mfem_convergence/lesson.md:
--------------------------------------------------------------------------------
  1 | # Finite Elements and Convergence with MFEM
  2 | 
  3 | ## At a Glance
  4 | <!-- (Expected # minutes to complete) %% temporarily omit -->
  5 | 
  6 | ```
  7 | Questions                    |Objectives                      |Key Points
  8 | -----------------------------|--------------------------------|---------------------------
  9 | What is a finite element     |Understand basic finite element |Basis functions determine
 10 | method?                      |machinery                       |the quality of the solution
 11 |                              |                                |
 12 | What is a high order method? |Understand  how polynomial      |High order methods add more
 13 |                              |order affects simulations       |unknowns on the same mesh
 14 |                              |                                |for more precise solutions
 15 |                              |                                |
 16 | What is convergence?         |Understand how convergence and  |High order methods converge
 17 |                              |convergence rate is calculated  |faster for smooth solutions
 18 | ```
 19 | 
 20 | **Note:** To begin this lesson...
 21 | ```
 22 | cd handson/mfem/examples/atpesc/mfem
 23 | ```
 24 | 
 25 | ## A Widely Applicable Equation
 26 | 
 27 | In this lesson, we demonstrate the discretization of a simple Poisson problem using
 28 | the [MFEM library](http://mfem.org) and examine the finite element approximation error
 29 | under uniform refinement. An example of this equation is steady-state [heat](../hand_coded_heat/lesson.md)
 30 | [conduction](../time_integrators/lesson.md).
 31 | 
 32 | |[<img src="ex8.png">](ex8.png)| [<img src="diffusion.png">](diffusion.png)|
 33 | 
 34 | ### Governing Equation
 35 | 
 36 | The [_Poisson Equation_](https://en.wikipedia.org/wiki/Poisson's_equation) is a partial
 37 | differential equation (PDE) that can be used to model steady-state heat conduction,
 38 | electric potentials and gravitational fields. In mathematical terms ...
 39 | 
 40 | |![](http://latex.codecogs.com/gif.latex?-%5Cnabla%5E2u%20%3D%20f)|(1)|
 41 | 
 42 | where _u_ is the potential field and _f_ is the source function. This PDE is a generalization
 43 | of the [_Laplace Equation_](https://en.wikipedia.org/wiki/Laplace%27s_equation).
 44 | 
 45 | ### Finite element basics
 46 | 
 47 | To solve the above continuous equation using computers we need to
 48 | [discretize](https://en.wikipedia.org/wiki/Discretization) it by introducing a finite
 49 | (discrete) number of unknowns to compute for.
 50 | In the [_Finite Element Method_](https://en.wikipedia.org/wiki/Finite_element_method) (FEM), this is
 51 | done using the concept of _basis functions_.
 52 | 
 53 | Instead of calculating the exact analytic solution _u_, consider approximating it by
 54 | 
 55 | |![](http://latex.codecogs.com/gif.latex?u%20%5Capprox%20%5Csum_%7Bj%3D1%7D%5En%20c_j%20%5Cphi_j)|(2)|
 56 | 
 57 | where ![](http://latex.codecogs.com/gif.latex?c_j) are scalar unknown coefficients and
 58 | ![](http://latex.codecogs.com/gif.latex?%5Cphi_j) are known _basis functions_. They are
 59 | typically piecewise-polynomial functions which are only non-zero on small portions of the
 60 | computational mesh. With finite elements, the mesh can be totally unstructured, curved and
 61 | non-conforming.
 62 | 
 63 | |[<img src="mesh.png" width="400">](mesh.png)|
 64 | 
 65 | To solve for the unknown coefficients, we multiply Poisson's equation by another (test)
 66 | basis function ![](http://latex.codecogs.com/gif.latex?%5Cphi_i) and integrate by parts
 67 | to obtain
 68 | 
 69 | |![](http://latex.codecogs.com/gif.latex?%5Csum_%7Bj%3D1%7D%5En%5Cint_%5COmega%20c_j%20%5Cnabla%20%5Cphi_j%20%5Ccdot%20%5Cnabla%20%5Cphi_i%20dV%20%3D%20%5Cint_%5COmega%20f%20%5Cphi_i)|(3)|
 70 | 
 71 | for every basis function ![](http://latex.codecogs.com/gif.latex?%5Cphi_i).
 72 | (Here we are assuming homogeneous Dirichlet boundary conditions, corresponding e.g. to
 73 | zero temperature on the whole boundary.)
 74 | 
 75 | Since the basis functions are known, we can rewrite (3) as
 76 | 
 77 | |![](http://latex.codecogs.com/gif.latex?%5Cmathbf%7BAx%7D%20%3D%20%5Cmathbf%7Bb%7D)|(4)|
 78 | 
 79 | where
 80 | 
 81 | |![](http://latex.codecogs.com/gif.latex?A_%7Bij%7D%20%3D%20%5Cint_%5COmega%20%5Cnabla%20%5Cphi_i%20%5Ccdot%20%5Cnabla%20%5Cphi_j%20dV)|(5)|
 82 | |![](http://latex.codecogs.com/gif.latex?b_i%20%3D%20%5Cint_%5COmega%20f%20%5Cphi_i%20dV)|(6)|
 83 | |![](http://latex.codecogs.com/gif.latex?x_j%20%3D%20c_j)|(7)|
 84 | 
 85 | This is a ![](http://latex.codecogs.com/gif.latex?n%20%5Ctimes%20n) linear system that
 86 | can be solved [directly](../superlu-mfem/lesson.md) or [iterarively](../iterativesolvers/lesson.md)
 87 | for the unknown coefficients. Note that we are free to choose the basis functions
 88 | ![](http://latex.codecogs.com/gif.latex?%5Cphi_i) as we see fit.
 89 | 
 90 | ---
 91 | 
 92 | ## Convergence Study Source Code
 93 | 
 94 | To define the system we need to solve, we need three things. First, we need to define our
 95 | basis functions which live on the computational mesh.
 96 | 
 97 | ```c++
 98 |    // order is the FEM basis functions polynomial order
 99 |    FiniteElementCollection *fec = new H1_FECollection(order, dim);
100 | 
101 |    // pmesh is the parallel computational mesh
102 |    ParFiniteElementSpace *fespace = new ParFiniteElementSpace(pmesh, fec);
103 | ```
104 | 
105 | This defines a collection of H1 functions (meaning they have well-defined gradient) of
106 | a given polynomial order on a parallel computational mesh pmesh. Next, we need to define
107 | the integrals in Equation (5)
108 | 
109 | ```c++
110 |    ParBilinearForm *a = new ParBilinearForm(fespace);
111 |    ConstantCoefficient one(1.0);
112 |    a->AddDomainIntegrator(new DiffusionIntegrator(one));
113 |    a->Assemble();
114 | ```
115 | 
116 | and Equation (6)
117 | 
118 | ```c++
119 |    // f_exact is a C function defining the source
120 |    FunctionCoefficient f(f_exact);
121 |    ParLinearForm *b = new ParLinearForm(fespace);
122 |    b->AddDomainIntegrator(new DomainLFIntegrator(f));
123 |    b->Assemble();
124 | ```
125 | 
126 | This defines the matrix A and the vector b. We then solve the linear
127 | system for our solution vector x using [AMG-preconditioned](../AMG/lesson.md) PCG iteration.
128 | 
129 | ```c++
130 |    // FEM -> Linear System
131 |    HypreParMatrix A;
132 |    Vector B, X;
133 |    a->FormLinearSystem(ess_tdof_list, x, *b, A, X, B);
134 | 
135 |    // AMG preconditioner
136 |    HypreBoomerAMG *amg = new HypreBoomerAMG(A);
137 |    amg->SetPrintLevel(0);
138 | 
139 |    // PCG Krylov solver
140 |    HyprePCG *pcg = new HyprePCG(A);
141 |    pcg->SetTol(1e-12);
142 |    pcg->SetMaxIter(200);
143 |    pcg->SetPrintLevel(0);
144 |    pcg->SetPreconditioner(*amg);
145 | 
146 |    // Solve the system A X = B
147 |    pcg->Mult(B, X);
148 | 
149 |    // Linear System -> FEM
150 |    a->RecoverFEMSolution(X, *b, x);
151 | ```
152 | 
153 | In this lesson we know what the exact solution is, so we can measure the amount of
154 | error in our approximate solution in two ways:
155 | 
156 | |![](http://latex.codecogs.com/gif.latex?%5Cleft%20%5C%7C%20u_%7B%5Cmbox%7Bexact%7D%7D%20-%20u_%7B%5Cmbox%7Bh%7D%7D%20%5Cright%20%5C%7C_%7BL_2%7D%5E2%20%3D%20%5Cint_%5COmega%20%5Cleft%7C%20u_%7B%5Cmbox%7Bexact%7D%7D%20-%20u_%7B%5Cmbox%7Bh%7D%7D%20%5Cright%20%7C%5E2)|(8)|
157 | |![](http://latex.codecogs.com/gif.latex?%5Cleft%20%5C%7C%20u_%7B%5Cmbox%7Bexact%7D%7D%20-%20u_%7B%5Cmbox%7Bh%7D%7D%20%5Cright%20%5C%7C_%7BH%5E1%7D%5E2%20%3D%20%5Cleft%20%5C%7C%20u_%7B%5Cmbox%7Bexact%7D%7D%20-%20u_%7B%5Cmbox%7Bh%7D%7D%20%5Cright%20%5C%7C_%7BL_2%7D%5E2%20&plus;%20%5Cleft%20%5C%7C%20%5Cnabla%20u_%7B%5Cmbox%7Bexact%7D%7D%20-%20%5Cnabla%20u_%7B%5Cmbox%7Bh%7D%7D%20%5Cright%20%5C%7C_%7BL_2%7D%5E2)|(9)|
158 | 
159 | The second one is know as the _energy norm_, which is derived directly from the weak form of the PDE.
160 | 
161 | We expect the error to behave like
162 | 
163 | |![](http://latex.codecogs.com/gif.latex?%5Cleft%20%5C%7C%20u_%7B%5Cmbox%7Bexact%7D%7D%20-%20u_%7B%5Cmbox%7Bh%7D%7D%20%5Cright%20%5C%7C_%7BL_2%7D%5E2%20%5Cleq%20Ch%5E%7Br%7D)|(10)|
164 | 
165 | where ![](http://latex.codecogs.com/gif.latex?h) is the mesh size, ![](http://latex.codecogs.com/gif.latex?C)
166 | is a mesh-independent constant and ![](http://latex.codecogs.com/gif.latex?r) is the
167 | [_convergence rate_](https://en.wikipedia.org/wiki/Rate_of_convergence).
168 | 
169 | Given approximations at two different mesh resolutions, we can  estimate the convergence rate as
170 | follows (![](http://latex.codecogs.com/gif.latex?C) doesn't change when we refine the mesh and compare runs):
171 | 
172 | |![](http://latex.codecogs.com/gif.latex?r%20%5Capprox%20%5Cfrac%7B%5Clog%5C%20%5Cfrac%7B%20%5Cleft%20%5C%7C%20u_%7B%5Cmbox%7Bexact%7D%7D%20-%20u_%7Bh_%7B%5Cmbox%7Bnew%7D%7D%7D%20%5Cright%20%5C%7C_%7BL_2%7D%7D%7B%5Cleft%20%5C%7C%20u_%7B%5Cmbox%7Bexact%7D%7D%20-%20u_%7Bh_%7B%5Cmbox%7Bold%7D%7D%7D%20%5Cright%20%5C%7C_%7BL_2%7D%7D%7D%7B%20%5Clog%20%5Cfrac%7Bh_%7B%5Cmbox%7Bnew%7D%7D%7D%7Bh_%7B%5Cmbox%7Bold%7D%7D%7D%7D)|(11)|
173 | 
174 | In code this is implemented in a refinement loop as follows:
175 | 
176 | ```c++
177 |    double l2_err = x.ComputeL2Error(u);
178 |    double h1_err = x.ComputeH1Error(&u, &u_grad, &one, 1.0, 1);
179 |    pmesh->GetCharacteristics(h_min, h_max, kappa_min, kappa_max);
180 | 
181 |    l2_rate = log(l2_err/l2_err_prev) / log(h_min/h_prev);
182 |    h1_rate = log(h1_err/h1_err_prev) / log(h_min/h_prev);
183 | ```
184 | 
185 | ---
186 | 
187 | ## Running the Convergence Study
188 | 
189 | The convergence study in `handson/mfem/examples/atpesc/mfem` has the following options
190 | 
191 | ```
192 | ./convergence --help
193 | 
194 | Usage: ./convergence [options] ...
195 | Options:
196 |    -h, --help
197 | 	Print this help message and exit.
198 |    -m <string>, --mesh <string>, current value: ../../../data/star.mesh
199 | 	Mesh file to use.
200 |    -o <int>, --order <int>, current value: 1
201 | 	Finite element order (polynomial degree).
202 |    -sc, --static-condensation, -no-sc, --no-static-condensation, current option: --no-static-condensation
203 | 	Enable static condensation.
204 |    -r <int>, --refinements <int>, current value: 4
205 | 	Number of total uniform refinements
206 |    -sr <int>, --serial-refinements <int>, current value: 2
207 | 	Maximum number of serial uniform refinements
208 |    -f <double>, --frequency <double>, current value: 1
209 | 	Set the frequency for the exact solution.
210 | ```
211 | 
212 | ### Run 1 (Low order)
213 | 
214 | In this run, we will examine the error after 7 uniform refinements in both the L2 and H1 norms using
215 | first order (linear) basis functions. We use the `star.mesh` 2D mesh file.
216 | 
217 | ```
218 | ./convergence -r 7
219 | Options used:
220 |    --mesh ../../../data/star.mesh
221 |    --order 1
222 |    --no-static-condensation
223 |    --refinements 7
224 |    --serial-refinements 2
225 |    --frequency 1
226 | ----------------------------------------------------------------------------------------
227 | DOFs            h               L^2 error       L^2 rate        H^1 error       H^1 rate
228 | ----------------------------------------------------------------------------------------
229 | 31              0.4876          0.3252          0               2.631           0
230 | 101             0.2438          0.09293         1.807           1.387           0.9229
231 | 361             0.1219          0.02393         1.957           0.7017          0.9836
232 | 1361            0.06095         0.006027        1.989           0.3518          0.996
233 | 5281            0.03048         0.00151         1.997           0.176           0.999
234 | 20801           0.01524         0.0003776       1.999           0.08803         0.9997
235 | 82561           0.007619        9.441e-05       2               0.04402         0.9999
236 | ```
237 | 
238 | Note that the L2 error is converging at a rate of 2 while the H1 error is only converging at a rate of 1.
239 | 
240 | ### Run 2 (High order)
241 | 
242 | Now consider the same run only we are using 3rd order (cubic) basis functions instead.
243 | 
244 | ```
245 | ./convergence -r 7 -o 3
246 | Options used:
247 |    --mesh ../../../data/star.mesh
248 |    --order 3
249 |    --no-static-condensation
250 |    --refinements 7
251 |    --serial-refinements 2
252 |    --frequency 1
253 | ----------------------------------------------------------------------------------------
254 | DOFs            h               L^2 error       L^2 rate        H^1 error       H^1 rate
255 | ----------------------------------------------------------------------------------------
256 | 211             0.4876          0.004777        0               0.118           0
257 | 781             0.2438          0.0003178       3.91            0.01576         2.905
258 | 3001            0.1219          2.008e-05       3.984           0.001995        2.982
259 | 11761           0.06095         1.258e-06       3.997           0.0002501       2.996
260 | 46561           0.03048         7.864e-08       4               3.129e-05       2.999
261 | 185281          0.01524         4.915e-09       4               3.912e-06       3
262 | 739201          0.007619        3.072e-10       4               4.891e-07       3
263 | ```
264 | 
265 | The L2 error is now converging at a rate of 4 and the H1 error is converging at a rate of 3.
266 | This is because the exact solution in these runs is smooth, so higher-order methods
267 | approximate it better.
268 | 
269 | #### Questions
270 | 
271 | > **How many unknowns do we need in runs 1 and 2 to get 4 digits of accuracy? Which method is more efficient: low-order or high-order?**
272 | 
273 | |<font color="white">The high-order methods is more efficient. It needs only 3001 unknowns compared to 82561 unknowns for the low-order method!</font>|
274 | 
275 | ### Run 3 (3D example)
276 | The previous two runs used a 2D mesh in serial, but the same code can be used to run a 3D problem in parallel.
277 | 
278 | ```
279 | ${MPIEXEC_OMPI} -n 4 ./convergence -r 4 -o 2 -m ../../../data/inline-hex.mesh
280 | Options used:
281 |    --mesh ../../../data/inline-hex.mesh
282 |    --order 2
283 |    --no-static-condensation
284 |    --refinements 4
285 |    --serial-refinements 2
286 |    --frequency 1
287 | ----------------------------------------------------------------------------------------
288 | DOFs            h               L^2 error       L^2 rate        H^1 error       H^1 rate
289 | ----------------------------------------------------------------------------------------
290 | 729             0.25            0.001386        0               0.02215         0
291 | 4913            0.125           0.0001772       2.967           0.005532        2.002
292 | 35937           0.0625          2.227e-05       2.993           0.001377        2.007
293 | 274625          0.03125         2.787e-06       2.998           0.0003441       2
294 | ```
295 | 
296 | #### Questions
297 | 
298 | > **Experiment with different orders in 2D and 3D. What convergence rate will you expect in L2 and H1 for a given basis order ![](http://latex.codecogs.com/gif.latex?p)?**
299 | 
300 | |<font color="white"> For a smooth exact solution, the convergence rate in energy norm (H1) is p. Using the so-called Nitsche's Trick, one can prove that we pick an additional order in L2, so the convergence rate there is p+1</font>|
301 | 
302 | ---
303 | 
304 | ## Out-Brief
305 | 
306 | We demonstrated the ease of implementing a order and dimension independent finite element
307 | code in MFEM. We discussed the basics of the finite element method as well as demonstrated
308 | the effect of the polynomial order of the basis functions on convergence rates.
309 | 
310 | ### Further Reading
311 | 
312 | To learn more about MFEM, including example codes and miniapps visit [mfem.org](http://mfem.org).
313 | 
314 | <!-- Insert space, horizontal line, and link to HandsOnLesson table -->
315 | 
316 | &nbsp;
317 | 
318 | ---
319 | 
320 | [Back to all HandsOnLessons](../lessons.md)
321 | 


--------------------------------------------------------------------------------
/lessons/mfem_convergence/mesh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/mfem_convergence/mesh.png


--------------------------------------------------------------------------------
/lessons/superlu-mfem/gmres.mpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/superlu-mfem/gmres.mpg


--------------------------------------------------------------------------------
/lessons/superlu-mfem/gmres_residual.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/superlu-mfem/gmres_residual.png


--------------------------------------------------------------------------------
/lessons/superlu-mfem/gmres_time.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/superlu-mfem/gmres_time.png


--------------------------------------------------------------------------------
/lessons/superlu-mfem/lesson.md:
--------------------------------------------------------------------------------
  1 | # Sparse Direct Solver
  2 | 
  3 | ## At A Glance
  4 | 
  5 | ```
  6 | Questions                  |Objectives                      |Key Points
  7 | ---------------------------|--------------------------------|----------
  8 | Why need direct solver?    | Can obtain accurate solution   | Robust for difficult problems
  9 | What parameters affect     | Try different ordering options | Performance (time & memory) 
 10 | performance?               |                                | can vary a lot
 11 | ```
 12 | 
 13 | **Note:** To begin this lesson
 14 | ```
 15 | cd handson/mfem/examples/atpesc/superlu
 16 | ```
 17 | 
 18 | ## The problem being solved
 19 | 
 20 | The [convdiff.c](https://github.com/mfem/mfem/blob/atpesc-dev/examples/atpesc/superlu/convdiff.cpp)
 21 | application is modeling the steady state convection-diffusion equation in 2D
 22 | with a constant velocity.  This equation is used to model the concentration
 23 | of something like a _die_ in a _moving_ fluid as it diffuses and flows through 
 24 | he fluid.  The equation is as follows:
 25 | 
 26 | |![](http://latex.codecogs.com/gif.latex?%5Cnabla%20%5Ccdot%20%28%5Ckappa%20%5Cnabla%20u%29%20-%20%5Cnabla%20%5Ccdot%20%28%5Coverrightarrow%7Bv%7Du%29%2BR%3D0)|(1)|
 27 | 
 28 | Where _u_ is the concentration that we are tracking,
 29 | ![](http://latex.codecogs.com/gif.latex?%5Ckappa) is the diffusion rate,
 30 | _v_ is the velocity of the flow and _R_ is a concentration source.
 31 |  
 32 | In the application we use here, the velocity vector _direction_ is fixed in the _+x_ 
 33 | direction. However, the _magnitude_ is set by the user (default of 100),
 34 | ![](http://latex.codecogs.com/gif.latex?%5Ckappa) is fixed at 1.0, and the
 35 | source is 0.0 everywhere except for a small disc centered at the middle of the
 36 | domain where it is 1.0.
 37 | 
 38 | |Initial Condition|
 39 | |:---:|
 40 | |[<img src="mfem-superlu0000.png" width="400">](mfem-superlu0000.png)|
 41 |  
 42 | Solving this PDE is well known to cause convergence problems for iterative solvers,
 43 | for larger _v_. We use MFEM as a vehicle to demonstrate the use of a distributed,
 44 | direct solver, [SuperLU_DIST](http://crd-legacy.lbl.gov/~xiaoye/SuperLU/),
 45 | to solve very ill-conditioned linear systems.
 46 | 
 47 | ## The Example Source Code
 48 | 
 49 | ## Running the Example
 50 | 
 51 | ### Run 1: default setting with GMRES solver, preconditioned by hypre, velocity = 100
 52 | 
 53 | ```
 54 | $ ./convdiff
 55 | 
 56 | Options used:
 57 |    --refine 0
 58 |    --order 1
 59 |    --velocity 100
 60 |    --no-visit
 61 |    --no-superlu
 62 |    --slu-colperm 0
 63 | Number of unknowns: 10201
 64 | =============================================
 65 | Setup phase times:
 66 | =============================================
 67 | GMRES Setup:
 68 |   wall clock time = 0.010000 seconds
 69 |   wall MFLOPS     = 0.000000
 70 |   cpu clock time  = 0.010000 seconds
 71 |   cpu MFLOPS      = 0.000000
 72 | 
 73 | L2 norm of b: 9.500000e-04
 74 | Initial L2 norm of residual: 9.500000e-04
 75 | =============================================
 76 | 
 77 | Iters     resid.norm     conv.rate  rel.res.norm
 78 | -----    ------------    ---------- ------------
 79 |     1    4.065439e-04    0.427941   4.279409e-01
 80 |     2    1.318995e-04    0.324441   1.388415e-01
 81 |     3    4.823031e-05    0.365660   5.076874e-02
 82 |     ...
 83 |    23    2.436775e-16    0.249025   2.565027e-13
 84 | 
 85 | Final L2 norm of residual: 2.436857e-16
 86 | 
 87 | =============================================
 88 | Solve phase times:
 89 | =============================================
 90 | GMRES Solve:
 91 |   wall clock time = 0.030000 seconds
 92 |   wall MFLOPS     = 0.000000
 93 |   cpu clock time  = 0.020000 seconds
 94 |   cpu MFLOPS      = 0.000000
 95 | 
 96 | GMRES Iterations = 23
 97 | Final GMRES Relative Residual Norm = 2.56511e-13
 98 | Time required for solver:  0.0362886 (s)
 99 | ```
100 | 
101 | |Steady State|
102 | |:---:|
103 | |[<img src="mfem-superlu0005.png" width="400">](mfem-superlu0005.png)|
104 | 
105 | ---
106 | 
107 | ### Run 2: increase velocity to 1000, GMRES does not converge anymore
108 | 
109 | ```
110 | $ ./convdiff --velocity 1000
111 | 
112 | Options used:
113 |    --refine 0
114 |    --order 1
115 |    --velocity 1000
116 |    --no-visit
117 |    --no-superlu
118 |    --slu-colperm 0
119 | Number of unknowns: 10201
120 | =============================================
121 | Setup phase times:
122 | =============================================
123 | GMRES Setup:
124 |   wall clock time = 0.020000 seconds
125 |   wall MFLOPS     = 0.000000
126 |   cpu clock time  = 0.010000 seconds
127 |   cpu MFLOPS      = 0.000000
128 | 
129 | L2 norm of b: 9.500000e-04
130 | Initial L2 norm of residual: 9.500000e-04
131 | =============================================
132 | 
133 | Iters     resid.norm     conv.rate  rel.res.norm
134 | -----    ------------    ---------- ------------
135 |     1    9.500000e-04    1.000000   1.000000e+00
136 |     2    9.500000e-04    1.000000   1.000000e+00
137 |     3    9.500000e-04    1.000000   1.000000e+00
138 |     ...
139 |   200    9.500000e-04    1.000000   1.000000e+00
140 | ```
141 | 
142 | Below, we plot behavior of the GMRES method for velocity values in the
143 | range [100,1000] at incriments, _dv_, of 25 and also show an animation
144 | of the solution GMRES gives as velocity increases
145 | 
146 | |Solutions @_dv_=25 in [100,1000]|Contours of Solution @ _vel=1000_|
147 | |:---:||:---:|
148 | |<video src="gmres.mpg" width="400" height="300" controls preload></video>|[<img src="mfem-superlu0003.png" width="400">](mfem-superlu0003.png)|
149 | 
150 | |Time to Solution|L2 norm of final residual|
151 | |:---:||:---:|
152 | |[<img src="gmres_time.png" width="400">](gmres_time.png)|[<img src="gmres_residual.png" width="400">](gmres_residual.png)|
153 | 
154 | > **What do you think is happening?**
155 | 
156 | |<font color="white">GMRES method works ok for low velocity values. As velocity increases, GMRES method eventually crosses a threshold where it can no longer provide a useful result.</font>|
157 | 
158 | > **Why does time to solution show smoother transition than L2 norm?**
159 | 
160 | |<font color="white">As instability is approached, more GMRES iterations are required to reach desired norm. So GMRES is still able to manage the solve and achieve a near-zero L2 norm. It just takes more and more iterations. Once GMRES is unable to solve the L2 norm explodes.</font>|
161 | 
162 | ---
163 | 
164 | ### Run 3: Now use SuperLU_DIST, with default options
165 | ```
166 | $ ./convdiff -slu --velocity 1000
167 | 
168 | Options used:
169 |    --refine 0
170 |    --order 1
171 |    --velocity 1000
172 |    --no-visit
173 |    --superlu
174 |    --slu-colperm 0
175 | Number of unknowns: 10201
176 | 
177 | ** Memory Usage **********************************
178 | ** NUMfact space (MB): (sum-of-all-processes)
179 |     L\U :           41.12 |  Total :    50.72
180 | ** Total highmark (MB):
181 |     Sum-of-all :    62.27 | Avg :    62.27  | Max :    62.27
182 | **************************************************
183 | Time required for solver:  38.2684 (s)
184 | Final L2 norm of residual: 1.55553e-18
185 | ```
186 | 
187 | |Stead State For _vel=1000_|
188 | |:---:|
189 | |[<img src="mfem-superlu0004.png" width="400">](mfem-superlu0004.png)|
190 | 
191 | ### Run 4: Now use SuperLU_DIST, with MMD(A'+A) ordering.
192 | ```
193 | $ ./convdiff -slu --velocity 1000 --slu-colperm 2
194 | 
195 | Options used:
196 |    --refine 0
197 |    --order 1
198 |    --velocity 1000
199 |    --no-visit
200 |    --superlu
201 |    --slu-colperm 2
202 | Number of unknowns: 10201
203 |        Nonzeros in L       594238
204 |        Nonzeros in U       580425
205 |        nonzeros in L+U     1164462
206 |        nonzeros in LSUB    203857
207 | 
208 | ** Memory Usage **********************************
209 | ** NUMfact space (MB): (sum-of-all-processes)
210 |     L\U :           10.07 |  Total :    16.19
211 | ** Total highmark (MB):
212 |     Sum-of-all :    16.19 | Avg :    16.19  | Max :    16.19
213 | **************************************************
214 | Time required for solver:  0.780516 (s)
215 | Final L2 norm of residual: 1.52262e-18
216 | ```
217 | NOTE: the number of nonzeros in L+U is much smaller than natural ordering.
218 | This affects the memory usage and runtime.
219 | 
220 | ### Run 5: Now use SuperLU_DIST, with Metis(A'+A) ordering.
221 | ```
222 | $ ./convdiff -slu --velocity 1000 --slu-colperm 4
223 | 
224 | Options used:
225 |    --refine 0
226 |    --order 1
227 |    --velocity 1000
228 |    --no-visit
229 |    --superlu
230 |    --slu-colperm 4
231 | Number of unknowns: 10201
232 |        Nonzeros in L       522306
233 |        Nonzeros in U       527748
234 |        nonzeros in L+U     1039853
235 |        nonzeros in LSUB    218211
236 | 
237 | ** Memory Usage **********************************
238 | ** NUMfact space (MB): (sum-of-all-processes)
239 |     L\U :            9.24 |  Total :    15.64
240 | ** Total highmark (MB):
241 |     Sum-of-all :    15.64 | Avg :    15.64  | Max :    15.64
242 | **************************************************
243 | Time required for solver:  0.786936 (s)
244 | Final L2 norm of residual: 1.55331e-18
245 | ```
246 | 
247 | |Solutions @_dv_=25 in [100,1000]|Steady State Solution @ _vel=1000_|
248 | |:---:||:---:|
249 | |<video src="slu_metis.mpg" width="400" height="300" controls preload></video>|[<img src="mfem-superlu0004.png" width="400">](mfem-superlu0004.png)|
250 | 
251 | |Time to Solution|
252 | |:---:|
253 | |[<img src="slu_metis_time.png" width="400">](slu_metis_time.png)|
254 | 
255 | ### Run 6: Now use SuperLU_DIST, with Metis(A'+A) ordering, using 16 MPI tasks, on a larger problem.
256 | 
257 | By adding `--refine 2`, each element in the mesh is subdivided twice yielding a 16x larger problem.
258 | Here, we'll run on 16 tasks and just grep the output form some key values of interest.
259 | 
260 | ```
261 | $ ${MPIEXEC_OMPI} -n 16 ./convdiff --refine 2 --velocity 1000 -slu --slu-colperm 4 >& junk.out
262 | $ grep 'Time required for solver:' junk.out 
263 | Time required for solver:  10.3593 (s)
264 | Time required for solver:  16.3567 (s)
265 | Time required for solver:  11.6391 (s)
266 | Time required for solver:  10.669 (s)
267 | Time required for solver:  10.0605 (s)
268 | Time required for solver:  10.1216 (s)
269 | Time required for solver:  20.0721 (s)
270 | Time required for solver:  10.6205 (s)
271 | Time required for solver:  13.8445 (s)
272 | Time required for solver:  11.8943 (s)
273 | Time required for solver:  16.1552 (s)
274 | Time required for solver:  13.0849 (s)
275 | Time required for solver:  14.0008 (s)
276 | Time required for solver:  13.238 (s)
277 | Time required for solver:  12.387 (s)
278 | Time required for solver:  9.81836 (s)
279 | $ grep 'Final L2 norm of residual:' junk.out
280 | Final L2 norm of residual: 3.06951e-18
281 | Final L2 norm of residual: 3.06951e-18
282 | Final L2 norm of residual: 3.06951e-18
283 | Final L2 norm of residual: 3.06951e-18
284 | Final L2 norm of residual: 3.06951e-18
285 | Final L2 norm of residual: 3.06951e-18
286 | Final L2 norm of residual: 3.06951e-18
287 | Final L2 norm of residual: 3.06951e-18
288 | Final L2 norm of residual: 3.06951e-18
289 | Final L2 norm of residual: 3.06951e-18
290 | Final L2 norm of residual: 3.06951e-18
291 | Final L2 norm of residual: 3.06951e-18
292 | Final L2 norm of residual: 3.06951e-18
293 | Final L2 norm of residual: 3.06951e-18
294 | Final L2 norm of residual: 3.06951e-18
295 | Final L2 norm of residual: 3.06951e-18
296 | ```
297 | 
298 | > **Can you explain the processor times _relative_ to the previous, single processor run?**
299 | 
300 | |<font color="white">We've increased the mesh size by 16x here. But, we've also added 16x processors. Yet, the time for those processors to run ranged between 10 and 20 seconds with an average of 12.7 seconds. The smaller, single processor run took 0.786936 and taking the ratio of these numbers, we get ~16. However, recall that the matrix size goes up as the SQUARE of the mesh size and this accounts for this additional factor of 16.</font>|
301 | 
302 | ---
303 | 
304 | ## Out-Brief
305 | 
306 | In this lesson, we have used [MFEM](http://mfem.org) as a vehicle to demonstrate
307 | the value of direct solvers from the [SuperLU_DIST](http://crd-legacy.lbl.gov/~xiaoye/SuperLU/)
308 | numerical package.
309 | 
310 | ### Further Reading
311 | 
312 | To learn more about sparse direct solver, see Gene Golub SIAM Summer School
313 | course materials:
314 | [Lecture Notes](http://www.siam.org/students/g2s3/2013/lecturers/XSLi/Lecture-Notes/sherry.pdf),
315 | [Book Chapter](http://crd-legacy.lbl.gov/~xiaoye/g2s3-summary.pdf), and
316 | [Video](http://www.siam.org/students/g2s3/2013/course.html)
317 | 
318 | <!-- Insert space, horizontal line, and link to HandsOnLesson table -->
319 | 
320 | &nbsp;
321 | 
322 | ---
323 | 
324 | [Back to all HandsOnLessons](../lessons.md)
325 | 


--------------------------------------------------------------------------------
/lessons/superlu-mfem/mfem-superlu0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/superlu-mfem/mfem-superlu0000.png


--------------------------------------------------------------------------------
/lessons/superlu-mfem/mfem-superlu0001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/superlu-mfem/mfem-superlu0001.png


--------------------------------------------------------------------------------
/lessons/superlu-mfem/mfem-superlu0002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/superlu-mfem/mfem-superlu0002.png


--------------------------------------------------------------------------------
/lessons/superlu-mfem/mfem-superlu0003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/superlu-mfem/mfem-superlu0003.png


--------------------------------------------------------------------------------
/lessons/superlu-mfem/mfem-superlu0004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/superlu-mfem/mfem-superlu0004.png


--------------------------------------------------------------------------------
/lessons/superlu-mfem/mfem-superlu0005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/superlu-mfem/mfem-superlu0005.png


--------------------------------------------------------------------------------
/lessons/superlu-mfem/slu_metis.mpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/superlu-mfem/slu_metis.mpg


--------------------------------------------------------------------------------
/lessons/superlu-mfem/slu_metis_residual.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/superlu-mfem/slu_metis_residual.png


--------------------------------------------------------------------------------
/lessons/superlu-mfem/slu_metis_time.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/superlu-mfem/slu_metis_time.png


--------------------------------------------------------------------------------
/lessons/time_integrators/mfem_sundials_dtt0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/time_integrators/mfem_sundials_dtt0000.png


--------------------------------------------------------------------------------
/lessons/time_integrators/mfem_sundials_dtt0001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/time_integrators/mfem_sundials_dtt0001.png


--------------------------------------------------------------------------------
/lessons/time_integrators/mfem_sundials_dtt0002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/time_integrators/mfem_sundials_dtt0002.png


--------------------------------------------------------------------------------
/lessons/time_integrators/mfem_sundials_dtt0003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/time_integrators/mfem_sundials_dtt0003.png


--------------------------------------------------------------------------------
/lessons/time_integrators/mfem_sundials_explicit0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/time_integrators/mfem_sundials_explicit0000.png


--------------------------------------------------------------------------------
/lessons/time_integrators/mfem_sundials_explicit0001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/time_integrators/mfem_sundials_explicit0001.png


--------------------------------------------------------------------------------
/lessons/time_integrators/mfem_sundials_explicit0002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/time_integrators/mfem_sundials_explicit0002.png


--------------------------------------------------------------------------------
/lessons/time_integrators/mfem_sundials_explicit20000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/time_integrators/mfem_sundials_explicit20000.png


--------------------------------------------------------------------------------
/lessons/time_integrators/mfem_sundials_explicit20001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/time_integrators/mfem_sundials_explicit20001.png


--------------------------------------------------------------------------------
/lessons/time_integrators/mfem_sundials_explicit20002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/time_integrators/mfem_sundials_explicit20002.png


--------------------------------------------------------------------------------
/lessons/time_integrators/nonlinear_heat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/time_integrators/nonlinear_heat.png


--------------------------------------------------------------------------------
/lessons/time_integrators/pyramid_animated.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xsdk-project/HandsOnLessons/7000e5a88455088a4e2ce2d6082b17a97bf31e5d/lessons/time_integrators/pyramid_animated.gif


--------------------------------------------------------------------------------
/lessons/time_integrators/transient-heat.cpp.numbered.txt:
--------------------------------------------------------------------------------
  1 |      1	//                         MFEM Example 16 - Parallel Version
  2 |      2	//                             SUNDIALS Modification
  3 |      3	//
  4 |      4	// Compile with: make ex16p
  5 |      5	//
  6 |      6	// Sample runs:
  7 |      7	//     mpiexec -n 4 ex16p
  8 |      8	//     mpiexec -n 4 ex16p -m ../../data/inline-tri.mesh
  9 |      9	//     mpiexec -n 4 ex16p -m ../../data/disc-nurbs.mesh -tf 2
 10 |     10	//     mpiexec -n 4 ex16p -s 12 -a 0.0 -k 1.0
 11 |     11	//     mpiexec -n 4 ex16p -s 1 -a 1.0 -k 0.0 -dt 4e-6 -tf 2e-2 -vs 50
 12 |     12	//     mpiexec -n 8 ex16p -s 2 -a 0.5 -k 0.5 -o 4 -dt 8e-6 -tf 2e-2 -vs 50
 13 |     13	//     mpiexec -n 4 ex16p -s 3 -dt 2.0e-4 -tf 4.0e-2
 14 |     14	//     mpiexec -n 16 ex16p -m ../../data/fichera-q2.mesh
 15 |     15	//     mpiexec -n 16 ex16p -m ../../data/escher-p2.mesh
 16 |     16	//     mpiexec -n 8 ex16p -m ../../data/beam-tet.mesh -tf 10 -dt 0.1
 17 |     17	//     mpiexec -n 4 ex16p -m ../../data/amr-quad.mesh -o 4 -rs 0 -rp 0
 18 |     18	//     mpiexec -n 4 ex16p -m ../../data/amr-hex.mesh -o 2 -rs 0 -rp 0
 19 |     19	//
 20 |     20	// Description:  This example solves a time dependent nonlinear heat equation
 21 |     21	//               problem of the form du/dt = C(u), with a non-linear diffusion
 22 |     22	//               operator C(u) = \nabla \cdot (\kappa + \alpha u) \nabla u.
 23 |     23	//
 24 |     24	//               The example demonstrates the use of nonlinear operators (the
 25 |     25	//               class ConductionOperator defining C(u)), as well as their
 26 |     26	//               implicit time integration. Note that implementing the method
 27 |     27	//               ConductionOperator::ImplicitSolve is the only requirement for
 28 |     28	//               high-order implicit (SDIRK) time integration. By default, this
 29 |     29	//               example uses the SUNDIALS ODE solvers from CVODE and ARKODE.
 30 |     30	//
 31 |     31	//               We recommend viewing examples 2, 9 and 10 before viewing this
 32 |     32	//               example.
 33 |     33	
 34 |     34	#include "mfem.hpp"
 35 |     35	#include "papi.h"
 36 |     36	#include <fstream>
 37 |     37	#include <iostream>
 38 |     38	#include <cmath>
 39 |     39	#include <assert.h>
 40 |     40	
 41 |     41	using namespace std;
 42 |     42	using namespace mfem;
 43 |     43	
 44 |     44	/** After spatial discretization, the conduction model can be written as:
 45 |     45	 *
 46 |     46	 *     du/dt = M^{-1}(-Ku)
 47 |     47	 *
 48 |     48	 *  where u is the vector representing the temperature, M is the mass matrix,
 49 |     49	 *  and K is the diffusion operator with diffusivity depending on u:
 50 |     50	 *  (\kappa + \alpha u).
 51 |     51	 *
 52 |     52	 *  Class ConductionOperator represents the right-hand side of the above ODE.
 53 |     53	 */
 54 |     54	class ConductionOperator : public TimeDependentOperator
 55 |     55	{
 56 |     56	protected:
 57 |     57	   ParFiniteElementSpace &fespace;
 58 |     58	   Array<int> ess_tdof_list; // this list remains empty for pure Neumann b.c.
 59 |     59	
 60 |     60	   ParBilinearForm *M;
 61 |     61	   ParBilinearForm *K;
 62 |     62	
 63 |     63	   HypreParMatrix Mmat;
 64 |     64	   HypreParMatrix Kmat;
 65 |     65	   HypreParMatrix *T; // T = M + dt K
 66 |     66	   double current_dt;
 67 |     67	
 68 |     68	   CGSolver M_solver;    // Krylov solver for inverting the mass matrix M
 69 |     69	   HypreSmoother M_prec; // Preconditioner for the mass matrix M
 70 |     70	
 71 |     71	   CGSolver T_solver;    // Implicit solver for T = M + dt K
 72 |     72	   HypreSmoother T_prec; // Preconditioner for the implicit solver
 73 |     73	
 74 |     74	   double alpha, kappa;
 75 |     75	
 76 |     76	   mutable Vector z; // auxiliary vector
 77 |     77	
 78 |     78	public:
 79 |     79	   ConductionOperator(ParFiniteElementSpace &f, double alpha, double kappa,
 80 |     80	                      const Vector &u);
 81 |     81	
 82 |     82	   virtual void Mult(const Vector &u, Vector &du_dt) const;
 83 |     83	   /** Solve the Backward-Euler equation: k = f(u + dt*k, t), for the unknown k.
 84 |     84	       This is the only requirement for high-order SDIRK implicit integration.*/
 85 |     85	   virtual void ImplicitSolve(const double dt, const Vector &u, Vector &k);
 86 |     86	
 87 |     87	   /** Solve the system (M + dt K) y = M b. The result y replaces the input b.
 88 |     88	       This method is used by the implicit SUNDIALS solvers. */
 89 |     89	   void SundialsSolve(const double dt, Vector &b);
 90 |     90	
 91 |     91	   /// Update the diffusion BilinearForm K using the given true-dof vector `u`.
 92 |     92	   void SetParameters(const Vector &u);
 93 |     93	
 94 |     94	   virtual ~ConductionOperator();
 95 |     95	};
 96 |     96	
 97 |     97	/// Custom Jacobian system solver for the SUNDIALS time integrators.
 98 |     98	/** For the ODE system represented by ConductionOperator
 99 |     99	
100 |    100	        M du/dt = -K(u),
101 |    101	
102 |    102	    this class facilitates the solution of linear systems of the form
103 |    103	
104 |    104	        (M + γK) y = M b,
105 |    105	
106 |    106	    for given b, u (not used), and γ = GetTimeStep(). */
107 |    107	class SundialsJacSolver : public SundialsODELinearSolver
108 |    108	{
109 |    109	private:
110 |    110	  ConductionOperator *oper;
111 |    111	
112 |    112	public:
113 |    113	   SundialsJacSolver() : oper(NULL) { }
114 |    114	
115 |    115	   int InitSystem(void *sundials_mem);
116 |    116	   int SetupSystem(void *sundials_mem, int conv_fail,
117 |    117	                   const Vector &y_pred, const Vector &f_pred, int &jac_cur,
118 |    118	                   Vector &v_temp1, Vector &v_temp2, Vector &v_temp3);
119 |    119	   int SolveSystem(void *sundials_mem, Vector &b, const Vector &weight,
120 |    120	                   const Vector &y_cur, const Vector &f_cur);
121 |    121	   int FreeSystem(void *sundials_mem);
122 |    122	};
123 |    123	
124 |    124	double InitialTemperature(const Vector &x);
125 |    125	
126 |    126	static void initialize_papi(void)
127 |    127	{
128 |    128	#ifdef HAVE_PAPI
129 |    129	    float ireal_time, iproc_time, imflops;
130 |    130	    long long iflpops;
131 |    131	
132 |    132	    assert(PAPI_library_init(PAPI_VER_CURRENT) == PAPI_VER_CURRENT);
133 |    133	    assert(PAPI_flops(&ireal_time,&iproc_time,&iflpops,&imflops) >= PAPI_OK);
134 |    134	#endif
135 |    135	}
136 |    136	
137 |    137	static void finalize_papi(void)
138 |    138	{
139 |    139	#ifdef HAVE_PAPI
140 |    140	    float real_time, proc_time, mflops;
141 |    141	    long long flpops;
142 |    142	    PAPI_dmem_info_t dmem;
143 |    143	
144 |    144	    assert(PAPI_flops(&real_time,&proc_time,&flpops,&mflops) >= PAPI_OK);
145 |    145	    assert(PAPI_get_dmem_info(&dmem) >= PAPI_OK);
146 |    146	
147 |    147	    cout << "Memory Info:" << endl;;
148 |    148	    cout << "\tMem Size:     " << dmem.size << endl;
149 |    149	    cout << "\tMem Resident:\t\t" << dmem.resident << endl;
150 |    150	    cout << "\tMem Heap:     " << dmem.heap << endl;
151 |    151	    cout << "Timing Info:" << endl;
152 |    152	    cout << "\tReal_time:    " << real_time << endl;
153 |    153	    cout << "\tProc_time:    " << proc_time << endl;
154 |    154	    cout << "Flops Info:" << endl;
155 |    155	    cout << "\tTotal flpops: " << flpops << endl;
156 |    156	    cout << "\tMFLOPS:       " << mflops << endl;
157 |    157	#endif
158 |    158	}
159 |    159	
160 |    160	
161 |    161	int main(int argc, char *argv[])
162 |    162	{
163 |    163	   initialize_papi();
164 |    164	
165 |    165	   // Initialize MPI.
166 |    166	   int num_procs, myid;
167 |    167	   MPI_Init(&argc, &argv);
168 |    168	   MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
169 |    169	   MPI_Comm_rank(MPI_COMM_WORLD, &myid);
170 |    170	
171 |    171	   // Parse command-line options.
172 |    172	   int dim = 2;
173 |    173	   int ref_levels = 0;
174 |    174	   int order = 1;
175 |    175	   double t_final = 0.5;
176 |    176	   double dt = 0.01;
177 |    177	   double alpha = 0.0;
178 |    178	   double kappa = 0.5;
179 |    179	   bool implicit = false;
180 |    180	   bool adaptdt = false;
181 |    181	   double reltol = 1e-4;
182 |    182	   double abstol = 1e-4;
183 |    183	   bool noout = false;
184 |    184	
185 |    185	   OptionsParser args(argc, argv);
186 |    186	   args.AddOption(&dim, "-d", "--dim",
187 |    187	                  "Number of dimensions in the problem (1 or 2).");
188 |    188	   args.AddOption(&ref_levels, "-r", "--refine",
189 |    189	                  "Number of times to refine the mesh uniformly.");
190 |    190	   args.AddOption(&order, "-o", "--order",
191 |    191	                  "Order (degree) of the finite elements.");
192 |    192	   args.AddOption(&t_final, "-tf", "--t-final",
193 |    193	                  "Final time; start time is 0.");
194 |    194	   args.AddOption(&dt, "-dt", "--time-step",
195 |    195	                  "Initial time step.");
196 |    196	   args.AddOption(&alpha, "-a", "--alpha",
197 |    197	                  "Alpha coefficient for conductivity: kappa + alpha*temperature");
198 |    198	   args.AddOption(&kappa, "-k", "--kappa",
199 |    199	                  "Kappa coefficient conductivity: kappa + alpha*temperature");
200 |    200	   args.AddOption(&adaptdt, "-adt", "--adapt-time-step", "-fdt", "--fixed-time-step",
201 |    201	                  "Flag whether or not to adapt the time step.");
202 |    202	   args.AddOption(&implicit, "-imp", "--implicit", "-exp", "--explicit",
203 |    203	                  "Implicit or Explicit ODE solution.");
204 |    204	   args.AddOption(&reltol, "-rtol", "--relative-tolerance",
205 |    205	                  "Relative tolerance in Sundials time integrator.");
206 |    206	   args.AddOption(&abstol, "-atol", "--absolute-tolerance",
207 |    207	                  "Absolute tolerance in Sundials time integrator.");
208 |    208	   args.AddOption(&noout, "-noout", "--no-output", "-out", "--do-output",
209 |    209	                  "Disable all file outputs.");
210 |    210	
211 |    211	   int precision = 8;
212 |    212	   cout.precision(precision);
213 |    213	   args.Parse();
214 |    214	   if (!args.Good())
215 |    215	   {
216 |    216	      args.PrintUsage(cout);
217 |    217	      MPI_Finalize();
218 |    218	      return 1;
219 |    219	   }
220 |    220	
221 |    221	   if (myid == 0)
222 |    222	   {
223 |    223	      args.PrintOptions(cout);
224 |    224	   }
225 |    225	
226 |    226	   Mesh *mesh;
227 |    227	   if (dim == 1)
228 |    228	   {
229 |    229	      mesh = new Mesh(16, 1.0);
230 |    230	   }
231 |    231	   else if (dim == 2)
232 |    232	   {
233 |    233	      mesh = new Mesh(16, 16, Element::QUADRILATERAL, 1, 1.0, 1.0);
234 |    234	   }
235 |    235	   else if (dim == 3)
236 |    236	   {
237 |    237	      mesh = new Mesh(16, 16, 16, Element::HEXAHEDRON, 1, 1.0, 1.0, 1.0);
238 |    238	   }
239 |    239	   else
240 |    240	   {
241 |    241	      cout << "Diminsion mus be set to 1, 2, or 3." << endl;
242 |    242	      return 2;
243 |    243	   }
244 |    244	   ParMesh *pmesh = new ParMesh(MPI_COMM_WORLD, *mesh);
245 |    245	   for (int lev = 0; lev < ref_levels; lev++)
246 |    246	   {
247 |    247	      pmesh->UniformRefinement();
248 |    248	   }
249 |    249	   delete mesh;
250 |    250	
251 |    251	   // Define the ARKODE solver used for time integration. Either implicit or explicit.
252 |    252	   ODESolver *ode_solver = NULL;
253 |    253	   ARKODESolver *arkode = NULL;
254 |    254	   SundialsJacSolver sun_solver; // Used by the implicit ARKODE solver.
255 |    255	
256 |    256	   if (implicit)
257 |    257	   {
258 |    258	      arkode = new ARKODESolver(MPI_COMM_WORLD, ARKODESolver::IMPLICIT);
259 |    259	      arkode->SetLinearSolver(sun_solver);
260 |    260	   }
261 |    261	   else
262 |    262	   {
263 |    263	      arkode = new ARKODESolver(MPI_COMM_WORLD, ARKODESolver::EXPLICIT);
264 |    264	      arkode->SetERKTableNum(FEHLBERG_13_7_8);
265 |    265	   }
266 |    266	   arkode->SetStepMode(ARK_ONE_STEP);
267 |    267	   arkode->SetSStolerances(reltol, abstol);
268 |    268	   arkode->SetMaxStep(t_final / 2.0);
269 |    269	   if (!adaptdt)
270 |    270	   {
271 |    271	      arkode->SetFixedStep(dt);
272 |    272	   }
273 |    273	   ode_solver = arkode;
274 |    274	
275 |    275	   // Define the vector finite element space representing the current and the
276 |    276	   // initial temperature, u_ref.
277 |    277	   H1_FECollection fe_coll(order, dim);
278 |    278	   ParFiniteElementSpace fespace(pmesh, &fe_coll);
279 |    279	   ParGridFunction u_gf(&fespace);
280 |    280	   int fe_size = fespace.GlobalTrueVSize();
281 |    281	   if (myid == 0)
282 |    282	   {
283 |    283	      cout << "Number of temperature unknowns: " << fe_size << endl;
284 |    284	   }
285 |    285	
286 |    286	   // Set the initial conditions for u.
287 |    287	   FunctionCoefficient u_0(InitialTemperature);
288 |    288	   u_gf.ProjectCoefficient(u_0);
289 |    289	   Vector u;
290 |    290	   u_gf.GetTrueDofs(u);
291 |    291	
292 |    292	   // Initialize the conduction operator and the VisIt visualization.
293 |    293	   ConductionOperator oper(fespace, alpha, kappa, u);
294 |    294	   u_gf.SetFromTrueDofs(u);
295 |    295	   VisItDataCollection visit_dc("dump", pmesh);
296 |    296	   visit_dc.RegisterField("temperature", &u_gf);
297 |    297	   if (!noout)
298 |    298	   {
299 |    299	      visit_dc.SetCycle(0);
300 |    300	      visit_dc.SetTime(0.0);
301 |    301	      visit_dc.Save();
302 |    302	   }
303 |    303	
304 |    304	   // Perform time-integration
305 |    305	   if (myid == 0)
306 |    306	   {
307 |    307	      cout << "Integrating the ODE ..." << endl;
308 |    308	   }
309 |    309	   ode_solver->Init(oper);
310 |    310	   double t = 0.0;
311 |    311	   bool last_step = false;
312 |    312	   for (int ti = 1; !last_step; ti++)
313 |    313	   {
314 |    314	      if (dt > t_final - t) 
315 |    315	      {
316 |    316	         dt = t_final - t;
317 |    317	         arkode->SetFixedStep(dt);
318 |    318	      }
319 |    319	      ode_solver->Step(u, t, dt);
320 |    320	
321 |    321	      if (myid == 0)
322 |    322	      {
323 |    323	         cout << "step " << ti << ", t = " << t << endl;
324 |    324	         arkode->PrintInfo();
325 |    325	      }
326 |    326	
327 |    327	      u_gf.SetFromTrueDofs(u);
328 |    328	
329 |    329	      if (!noout)
330 |    330	      {
331 |    331	          visit_dc.SetCycle(ti);
332 |    332	          visit_dc.SetTime(t);
333 |    333	          visit_dc.Save();
334 |    334	      }
335 |    335	
336 |    336	      oper.SetParameters(u);
337 |    337	      last_step = (t >= t_final - 1e-8*dt);
338 |    338	   }
339 |    339	
340 |    340	   // Cleanup
341 |    341	   delete ode_solver;
342 |    342	   delete pmesh;
343 |    343	   MPI_Finalize();
344 |    344	
345 |    345	   finalize_papi();
346 |    346	
347 |    347	   return 0;
348 |    348	}
349 |    349	
350 |    350	ConductionOperator::ConductionOperator(ParFiniteElementSpace &f, double al,
351 |    351	                                       double kap, const Vector &u)
352 |    352	   : TimeDependentOperator(f.GetTrueVSize(), 0.0), fespace(f), M(NULL), K(NULL),
353 |    353	     T(NULL), current_dt(0.0),
354 |    354	     M_solver(f.GetComm()), T_solver(f.GetComm()), z(height)
355 |    355	{
356 |    356	   const double rel_tol = 1e-8;
357 |    357	
358 |    358	   M = new ParBilinearForm(&fespace);
359 |    359	   M->AddDomainIntegrator(new MassIntegrator());
360 |    360	   M->Assemble(0); // keep sparsity pattern of M and K the same
361 |    361	   M->FormSystemMatrix(ess_tdof_list, Mmat);
362 |    362	
363 |    363	   M_solver.iterative_mode = false;
364 |    364	   M_solver.SetRelTol(rel_tol);
365 |    365	   M_solver.SetAbsTol(0.0);
366 |    366	   M_solver.SetMaxIter(100);
367 |    367	   M_solver.SetPrintLevel(0);
368 |    368	   M_prec.SetType(HypreSmoother::Jacobi);
369 |    369	   M_solver.SetPreconditioner(M_prec);
370 |    370	   M_solver.SetOperator(Mmat);
371 |    371	
372 |    372	   alpha = al;
373 |    373	   kappa = kap;
374 |    374	
375 |    375	   T_solver.iterative_mode = false;
376 |    376	   T_solver.SetRelTol(rel_tol);
377 |    377	   T_solver.SetAbsTol(0.0);
378 |    378	   T_solver.SetMaxIter(100);
379 |    379	   T_solver.SetPrintLevel(0);
380 |    380	   T_solver.SetPreconditioner(T_prec);
381 |    381	
382 |    382	   SetParameters(u);
383 |    383	}
384 |    384	
385 |    385	void ConductionOperator::Mult(const Vector &u, Vector &du_dt) const
386 |    386	{
387 |    387	   // Compute:
388 |    388	   //    du_dt = M^{-1}*-K(u)
389 |    389	   // for du_dt
390 |    390	   Kmat.Mult(u, z);
391 |    391	   z.Neg(); // z = -z
392 |    392	   M_solver.Mult(z, du_dt);
393 |    393	}
394 |    394	
395 |    395	void ConductionOperator::ImplicitSolve(const double dt,
396 |    396	                                       const Vector &u, Vector &du_dt)
397 |    397	{
398 |    398	   // Solve the equation:
399 |    399	   //    du_dt = M^{-1}*[-K(u + dt*du_dt)]
400 |    400	   // for du_dt
401 |    401	   if (!T)
402 |    402	   {
403 |    403	      T = Add(1.0, Mmat, dt, Kmat);
404 |    404	      current_dt = dt;
405 |    405	      T_solver.SetOperator(*T);
406 |    406	   }
407 |    407	   MFEM_VERIFY(dt == current_dt, ""); // SDIRK methods use the same dt
408 |    408	   Kmat.Mult(u, z);
409 |    409	   z.Neg();
410 |    410	   T_solver.Mult(z, du_dt);
411 |    411	}
412 |    412	
413 |    413	void ConductionOperator::SundialsSolve(const double dt, Vector &b)
414 |    414	{
415 |    415	   // Solve the system (M + dt K) y = M b. The result y replaces the input b.
416 |    416	   if (!T || dt != current_dt)
417 |    417	   {
418 |    418	      delete T;
419 |    419	      T = Add(1.0, Mmat, dt, Kmat);
420 |    420	      current_dt = dt;
421 |    421	      T_solver.SetOperator(*T);
422 |    422	   }
423 |    423	   Mmat.Mult(b, z);
424 |    424	   T_solver.Mult(z, b);
425 |    425	}
426 |    426	
427 |    427	void ConductionOperator::SetParameters(const Vector &u)
428 |    428	{
429 |    429	   ParGridFunction u_alpha_gf(&fespace);
430 |    430	   u_alpha_gf.SetFromTrueDofs(u);
431 |    431	   for (int i = 0; i < u_alpha_gf.Size(); i++)
432 |    432	   {
433 |    433	      u_alpha_gf(i) = kappa + alpha*u_alpha_gf(i);
434 |    434	   }
435 |    435	
436 |    436	   delete K;
437 |    437	   K = new ParBilinearForm(&fespace);
438 |    438	
439 |    439	   GridFunctionCoefficient u_coeff(&u_alpha_gf);
440 |    440	
441 |    441	   K->AddDomainIntegrator(new DiffusionIntegrator(u_coeff));
442 |    442	   K->Assemble(0); // keep sparsity pattern of M and K the same
443 |    443	   K->FormSystemMatrix(ess_tdof_list, Kmat);
444 |    444	   delete T;
445 |    445	   T = NULL; // re-compute T on the next ImplicitSolve or SundialsSolve
446 |    446	}
447 |    447	
448 |    448	ConductionOperator::~ConductionOperator()
449 |    449	{
450 |    450	   delete T;
451 |    451	   delete M;
452 |    452	   delete K;
453 |    453	}
454 |    454	
455 |    455	
456 |    456	int SundialsJacSolver::InitSystem(void *sundials_mem)
457 |    457	{
458 |    458	   TimeDependentOperator *td_oper = GetTimeDependentOperator(sundials_mem);
459 |    459	
460 |    460	   // During development, we use dynamic_cast<> to ensure the setup is correct:
461 |    461	   oper = dynamic_cast<ConductionOperator*>(td_oper);
462 |    462	   MFEM_VERIFY(oper, "operator is not ConductionOperator");
463 |    463	   return 0;
464 |    464	}
465 |    465	
466 |    466	int SundialsJacSolver::SetupSystem(void *sundials_mem, int conv_fail,
467 |    467	                                   const Vector &y_pred, const Vector &f_pred,
468 |    468	                                   int &jac_cur, Vector &v_temp1,
469 |    469	                                   Vector &v_temp2, Vector &v_temp3)
470 |    470	{
471 |    471	   jac_cur = 1;
472 |    472	
473 |    473	   return 0;
474 |    474	}
475 |    475	
476 |    476	int SundialsJacSolver::SolveSystem(void *sundials_mem, Vector &b,
477 |    477	                                   const Vector &weight, const Vector &y_cur,
478 |    478	                                   const Vector &f_cur)
479 |    479	{
480 |    480	   oper->SundialsSolve(GetTimeStep(sundials_mem), b);
481 |    481	
482 |    482	   return 0;
483 |    483	}
484 |    484	
485 |    485	int SundialsJacSolver::FreeSystem(void *sundials_mem)
486 |    486	{
487 |    487	   return 0;
488 |    488	}
489 |    489	
490 |    490	
491 |    491	//This will be a "pyramid" initial temperature with 1.0 at the center
492 |    492	//tending to 0.0 at all the boundaries.
493 |    493	double InitialTemperature(const Vector &x)
494 |    494	{
495 |    495	   double max_comp_dist = 0.0;
496 |    496	   for (int d = 0; d < x.Size(); ++d)
497 |    497	   {
498 |    498	      double comp_dist = std::abs(x[d] - 0.5);
499 |    499	      if (comp_dist > max_comp_dist)
500 |    500	      {
501 |    501	         max_comp_dist = comp_dist;
502 |    502	      }
503 |    503	   }
504 |    504	   return 1.0 - 2.0*max_comp_dist;
505 |    505	}
506 | 


--------------------------------------------------------------------------------
/tools/atpesc2017_cooley_vnc_setup.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | acct=ATPESC2017
  3 | nnodes=2
  4 | tl=600
  5 | localos=`uname`
  6 | linuxvnc=''
  7 | 
  8 | if [[ -z "$1" ]]; then
  9 |     echo "Usage: $0 <cooley_username> [debug]"
 10 |     echo "    if 'debug' present reduces allocation to 1 node @ 20 mins"
 11 |     exit 1
 12 | fi
 13 | cooley_username=$1
 14 | 
 15 | if [[ "$2" == debug ]]; then
 16 |     set -x
 17 |     nnodes=1
 18 |     tl=20
 19 | fi
 20 | 
 21 | if [[ "$localos" == "Linux" ]]; then
 22 |     if [  -f /usr/bin/vncviewer ]; then
 23 |         linuxvnc=vncviewer
 24 |     elif [  -f /usr/bin/vinagre ]; then
 25 |         linuxvnc=vinagre
 26 |     else
 27 |          echo "Please install vncviewer (from TigerVNC) or vinagre and rerun the script"
 28 |          exit 1
 29 |     fi
 30 | fi
 31 | 
 32 | # Ensure ~/.ssh/config exists and has limited permissions
 33 | if [[ ! -e ~/.ssh/config ]]; then
 34 |     if [[ ! -e ~/.ssh ]]; then
 35 |         mkdir ~/.ssh
 36 |         chmod 700 ~/.ssh ~/.ssh/config
 37 |     fi
 38 |     touch ~/.ssh/config
 39 |     chmod 700 ~/.ssh/config
 40 | fi
 41 | if [[ ! -e ~/.ssh/cm_socket ]]; then
 42 |     mkdir ~/.ssh/cm_socket
 43 | fi
 44 | 
 45 | #
 46 | # Append stuff to ~/.ssh/config for ssh control master to cooley
 47 | #
 48 | if [[ -z "$(grep cooley-nph ~/.ssh/config)" ]]; then
 49 | cat >> ~/.ssh/config << EOF
 50 | #added by NumericalPackagesHandsOn
 51 | Host cooley-nph
 52 |     Hostname cooley.alcf.anl.gov
 53 |     Compression yes
 54 |     ControlMaster auto
 55 |     ControlPersist 12h
 56 |     ControlPath ~/.ssh/cm_socket/%r@cooley.alcf.anl.gov:%p
 57 | EOF
 58 | fi
 59 | 
 60 | #
 61 | # open login to cooley (will prompt) and put in bg and keep open all day
 62 | # This is the login that all others will use shared authentication with
 63 | #
 64 | ssh -N -f ${cooley_username}@cooley-nph
 65 | 
 66 | #
 67 | # copy vnc dot files to cooley prompt for desired vnc password
 68 | #
 69 | ssh ${cooley_username}@cooley-nph "mkdir -p  ~/.vnc; cat > ~/.vnc/xstartup" << EOF
 70 | #!/bin/bash
 71 | #created by NumericalPackagesHandsOn
 72 | export DISPLAY=:0.0
 73 | export HANDSON=/projects/ATPESC2017/NumericalPackages/handson/
 74 | xterm -fn 10x20 &
 75 | twm
 76 | EOF
 77 | ssh ${cooley_username}@cooley-nph "chmod u+x ~/.vnc/xstartup"
 78 | #
 79 | # Update users .soft.cooley file for basic setup
 80 | #
 81 | #ssh ${cooley_username}@cooley-nph "cat >> ~/.soft.cooley" << EOF
 82 | ##added by NumericalPackagesHandsOn
 83 | #+gcc-4.8.1
 84 | #@visit
 85 | #EOF
 86 | #
 87 | # Get a temporary password from user and confirm its intended
 88 | #
 89 | while true; do
 90 |     read -p "Create temporary VNC Password: " pw
 91 |     echo "You have entered \"$pw\", is this correct?"
 92 |     select yn in "Yes" "No"; do
 93 |         case $yn in
 94 |             Yes ) break 2;;
 95 |         esac
 96 |     done
 97 | done
 98 | # Push the password to cooley and vncpasswd encode it
 99 | ssh ${cooley_username}@cooley-nph "rm -f ~/.vnc/passwd; echo $pw | vncpasswd -f > ~/.vnc/passwd; chmod 600 ~/.vnc/passwd"
100 | 
101 | #
102 | # Reserve 2 nodes for interactive use all day
103 | #
104 | ssh -t -t -f ${cooley_username}@cooley-nph "qsub -I -n $nnodes -t $tl -A $acct" -q training > ./qsub-interactive.out 2>&1 &
105 | 
106 | #
107 | # Loop watching output from above to get allocation node name
108 | #
109 | nodid=""
110 | while [[ -z "$nodid" ]] ; do
111 |     echo "Checking for allocation completion"
112 |     nodid=$(cat ./qsub-interactive.out | tr ' ' '\n' | grep cc[0-9][0-9][0-9].cooley | cut -d'.' -f1)
113 |     sleep 5
114 | done
115 | echo "Got allocation at $nodid"
116 | 
117 | #
118 | # Startup xvncserver on the allocation
119 | #
120 | ssh ${cooley_username}@cooley-nph "nohup ssh $nodid x0vncserver --display=:0.0 --NeverShared=1 --geometry=1200x900+0+0 --PasswordFile=/home/$cooley_username/.vnc/passwd --MaxProcessorUsage=100 >& /dev/null &"
121 | sleep 5 
122 | 
123 | #
124 | # Set up 2-hop ssh tunnel to allocation, (above) through login and run xstartup there
125 | #
126 | ssh -f -L 22590:$nodid:5900 ${cooley_username}@cooley-nph "nohup ssh $nodid ~/.vnc/xstartup >& /dev/null &"
127 | sleep 5 
128 | 
129 | #
130 | # finally, start the vnc client on local machine
131 | #
132 | echo "Attempting to connect VNC to localhost:22590 - If this fails you can reattempt this manually"
133 | if [[ "$localos" == Darwin ]]; then
134 |     open vnc://localhost:22590
135 | elif [[ "$localos" == Linux ]]; then
136 |     $linuxvnc localhost::22590
137 | elif [[ "$localos" == windows ]]; then
138 |     echo "not implemented"
139 | fi
140 | 


--------------------------------------------------------------------------------