├── .clang-format
├── .github
    ├── FUNDING.yml
    └── workflows
    │   └── ccpp.yml
├── .gitignore
├── Dockerfile
├── LICENSE.md
├── README.md
├── performance.md
└── src
    ├── .gitignore
    ├── CMakeLists.txt
    ├── Elasticity.py
    ├── Poisson.py
    ├── cg.h
    ├── cgpoisson_problem.cpp
    ├── cgpoisson_problem.h
    ├── elasticity_problem.cpp
    ├── elasticity_problem.h
    ├── main.cpp
    ├── mem.cpp
    ├── mem.h
    ├── mesh.cpp
    ├── mesh.h
    ├── poisson_problem.cpp
    └── poisson_problem.h


/.clang-format:
--------------------------------------------------------------------------------
 1 | ---
 2 | Language:        Cpp
 3 | # BasedOnStyle:  LLVM
 4 | AccessModifierOffset: -2
 5 | AlignAfterOpenBracket: Align
 6 | AlignConsecutiveAssignments: false
 7 | AlignConsecutiveDeclarations: false
 8 | AlignEscapedNewlinesLeft: false
 9 | AlignOperands:   true
10 | AlignTrailingComments: true
11 | AllowAllParametersOfDeclarationOnNextLine: true
12 | AllowShortBlocksOnASingleLine: false
13 | AllowShortCaseLabelsOnASingleLine: false
14 | AllowShortFunctionsOnASingleLine: All
15 | AllowShortIfStatementsOnASingleLine: false
16 | AllowShortLoopsOnASingleLine: false
17 | AlwaysBreakAfterDefinitionReturnType: None
18 | AlwaysBreakAfterReturnType: None
19 | AlwaysBreakBeforeMultilineStrings: false
20 | AlwaysBreakTemplateDeclarations: true
21 | BinPackArguments: true
22 | BinPackParameters: true
23 | BraceWrapping:
24 |   AfterClass:      false
25 |   AfterControlStatement: false
26 |   AfterEnum:       false
27 |   AfterFunction:   false
28 |   AfterNamespace:  false
29 |   AfterObjCDeclaration: false
30 |   AfterStruct:     false
31 |   AfterUnion:      false
32 |   BeforeCatch:     false
33 |   BeforeElse:      false
34 |   IndentBraces:    false
35 | BreakBeforeBinaryOperators: All
36 | BreakBeforeBraces: Allman
37 | BreakBeforeTernaryOperators: true
38 | BreakConstructorInitializersBeforeComma: false
39 | BreakAfterJavaFieldAnnotations: false
40 | BreakStringLiterals: true
41 | ColumnLimit:     80
42 | CommentPragmas:  '^ IWYU pragma:'
43 | ConstructorInitializerAllOnOneLineOrOnePerLine: false
44 | ConstructorInitializerIndentWidth: 4
45 | ContinuationIndentWidth: 4
46 | Cpp11BracedListStyle: true
47 | DerivePointerAlignment: false
48 | DisableFormat:   false
49 | ExperimentalAutoDetectBinPacking: false
50 | ForEachMacros:   [ foreach, Q_FOREACH, BOOST_FOREACH ]
51 | IncludeCategories:
52 |   - Regex:           '^"(llvm|llvm-c|clang|clang-c)/'
53 |     Priority:        2
54 |   - Regex:           '^(<|"(gtest|isl|json)/)'
55 |     Priority:        3
56 |   - Regex:           '.*'
57 |     Priority:        1
58 | IncludeIsMainRegex: '$'
59 | IndentCaseLabels: false
60 | IndentWidth:     2
61 | IndentWrappedFunctionNames: false
62 | JavaScriptQuotes: Leave
63 | JavaScriptWrapImports: true
64 | KeepEmptyLinesAtTheStartOfBlocks: true
65 | MacroBlockBegin: ''
66 | MacroBlockEnd:   ''
67 | MaxEmptyLinesToKeep: 1
68 | NamespaceIndentation: None
69 | ObjCBlockIndentWidth: 2
70 | ObjCSpaceAfterProperty: false
71 | ObjCSpaceBeforeProtocolList: true
72 | PenaltyBreakBeforeFirstCallParameter: 19
73 | PenaltyBreakComment: 300
74 | PenaltyBreakFirstLessLess: 120
75 | PenaltyBreakString: 1000
76 | PenaltyExcessCharacter: 1000000
77 | PenaltyReturnTypeOnItsOwnLine: 60
78 | PointerAlignment: Left
79 | ReflowComments:  true
80 | SortIncludes:    true
81 | SpaceAfterCStyleCast: false
82 | SpaceAfterTemplateKeyword: true
83 | SpaceBeforeAssignmentOperators: true
84 | SpaceBeforeParens: ControlStatements
85 | SpaceInEmptyParentheses: false
86 | SpacesBeforeTrailingComments: 1
87 | SpacesInAngles:  false
88 | SpacesInContainerLiterals: true
89 | SpacesInCStyleCastParentheses: false
90 | SpacesInParentheses: false
91 | SpacesInSquareBrackets: false
92 | Standard:        Cpp11
93 | TabWidth:        8
94 | UseTab:          Never
95 | ...
96 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 | 
3 | github: FEniCS # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
4 | 


--------------------------------------------------------------------------------
/.github/workflows/ccpp.yml:
--------------------------------------------------------------------------------
  1 | name: FEniCS Performance Test CI
  2 | 
  3 | on:
  4 |   push:
  5 |     branches:
  6 |       - "**"
  7 |   pull_request:
  8 |     branches:
  9 |       - main
 10 |   merge_group:
 11 |     branches:
 12 |       - main
 13 |   workflow_dispatch:
 14 |   schedule:
 15 |     # * is a special character in YAML so you have to quote this string
 16 |     - cron: "0 3 * * 0,3"
 17 | 
 18 | jobs:
 19 |   build:
 20 |     runs-on: ubuntu-latest
 21 |     container: fenicsproject/test-env:current-openmpi
 22 | 
 23 |     env:
 24 |       PETSC_ARCH: linux-gnu-real64-64
 25 |       OMPI_ALLOW_RUN_AS_ROOT: 1
 26 |       OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1
 27 | 
 28 |     steps:
 29 |       - uses: actions/checkout@v4
 30 | 
 31 |       - name: Get DOLFINx
 32 |         uses: actions/checkout@v4
 33 |         with:
 34 |           path: ./dolfinx
 35 |           repository: FEniCS/dolfinx
 36 |           ref: main
 37 | 
 38 |       - name: Install FEniCS Python components
 39 |         run: |
 40 |           apt-get -qq update
 41 |           apt-get -y install libboost-program-options-dev
 42 |           pip3 install --break-system-packages pip --upgrade
 43 |           pip3 install --break-system-packages git+https://github.com/FEniCS/ufl.git
 44 |           pip3 install --break-system-packages git+https://github.com/FEniCS/basix.git
 45 |           pip3 install --break-system-packages git+https://github.com/FEniCS/ffcx
 46 |       - name: Build dolfinx cpp
 47 |         run: |
 48 |           cmake -G Ninja -DCMAKE_BUILD_TYPE=Developer -B build -S dolfinx/cpp/
 49 |           cmake --build build
 50 |           cmake --install build
 51 |       - name: Build performance test
 52 |         run: |
 53 |           cmake -G Ninja -DCMAKE_BUILD_TYPE=Developer -B build-dir -S src
 54 |           cmake --build build-dir
 55 |           cmake --install build-dir
 56 |       - name: Run Poisson test (BoomerAMG, serial)
 57 |         run: |
 58 |           dolfinx-scaling-test \
 59 |           --problem_type poisson \
 60 |           --scaling_type weak \
 61 |           --ndofs 50000 \
 62 |           -log_view \
 63 |           -ksp_view \
 64 |           -ksp_type cg \
 65 |           -ksp_rtol 1.0e-8 \
 66 |           -pc_type hypre \
 67 |           -pc_hypre_type boomeramg \
 68 |           -pc_hypre_boomeramg_strong_threshold 0.7 \
 69 |           -pc_hypre_boomeramg_agg_nl 4 \
 70 |           -pc_hypre_boomeramg_agg_num_paths 2
 71 |       - name: Run Poisson test (BoomerAMG, weak)
 72 |         run: |
 73 |           mpirun -np 2 dolfinx-scaling-test \
 74 |           --problem_type poisson \
 75 |           --scaling_type weak \
 76 |           --ndofs 50000 \
 77 |           -log_view \
 78 |           -ksp_view \
 79 |           -ksp_type cg \
 80 |           -ksp_rtol 1.0e-8 \
 81 |           -pc_type hypre \
 82 |           -pc_hypre_type boomeramg \
 83 |           -pc_hypre_boomeramg_strong_threshold 0.7 \
 84 |           -pc_hypre_boomeramg_agg_nl 4 \
 85 |           -pc_hypre_boomeramg_agg_num_paths 2
 86 |       - name: Run Poisson test (BoomerAMG, 3rd order, weak)
 87 |         run: |
 88 |           mpirun -np 2 dolfinx-scaling-test \
 89 |           --problem_type poisson \
 90 |           --scaling_type weak \
 91 |           --ndofs 50000 \
 92 |           --order 3 \
 93 |           -log_view \
 94 |           -ksp_view \
 95 |           -ksp_type cg \
 96 |           -ksp_rtol 1.0e-8 \
 97 |           -pc_type hypre \
 98 |           -pc_hypre_type boomeramg \
 99 |           -pc_hypre_boomeramg_strong_threshold 0.7 \
100 |           -pc_hypre_boomeramg_agg_nl 4 \
101 |           -pc_hypre_boomeramg_agg_num_paths 2
102 |       - name: Run Poisson test (BoomerAMG, weak, unstructured mesh)
103 |         run: |
104 |           mpirun -np 2 dolfinx-scaling-test \
105 |           --problem_type poisson \
106 |           --mesh_type unstructured \
107 |           --scaling_type weak \
108 |           --ndofs 50000 \
109 |           -log_view \
110 |           -ksp_view \
111 |           -ksp_type cg \
112 |           -ksp_rtol 1.0e-8 \
113 |           -pc_type hypre \
114 |           -pc_hypre_type boomeramg \
115 |           -pc_hypre_boomeramg_strong_threshold 0.7 \
116 |           -pc_hypre_boomeramg_agg_nl 4 \
117 |           -pc_hypre_boomeramg_agg_num_paths 2
118 |       - name: Run Poisson test (BoomerAMG, strong)
119 |         run: |
120 |           mpirun -np 2 dolfinx-scaling-test \
121 |           --problem_type poisson \
122 |           --scaling_type strong \
123 |           --ndofs 1000000 \
124 |           -log_view \
125 |           -ksp_view \
126 |           -ksp_type cg \
127 |           -ksp_rtol 1.0e-8 \
128 |           -pc_type hypre \
129 |           -pc_hypre_type boomeramg \
130 |           -pc_hypre_boomeramg_strong_threshold 0.7 \
131 |           -pc_hypre_boomeramg_agg_nl 4 \
132 |           -pc_hypre_boomeramg_agg_num_paths 2
133 |       - name: Run elasticity test (GAMG, serial)
134 |         run: |
135 |           dolfinx-scaling-test \
136 |           --problem_type elasticity \
137 |           --scaling_type weak \
138 |           --ndofs 100000 \
139 |           -log_view \
140 |           -ksp_view \
141 |           -ksp_type cg \
142 |           -ksp_rtol 1.0e-8 \
143 |           -pc_type gamg \
144 |           -pc_gamg_coarse_eq_limit 1000 \
145 |           -mg_levels_ksp_type chebyshev \
146 |           -mg_levels_pc_type jacobi \
147 |           -mg_levels_esteig_ksp_type cg \
148 |           -matptap_via scalable
149 |       - name: Run elasticity test (GAMG, weak)
150 |         run: |
151 |           mpirun -np 2 dolfinx-scaling-test \
152 |           --problem_type elasticity \
153 |           --scaling_type weak \
154 |           --ndofs 100000 \
155 |           -log_view \
156 |           -ksp_view \
157 |           -ksp_type cg \
158 |           -ksp_rtol 1.0e-8 \
159 |           -pc_type gamg \
160 |           -pc_gamg_coarse_eq_limit 1000 \
161 |           -mg_levels_ksp_type chebyshev \
162 |           -mg_levels_pc_type jacobi \
163 |           -mg_levels_esteig_ksp_type cg \
164 |           -matptap_via scalable
165 |       - name: Run elasticity test (GAMG, 3rd order, weak)
166 |         run: |
167 |           mpirun -np 2 dolfinx-scaling-test \
168 |           --problem_type elasticity \
169 |           --scaling_type weak \
170 |           --ndofs 100000 \
171 |           --order 3 \
172 |           -log_view \
173 |           -ksp_view \
174 |           -ksp_type cg \
175 |           -ksp_rtol 1.0e-8 \
176 |           -pc_type gamg \
177 |           -pc_gamg_coarse_eq_limit 1000 \
178 |           -mg_levels_ksp_type chebyshev \
179 |           -mg_levels_pc_type jacobi \
180 |           -mg_levels_esteig_ksp_type cg \
181 |           -matptap_via scalable
182 |       - name: Run elasticity test (GAMG, strong)
183 |         run: |
184 |           mpirun -np 2 dolfinx-scaling-test \
185 |           --problem_type elasticity \
186 |           --scaling_type strong \
187 |           --ndofs 500000 \
188 |           -log_view \
189 |           -ksp_view \
190 |           -ksp_type cg \
191 |           -ksp_rtol 1.0e-8 \
192 |           -pc_type gamg \
193 |           -pc_gamg_coarse_eq_limit 1000 \
194 |           -mg_levels_ksp_type chebyshev \
195 |           -mg_levels_pc_type jacobi \
196 |           -mg_levels_esteig_ksp_type cg \
197 |           -matptap_via scalable
198 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | build
2 | 
3 | .vscode
4 | .devcontainer


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Builds a Docker image with the necessary libraries for compiling
 2 | # FEniCS. The image is at
 3 | # https://hub.docker.com/r/fenicsproject/performance-tests
 4 | #
 5 | # Authors: Garth N. Wells <gnw20@cam.ac.uk>
 6 | 
 7 | ARG PETSC_VERSION=3.12.4
 8 | 
 9 | FROM ubuntu:20.04
10 | 
11 | WORKDIR /tmp
12 | 
13 | # Environment variables
14 | ENV OPENBLAS_NUM_THREADS=1
15 | 
16 | # Non-Python utilities and libraries
17 | RUN apt-get -qq update && \
18 |     apt-get -y --with-new-pkgs \
19 |     -o Dpkg::Options::="--force-confold" upgrade && \
20 |     apt-get -y install \
21 |     bison \
22 |     clang \
23 |     cmake \
24 |     flex \
25 |     g++ \
26 |     gfortran \
27 |     git \
28 |     libboost-filesystem-dev \
29 |     libboost-iostreams-dev \
30 |     libboost-math-dev \
31 |     libboost-program-options-dev \
32 |     libboost-system-dev \
33 |     libboost-thread-dev \
34 |     libboost-timer-dev \
35 |     liblapack-dev \
36 |     libmpich-dev \
37 |     libopenblas-dev \
38 |     libhdf5-mpich-dev \
39 |     mpich \
40 |     ninja-build \
41 |     python3 \
42 |     python3-dev \
43 |     pkg-config \
44 |     wget && \
45 |     apt-get clean && \
46 |     rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
47 | 
48 | # Install PETSc from source
49 | ARG PETSC_VERSION
50 | RUN git clone --branch v${PETSC_VERSION} --depth 1 https://gitlab.com/petsc/petsc.git && \
51 |     cd petsc && \
52 |     python3 ./configure --with-64-bit-indices=0 \
53 |     --COPTFLAGS="-O3" \
54 |     --CXXOPTFLAGS="-O3" \
55 |     --FOPTFLAGS="-O3" \
56 |     --with-c-support \
57 |     --with-fortran-bindings=no \
58 |     --with-debugging=0 \
59 |     --with-shared-libraries \
60 |     --download-hypre \
61 |     --download-ptscotch \
62 |     --prefix=/usr/local/petsc-32 && \
63 |     make && \
64 |     make install && \
65 |     git clean -fdx . && \
66 |     python3 ./configure --with-64-bit-indices=1 \
67 |     --COPTFLAGS="-O3" \
68 |     --CXXOPTFLAGS="-O3" \
69 |     --FOPTFLAGS="-O3" \
70 |     --with-c-support \
71 |     --with-fortran-bindings=no \
72 |     --with-debugging=0 \
73 |     --with-shared-libraries \
74 |     --download-hypre \
75 |     --download-ptscotch \
76 |     --prefix=/usr/local/petsc-64 && \
77 |     make && \
78 |     make install && \
79 |     rm -rf /tmp/*
80 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Copyright 2017 Chris N. Richardson and Garth N. Wells
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining
 4 | a copy of this software and associated documentation files (the
 5 | "Software"), to deal in the Software without restriction, including
 6 | without limitation the rights to use, copy, modify, merge, publish,
 7 | distribute, sublicense, and/or sell copies of the Software, and to
 8 | permit persons to whom the Software is furnished to do so, subject to
 9 | the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Performance test codes for FEniCSx/DOLFINx
  2 | 
  3 | This repository contains solvers for testing the parallel performance of
  4 | DOLFINx and the underlying linear solvers. It tests elliptic equations
  5 | - Poisson equation and elasticity - in three dimensions.
  6 | 
  7 | Representative performance data is available at
  8 | https://fenics.github.io/performance-test-results/.
  9 | 
 10 | [![FEniCS Performance Test CI](https://github.com/FEniCS/performance-test/workflows/FEniCS%20Performance%20Test%20CI/badge.svg)](https://github.com/FEniCS/performance-test/actions?query=branch%3Amain)
 11 | 
 12 | ## Building
 13 | 
 14 | The source of the tests is in `src/` directory.
 15 | 
 16 | 
 17 | ### Requirements
 18 | 
 19 | - FEniCSx/DOLFINx installation (development version of DOLFINx
 20 |   **required**)
 21 | - PETSc installation
 22 | - Boost Program Options
 23 | 
 24 | 
 25 | ### Compilation
 26 | 
 27 | In the `src/` directory, build the program:
 28 | 
 29 |         cmake .
 30 |         make
 31 | 
 32 | 
 33 | ## Running tests
 34 | 
 35 | Options for the test are:
 36 | 
 37 | - Problem type (`--problem_type`): `poisson` or `elasticity`
 38 | - Scaling type (`--scaling_type`): `strong` (fixed problem size) or
 39 |   `weak` (fixed problem size per process)
 40 | - Number of degrees-of-freedom (`--ndofs`): total (in case of strong
 41 |   scaling) or per process (for weak scaling)
 42 | - Order (`--order`): polynomial order (1, 2, or 3) - only on cube mesh,
 43 |   defaults to 1.
 44 | - File output (`--output`): `true` or `false` (IO performance depends
 45 |   heavily on the underlying filesystem)
 46 | - Data output directory (`--output_dir`): directory to write solution
 47 |   data to
 48 | 
 49 | Linear solver options are configured via PETSc command line options,
 50 | (single hyphen) as shown below.
 51 | 
 52 | 
 53 | ## Recommended test configuration
 54 | 
 55 | Suggested options for running tests are listed below. The options
 56 | include PETSc performance logging which is useful for assessing
 57 | performance.
 58 | 
 59 | ### Elasticity
 60 | 
 61 | For elasticity, a conjugate gradient (CG) solver with a smoothed
 62 | aggregation algebraic multigrid (GAMG) preconditioner is recommended.
 63 | For a weak scaling test with 8 MPI processes and 500k degrees-of-freedom
 64 | per process:
 65 | 
 66 | ```
 67 | mpirun -np 8 ./dolfinx-scaling-test \
 68 | --problem_type elasticity \
 69 | --scaling_type weak \
 70 | --ndofs 500000 \
 71 | -log_view \
 72 | -ksp_view \
 73 | -ksp_type cg \
 74 | -ksp_rtol 1.0e-8 \
 75 | -pc_type gamg \
 76 | -pc_gamg_coarse_eq_limit 1000 \
 77 | -mg_levels_ksp_type chebyshev \
 78 | -mg_levels_pc_type jacobi \
 79 | -mg_levels_esteig_ksp_type cg \
 80 | -matptap_via scalable \
 81 | -options_left
 82 | ```
 83 | 
 84 | For a strong scaling test, with 8 MPI processes and 10M
 85 | degrees-of-freedom in total:
 86 | 
 87 | 
 88 | ```
 89 | mpirun -np 8 ./dolfinx-scaling-test \
 90 | --problem_type elasticity \
 91 | --scaling_type strong \
 92 | --ndofs 10000000 \
 93 | -log_view \
 94 | -ksp_view \
 95 | -ksp_type cg \
 96 | -ksp_rtol 1.0e-8 \
 97 | -pc_type gamg \
 98 | -pc_gamg_coarse_eq_limit 1000 \
 99 | -mg_levels_ksp_type chebyshev \
100 | -mg_levels_pc_type jacobi \
101 | -mg_levels_esteig_ksp_type cg \
102 | -matptap_via scalable \
103 | -options_left
104 | ```
105 | 
106 | ### Poisson
107 | 
108 | For the Poisson equation, a conjugate gradient (CG) solver with a
109 | classical algebraic multigrid (BoomerAMG) preconditioner is
110 | recommended.  For a weak scaling test with 8 MPI processes and 500k
111 | degrees-of-freedom per process:
112 | 
113 | ```
114 | mpirun -np 8 ./dolfinx-scaling-test \
115 | --problem_type poisson \
116 | --scaling_type weak \
117 | --ndofs 500000 \
118 | -log_view \
119 | -ksp_view \
120 | -ksp_type cg \
121 | -ksp_rtol 1.0e-8 \
122 | -pc_type hypre \
123 | -pc_hypre_type boomeramg \
124 | -pc_hypre_boomeramg_strong_threshold 0.7 \
125 | -pc_hypre_boomeramg_agg_nl 4 \
126 | -pc_hypre_boomeramg_agg_num_paths 2 \
127 | -options_left
128 | ```
129 | For a strong scaling test, with 8 MPI processes and 10M
130 | degrees-of-freedom in total:
131 | ```
132 | mpirun -np 8 ./dolfinx-scaling-test \
133 | --problem_type poisson \
134 | --scaling_type strong \
135 | --ndofs 10000000 \
136 | -log_view \
137 | -ksp_view \
138 | -ksp_type cg \
139 | -ksp_rtol 1.0e-8 \
140 | -pc_type hypre \
141 | -pc_hypre_type boomeramg \
142 | -pc_hypre_boomeramg_strong_threshold 0.7 \
143 | -pc_hypre_boomeramg_agg_nl 4 \
144 | -pc_hypre_boomeramg_agg_num_paths 2 \
145 | -options_left
146 | ```
147 | 
148 | ## Interpreting the output
149 | 
150 | The default loglevel diagnostic messages from DOLFINx will be present, and if `-log_view` is specified, there will be a performance profile from PETSc. There's also a "Test problem summary" summarizing the test parameters and environment to aid with reproducibility. Finally, there's a table labeled "Summary of timings" that contains various times (in units of seconds) of interest, the parts that are explicit to this test are labeled `ZZZ`. We elaborate on some:
151 | 
152 | - `ZZZ Create Mesh`: Create the mesh to be used as the spatial discretisation of the domain in the FE problem
153 | - `ZZZ Create facets and facet->cell connectivity`: Compute the topology connectivity of the mesh's graph, i.e. compute the relationship between which cells are connected to each facet.
154 | - `ZZZ FunctionSpace`: Create the function space in which the finite element method solution will be sought along with appropriate index maps for each degree of freedom and their relationship with the mesh.
155 | - `ZZZ Assemble`: Encompassing timer for:
156 |   - `ZZZ Create boundary conditions`: Find the mesh’s topological indices and corresponding degree of freedom indices on which to impose boundary data in a strong Dirichlet sense.
157 |   - `ZZZ Create RHS function`: This is the step computing the function $f$ in the cases where $\nabla^2u=-f$ (Poisson) and $\nabla\cdot u=-f$ (elasticity, i.e. elastostatics in this case).
158 |   - `ZZZ Assemble matrix`: Assemble the finite element matrix $A$ underlying finite element formulation, such that we seek to later solve $A\vec{x}=\vec{b}$.
159 |   - `ZZZ Assemble vector`: Assemble the right-hand-side vector $\vec{b}$.
160 | - `ZZZ Solve`: Compute the solution of the linear system. This is typically the dominant stage taking the greatest computational effort.
161 | - `ZZZ Output`: Postprocess and potentially output (with `--output`) results to disk.
162 | 
163 | 
164 | ## Reference performance data
165 | 
166 | Reference performance data is provided [here](performance.md) to help
167 | in assessing performance on a given system.
168 | 
169 | 
170 | ## Authors and license
171 | 
172 | The tests have been developed by Chris N. Richardson
173 | (<chris@bpi.cam.ac.uk>) and Garth N. Wells (<gnw20@cam.ac.uk>).
174 | 
175 | The code is covered by the MIT license. See LICENSE.md.
176 | 


--------------------------------------------------------------------------------
/performance.md:
--------------------------------------------------------------------------------
1 | # Performance test results
2 | 
3 | * [Some test results on CSD3 (University of Cambridge HPC system)](https://fenics.github.io/performance-test-results) up to 512 cores on 16 nodes. [Raw data](https://github.com/FEniCS/performance-test-results)
4 | 


--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
 1 | CMakeCache.txt
 2 | CMakeFiles/
 3 | Elasticity.c
 4 | Elasticity.h
 5 | Makefile
 6 | Poisson.c
 7 | Poisson.h
 8 | cmake_install.cmake
 9 | dolfinx-scaling-test
10 | 


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.16)
 2 | 
 3 | set(PROJECT_NAME dolfinx-scaling-test)
 4 | project(${PROJECT_NAME})
 5 | 
 6 | include(GNUInstallDirs)
 7 | set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
 8 | 
 9 | # Use C++20
10 | set(CMAKE_CXX_STANDARD 20)
11 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
12 | set(CMAKE_CXX_EXTENSIONS OFF)
13 | 
14 | # Get DOLFIN configuration data (DOLFINConfig.cmake must be in
15 | # DOLFIN_CMAKE_CONFIG_PATH)
16 | find_package(DOLFINX REQUIRED)
17 | 
18 | # set(CMAKE_BUILD_TYPE "Release")
19 | set(CMAKE_CXX_FLAGS "-Ofast ${CMAKE_CXX_FLAGS} -g -Wall")
20 | set(CMAKE_C_FLAGS "-Ofast ${CMAKE_C_FLAGS} -g -Wall")
21 | 
22 | # Compile UFL files
23 | add_custom_command(
24 |   OUTPUT Poisson.c
25 |   COMMAND ffcx ${CMAKE_CURRENT_SOURCE_DIR}/Poisson.py
26 |   DEPENDS Poisson.py
27 | )
28 | 
29 | add_custom_command(
30 |   OUTPUT Elasticity.c
31 |   COMMAND ffcx ${CMAKE_CURRENT_SOURCE_DIR}/Elasticity.py
32 |   DEPENDS Elasticity.py
33 | )
34 | 
35 | set(CMAKE_INCLUDE_CURRENT_DIR ON)
36 | 
37 | # Executable
38 | add_executable(${PROJECT_NAME} main.cpp mesh.cpp elasticity_problem.cpp cgpoisson_problem.cpp poisson_problem.cpp mem.cpp
39 | ${CMAKE_CURRENT_BINARY_DIR}/Elasticity.c
40 | ${CMAKE_CURRENT_BINARY_DIR}/Poisson.c)
41 | 
42 | # Find Boost program_options
43 | if(DEFINED ENV{BOOST_ROOT} OR DEFINED BOOST_ROOT)
44 |   set(Boost_NO_SYSTEM_PATHS on)
45 | endif()
46 | # set(Boost_USE_MULTITHREADED $ENV{BOOST_USE_MULTITHREADED})
47 | set(Boost_VERBOSE TRUE)
48 | find_package(Boost 1.70 REQUIRED program_options)
49 | 
50 | # Target libraries
51 | target_link_libraries(${PROJECT_NAME} dolfinx Boost::program_options pthread)
52 | 
53 | message(STATUS ${CMAKE_CXX_FLAGS})
54 | install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
55 | 


--------------------------------------------------------------------------------
/src/Elasticity.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2017-2022 Chris N. Richardson and Garth N. Wells
 2 | #
 3 | # This file is part of FEniCS-miniapp (https://www.fenicsproject.org)
 4 | #
 5 | # SPDX-License-Identifier:    MIT
 6 | 
 7 | import basix.ufl
 8 | from ufl import (Coefficient, Identity, FunctionSpace, Mesh, TestFunction, TrialFunction,
 9 |                  dx, grad, inner, tetrahedron, tr)
10 | 
11 | # Elasticity parameters
12 | E = 1.0e6
13 | nu = 0.3
14 | mu = E / (2.0 * (1.0 + nu))
15 | lmbda = E * nu / ((1.0 + nu) * (1.0 - 2.0 * nu))
16 | cell = tetrahedron
17 | 
18 | # Load namespace
19 | ns = vars()
20 | 
21 | forms = []
22 | for degree in range(1, 4):
23 |     element = basix.ufl.element("Lagrange", "tetrahedron", degree, shape=(3, ))
24 |     domain = Mesh(basix.ufl.element("Lagrange", "tetrahedron", 1, shape=(3, )))
25 |     space = FunctionSpace(domain, element)
26 | 
27 |     u, v = TrialFunction(space), TestFunction(space)
28 |     f = Coefficient(space)
29 | 
30 |     def eps(v):
31 |         return 0.5*(grad(v) + grad(v).T)
32 | 
33 |     def sigma(v):
34 |         return 2.0*mu*eps(v) + lmbda*tr(eps(v))*Identity(3)
35 | 
36 |     # Add forms to namespace with names a1, a2, a3 etc.
37 |     aname = 'a' + str(degree)
38 |     Lname = 'L' + str(degree)
39 |     ns[aname] = inner(sigma(u), eps(v))*dx
40 |     ns[Lname] = inner(f, v)*dx
41 | 
42 |     del u, v, f
43 |     forms += [ns[aname], ns[Lname]]
44 | 


--------------------------------------------------------------------------------
/src/Poisson.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2017-2022 Chris N. Richardson and Garth N. Wells
 2 | #
 3 | # This file is part of FEniCS-miniapp (https://www.fenicsproject.org)
 4 | #
 5 | # SPDX-License-Identifier:    MIT
 6 | 
 7 | import basix.ufl
 8 | from ufl import (Coefficient, FunctionSpace, TestFunction, TrialFunction, Mesh, action, ds,
 9 |                  dx, grad, inner, tetrahedron)
10 | 
11 | # Load namespace
12 | ns = vars()
13 | 
14 | forms = []
15 | for degree in range(1, 4):
16 |     element = basix.ufl.element("Lagrange", "tetrahedron", degree)
17 |     domain = Mesh(basix.ufl.element("Lagrange", "tetrahedron", 1, shape=(3,)))
18 |     space = FunctionSpace(domain, element)
19 | 
20 |     u = TrialFunction(space)
21 |     v = TestFunction(space)
22 |     f = Coefficient(space)
23 |     g = Coefficient(space)
24 |     un = Coefficient(space)
25 | 
26 |     aname = 'a' + str(degree)
27 |     Lname = 'L' + str(degree)
28 |     Mname = 'M' + str(degree)
29 | 
30 |     # Insert into namespace so that the forms will be named a1, a2, a3 etc.
31 |     ns[aname] = inner(grad(u), grad(v))*dx
32 |     ns[Lname] = f*v*dx + g*v*ds
33 |     ns[Mname] = action(ns[aname], un)
34 | 
35 |     # Delete, so that the forms will get unnamed args and coefficients
36 |     # and default to v_0, v_1, w0, w1 etc.
37 |     del u, v, f, g, un
38 | 
39 |     forms += [ns[aname], ns[Lname], ns[Mname]]
40 | 


--------------------------------------------------------------------------------
/src/cg.h:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2021 Igor A. Baratta, Chris Richardson
 2 | // SPDX-License-Identifier:    MIT
 3 | 
 4 | #include <algorithm>
 5 | #include <dolfinx/common/IndexMap.h>
 6 | #include <dolfinx/common/MPI.h>
 7 | #include <dolfinx/la/Vector.h>
 8 | 
 9 | using namespace dolfinx;
10 | 
11 | namespace linalg
12 | {
13 | /// Compute vector r = alpha*x + y
14 | /// @param[out] r Result
15 | /// @param[in] alpha
16 | /// @param[in] x
17 | /// @param[in] y
18 | template <typename U>
19 | void axpy(la::Vector<U>& r, U alpha, const la::Vector<U>& x,
20 |           const la::Vector<U>& y)
21 | {
22 |   std::transform(x.array().begin(), x.array().end(), y.array().begin(),
23 |                  r.mutable_array().begin(),
24 |                  [alpha](auto x, auto y) { return alpha * x + y; });
25 | }
26 | 
27 | /// Solve problem A.x = b using the Conjugate Gradient method
28 | /// @tparam U The scalar type
29 | /// @tparam ApplyFunction Type of the function object "action"
30 | /// @param[in, out] x Solution vector, may be set to an initial guess
31 | /// @param[in] b RHS Vector
32 | /// @param[in] action Function that provides the action of the linear operator
33 | /// @param[in] kmax Maximum number of iterations
34 | /// @param[in] rtol Relative tolerances for convergence
35 | /// @return The number if iterations
36 | /// @pre It is required that the ghost values of `x` and `b` have been
37 | /// updated before this function is called
38 | template <typename U, typename ApplyFunction>
39 | int cg(la::Vector<U>& x, const la::Vector<U>& b, ApplyFunction&& action,
40 |        int kmax = 50, double rtol = 1e-8)
41 | {
42 |   // Create working vectors
43 |   la::Vector<U> r(b), y(b);
44 | 
45 |   // Compute initial residual r0 = b - Ax0
46 |   action(x, y);
47 |   axpy(r, U(-1), y, b);
48 | 
49 |   // Create p work vector
50 |   la::Vector<U> p(r);
51 | 
52 |   // Iterations of CG
53 |   auto rnorm0 = la::squared_norm(r);
54 |   const auto rtol2 = rtol * rtol;
55 |   auto rnorm = rnorm0;
56 |   int k = 0;
57 |   while (k < kmax)
58 |   {
59 |     ++k;
60 | 
61 |     // Compute y = A p
62 |     action(p, y);
63 | 
64 |     // Compute alpha = r.r/p.y
65 |     const U alpha = rnorm / la::inner_product(p, y);
66 | 
67 |     // Update x (x <- x + alpha*p)
68 |     axpy(x, alpha, p, x);
69 | 
70 |     // Update r (r <- r - alpha*y)
71 |     axpy(r, -alpha, y, r);
72 | 
73 |     // Update residual norm
74 |     const auto rnorm_new = la::squared_norm(r);
75 |     const U beta = rnorm_new / rnorm;
76 |     rnorm = rnorm_new;
77 | 
78 |     if (rnorm / rnorm0 < rtol2)
79 |       break;
80 | 
81 |     // Update p (p <- beta*p + r)
82 |     axpy(p, beta, p, r);
83 |   }
84 | 
85 |   return k;
86 | }
87 | } // namespace linalg
88 | 


--------------------------------------------------------------------------------
/src/cgpoisson_problem.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2017-2019 Chris N. Richardson and Garth N. Wells
  2 | //
  3 | // This file is part of FEniCS-miniapp (https://www.fenicsproject.org)
  4 | //
  5 | // SPDX-License-Identifier:    MIT
  6 | 
  7 | #include "cgpoisson_problem.h"
  8 | #include "Poisson.h"
  9 | #include "cg.h"
 10 | #include <cfloat>
 11 | #include <cmath>
 12 | #include <dolfinx/common/Scatterer.h>
 13 | #include <dolfinx/common/Timer.h>
 14 | #include <dolfinx/fem/DirichletBC.h>
 15 | #include <dolfinx/fem/Function.h>
 16 | #include <dolfinx/fem/FunctionSpace.h>
 17 | #include <dolfinx/fem/assembler.h>
 18 | #include <dolfinx/fem/petsc.h>
 19 | #include <dolfinx/fem/utils.h>
 20 | #include <dolfinx/la/MatrixCSR.h>
 21 | #include <dolfinx/mesh/Mesh.h>
 22 | #include <dolfinx/mesh/utils.h>
 23 | #include <memory>
 24 | #include <petscsys.h>
 25 | #include <utility>
 26 | 
 27 | using namespace dolfinx;
 28 | using T = PetscScalar;
 29 | 
 30 | namespace
 31 | {
 32 | void pack_fn(std::span<const T> in, std::span<const std::int32_t> idx,
 33 |              std::span<T> out)
 34 | {
 35 |   for (std::size_t i = 0; i < idx.size(); ++i)
 36 |     out[i] = in[idx[i]];
 37 | }
 38 | 
 39 | void unpack_fn(std::span<const T> in, std::span<const std::int32_t> idx,
 40 |                std::span<T> out, std::function<T(T, T)> op)
 41 | {
 42 |   for (std::size_t i = 0; i < idx.size(); ++i)
 43 |     out[idx[i]] = op(out[idx[i]], in[i]);
 44 | }
 45 | } // namespace
 46 | 
 47 | std::tuple<std::shared_ptr<la::Vector<T>>, std::shared_ptr<fem::Function<T>>,
 48 |            std::function<int(fem::Function<T>&, const la::Vector<T>&)>>
 49 | cgpoisson::problem(std::shared_ptr<mesh::Mesh<double>> mesh, int order,
 50 |                    std::string scatterer)
 51 | {
 52 |   common::Timer t0("ZZZ FunctionSpace");
 53 | 
 54 |   auto element = basix::create_element<double>(
 55 |       basix::element::family::P, basix::cell::type::tetrahedron, order,
 56 |       basix::element::lagrange_variant::gll_warped,
 57 |       basix::element::dpc_variant::unset, false);
 58 | 
 59 |   auto dolfinx_element
 60 |       = std::make_shared<const fem::FiniteElement<double>>(element);
 61 | 
 62 |   auto V = std::make_shared<fem::FunctionSpace<double>>(
 63 |       fem::create_functionspace(mesh, dolfinx_element));
 64 | 
 65 |   t0.stop();
 66 |   t0.flush();
 67 | 
 68 |   common::Timer t1("ZZZ Assemble");
 69 | 
 70 |   common::Timer t2("ZZZ Create boundary conditions");
 71 |   // Define boundary condition
 72 |   auto u0 = std::make_shared<fem::Function<T>>(V);
 73 |   u0->x()->set(0);
 74 | 
 75 |   // Find facets with bc applied
 76 |   const int tdim = mesh->topology()->dim();
 77 |   const std::vector<std::int32_t> bc_facets = mesh::locate_entities(
 78 |       *mesh, tdim - 1,
 79 |       [](auto x)
 80 |       {
 81 |         constexpr double eps = 1.0e-8;
 82 |         std::vector<std::int8_t> marker(x.extent(1), false);
 83 |         for (std::size_t p = 0; p < x.extent(1); ++p)
 84 |         {
 85 |           double x0 = x(0, p);
 86 |           if (std::abs(x0) < eps or std::abs(x0 - 1) < eps)
 87 |             marker[p] = true;
 88 |         }
 89 |         return marker;
 90 |       });
 91 | 
 92 |   // Find constrained dofs
 93 |   const std::vector<std::int32_t> bdofs = fem::locate_dofs_topological(
 94 |       *V->mesh()->topology_mutable(), *V->dofmap(), tdim - 1, bc_facets);
 95 | 
 96 |   auto bc = std::make_shared<fem::DirichletBC<T>>(u0, bdofs);
 97 |   t2.stop();
 98 |   t2.flush();
 99 | 
100 |   // Define coefficients
101 |   common::Timer t3("ZZZ Create RHS function");
102 |   auto f = std::make_shared<fem::Function<T>>(V);
103 |   auto g = std::make_shared<fem::Function<T>>(V);
104 |   f->interpolate(
105 |       [](auto x) -> std::pair<std::vector<T>, std::vector<std::size_t>>
106 |       {
107 |         std::vector<T> v(x.extent(1));
108 |         for (std::size_t p = 0; p < x.extent(1); ++p)
109 |         {
110 |           double dx = x(0, p) - 0.5;
111 |           double dy = x(1, p) - 0.5;
112 |           double dr = dx * dx + dy * dy;
113 |           v[p] = 10 * std::exp(-dr / 0.02);
114 |         }
115 | 
116 |         return {std::move(v), {v.size()}};
117 |       });
118 |   g->interpolate(
119 |       [](auto x) -> std::pair<std::vector<T>, std::vector<std::size_t>>
120 |       {
121 |         std::vector<T> f(x.extent(1));
122 |         for (std::size_t p = 0; p < x.extent(1); ++p)
123 |           f[p] = std::sin(5 * x(0, p));
124 |         return {f, {f.size()}};
125 |       });
126 |   t3.stop();
127 |   t3.flush();
128 | 
129 |   std::vector form_poisson_L
130 |       = {form_Poisson_L1, form_Poisson_L2, form_Poisson_L3};
131 |   std::vector form_poisson_a
132 |       = {form_Poisson_a1, form_Poisson_a2, form_Poisson_a3};
133 |   std::vector form_poisson_M
134 |       = {form_Poisson_M1, form_Poisson_M2, form_Poisson_M3};
135 | 
136 |   // Define variational forms
137 |   auto L = std::make_shared<fem::Form<T>>(fem::create_form<T>(
138 |       *form_poisson_L.at(order - 1), {V}, {{"w0", f}, {"w1", g}}, {}, {}, {}));
139 |   // auto a = std::make_shared<fem::Form<T>>(fem::create_form<T>(
140 |   //     *form_poisson_a.at(order - 1), {V, V},
141 |   //     std::vector<std::shared_ptr<const fem::Function<T>>>{}, {}, {}));
142 | 
143 |   auto un = std::make_shared<fem::Function<T>>(V);
144 |   auto M = std::make_shared<fem::Form<T>>(fem::create_form<T>(
145 |       *form_poisson_M.at(order - 1), {V}, {{"w0", un}}, {{}}, {}, {}));
146 | 
147 |   // Create la::Vector
148 |   la::Vector<T> b(L->function_spaces()[0]->dofmap()->index_map,
149 |                   L->function_spaces()[0]->dofmap()->index_map_bs());
150 |   b.set(0);
151 |   common::Timer t5("ZZZ Assemble vector");
152 |   const std::vector constants_L = fem::pack_constants(*L);
153 |   auto coeffs_L = fem::allocate_coefficient_storage(*L);
154 |   fem::pack_coefficients(*L, coeffs_L);
155 |   fem::assemble_vector<T>(b.mutable_array(), *L, constants_L,
156 |                           fem::make_coefficients_span(coeffs_L));
157 | 
158 |   // Apply lifting to account for Dirichlet boundary condition
159 |   // b <- b - A * x_bc
160 |   bc->set(un->x()->mutable_array(), std::nullopt, -1.0);
161 |   fem::assemble_vector(b.mutable_array(), *M);
162 | 
163 |   // Communicate ghost values
164 |   b.scatter_rev(std::plus<T>());
165 | 
166 |   // Set BC dofs to zero (effectively zeroes columns of A)
167 |   bc->set(b.mutable_array(), std::nullopt, 0.0);
168 |   b.scatter_fwd();
169 | 
170 |   // Pack coefficients and constants
171 | 
172 |   if (un->x()->array().size() != b.array().size())
173 |     throw std::runtime_error("error");
174 |   // Create Function to hold solution
175 |   auto u = std::make_shared<fem::Function<T>>(V);
176 | 
177 |   std::function<int(fem::Function<T>&, const la::Vector<T>&)> solver_function
178 |       = [M, un, bc, scatterer](fem::Function<T>& u, const la::Vector<T>& b)
179 |   {
180 |     const std::vector<T> constants;
181 |     auto coeff = fem::allocate_coefficient_storage(*M);
182 | 
183 |     auto V = M->function_spaces()[0];
184 |     auto idx_map = V->dofmap()->index_map;
185 |     int bs = V->dofmap()->bs();
186 |     common::Scatterer sct(*idx_map, bs);
187 | 
188 |     std::vector<T> local_buffer(sct.local_buffer_size(), 0);
189 |     std::vector<T> remote_buffer(sct.remote_buffer_size(), 0);
190 | 
191 |     common::Scatterer<>::type type;
192 |     if (scatterer == "neighbor")
193 |       type = common::Scatterer<>::type::neighbor;
194 |     if (scatterer == "p2p")
195 |       type = common::Scatterer<>::type::p2p;
196 | 
197 |     std::vector<MPI_Request> request = sct.create_request_vector(type);
198 | 
199 |     // Create function for computing the action of A on x (y = Ax)
200 |     auto action = [&](la::Vector<T>& x, la::Vector<T>& y)
201 |     {
202 |       // Zero y
203 |       y.set(0.0);
204 | 
205 |       // Update coefficient un (just copy data from x to un)
206 |       std::copy(x.array().begin(), x.array().end(),
207 |                 un->x()->mutable_array().begin());
208 | 
209 |       // Compute action of A on x
210 |       fem::pack_coefficients(*M, coeff);
211 |       fem::assemble_vector(y.mutable_array(), *M, std::span<const T>(constants),
212 |                            fem::make_coefficients_span(coeff));
213 | 
214 |       // Set BC dofs to zero (effectively zeroes rows of A)
215 |       bc->set(y.mutable_array(), std::nullopt, 0.0);
216 | 
217 |       // Accumuate ghost values
218 |       // y.scatter_rev(std::plus<T>());
219 | 
220 |       const std::int32_t local_size = bs * idx_map->size_local();
221 |       const std::int32_t num_ghosts = bs * idx_map->num_ghosts();
222 |       std::span<T> remote_data(y.mutable_array().data() + local_size,
223 |                                num_ghosts);
224 |       std::span<T> local_data(y.mutable_array().data(), local_size);
225 |       sct.scatter_rev_begin<T>(remote_data, remote_buffer, local_buffer,
226 |                                pack_fn, request, type);
227 |       sct.scatter_rev_end<T>(local_buffer, local_data, unpack_fn,
228 |                              std::plus<T>(), request);
229 | 
230 |       // Update ghost values
231 |       sct.scatter_fwd_begin<T>(local_data, local_buffer, remote_buffer, pack_fn,
232 |                                request, type);
233 |       sct.scatter_fwd_end<T>(remote_buffer, remote_data, unpack_fn, request);
234 |     };
235 | 
236 |     common::Timer tcg;
237 |     int num_it = linalg::cg(*u.x(), b, action, 100, 1e-6);
238 |     tcg.stop();
239 |     tcg.flush();
240 |     double time = std::chrono::duration<double>(tcg.elapsed()).count();
241 |     double ndofs_global
242 |         = static_cast<double>(V->dofmap()->index_map->size_global());
243 |     double gdofs = (num_it * ndofs_global) / time / 1e9;
244 | 
245 |     std::cout << "CG matrix-free action processed: " << gdofs << " Gdof/s\n";
246 | 
247 |     return num_it;
248 |   };
249 | 
250 |   return {std::make_shared<la::Vector<T>>(std::move(b)), u, solver_function};
251 | }
252 | 


--------------------------------------------------------------------------------
/src/cgpoisson_problem.h:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2017-2019 Chris N. Richardson and Garth N. Wells
 2 | //
 3 | // This file is part of FEniCS-miniapp (https://www.fenicsproject.org)
 4 | //
 5 | // SPDX-License-Identifier:    MIT
 6 | 
 7 | #pragma once
 8 | 
 9 | #include <dolfinx/fem/Function.h>
10 | #include <dolfinx/la/Vector.h>
11 | #include <dolfinx/mesh/Mesh.h>
12 | #include <memory>
13 | #include <petscsys.h>
14 | #include <utility>
15 | 
16 | namespace cgpoisson
17 | {
18 | 
19 | std::tuple<std::shared_ptr<dolfinx::la::Vector<PetscScalar>>,
20 |            std::shared_ptr<dolfinx::fem::Function<PetscScalar>>,
21 |            std::function<int(dolfinx::fem::Function<PetscScalar>&,
22 |                              const dolfinx::la::Vector<PetscScalar>&)>>
23 |   problem(std::shared_ptr<dolfinx::mesh::Mesh<double>> mesh, int order, std::string scatterer);
24 | 
25 | } // namespace poisson
26 | 


--------------------------------------------------------------------------------
/src/elasticity_problem.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2017-2019 Chris N. Richardson and Garth N. Wells
  2 | //
  3 | // This file is part of FEniCS-miniapp (https://www.fenicsproject.org)
  4 | //
  5 | // SPDX-License-Identifier:    MIT
  6 | 
  7 | #include "elasticity_problem.h"
  8 | #include "Elasticity.h"
  9 | #include <basix/mdspan.hpp>
 10 | #include <dolfinx/common/Timer.h>
 11 | #include <dolfinx/fem/DirichletBC.h>
 12 | #include <dolfinx/fem/DofMap.h>
 13 | #include <dolfinx/fem/Form.h>
 14 | #include <dolfinx/fem/Function.h>
 15 | #include <dolfinx/fem/FunctionSpace.h>
 16 | #include <dolfinx/fem/assembler.h>
 17 | #include <dolfinx/fem/petsc.h>
 18 | #include <dolfinx/la/Vector.h>
 19 | #include <dolfinx/la/petsc.h>
 20 | #include <dolfinx/la/utils.h>
 21 | #include <dolfinx/mesh/Geometry.h>
 22 | #include <dolfinx/mesh/Mesh.h>
 23 | #include <dolfinx/mesh/utils.h>
 24 | #include <memory>
 25 | #include <petscsys.h>
 26 | #include <span>
 27 | #include <utility>
 28 | 
 29 | using namespace dolfinx;
 30 | using T = PetscScalar;
 31 | 
 32 | namespace
 33 | {
 34 | // Function to compute the near nullspace for elasticity - it is made up
 35 | // of the six rigid body modes
 36 | MatNullSpace build_near_nullspace(const fem::FunctionSpace<double>& V)
 37 | {
 38 |   // Create vectors for nullspace basis
 39 |   auto map = V.dofmap()->index_map;
 40 |   int bs = V.dofmap()->index_map_bs();
 41 |   std::vector<la::Vector<T>> basis(6, la::Vector<T>(map, bs));
 42 | 
 43 |   // x0, x1, x2 translations
 44 |   std::int32_t length_block = map->size_local() + map->num_ghosts();
 45 |   for (int k = 0; k < 3; ++k)
 46 |   {
 47 |     std::span<T> x = basis[k].mutable_array();
 48 |     for (std::int32_t i = 0; i < length_block; ++i)
 49 |       x[bs * i + k] = 1.0;
 50 |   }
 51 | 
 52 |   // Rotations
 53 |   auto x3 = basis[3].mutable_array();
 54 |   auto x4 = basis[4].mutable_array();
 55 |   auto x5 = basis[5].mutable_array();
 56 | 
 57 |   const std::vector<double> x = V.tabulate_dof_coordinates(false);
 58 |   const std::int32_t* dofs = V.dofmap()->map().data_handle();
 59 |   for (std::size_t i = 0; i < V.dofmap()->map().size(); ++i)
 60 |   {
 61 |     std::span<const double, 3> xd(x.data() + 3 * dofs[i], 3);
 62 | 
 63 |     x3[bs * dofs[i] + 0] = -xd[1];
 64 |     x3[bs * dofs[i] + 1] = xd[0];
 65 | 
 66 |     x4[bs * dofs[i] + 0] = xd[2];
 67 |     x4[bs * dofs[i] + 2] = -xd[0];
 68 | 
 69 |     x5[bs * dofs[i] + 2] = xd[1];
 70 |     x5[bs * dofs[i] + 1] = -xd[2];
 71 |   }
 72 | 
 73 |   // Orthonormalize basis
 74 |   la::orthonormalize(std::vector<std::reference_wrapper<la::Vector<T>>>(
 75 |       basis.begin(), basis.end()));
 76 |   if (!la::is_orthonormal(
 77 |           std::vector<std::reference_wrapper<const la::Vector<T>>>(
 78 |               basis.begin(), basis.end())))
 79 |   {
 80 |     throw std::runtime_error("Space not orthonormal");
 81 |   }
 82 | 
 83 |   // Build PETSc nullspace object
 84 |   std::int32_t length = bs * map->size_local();
 85 |   std::vector<std::span<const T>> basis_local;
 86 |   std::transform(basis.cbegin(), basis.cend(), std::back_inserter(basis_local),
 87 |                  [length](auto& x)
 88 |                  { return std::span(x.array().data(), length); });
 89 |   MPI_Comm comm = V.mesh()->comm();
 90 |   std::vector<Vec> v = la::petsc::create_vectors(comm, basis_local);
 91 |   MatNullSpace ns = la::petsc::create_nullspace(comm, v);
 92 |   std::for_each(v.begin(), v.end(), [](auto v) { VecDestroy(&v); });
 93 |   return ns;
 94 | }
 95 | } // namespace
 96 | 
 97 | std::tuple<std::shared_ptr<la::Vector<T>>, std::shared_ptr<fem::Function<T>>,
 98 |            std::function<int(fem::Function<T>&, const la::Vector<T>&)>>
 99 | elastic::problem(std::shared_ptr<mesh::Mesh<double>> mesh, int order)
100 | {
101 |   common::Timer t0("ZZZ FunctionSpace");
102 | 
103 |   auto element = basix::create_element<double>(
104 |       basix::element::family::P, basix::cell::type::tetrahedron, order,
105 |       basix::element::lagrange_variant::gll_warped,
106 |       basix::element::dpc_variant::unset, false);
107 | 
108 |   auto dolfinx_element = std::make_shared<const fem::FiniteElement<double>>(
109 |       element, std::vector<std::size_t>{3});
110 |   auto V = std::make_shared<fem::FunctionSpace<double>>(
111 |       fem::create_functionspace(mesh, dolfinx_element));
112 | 
113 |   t0.stop();
114 |   t0.flush();
115 | 
116 |   common::Timer t0a("ZZZ Create boundary conditions");
117 | 
118 |   // Define boundary condition
119 |   auto u0 = std::make_shared<fem::Function<T>>(V);
120 |   u0->x()->set(0);
121 | 
122 |   const int tdim = mesh->topology()->dim();
123 | 
124 |   // Find facets with bc applied
125 |   const std::vector<std::int32_t> bc_facets = mesh::locate_entities(
126 |       *mesh, tdim - 1,
127 |       [](auto x)
128 |       {
129 |         constexpr double eps = 1.0e-8;
130 |         std::vector<std::int8_t> marker(x.extent(1), false);
131 |         for (std::size_t p = 0; p < x.extent(1); ++p)
132 |         {
133 |           double x1 = x(1, p);
134 |           if (std::abs(x1) < eps)
135 |             marker[p] = true;
136 |         }
137 |         return marker;
138 |       });
139 | 
140 |   // Find constrained dofs
141 |   const std::vector<std::int32_t> bdofs = fem::locate_dofs_topological(
142 |       *V->mesh()->topology_mutable(), *V->dofmap(), tdim - 1, bc_facets);
143 | 
144 |   // Bottom (x[1] = 0) surface
145 |   auto bc = std::make_shared<const fem::DirichletBC<T>>(u0, bdofs);
146 | 
147 |   t0a.stop();
148 |   t0a.flush();
149 | 
150 |   common::Timer t0b("ZZZ Create RHS function");
151 | 
152 |   // Define coefficients
153 |   auto f = std::make_shared<fem::Function<T>>(V);
154 |   f->interpolate(
155 |       [](auto x) -> std::pair<std::vector<T>, std::vector<std::size_t>>
156 |       {
157 |         std::vector<T> vdata(x.extent(0) * x.extent(1));
158 |         namespace stdex
159 |             = MDSPAN_IMPL_STANDARD_NAMESPACE::MDSPAN_IMPL_PROPOSED_NAMESPACE;
160 |         MDSPAN_IMPL_STANDARD_NAMESPACE::mdspan<
161 |             T,
162 |             MDSPAN_IMPL_STANDARD_NAMESPACE::extents<
163 |                 std::size_t, 3, MDSPAN_IMPL_STANDARD_NAMESPACE::dynamic_extent>>
164 |             v(vdata.data(), x.extent(0), x.extent(1));
165 |         for (std::size_t p = 0; p < x.extent(1); ++p)
166 |         {
167 |           double dx = x(0, p) - 0.5;
168 |           double dz = x(2, p) - 0.5;
169 |           double r = std::sqrt(dx * dx + dz * dz);
170 |           v(0, p) = -dz * r * x(1, p);
171 |           v(1, p) = 1.0;
172 |           v(2, p) = dx * r * x(1, p);
173 |         }
174 | 
175 |         return {vdata, {v.extent(0), v.extent(1)}};
176 |       });
177 | 
178 |   t0b.stop();
179 |   t0b.flush();
180 | 
181 |   common::Timer t0c("ZZZ Create forms");
182 | 
183 |   // Define variational forms
184 |   std::vector form_elasticity_L
185 |       = {form_Elasticity_L1, form_Elasticity_L2, form_Elasticity_L3};
186 |   std::vector form_elasticity_a
187 |       = {form_Elasticity_a1, form_Elasticity_a2, form_Elasticity_a3};
188 |   auto L = std::make_shared<fem::Form<T, double>>(fem::create_form<T>(
189 |       *form_elasticity_L.at(order - 1), {V}, {{"w0", f}}, {}, {}, {}));
190 |   auto a = std::make_shared<const fem::Form<T, double>>(fem::create_form<T>(
191 |       *form_elasticity_a.at(order - 1), {V, V}, {}, {}, {}, {}));
192 |   t0c.stop();
193 |   t0c.flush();
194 | 
195 |   // Create matrices and vector, and assemble system
196 |   std::shared_ptr<la::petsc::Matrix> A = std::make_shared<la::petsc::Matrix>(
197 |       fem::petsc::create_matrix(*a), false);
198 | 
199 |   common::Timer t2("ZZZ Assemble matrix");
200 |   const std::vector constants_a = fem::pack_constants(*a);
201 |   auto coeffs_a = fem::allocate_coefficient_storage(*a);
202 |   fem::pack_coefficients(*a, coeffs_a);
203 |   fem::assemble_matrix(la::petsc::Matrix::set_block_fn(A->mat(), ADD_VALUES),
204 |                        *a, std::span(constants_a),
205 |                        fem::make_coefficients_span(coeffs_a), {*bc});
206 |   MatAssemblyBegin(A->mat(), MAT_FLUSH_ASSEMBLY);
207 |   MatAssemblyEnd(A->mat(), MAT_FLUSH_ASSEMBLY);
208 |   fem::set_diagonal<T>(la::petsc::Matrix::set_fn(A->mat(), INSERT_VALUES), *V,
209 |                        {*bc});
210 |   MatAssemblyBegin(A->mat(), MAT_FINAL_ASSEMBLY);
211 |   MatAssemblyEnd(A->mat(), MAT_FINAL_ASSEMBLY);
212 |   t2.stop();
213 |   t2.flush();
214 | 
215 |   // Wrap la::Vector with Petsc Vec
216 |   la::Vector<T> b(L->function_spaces()[0]->dofmap()->index_map,
217 |                   L->function_spaces()[0]->dofmap()->index_map_bs());
218 |   b.set(0);
219 |   common::Timer t3("ZZZ Assemble vector");
220 |   const std::vector constants_L = fem::pack_constants(*L);
221 |   auto coeffs_L = fem::allocate_coefficient_storage(*L);
222 |   fem::pack_coefficients(*L, coeffs_L);
223 |   fem::assemble_vector<T>(b.mutable_array(), *L, constants_L,
224 |                           fem::make_coefficients_span(coeffs_L));
225 |   fem::apply_lifting<T, double>(b.mutable_array(), {*a}, {constants_L},
226 |                                 {fem::make_coefficients_span(coeffs_L)},
227 |                                 {{*bc}}, {}, 1.0);
228 |   b.scatter_rev(std::plus<>());
229 |   bc->set(b.mutable_array(), std::nullopt);
230 |   t3.stop();
231 |   t3.flush();
232 | 
233 |   common::Timer t4("ZZZ Create near-nullspace");
234 | 
235 |   // Create Function to hold solution
236 |   auto u = std::make_shared<fem::Function<T>>(V);
237 | 
238 |   // Build near-nullspace and attach to matrix
239 |   MatNullSpace ns = build_near_nullspace(*V);
240 |   MatSetNearNullSpace(A->mat(), ns);
241 |   MatNullSpaceDestroy(&ns);
242 | 
243 |   t4.stop();
244 |   t4.flush();
245 | 
246 |   std::function<int(fem::Function<T>&, const la::Vector<T>&)> solver_function
247 |       = [A](fem::Function<T>& u, const la::Vector<T>& b)
248 |   {
249 |     // Create solver
250 |     la::petsc::KrylovSolver solver(MPI_COMM_WORLD);
251 |     solver.set_from_options();
252 |     solver.set_operator(A->mat());
253 | 
254 |     // Wrap la::Vector
255 |     la::petsc::Vector _b(la::petsc::create_vector_wrap(b), false);
256 |     la::petsc::Vector x(la::petsc::create_vector_wrap(*u.x()), false);
257 | 
258 |     // Solve
259 |     int num_iter = solver.solve(x.vec(), _b.vec());
260 |     return num_iter;
261 |   };
262 | 
263 |   return {std::make_shared<la::Vector<T>>(std::move(b)), u, solver_function};
264 | }
265 | 


--------------------------------------------------------------------------------
/src/elasticity_problem.h:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2017-2019 Chris N. Richardson and Garth N. Wells
 2 | //
 3 | // This file is part of FEniCS-miniapp (https://www.fenicsproject.org)
 4 | //
 5 | // SPDX-License-Identifier:    MIT
 6 | 
 7 | #pragma once
 8 | 
 9 | #include <dolfinx/fem/Function.h>
10 | #include <dolfinx/la/Vector.h>
11 | #include <memory>
12 | #include <petscsys.h>
13 | #include <utility>
14 | 
15 | namespace dolfinx::mesh
16 | {
17 | template <std::floating_point T>
18 | class Mesh;
19 | }
20 | 
21 | namespace elastic
22 | {
23 | 
24 | std::tuple<std::shared_ptr<dolfinx::la::Vector<PetscScalar>>,
25 |            std::shared_ptr<dolfinx::fem::Function<PetscScalar>>,
26 |            std::function<int(dolfinx::fem::Function<PetscScalar>&,
27 |                              const dolfinx::la::Vector<PetscScalar>&)>>
28 | problem(std::shared_ptr<dolfinx::mesh::Mesh<double>> mesh, int order);
29 | 
30 | } // namespace elastic
31 | 


--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2017-2022 Chris N. Richardson and Garth N. Wells
  2 | //
  3 | // This file is part of FEniCS-miniapp (https://www.fenicsproject.org)
  4 | //
  5 | // SPDX-License-Identifier:    MIT
  6 | 
  7 | #include "cgpoisson_problem.h"
  8 | #include "elasticity_problem.h"
  9 | #include "mem.h"
 10 | #include "mesh.h"
 11 | #include "poisson_problem.h"
 12 | #include <boost/program_options.hpp>
 13 | #include <dolfinx/common/Timer.h>
 14 | #include <dolfinx/common/log.h>
 15 | #include <dolfinx/common/timing.h>
 16 | #include <dolfinx/common/version.h>
 17 | #include <dolfinx/fem/Form.h>
 18 | #include <dolfinx/fem/Function.h>
 19 | #include <dolfinx/fem/FunctionSpace.h>
 20 | #include <dolfinx/fem/utils.h>
 21 | #include <dolfinx/io/XDMFFile.h>
 22 | #include <dolfinx/la/Vector.h>
 23 | #include <iomanip>
 24 | #include <petscsys.h>
 25 | #include <string>
 26 | #include <thread>
 27 | #include <utility>
 28 | 
 29 | namespace po = boost::program_options;
 30 | 
 31 | std::string int64_to_human(std::int64_t n)
 32 | {
 33 |   double r = static_cast<double>(n);
 34 |   const std::string name[] = {"", "thousand", "million", "billion", "trillion"};
 35 | 
 36 |   int i = 0;
 37 |   while (r > 1000.0)
 38 |   {
 39 |     r /= 1000.0;
 40 |     i++;
 41 |   }
 42 |   if (i > 4)
 43 |     throw std::runtime_error("number too big");
 44 | 
 45 |   std::stringstream s;
 46 |   if (i == 0)
 47 |     return s.str();
 48 |   s << " (" << std::setprecision(3) << r << " " << name[i] << ")";
 49 |   return s.str();
 50 | }
 51 | 
 52 | void solve(int argc, char* argv[])
 53 | {
 54 |   po::options_description desc("Allowed options");
 55 |   bool mem_profile;
 56 |   bool use_subcomm;
 57 |   desc.add_options()("help,h", "print usage message")(
 58 |       "problem_type", po::value<std::string>()->default_value("poisson"),
 59 |       "problem (poisson, cgpoisson, or elasticity)")(
 60 |       "mesh_type", po::value<std::string>()->default_value("cube"),
 61 |       "mesh (cube or unstructured)")(
 62 |       "memory_profiling", po::bool_switch(&mem_profile)->default_value(false),
 63 |       "turn on memory logging")(
 64 |       "subcomm_partition", po::bool_switch(&use_subcomm)->default_value(false),
 65 |       "Use sub-communicator for partitioning")(
 66 |       "scaling_type", po::value<std::string>()->default_value("weak"),
 67 |       "scaling (weak or strong)")(
 68 |       "output", po::value<std::string>()->default_value(""),
 69 |       "output directory (no output unless this is set)")(
 70 |       "ndofs", po::value<std::size_t>()->default_value(50000),
 71 |       "number of degrees of freedom")(
 72 |       "order", po::value<std::size_t>()->default_value(1), "polynomial order")(
 73 |       "scatterer", po::value<std::string>()->default_value("neighbor"),
 74 |       "scatterer for CG (neighbor or p2p)");
 75 | 
 76 |   po::variables_map vm;
 77 |   po::store(po::command_line_parser(argc, argv)
 78 |                 .options(desc)
 79 |                 .allow_unregistered()
 80 |                 .run(),
 81 |             vm);
 82 |   po::notify(vm);
 83 | 
 84 |   if (vm.count("help"))
 85 |   {
 86 |     std::cout << desc << std::endl;
 87 |     ;
 88 |     return;
 89 |   }
 90 | 
 91 |   const std::string problem_type = vm["problem_type"].as<std::string>();
 92 |   const std::string mesh_type = vm["mesh_type"].as<std::string>();
 93 |   const std::string scaling_type = vm["scaling_type"].as<std::string>();
 94 |   const std::size_t ndofs = vm["ndofs"].as<std::size_t>();
 95 |   const int order = vm["order"].as<std::size_t>();
 96 |   const std::string scatterer = vm["scatterer"].as<std::string>();
 97 |   const std::string output_dir = vm["output"].as<std::string>();
 98 |   const bool output = (output_dir.size() > 0);
 99 |   const int mpi_rank = dolfinx::MPI::rank(MPI_COMM_WORLD);
100 | 
101 |   bool quit_flag = false;
102 |   std::thread mem_thread;
103 | 
104 |   if (mem_profile and mpi_rank == 0)
105 |   {
106 |     mem_thread = std::thread(process_mem_usage, std::ref(quit_flag));
107 |   }
108 | 
109 |   bool strong_scaling;
110 |   if (scaling_type == "strong")
111 |     strong_scaling = true;
112 |   else if (scaling_type == "weak")
113 |     strong_scaling = false;
114 |   else
115 |     throw std::runtime_error("Scaling type '" + scaling_type + "` unknown");
116 | 
117 |   // Get number of processes
118 |   const std::size_t num_processes = dolfinx::MPI::size(MPI_COMM_WORLD);
119 | 
120 |   // Assemble problem
121 |   std::shared_ptr<dolfinx::mesh::Mesh<double>> mesh;
122 |   std::shared_ptr<dolfinx::la::Vector<PetscScalar>> b;
123 |   std::shared_ptr<dolfinx::fem::Function<PetscScalar>> u;
124 |   std::function<int(dolfinx::fem::Function<PetscScalar>&,
125 |                     const dolfinx::la::Vector<PetscScalar>&)>
126 |       solver_function;
127 | 
128 |   const int ndofs_per_node = (problem_type == "elasticity") ? 3 : 1;
129 | 
130 |   dolfinx::common::Timer t0("ZZZ Create Mesh");
131 |   if (mesh_type == "cube")
132 |   {
133 |     mesh = std::make_shared<dolfinx::mesh::Mesh<double>>(
134 |         create_cube_mesh(MPI_COMM_WORLD, ndofs, strong_scaling, ndofs_per_node,
135 |                          order, use_subcomm));
136 |   }
137 |   else
138 |   {
139 |     mesh = create_spoke_mesh(MPI_COMM_WORLD, ndofs, strong_scaling,
140 |                              ndofs_per_node);
141 |   }
142 |   t0.stop();
143 |   t0.flush();
144 | 
145 |   dolfinx::common::Timer t_ent(
146 |       "ZZZ Create facets and facet->cell connectivity");
147 |   mesh->topology_mutable()->create_entities(2);
148 |   mesh->topology_mutable()->create_connectivity(2, 3);
149 |   t_ent.stop();
150 |   t_ent.flush();
151 | 
152 |   if (problem_type == "poisson")
153 |   {
154 |     // Create Poisson problem
155 |     std::tie(b, u, solver_function) = poisson::problem(mesh, order);
156 |   }
157 |   else if (problem_type == "cgpoisson")
158 |   {
159 |     // Create Poisson problem
160 |     std::tie(b, u, solver_function)
161 |         = cgpoisson::problem(mesh, order, scatterer);
162 |   }
163 |   else if (problem_type == "elasticity")
164 |   {
165 |     // Create elasticity problem. Near-nullspace will be attached to the
166 |     // linear operator (matrix).
167 |     std::tie(b, u, solver_function) = elastic::problem(mesh, order);
168 |   }
169 |   else
170 |     throw std::runtime_error("Unknown problem type: " + problem_type);
171 | 
172 |   // Print simulation summary
173 |   if (dolfinx::MPI::rank(MPI_COMM_WORLD) == 0)
174 |   {
175 |     char petsc_version[256];
176 |     PetscGetVersion(petsc_version, 256);
177 | 
178 |     const std::int64_t num_dofs
179 |         = u->function_space()->dofmap()->index_map->size_global()
180 |           * u->function_space()->dofmap()->index_map_bs();
181 |     const int tdim = mesh->topology()->dim();
182 |     const std::int64_t num_cells
183 |         = mesh->topology()->index_map(tdim)->size_global();
184 |     const std::string num_cells_human = int64_to_human(num_cells);
185 |     const std::string num_dofs_human = int64_to_human(num_dofs);
186 |     std::cout
187 |         << "----------------------------------------------------------------"
188 |         << std::endl;
189 |     std::cout << "Test problem summary" << std::endl;
190 |     std::cout << "  dolfinx version: " << DOLFINX_VERSION_STRING << std::endl;
191 |     std::cout << "  dolfinx hash:    " << DOLFINX_VERSION_GIT << std::endl;
192 |     std::cout << "  ufl hash:        " << UFCX_SIGNATURE << std::endl;
193 |     std::cout << "  petsc version:   " << petsc_version << std::endl;
194 |     std::cout << "  Problem type:    " << problem_type << std::endl;
195 |     std::cout << "  Scaling type:    " << scaling_type << std::endl;
196 |     std::cout << "  Num processes:   " << num_processes << std::endl;
197 |     std::cout << "  Num cells:       " << num_cells << num_cells_human
198 |               << std::endl;
199 |     std::cout << "  Total degrees of freedom:               " << num_dofs
200 |               << num_dofs_human << std::endl;
201 |     std::cout << "  Average degrees of freedom per process: "
202 |               << num_dofs / dolfinx::MPI::size(MPI_COMM_WORLD) << std::endl;
203 |     std::cout
204 |         << "----------------------------------------------------------------"
205 |         << std::endl;
206 |   }
207 | 
208 |   dolfinx::common::Timer t5("ZZZ Solve");
209 |   int num_iter = solver_function(*u, *b);
210 |   t5.stop();
211 |   t5.flush();
212 | 
213 |   if (output)
214 |   {
215 |     dolfinx::common::Timer t6("ZZZ Output");
216 |     std::string filename
217 |         = output_dir + "/solution-" + std::to_string(num_processes) + ".xdmf";
218 |     dolfinx::io::XDMFFile file(MPI_COMM_WORLD, filename, "w");
219 |     file.write_mesh(*mesh);
220 |     file.write_function(*u, 0.0);
221 |     t6.stop();
222 |     t6.flush();
223 |   }
224 | 
225 |   // Display timings
226 |   dolfinx::list_timings(MPI_COMM_WORLD);
227 | 
228 |   // Report number of Krylov iterations
229 |   double norm = dolfinx::la::norm(*(u->x()));
230 |   if (dolfinx::MPI::rank(MPI_COMM_WORLD) == 0)
231 |   {
232 |     std::cout << "*** Number of Krylov iterations: " << num_iter << std::endl;
233 |     std::cout << "*** Solution norm:  " << norm << std::endl;
234 |   }
235 | 
236 |   if (mem_profile and mpi_rank == 0)
237 |   {
238 |     quit_flag = true;
239 |     mem_thread.join();
240 |   }
241 | }
242 | 
243 | int main(int argc, char* argv[])
244 | {
245 |   dolfinx::common::Timer t0("Init MPI");
246 |   MPI_Init(&argc, &argv);
247 |   t0.stop();
248 |   t0.flush();
249 | 
250 |   dolfinx::common::Timer t1("Init logging");
251 |   dolfinx::init_logging(argc, argv);
252 |   t1.stop();
253 |   t1.flush();
254 | 
255 |   dolfinx::common::Timer t2("Init PETSc");
256 |   PetscInitialize(&argc, &argv, nullptr, nullptr);
257 |   t2.stop();
258 |   t2.flush();
259 | 
260 |   // Set the logging thread name to show the process rank and enable on
261 |   // rank 0 (add more here if desired)
262 |   const int mpi_rank = dolfinx::MPI::rank(MPI_COMM_WORLD);
263 |   std::string thread_name = "RANK: " + std::to_string(mpi_rank);
264 |   std::string fmt = "[%Y-%m-%d %H:%M:%S.%e] [" + thread_name + "] [%l] %v";
265 |   spdlog::set_pattern(fmt);
266 |   if (mpi_rank == 0)
267 |     spdlog::set_level(spdlog::level::info);
268 | 
269 |   solve(argc, argv);
270 | 
271 |   PetscFinalize();
272 |   MPI_Finalize();
273 | 
274 |   return 0;
275 | }
276 | 


--------------------------------------------------------------------------------
/src/mem.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2021 Chris N. Richardson
 2 | //
 3 | // This file is part of FEniCS-miniapp (https://www.fenicsproject.org)
 4 | //
 5 | // SPDX-License-Identifier:    MIT
 6 | 
 7 | #include <chrono>
 8 | #include <dolfinx/common/log.h>
 9 | #include <fstream>
10 | #include <ios>
11 | #include <iostream>
12 | #include <iterator>
13 | #include <string>
14 | #include <thread>
15 | #include <unistd.h>
16 | #include <vector>
17 | 
18 | void process_mem_usage(bool& quit)
19 | {
20 |   std::string fmt = "[%Y-%m-%d %H:%M:%S.%e] [MEM] [%l] %v";
21 |   spdlog::set_pattern(fmt);
22 | 
23 |   const int page_size_bytes = sysconf(_SC_PAGE_SIZE);
24 | 
25 |   while (!quit)
26 |   {
27 |     std::ifstream f("/proc/self/stat", std::ios_base::in);
28 |     std::istream_iterator<std::string> it(f);
29 |     std::advance(it, 21);
30 | 
31 |     std::size_t vsize, rss;
32 |     f >> vsize >> rss;
33 |     f.close();
34 |     spdlog::warn("VSIZE={}, RSS={}", vsize / 1024,
35 |                  rss * page_size_bytes / 1024);
36 |     std::this_thread::sleep_for(std::chrono::milliseconds(100));
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/src/mem.h:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2021 Chris N. Richardson
2 | //
3 | // This file is part of FEniCS-miniapp (https://www.fenicsproject.org)
4 | //
5 | // SPDX-License-Identifier:    MIT
6 | 
7 | /// Thread to output memory usage to logger
8 | void process_mem_usage(bool& quit);
9 | 


--------------------------------------------------------------------------------
/src/mesh.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2019 Chris N. Richardson and Garth N. Wells
  2 | // Licensed under the MIT License. See LICENSE file in the project
  3 | // root for full license information.
  4 | 
  5 | #include "mesh.h"
  6 | #include <dolfinx/common/MPI.h>
  7 | #include <dolfinx/common/log.h>
  8 | #include <dolfinx/fem/CoordinateElement.h>
  9 | #include <dolfinx/fem/ElementDofLayout.h>
 10 | #include <dolfinx/graph/AdjacencyList.h>
 11 | #include <dolfinx/graph/partitioners.h>
 12 | #include <dolfinx/mesh/Mesh.h>
 13 | #include <dolfinx/mesh/MeshTags.h>
 14 | #include <dolfinx/mesh/cell_types.h>
 15 | #include <dolfinx/mesh/generation.h>
 16 | #include <dolfinx/refinement/refine.h>
 17 | #include <memory>
 18 | #include <numbers>
 19 | #include <span>
 20 | 
 21 | namespace
 22 | {
 23 | // The numbers of lower-dimensional cells of the CW complex of the right prism.
 24 | //
 25 | // The right prism with dimensions i x j x k is uniformly decomposed
 26 | // into ijk unit cubes, and each cube is decomposed into 6 tetrahedra;
 27 | // the decomposition procedure is described in Hatcher's "Algebraic
 28 | // Topology", on the proof of Theorem 2.10 [1], although pictures of
 29 | // the decomposition for the particular case of 3 dimensions simply
 30 | // can be viewed online by searching for "tetrahedral decomposition of
 31 | // cube".
 32 | //
 33 | // This decomposition of the right prism leads to a number of
 34 | // vertices, edges, faces, and tetrahedra (cells). The counting of
 35 | // edges and faces is complicated by the fact that many cells might
 36 | // share an edge, and two cells might share a face.
 37 | //
 38 | // The variable @param nrefine controls the dyadic subdivision of the
 39 | // prism; essentially equivalent to scaling up the prism by a factor
 40 | // of 2^nrefine in all directions. It should be a nonnegative small
 41 | // integer.
 42 | //
 43 | // 1. Available at <https://pi.math.cornell.edu/~hatcher/AT/ATpage.html>.
 44 | constexpr std::tuple<std::int64_t, std::int64_t, std::int64_t, std::int64_t>
 45 | num_entities(std::int64_t i, std::int64_t j, std::int64_t k, int nrefine) {
 46 |   i <<= nrefine;
 47 |   j <<= nrefine;
 48 |   k <<= nrefine;
 49 |   std::int64_t vertices = (i + 1) * (j + 1) * (k + 1);
 50 |   std::int64_t edges = 7*i*j*k + 3*(i*j + i*k + j*k) + (i + j + k);
 51 |   std::int64_t faces = 12*i*j*k + 2*(i*j + i*k + j*k);
 52 |   std::int64_t cells = 6 * (i * j * k);
 53 |   return {vertices, edges, faces, cells};
 54 | }
 55 | 
 56 | std::int64_t num_pdofs(std::int64_t i, std::int64_t j, std::int64_t k,
 57 |                        int nrefine, int order)
 58 | {
 59 |   auto [nv, ne, nf, nc] = num_entities(i, j, k, nrefine);
 60 | 
 61 |   switch (order)
 62 |   {
 63 |   case 1:
 64 |     return nv;
 65 |   case 2:
 66 |     return nv + ne;
 67 |   case 3:
 68 |     return nv + 2 * ne + nf;
 69 |   case 4:
 70 |     return nv + 3 * ne + 3 * nf + nc;
 71 |   default:
 72 |     throw std::runtime_error("Order not supported");
 73 |   }
 74 | }
 75 | 
 76 | } // namespace
 77 | 
 78 | dolfinx::mesh::Mesh<double>
 79 | create_cube_mesh(MPI_Comm comm, std::size_t target_dofs, bool target_dofs_total,
 80 |                  std::size_t dofs_per_node, int order, bool use_subcomm)
 81 | {
 82 |   // Get number of processes
 83 |   const std::size_t num_processes = dolfinx::MPI::size(comm);
 84 | 
 85 |   // Target total dofs
 86 |   std::int64_t N = 0;
 87 |   if (target_dofs_total == true)
 88 |     N = target_dofs / dofs_per_node;
 89 |   else
 90 |     N = target_dofs * num_processes / dofs_per_node;
 91 | 
 92 |   std::int64_t Nx, Ny, Nz;
 93 |   int r = 0;
 94 | 
 95 |   // Choose Nx_max carefully. If too large, the base mesh may become too
 96 |   // large for the partitioner; likewise, if too small, it will fail on
 97 |   // large numbers of processes.
 98 |   const std::int64_t Nx_max = 200;
 99 | 
100 |   // Get initial guess for Nx, Ny, Nz, r
101 |   Nx = 1;
102 |   std::int64_t ndofs = 0;
103 |   while (ndofs < N)
104 |   {
105 |     // Increase base mesh size
106 |     ++Nx;
107 |     if (Nx > Nx_max)
108 |     {
109 |       // Base mesh got too big, so add refinement levels
110 |       // Each increase will dramatically (~8x) increase the number of
111 |       // dofs
112 |       while (ndofs < N)
113 |       {
114 |         // Keep on refining until we have overshot
115 |         ++r;
116 |         ndofs = num_pdofs(Nx, Nx, Nx, r, order);
117 |       }
118 |       while (ndofs > N)
119 |       {
120 |         // Shrink base mesh until dofs are back on target
121 |         --Nx;
122 |         ndofs = num_pdofs(Nx, Nx, Nx, r, order);
123 |       }
124 |     }
125 |     ndofs = num_pdofs(Nx, Nx, Nx, r, order);
126 |   }
127 | 
128 |   Ny = Nx;
129 |   Nz = Nx;
130 | 
131 |   // Optimise number of dofs by trying nearby mesh sizes +/- 5 or 10 in
132 |   // each dimension
133 | 
134 |   std::size_t mindiff = 1000000;
135 |   for (std::int64_t i = Nx - 10; i < Nx + 10; ++i)
136 |   {
137 |     for (std::int64_t j = i - 5; j < i + 5; ++j)
138 |     {
139 |       for (std::int64_t k = i - 5; k < i + 5; ++k)
140 |       {
141 |         std::size_t diff = std::abs(num_pdofs(i, j, k, r, order) - N);
142 |         if (diff < mindiff)
143 |         {
144 |           mindiff = diff;
145 |           Nx = i;
146 |           Ny = j;
147 |           Nz = k;
148 |         }
149 |       }
150 |     }
151 |   }
152 | 
153 | #ifdef HAS_PARMETIS
154 |   auto graph_part = dolfinx::graph::parmetis::partitioner();
155 | #elif HAS_PTSCOTCH
156 |   auto graph_part = dolfinx::graph::scotch::partitioner(
157 |       dolfinx::graph::scotch::strategy::scalability);
158 | #elif HAS_KAHIP
159 |   auto graph_part = dolfinx::graph::kahip::partitioner();
160 | #else
161 | #error "No mesh partitioner has been selected"
162 | #endif
163 | 
164 |   MPI_Comm sub_comm;
165 | 
166 |   if (use_subcomm)
167 |   {
168 |     // Create a sub-communicator for mesh partitioning
169 |     MPI_Comm shm_comm;
170 |     // Get a local comm on each node
171 |     MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,
172 |                         &shm_comm);
173 |     int shm_comm_rank = dolfinx::MPI::rank(shm_comm);
174 |     MPI_Comm_free(&shm_comm);
175 |     // Create a comm across nodes, using rank 0 of the local comm on each node
176 |     int color = (shm_comm_rank == 0) ? 0 : MPI_UNDEFINED;
177 |     MPI_Comm_split(comm, color, 0, &sub_comm);
178 |   }
179 |   else
180 |     MPI_Comm_dup(comm, &sub_comm);
181 | 
182 |   auto cell_part = dolfinx::mesh::create_cell_partitioner(
183 |       dolfinx::mesh::GhostMode::none, graph_part);
184 |   auto mesh = dolfinx::mesh::create_box(
185 |       comm, sub_comm, {{{0.0, 0.0, 0.0}, {1.0, 1.0, 1.0}}}, {Nx, Ny, Nz},
186 |       dolfinx::mesh::CellType::tetrahedron, cell_part);
187 | 
188 |   MPI_Comm_free(&sub_comm);
189 | 
190 |   if (dolfinx::MPI::rank(mesh.comm()) == 0)
191 |   {
192 |     std::cout << "UnitCube (" << Nx << "x" << Ny << "x" << Nz
193 |               << ") to be refined " << r << " times" << std::endl;
194 |   }
195 | 
196 |   for (int i = 0; i < r; ++i)
197 |   {
198 |     mesh.topology_mutable()->create_connectivity(3, 1);
199 |     auto [new_mesh, _parent_edges, _parent_facet] = dolfinx::refinement::refine(
200 |       mesh, std::nullopt,
201 |       dolfinx::mesh::create_cell_partitioner(dolfinx::mesh::GhostMode::shared_facet),
202 |       dolfinx::refinement::Option::parent_cell_and_facet);
203 |     mesh = std::move(new_mesh);
204 |   }
205 | 
206 |   return mesh;
207 | }
208 | //-----------------------------------------------------------------------------
209 | std::shared_ptr<dolfinx::mesh::Mesh<double>>
210 | create_spoke_mesh(MPI_Comm comm, std::size_t target_dofs,
211 |                   bool target_dofs_total, std::size_t dofs_per_node)
212 | {
213 |   int target = target_dofs / dofs_per_node;
214 |   int mpi_size = dolfinx::MPI::size(comm);
215 |   if (!target_dofs_total)
216 |     target *= mpi_size;
217 | 
218 |   // Parameters controlling shape
219 |   constexpr int n = 17;       // number of spokes
220 |   constexpr double r0 = 0.25; // inner radius of ring
221 |   constexpr double r1 = 0.5;  // outer radius of ring
222 | 
223 |   constexpr double h0 = 1.2; // height (inner)
224 |   constexpr double h1 = 1.0; // height (outer)
225 | 
226 |   constexpr int lspur = 6;     // number of elements in each spoke
227 |   constexpr double l0 = 0.5;   // length of each element in spoke
228 |   constexpr double dth = 0.15; // curl (angle increment) as spoke goes out
229 |   constexpr double tap
230 |       = 0.9; // taper (fractional height decrease on each element)
231 | 
232 |   // Subdivision of a cube into 6 tetrahedra
233 |   constexpr int cube[6][4] = {{0, 1, 2, 4}, {1, 2, 4, 5}, {2, 4, 5, 6},
234 |                               {0, 2, 3, 4}, {6, 7, 4, 2}, {2, 3, 4, 7}};
235 | 
236 |   // Calculate number of points and cells (only on process 0)
237 |   int npoints = 0;
238 |   int ncells = 0;
239 |   const int mpi_rank = dolfinx::MPI::rank(comm);
240 | 
241 |   if (mpi_rank == 0)
242 |   {
243 |     npoints = n * 4 + n * lspur * 4;
244 |     ncells = n * 6 + n * lspur * 6;
245 |   }
246 | 
247 |   std::vector<double> x(npoints * 3);
248 |   std::vector<std::int64_t> topo(4 * ncells);
249 |   if (mpi_rank == 0)
250 |   {
251 |     int p = 0;
252 |     int c = 0;
253 | 
254 |     // Add n 'cubes' to make a joined up ring.
255 |     for (int i = 0; i < n; ++i)
256 |     {
257 |       std::cout << "Adding cube " << i << std::endl;
258 |       // Get the points for current cube
259 |       std::array<int, 8> pts;
260 |       for (std::size_t j = 0; j < pts.size(); ++j)
261 |         pts[j] = (i * 4 + j) % (n * 4);
262 | 
263 |       // Add to topology
264 |       for (int k = 0; k < 6; ++k)
265 |       {
266 |         for (int j = 0; j < 4; ++j)
267 |           topo[4 * c + j] = pts[cube[k][j]];
268 |         ++c;
269 |       }
270 | 
271 |       // Calculate the position of points
272 |       const double th = 2 * std::numbers::pi * i / n;
273 | 
274 |       std::array p0 = {r0 * std::cos(th), r0 * std::sin(th), h0};
275 |       std::copy(p0.begin(), p0.end(), std::next(x.begin(), 3 * p));
276 | 
277 |       std::array p1 = {r0 * std::cos(th), r0 * std::sin(th), -h0};
278 |       std::copy(p1.begin(), p1.end(), std::next(x.begin(), 3 * (p + 1)));
279 | 
280 |       std::array p2 = {r1 * std::cos(th), r1 * std::sin(th), -h1};
281 |       std::copy(p2.begin(), p2.end(), std::next(x.begin(), 3 * (p + 2)));
282 | 
283 |       std::array p3 = {r1 * std::cos(th), r1 * std::sin(th), h1};
284 |       std::copy(p3.begin(), p3.end(), std::next(x.begin(), 3 * (p + 3)));
285 | 
286 |       p += 4;
287 |     }
288 | 
289 |     // Add spurs to ring
290 |     for (int i = 0; i < n; ++i)
291 |     {
292 |       std::cout << "Adding spur " << i << std::endl;
293 | 
294 |       // Intermediate angle between two faces
295 |       const double th0 = 2 * std::numbers::pi * (i + 0.5) / n;
296 | 
297 |       // Starting points on outer edge of ring
298 |       std::array<int, 8> pts = {(i * 4 + 2) % (n * 4),
299 |                                 (i * 4 + 3) % (n * 4),
300 |                                 (i * 4 + 7) % (n * 4),
301 |                                 (i * 4 + 6) % (n * 4),
302 |                                 0,
303 |                                 0,
304 |                                 0,
305 |                                 0};
306 | 
307 |       // Build each spur outwards
308 |       for (int k = 0; k < lspur; ++k)
309 |       {
310 |         // Add new points
311 |         for (int j = 0; j < 4; ++j)
312 |         {
313 |           pts[j + 4] = p;
314 |           std::span<double, 3> xp(x.data() + 3 * p, 3);
315 |           std::copy_n(std::next(x.begin(), 3 * pts[j]), 3, xp.begin());
316 |           xp[0] += l0 * std::cos(th0 + k * dth);
317 |           xp[1] += l0 * std::sin(th0 + k * dth);
318 |           xp[2] *= std::pow(tap, k);
319 |           ++p;
320 |         }
321 | 
322 |         // Add new cells
323 |         for (int m = 0; m < 6; ++m)
324 |         {
325 |           for (int j = 0; j < 4; ++j)
326 |             topo[4 * c + j] = pts[cube[m][j]];
327 |           ++c;
328 |         }
329 | 
330 |         // Outer face becomes inner face of next cube
331 |         std::span<int, 8> _pts(pts.data(), 8);
332 |         auto pts0 = _pts.first<4>();
333 |         auto pts1 = _pts.last<4>();
334 |         std::copy(pts1.begin(), pts1.end(), pts0.begin());
335 |       }
336 |     }
337 | 
338 |     // Check geometric sizes and rescale
339 |     double x0min(0), x0max(0), x1min(0), x1max(0), x2min(0), x2max(0);
340 |     for (std::size_t i = 0; i < x.size(); i += 3)
341 |     {
342 |       x0min = std::min(std::abs(x[i]), x0min);
343 |       x0max = std::max(std::abs(x[i]), x0max);
344 | 
345 |       x1min = std::min(std::abs(x[i + 1]), x1min);
346 |       x1max = std::max(std::abs(x[i + 1]), x1max);
347 | 
348 |       x2min = std::min(std::abs(x[i + 2]), x2min);
349 |       x2max = std::max(std::abs(x[i + 2]), x2max);
350 |     }
351 | 
352 |     for (std::size_t i = 0; i < x.size(); i += 3)
353 |       x[i] -= 0.9 * x0min;
354 |     std::transform(x.begin(), x.end(), x.begin(),
355 |                    [scale = 0.9 * x0max](auto x) { return x / scale; });
356 | 
357 |     spdlog::info("x range = {} - {}", x0min, x0max);
358 |     spdlog::info("y range = {} - {}", x1min, x1max);
359 |     spdlog::info("z range = {} - {}", x2min, x2max);
360 |   }
361 | 
362 |   // New Mesh
363 |   dolfinx::fem::CoordinateElement<double> element(
364 |       dolfinx::mesh::CellType::tetrahedron, 1);
365 | 
366 |   auto mesh = std::make_shared<dolfinx::mesh::Mesh<double>>(
367 |       dolfinx::mesh::create_mesh(comm, topo, element, x, {x.size() / 3, 3},
368 |                                  dolfinx::mesh::GhostMode::none));
369 | 
370 |   mesh->topology_mutable()->create_entities(1);
371 | 
372 |   while (mesh->topology()->index_map(0)->size_global()
373 |              + mesh->topology()->index_map(1)->size_global()
374 |          < target)
375 |   {
376 |     auto [new_mesh, _parent_edges, _parent_facet] = dolfinx::refinement::refine(
377 |       *mesh, std::nullopt,
378 |       dolfinx::mesh::create_cell_partitioner(dolfinx::mesh::GhostMode::shared_facet),
379 |       dolfinx::refinement::Option::parent_cell_and_facet);
380 |     mesh = std::make_shared<dolfinx::mesh::Mesh<double>>(new_mesh);
381 |     mesh->topology_mutable()->create_entities(1);
382 |   }
383 | 
384 |   double fraction
385 |       = (double)(target - mesh->topology()->index_map(0)->size_global())
386 |         / mesh->topology()->index_map(1)->size_global();
387 | 
388 |   if (mpi_rank == 0)
389 |   {
390 |     std::cout << "Create unstructured mesh: desired fraction=" << fraction
391 |               << std::endl;
392 |   }
393 | 
394 |   // Estimate step needed to get desired refinement fraction
395 |   // using some heuristics and bisection method
396 |   int nmarked = pow(fraction, 1.6) * 2000;
397 | 
398 |   double f_lower = 0.0;
399 |   double f_upper = 1.0;
400 |   int lmark = 0;
401 |   int umark = 2000;
402 | 
403 |   std::shared_ptr<dolfinx::mesh::Mesh<double>> meshi;
404 |   for (int k = 0; k < 5; ++k)
405 |   {
406 |     // Trial step
407 |     mesh->topology_mutable()->create_entities(1);
408 |     std::vector<std::int32_t> marked_edges;
409 |     const std::int32_t num_edges = mesh->topology()->index_map(1)->size_local();
410 |     for (int i = 0; i < num_edges; ++i)
411 |       if (i % 2000 < nmarked)
412 |         marked_edges.push_back(i);
413 | 
414 |     auto [new_mesh, _parent_edges, _parent_facet] = dolfinx::refinement::refine(
415 |       *mesh, marked_edges,
416 |       dolfinx::mesh::create_cell_partitioner(dolfinx::mesh::GhostMode::shared_facet),
417 |       dolfinx::refinement::Option::parent_cell_and_facet);
418 |     meshi = std::make_shared<dolfinx::mesh::Mesh<double>>(new_mesh);
419 | 
420 |     double actual_fraction
421 |         = (double)(meshi->topology()->index_map(0)->size_global()
422 |                    - mesh->topology()->index_map(0)->size_global())
423 |           / mesh->topology()->index_map(1)->size_global();
424 | 
425 |     if (mpi_rank == 0)
426 |     {
427 |       std::cout << "Edges marked = " << nmarked << "/2000" << std::endl;
428 |       std::cout << "Step " << k
429 |                 << " achieved actual fraction = " << actual_fraction
430 |                 << std::endl;
431 |     }
432 | 
433 |     if (actual_fraction > fraction)
434 |     {
435 |       umark = nmarked;
436 |       f_upper = actual_fraction;
437 |     }
438 |     else
439 |     {
440 |       lmark = nmarked;
441 |       f_lower = actual_fraction;
442 |     }
443 |     int new_mark = (lmark * (f_upper - fraction) + umark * (fraction - f_lower))
444 |                    / (f_upper - f_lower);
445 | 
446 |     if (nmarked == new_mark)
447 |       break;
448 |     else
449 |       nmarked = new_mark;
450 |   }
451 | 
452 |   return meshi;
453 | }
454 | 


--------------------------------------------------------------------------------
/src/mesh.h:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2017 Chris N. Richardson and Garth N. Wells
 2 | // Licensed under the MIT License. See LICENSE file in the project
 3 | // root for full license information.
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <memory>
 8 | #include <mpi.h>
 9 | 
10 | namespace dolfinx::fem
11 | {
12 | template <std::floating_point T>
13 | class CoordinateElement;
14 | }
15 | 
16 | namespace dolfinx::mesh
17 | {
18 | template <std::floating_point T>
19 | class Mesh;
20 | }
21 | 
22 | dolfinx::mesh::Mesh<double>
23 | create_cube_mesh(MPI_Comm comm, std::size_t target_dofs, bool target_dofs_total,
24 |                  std::size_t dofs_per_node, int order, bool use_subcomm);
25 | 
26 | std::shared_ptr<dolfinx::mesh::Mesh<double>>
27 | create_spoke_mesh(MPI_Comm comm, std::size_t target_dofs,
28 |                   bool target_dofs_total, std::size_t dofs_per_node);
29 | 


--------------------------------------------------------------------------------
/src/poisson_problem.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2017-2019 Chris N. Richardson and Garth N. Wells
  2 | //
  3 | // This file is part of FEniCS-miniapp (https://www.fenicsproject.org)
  4 | //
  5 | // SPDX-License-Identifier:    MIT
  6 | 
  7 | #include "poisson_problem.h"
  8 | #include "Poisson.h"
  9 | #include <cfloat>
 10 | #include <cmath>
 11 | #include <dolfinx/common/Timer.h>
 12 | #include <dolfinx/fem/DirichletBC.h>
 13 | #include <dolfinx/fem/Function.h>
 14 | #include <dolfinx/fem/FunctionSpace.h>
 15 | #
 16 | #include <dolfinx/fem/assembler.h>
 17 | #include <dolfinx/fem/petsc.h>
 18 | #include <dolfinx/fem/utils.h>
 19 | #include <dolfinx/la/petsc.h>
 20 | #include <dolfinx/mesh/Mesh.h>
 21 | #include <dolfinx/mesh/utils.h>
 22 | #include <memory>
 23 | #include <petscsys.h>
 24 | #include <utility>
 25 | 
 26 | using namespace dolfinx;
 27 | using T = PetscScalar;
 28 | 
 29 | std::tuple<std::shared_ptr<la::Vector<T>>, std::shared_ptr<fem::Function<T>>,
 30 |            std::function<int(fem::Function<T>&, const la::Vector<T>&)>>
 31 | poisson::problem(std::shared_ptr<mesh::Mesh<double>> mesh, int order)
 32 | {
 33 |   common::Timer t0("ZZZ FunctionSpace");
 34 | 
 35 |   auto element = basix::create_element<double>(
 36 |       basix::element::family::P, basix::cell::type::tetrahedron, order,
 37 |       basix::element::lagrange_variant::gll_warped,
 38 |       basix::element::dpc_variant::unset, false);
 39 | 
 40 |   auto dolfinx_element
 41 |       = std::make_shared<const fem::FiniteElement<double>>(element);
 42 | 
 43 |   auto V = std::make_shared<fem::FunctionSpace<double>>(
 44 |       fem::create_functionspace(mesh, dolfinx_element));
 45 | 
 46 |   t0.stop();
 47 |   t0.flush();
 48 | 
 49 |   common::Timer t1("ZZZ Assemble");
 50 | 
 51 |   common::Timer t2("ZZZ Create boundary conditions");
 52 |   // Define boundary condition
 53 |   auto u0 = std::make_shared<fem::Function<T>>(V);
 54 |   u0->x()->set(0);
 55 | 
 56 |   // Find facets with bc applied
 57 |   const int tdim = mesh->topology()->dim();
 58 |   const std::vector<std::int32_t> bc_facets = mesh::locate_entities(
 59 |       *mesh, tdim - 1,
 60 |       [](auto x)
 61 |       {
 62 |         constexpr double eps = 1.0e-8;
 63 |         std::vector<std::int8_t> marker(x.extent(1), false);
 64 |         for (std::size_t p = 0; p < x.extent(1); ++p)
 65 |         {
 66 |           double x0 = x(0, p);
 67 |           if (std::abs(x0) < eps or std::abs(x0 - 1) < eps)
 68 |             marker[p] = true;
 69 |         }
 70 |         return marker;
 71 |       });
 72 | 
 73 |   // Find constrained dofs
 74 |   const std::vector<std::int32_t> bdofs = fem::locate_dofs_topological(
 75 |       *V->mesh()->topology_mutable(), *V->dofmap(), tdim - 1, bc_facets);
 76 | 
 77 |   auto bc = std::make_shared<fem::DirichletBC<T>>(u0, bdofs);
 78 |   t2.stop();
 79 |   t2.flush();
 80 | 
 81 |   // Define coefficients
 82 |   common::Timer t3("ZZZ Create RHS function");
 83 |   auto f = std::make_shared<fem::Function<T>>(V);
 84 |   auto g = std::make_shared<fem::Function<T>>(V);
 85 |   f->interpolate(
 86 |       [](auto x) -> std::pair<std::vector<T>, std::vector<std::size_t>>
 87 |       {
 88 |         std::vector<T> v(x.extent(1));
 89 |         for (std::size_t p = 0; p < x.extent(1); ++p)
 90 |         {
 91 |           double dx = x(0, p) - 0.5;
 92 |           double dy = x(1, p) - 0.5;
 93 |           double dr = dx * dx + dy * dy;
 94 |           v[p] = 10 * std::exp(-dr / 0.02);
 95 |         }
 96 | 
 97 |         return {std::move(v), {v.size()}};
 98 |       });
 99 |   g->interpolate(
100 |       [](auto x) -> std::pair<std::vector<T>, std::vector<std::size_t>>
101 |       {
102 |         std::vector<T> f(x.extent(1));
103 |         for (std::size_t p = 0; p < x.extent(1); ++p)
104 |           f[p] = std::sin(5 * x(0, p));
105 |         return {f, {f.size()}};
106 |       });
107 |   t3.stop();
108 |   t3.flush();
109 | 
110 |   std::vector form_poisson_L
111 |       = {form_Poisson_L1, form_Poisson_L2, form_Poisson_L3};
112 |   std::vector form_poisson_a
113 |       = {form_Poisson_a1, form_Poisson_a2, form_Poisson_a3};
114 | 
115 |   // Define variational forms
116 |   auto L = std::make_shared<fem::Form<T>>(fem::create_form<T>(
117 |       *form_poisson_L.at(order - 1), {V}, {{"w0", f}, {"w1", g}}, {}, {}, {}));
118 |   auto a = std::make_shared<fem::Form<T>>(fem::create_form<T>(
119 |       *form_poisson_a.at(order - 1), {V, V}, {}, {}, {}, {}));
120 | 
121 |   // Create matrices and vector, and assemble system
122 |   std::shared_ptr<la::petsc::Matrix> A = std::make_shared<la::petsc::Matrix>(
123 |       fem::petsc::create_matrix(*a), false);
124 | 
125 |   common::Timer t4("ZZZ Assemble matrix");
126 |   const std::vector constants_a = fem::pack_constants(*a);
127 |   auto coeffs_a = fem::allocate_coefficient_storage(*a);
128 |   fem::pack_coefficients(*a, coeffs_a);
129 |   fem::assemble_matrix<T>(la::petsc::Matrix::set_block_fn(A->mat(), ADD_VALUES),
130 |                           *a, constants_a,
131 |                           fem::make_coefficients_span(coeffs_a), {*bc});
132 |   MatAssemblyBegin(A->mat(), MAT_FLUSH_ASSEMBLY);
133 |   MatAssemblyEnd(A->mat(), MAT_FLUSH_ASSEMBLY);
134 |   fem::set_diagonal<T>(la::petsc::Matrix::set_fn(A->mat(), INSERT_VALUES), *V,
135 |                        {*bc});
136 |   MatAssemblyBegin(A->mat(), MAT_FINAL_ASSEMBLY);
137 |   MatAssemblyEnd(A->mat(), MAT_FINAL_ASSEMBLY);
138 |   t4.stop();
139 |   t4.flush();
140 | 
141 |   // Create la::Vector
142 |   la::Vector<T> b(L->function_spaces()[0]->dofmap()->index_map,
143 |                   L->function_spaces()[0]->dofmap()->index_map_bs());
144 |   b.set(0);
145 |   common::Timer t5("ZZZ Assemble vector");
146 |   const std::vector constants_L = fem::pack_constants(*L);
147 |   auto coeffs_L = fem::allocate_coefficient_storage(*L);
148 |   fem::pack_coefficients(*L, coeffs_L);
149 |   fem::assemble_vector<T>(b.mutable_array(), *L, constants_L,
150 |                           fem::make_coefficients_span(coeffs_L));
151 |   fem::apply_lifting<T, double>(b.mutable_array(), {*a}, {constants_L},
152 |                                 {fem::make_coefficients_span(coeffs_L)},
153 |                                 {{*bc}}, {}, 1.0);
154 |   b.scatter_rev(std::plus<>());
155 |   bc->set(b.mutable_array(), std::nullopt);
156 |   t5.stop();
157 |   t5.flush();
158 | 
159 |   t1.stop();
160 |   t1.flush();
161 | 
162 |   // Create Function to hold solution
163 |   auto u = std::make_shared<fem::Function<T>>(V);
164 |   std::function<int(fem::Function<T>&, const la::Vector<T>&)> solver_function
165 |       = [A](fem::Function<T>& u, const la::Vector<T>& b)
166 |   {
167 |     // Create solver
168 |     la::petsc::KrylovSolver solver(MPI_COMM_WORLD);
169 |     solver.set_from_options();
170 |     solver.set_operator(A->mat());
171 | 
172 |     // Wrap la::Vector
173 |     la::petsc::Vector _b(la::petsc::create_vector_wrap(b), false);
174 |     la::petsc::Vector x(la::petsc::create_vector_wrap(*u.x()), false);
175 | 
176 |     // Solve
177 |     int num_iter = solver.solve(x.vec(), _b.vec());
178 |     return num_iter;
179 |   };
180 | 
181 |   return {std::make_shared<la::Vector<T>>(std::move(b)), u, solver_function};
182 | }
183 | 


--------------------------------------------------------------------------------
/src/poisson_problem.h:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2017-2019 Chris N. Richardson and Garth N. Wells
 2 | //
 3 | // This file is part of FEniCS-miniapp (https://www.fenicsproject.org)
 4 | //
 5 | // SPDX-License-Identifier:    MIT
 6 | 
 7 | #pragma once
 8 | 
 9 | #include <dolfinx/fem/Function.h>
10 | #include <dolfinx/la/Vector.h>
11 | #include <dolfinx/mesh/Mesh.h>
12 | #include <memory>
13 | #include <petscsys.h>
14 | #include <utility>
15 | 
16 | namespace poisson
17 | {
18 | 
19 | std::tuple<std::shared_ptr<dolfinx::la::Vector<PetscScalar>>,
20 |            std::shared_ptr<dolfinx::fem::Function<PetscScalar>>,
21 |            std::function<int(dolfinx::fem::Function<PetscScalar>&,
22 |                              const dolfinx::la::Vector<PetscScalar>&)>>
23 | problem(std::shared_ptr<dolfinx::mesh::Mesh<double>> mesh, int order);
24 | 
25 | } // namespace poisson
26 | 


--------------------------------------------------------------------------------