├── .clang-format ├── .github ├── FUNDING.yml └── workflows │ └── ccpp.yml ├── .gitignore ├── Dockerfile ├── LICENSE.md ├── README.md ├── performance.md └── src ├── .gitignore ├── CMakeLists.txt ├── Elasticity.py ├── Poisson.py ├── cg.h ├── cgpoisson_problem.cpp ├── cgpoisson_problem.h ├── elasticity_problem.cpp ├── elasticity_problem.h ├── main.cpp ├── mem.cpp ├── mem.h ├── mesh.cpp ├── mesh.h ├── poisson_problem.cpp └── poisson_problem.h /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | # BasedOnStyle: LLVM 4 | AccessModifierOffset: -2 5 | AlignAfterOpenBracket: Align 6 | AlignConsecutiveAssignments: false 7 | AlignConsecutiveDeclarations: false 8 | AlignEscapedNewlinesLeft: false 9 | AlignOperands: true 10 | AlignTrailingComments: true 11 | AllowAllParametersOfDeclarationOnNextLine: true 12 | AllowShortBlocksOnASingleLine: false 13 | AllowShortCaseLabelsOnASingleLine: false 14 | AllowShortFunctionsOnASingleLine: All 15 | AllowShortIfStatementsOnASingleLine: false 16 | AllowShortLoopsOnASingleLine: false 17 | AlwaysBreakAfterDefinitionReturnType: None 18 | AlwaysBreakAfterReturnType: None 19 | AlwaysBreakBeforeMultilineStrings: false 20 | AlwaysBreakTemplateDeclarations: true 21 | BinPackArguments: true 22 | BinPackParameters: true 23 | BraceWrapping: 24 | AfterClass: false 25 | AfterControlStatement: false 26 | AfterEnum: false 27 | AfterFunction: false 28 | AfterNamespace: false 29 | AfterObjCDeclaration: false 30 | AfterStruct: false 31 | AfterUnion: false 32 | BeforeCatch: false 33 | BeforeElse: false 34 | IndentBraces: false 35 | BreakBeforeBinaryOperators: All 36 | BreakBeforeBraces: Allman 37 | BreakBeforeTernaryOperators: true 38 | BreakConstructorInitializersBeforeComma: false 39 | BreakAfterJavaFieldAnnotations: false 40 | BreakStringLiterals: true 41 | ColumnLimit: 80 42 | CommentPragmas: '^ IWYU pragma:' 43 | ConstructorInitializerAllOnOneLineOrOnePerLine: false 44 | ConstructorInitializerIndentWidth: 4 45 | ContinuationIndentWidth: 4 46 | Cpp11BracedListStyle: true 47 | DerivePointerAlignment: false 48 | DisableFormat: false 49 | ExperimentalAutoDetectBinPacking: false 50 | ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] 51 | IncludeCategories: 52 | - Regex: '^"(llvm|llvm-c|clang|clang-c)/' 53 | Priority: 2 54 | - Regex: '^(<|"(gtest|isl|json)/)' 55 | Priority: 3 56 | - Regex: '.*' 57 | Priority: 1 58 | IncludeIsMainRegex: '$' 59 | IndentCaseLabels: false 60 | IndentWidth: 2 61 | IndentWrappedFunctionNames: false 62 | JavaScriptQuotes: Leave 63 | JavaScriptWrapImports: true 64 | KeepEmptyLinesAtTheStartOfBlocks: true 65 | MacroBlockBegin: '' 66 | MacroBlockEnd: '' 67 | MaxEmptyLinesToKeep: 1 68 | NamespaceIndentation: None 69 | ObjCBlockIndentWidth: 2 70 | ObjCSpaceAfterProperty: false 71 | ObjCSpaceBeforeProtocolList: true 72 | PenaltyBreakBeforeFirstCallParameter: 19 73 | PenaltyBreakComment: 300 74 | PenaltyBreakFirstLessLess: 120 75 | PenaltyBreakString: 1000 76 | PenaltyExcessCharacter: 1000000 77 | PenaltyReturnTypeOnItsOwnLine: 60 78 | PointerAlignment: Left 79 | ReflowComments: true 80 | SortIncludes: true 81 | SpaceAfterCStyleCast: false 82 | SpaceAfterTemplateKeyword: true 83 | SpaceBeforeAssignmentOperators: true 84 | SpaceBeforeParens: ControlStatements 85 | SpaceInEmptyParentheses: false 86 | SpacesBeforeTrailingComments: 1 87 | SpacesInAngles: false 88 | SpacesInContainerLiterals: true 89 | SpacesInCStyleCastParentheses: false 90 | SpacesInParentheses: false 91 | SpacesInSquareBrackets: false 92 | Standard: Cpp11 93 | TabWidth: 8 94 | UseTab: Never 95 | ... 96 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: FEniCS # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | -------------------------------------------------------------------------------- /.github/workflows/ccpp.yml: -------------------------------------------------------------------------------- 1 | name: FEniCS Performance Test CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - "**" 7 | pull_request: 8 | branches: 9 | - main 10 | merge_group: 11 | branches: 12 | - main 13 | workflow_dispatch: 14 | schedule: 15 | # * is a special character in YAML so you have to quote this string 16 | - cron: "0 3 * * 0,3" 17 | 18 | jobs: 19 | build: 20 | runs-on: ubuntu-latest 21 | container: fenicsproject/test-env:current-openmpi 22 | 23 | env: 24 | PETSC_ARCH: linux-gnu-real64-64 25 | OMPI_ALLOW_RUN_AS_ROOT: 1 26 | OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1 27 | 28 | steps: 29 | - uses: actions/checkout@v4 30 | 31 | - name: Get DOLFINx 32 | uses: actions/checkout@v4 33 | with: 34 | path: ./dolfinx 35 | repository: FEniCS/dolfinx 36 | ref: main 37 | 38 | - name: Install FEniCS Python components 39 | run: | 40 | apt-get -qq update 41 | apt-get -y install libboost-program-options-dev 42 | pip3 install --break-system-packages pip --upgrade 43 | pip3 install --break-system-packages git+https://github.com/FEniCS/ufl.git 44 | pip3 install --break-system-packages git+https://github.com/FEniCS/basix.git 45 | pip3 install --break-system-packages git+https://github.com/FEniCS/ffcx 46 | - name: Build dolfinx cpp 47 | run: | 48 | cmake -G Ninja -DCMAKE_BUILD_TYPE=Developer -B build -S dolfinx/cpp/ 49 | cmake --build build 50 | cmake --install build 51 | - name: Build performance test 52 | run: | 53 | cmake -G Ninja -DCMAKE_BUILD_TYPE=Developer -B build-dir -S src 54 | cmake --build build-dir 55 | cmake --install build-dir 56 | - name: Run Poisson test (BoomerAMG, serial) 57 | run: | 58 | dolfinx-scaling-test \ 59 | --problem_type poisson \ 60 | --scaling_type weak \ 61 | --ndofs 50000 \ 62 | -log_view \ 63 | -ksp_view \ 64 | -ksp_type cg \ 65 | -ksp_rtol 1.0e-8 \ 66 | -pc_type hypre \ 67 | -pc_hypre_type boomeramg \ 68 | -pc_hypre_boomeramg_strong_threshold 0.7 \ 69 | -pc_hypre_boomeramg_agg_nl 4 \ 70 | -pc_hypre_boomeramg_agg_num_paths 2 71 | - name: Run Poisson test (BoomerAMG, weak) 72 | run: | 73 | mpirun -np 2 dolfinx-scaling-test \ 74 | --problem_type poisson \ 75 | --scaling_type weak \ 76 | --ndofs 50000 \ 77 | -log_view \ 78 | -ksp_view \ 79 | -ksp_type cg \ 80 | -ksp_rtol 1.0e-8 \ 81 | -pc_type hypre \ 82 | -pc_hypre_type boomeramg \ 83 | -pc_hypre_boomeramg_strong_threshold 0.7 \ 84 | -pc_hypre_boomeramg_agg_nl 4 \ 85 | -pc_hypre_boomeramg_agg_num_paths 2 86 | - name: Run Poisson test (BoomerAMG, 3rd order, weak) 87 | run: | 88 | mpirun -np 2 dolfinx-scaling-test \ 89 | --problem_type poisson \ 90 | --scaling_type weak \ 91 | --ndofs 50000 \ 92 | --order 3 \ 93 | -log_view \ 94 | -ksp_view \ 95 | -ksp_type cg \ 96 | -ksp_rtol 1.0e-8 \ 97 | -pc_type hypre \ 98 | -pc_hypre_type boomeramg \ 99 | -pc_hypre_boomeramg_strong_threshold 0.7 \ 100 | -pc_hypre_boomeramg_agg_nl 4 \ 101 | -pc_hypre_boomeramg_agg_num_paths 2 102 | - name: Run Poisson test (BoomerAMG, weak, unstructured mesh) 103 | run: | 104 | mpirun -np 2 dolfinx-scaling-test \ 105 | --problem_type poisson \ 106 | --mesh_type unstructured \ 107 | --scaling_type weak \ 108 | --ndofs 50000 \ 109 | -log_view \ 110 | -ksp_view \ 111 | -ksp_type cg \ 112 | -ksp_rtol 1.0e-8 \ 113 | -pc_type hypre \ 114 | -pc_hypre_type boomeramg \ 115 | -pc_hypre_boomeramg_strong_threshold 0.7 \ 116 | -pc_hypre_boomeramg_agg_nl 4 \ 117 | -pc_hypre_boomeramg_agg_num_paths 2 118 | - name: Run Poisson test (BoomerAMG, strong) 119 | run: | 120 | mpirun -np 2 dolfinx-scaling-test \ 121 | --problem_type poisson \ 122 | --scaling_type strong \ 123 | --ndofs 1000000 \ 124 | -log_view \ 125 | -ksp_view \ 126 | -ksp_type cg \ 127 | -ksp_rtol 1.0e-8 \ 128 | -pc_type hypre \ 129 | -pc_hypre_type boomeramg \ 130 | -pc_hypre_boomeramg_strong_threshold 0.7 \ 131 | -pc_hypre_boomeramg_agg_nl 4 \ 132 | -pc_hypre_boomeramg_agg_num_paths 2 133 | - name: Run elasticity test (GAMG, serial) 134 | run: | 135 | dolfinx-scaling-test \ 136 | --problem_type elasticity \ 137 | --scaling_type weak \ 138 | --ndofs 100000 \ 139 | -log_view \ 140 | -ksp_view \ 141 | -ksp_type cg \ 142 | -ksp_rtol 1.0e-8 \ 143 | -pc_type gamg \ 144 | -pc_gamg_coarse_eq_limit 1000 \ 145 | -mg_levels_ksp_type chebyshev \ 146 | -mg_levels_pc_type jacobi \ 147 | -mg_levels_esteig_ksp_type cg \ 148 | -matptap_via scalable 149 | - name: Run elasticity test (GAMG, weak) 150 | run: | 151 | mpirun -np 2 dolfinx-scaling-test \ 152 | --problem_type elasticity \ 153 | --scaling_type weak \ 154 | --ndofs 100000 \ 155 | -log_view \ 156 | -ksp_view \ 157 | -ksp_type cg \ 158 | -ksp_rtol 1.0e-8 \ 159 | -pc_type gamg \ 160 | -pc_gamg_coarse_eq_limit 1000 \ 161 | -mg_levels_ksp_type chebyshev \ 162 | -mg_levels_pc_type jacobi \ 163 | -mg_levels_esteig_ksp_type cg \ 164 | -matptap_via scalable 165 | - name: Run elasticity test (GAMG, 3rd order, weak) 166 | run: | 167 | mpirun -np 2 dolfinx-scaling-test \ 168 | --problem_type elasticity \ 169 | --scaling_type weak \ 170 | --ndofs 100000 \ 171 | --order 3 \ 172 | -log_view \ 173 | -ksp_view \ 174 | -ksp_type cg \ 175 | -ksp_rtol 1.0e-8 \ 176 | -pc_type gamg \ 177 | -pc_gamg_coarse_eq_limit 1000 \ 178 | -mg_levels_ksp_type chebyshev \ 179 | -mg_levels_pc_type jacobi \ 180 | -mg_levels_esteig_ksp_type cg \ 181 | -matptap_via scalable 182 | - name: Run elasticity test (GAMG, strong) 183 | run: | 184 | mpirun -np 2 dolfinx-scaling-test \ 185 | --problem_type elasticity \ 186 | --scaling_type strong \ 187 | --ndofs 500000 \ 188 | -log_view \ 189 | -ksp_view \ 190 | -ksp_type cg \ 191 | -ksp_rtol 1.0e-8 \ 192 | -pc_type gamg \ 193 | -pc_gamg_coarse_eq_limit 1000 \ 194 | -mg_levels_ksp_type chebyshev \ 195 | -mg_levels_pc_type jacobi \ 196 | -mg_levels_esteig_ksp_type cg \ 197 | -matptap_via scalable 198 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | 3 | .vscode 4 | .devcontainer -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Builds a Docker image with the necessary libraries for compiling 2 | # FEniCS. The image is at 3 | # https://hub.docker.com/r/fenicsproject/performance-tests 4 | # 5 | # Authors: Garth N. Wells 6 | 7 | ARG PETSC_VERSION=3.12.4 8 | 9 | FROM ubuntu:20.04 10 | 11 | WORKDIR /tmp 12 | 13 | # Environment variables 14 | ENV OPENBLAS_NUM_THREADS=1 15 | 16 | # Non-Python utilities and libraries 17 | RUN apt-get -qq update && \ 18 | apt-get -y --with-new-pkgs \ 19 | -o Dpkg::Options::="--force-confold" upgrade && \ 20 | apt-get -y install \ 21 | bison \ 22 | clang \ 23 | cmake \ 24 | flex \ 25 | g++ \ 26 | gfortran \ 27 | git \ 28 | libboost-filesystem-dev \ 29 | libboost-iostreams-dev \ 30 | libboost-math-dev \ 31 | libboost-program-options-dev \ 32 | libboost-system-dev \ 33 | libboost-thread-dev \ 34 | libboost-timer-dev \ 35 | liblapack-dev \ 36 | libmpich-dev \ 37 | libopenblas-dev \ 38 | libhdf5-mpich-dev \ 39 | mpich \ 40 | ninja-build \ 41 | python3 \ 42 | python3-dev \ 43 | pkg-config \ 44 | wget && \ 45 | apt-get clean && \ 46 | rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* 47 | 48 | # Install PETSc from source 49 | ARG PETSC_VERSION 50 | RUN git clone --branch v${PETSC_VERSION} --depth 1 https://gitlab.com/petsc/petsc.git && \ 51 | cd petsc && \ 52 | python3 ./configure --with-64-bit-indices=0 \ 53 | --COPTFLAGS="-O3" \ 54 | --CXXOPTFLAGS="-O3" \ 55 | --FOPTFLAGS="-O3" \ 56 | --with-c-support \ 57 | --with-fortran-bindings=no \ 58 | --with-debugging=0 \ 59 | --with-shared-libraries \ 60 | --download-hypre \ 61 | --download-ptscotch \ 62 | --prefix=/usr/local/petsc-32 && \ 63 | make && \ 64 | make install && \ 65 | git clean -fdx . && \ 66 | python3 ./configure --with-64-bit-indices=1 \ 67 | --COPTFLAGS="-O3" \ 68 | --CXXOPTFLAGS="-O3" \ 69 | --FOPTFLAGS="-O3" \ 70 | --with-c-support \ 71 | --with-fortran-bindings=no \ 72 | --with-debugging=0 \ 73 | --with-shared-libraries \ 74 | --download-hypre \ 75 | --download-ptscotch \ 76 | --prefix=/usr/local/petsc-64 && \ 77 | make && \ 78 | make install && \ 79 | rm -rf /tmp/* 80 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright 2017 Chris N. Richardson and Garth N. Wells 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Performance test codes for FEniCSx/DOLFINx 2 | 3 | This repository contains solvers for testing the parallel performance of 4 | DOLFINx and the underlying linear solvers. It tests elliptic equations 5 | - Poisson equation and elasticity - in three dimensions. 6 | 7 | Representative performance data is available at 8 | https://fenics.github.io/performance-test-results/. 9 | 10 | [![FEniCS Performance Test CI](https://github.com/FEniCS/performance-test/workflows/FEniCS%20Performance%20Test%20CI/badge.svg)](https://github.com/FEniCS/performance-test/actions?query=branch%3Amain) 11 | 12 | ## Building 13 | 14 | The source of the tests is in `src/` directory. 15 | 16 | 17 | ### Requirements 18 | 19 | - FEniCSx/DOLFINx installation (development version of DOLFINx 20 | **required**) 21 | - PETSc installation 22 | - Boost Program Options 23 | 24 | 25 | ### Compilation 26 | 27 | In the `src/` directory, build the program: 28 | 29 | cmake . 30 | make 31 | 32 | 33 | ## Running tests 34 | 35 | Options for the test are: 36 | 37 | - Problem type (`--problem_type`): `poisson` or `elasticity` 38 | - Scaling type (`--scaling_type`): `strong` (fixed problem size) or 39 | `weak` (fixed problem size per process) 40 | - Number of degrees-of-freedom (`--ndofs`): total (in case of strong 41 | scaling) or per process (for weak scaling) 42 | - Order (`--order`): polynomial order (1, 2, or 3) - only on cube mesh, 43 | defaults to 1. 44 | - File output (`--output`): `true` or `false` (IO performance depends 45 | heavily on the underlying filesystem) 46 | - Data output directory (`--output_dir`): directory to write solution 47 | data to 48 | 49 | Linear solver options are configured via PETSc command line options, 50 | (single hyphen) as shown below. 51 | 52 | 53 | ## Recommended test configuration 54 | 55 | Suggested options for running tests are listed below. The options 56 | include PETSc performance logging which is useful for assessing 57 | performance. 58 | 59 | ### Elasticity 60 | 61 | For elasticity, a conjugate gradient (CG) solver with a smoothed 62 | aggregation algebraic multigrid (GAMG) preconditioner is recommended. 63 | For a weak scaling test with 8 MPI processes and 500k degrees-of-freedom 64 | per process: 65 | 66 | ``` 67 | mpirun -np 8 ./dolfinx-scaling-test \ 68 | --problem_type elasticity \ 69 | --scaling_type weak \ 70 | --ndofs 500000 \ 71 | -log_view \ 72 | -ksp_view \ 73 | -ksp_type cg \ 74 | -ksp_rtol 1.0e-8 \ 75 | -pc_type gamg \ 76 | -pc_gamg_coarse_eq_limit 1000 \ 77 | -mg_levels_ksp_type chebyshev \ 78 | -mg_levels_pc_type jacobi \ 79 | -mg_levels_esteig_ksp_type cg \ 80 | -matptap_via scalable \ 81 | -options_left 82 | ``` 83 | 84 | For a strong scaling test, with 8 MPI processes and 10M 85 | degrees-of-freedom in total: 86 | 87 | 88 | ``` 89 | mpirun -np 8 ./dolfinx-scaling-test \ 90 | --problem_type elasticity \ 91 | --scaling_type strong \ 92 | --ndofs 10000000 \ 93 | -log_view \ 94 | -ksp_view \ 95 | -ksp_type cg \ 96 | -ksp_rtol 1.0e-8 \ 97 | -pc_type gamg \ 98 | -pc_gamg_coarse_eq_limit 1000 \ 99 | -mg_levels_ksp_type chebyshev \ 100 | -mg_levels_pc_type jacobi \ 101 | -mg_levels_esteig_ksp_type cg \ 102 | -matptap_via scalable \ 103 | -options_left 104 | ``` 105 | 106 | ### Poisson 107 | 108 | For the Poisson equation, a conjugate gradient (CG) solver with a 109 | classical algebraic multigrid (BoomerAMG) preconditioner is 110 | recommended. For a weak scaling test with 8 MPI processes and 500k 111 | degrees-of-freedom per process: 112 | 113 | ``` 114 | mpirun -np 8 ./dolfinx-scaling-test \ 115 | --problem_type poisson \ 116 | --scaling_type weak \ 117 | --ndofs 500000 \ 118 | -log_view \ 119 | -ksp_view \ 120 | -ksp_type cg \ 121 | -ksp_rtol 1.0e-8 \ 122 | -pc_type hypre \ 123 | -pc_hypre_type boomeramg \ 124 | -pc_hypre_boomeramg_strong_threshold 0.7 \ 125 | -pc_hypre_boomeramg_agg_nl 4 \ 126 | -pc_hypre_boomeramg_agg_num_paths 2 \ 127 | -options_left 128 | ``` 129 | For a strong scaling test, with 8 MPI processes and 10M 130 | degrees-of-freedom in total: 131 | ``` 132 | mpirun -np 8 ./dolfinx-scaling-test \ 133 | --problem_type poisson \ 134 | --scaling_type strong \ 135 | --ndofs 10000000 \ 136 | -log_view \ 137 | -ksp_view \ 138 | -ksp_type cg \ 139 | -ksp_rtol 1.0e-8 \ 140 | -pc_type hypre \ 141 | -pc_hypre_type boomeramg \ 142 | -pc_hypre_boomeramg_strong_threshold 0.7 \ 143 | -pc_hypre_boomeramg_agg_nl 4 \ 144 | -pc_hypre_boomeramg_agg_num_paths 2 \ 145 | -options_left 146 | ``` 147 | 148 | ## Interpreting the output 149 | 150 | The default loglevel diagnostic messages from DOLFINx will be present, and if `-log_view` is specified, there will be a performance profile from PETSc. There's also a "Test problem summary" summarizing the test parameters and environment to aid with reproducibility. Finally, there's a table labeled "Summary of timings" that contains various times (in units of seconds) of interest, the parts that are explicit to this test are labeled `ZZZ`. We elaborate on some: 151 | 152 | - `ZZZ Create Mesh`: Create the mesh to be used as the spatial discretisation of the domain in the FE problem 153 | - `ZZZ Create facets and facet->cell connectivity`: Compute the topology connectivity of the mesh's graph, i.e. compute the relationship between which cells are connected to each facet. 154 | - `ZZZ FunctionSpace`: Create the function space in which the finite element method solution will be sought along with appropriate index maps for each degree of freedom and their relationship with the mesh. 155 | - `ZZZ Assemble`: Encompassing timer for: 156 | - `ZZZ Create boundary conditions`: Find the mesh’s topological indices and corresponding degree of freedom indices on which to impose boundary data in a strong Dirichlet sense. 157 | - `ZZZ Create RHS function`: This is the step computing the function $f$ in the cases where $\nabla^2u=-f$ (Poisson) and $\nabla\cdot u=-f$ (elasticity, i.e. elastostatics in this case). 158 | - `ZZZ Assemble matrix`: Assemble the finite element matrix $A$ underlying finite element formulation, such that we seek to later solve $A\vec{x}=\vec{b}$. 159 | - `ZZZ Assemble vector`: Assemble the right-hand-side vector $\vec{b}$. 160 | - `ZZZ Solve`: Compute the solution of the linear system. This is typically the dominant stage taking the greatest computational effort. 161 | - `ZZZ Output`: Postprocess and potentially output (with `--output`) results to disk. 162 | 163 | 164 | ## Reference performance data 165 | 166 | Reference performance data is provided [here](performance.md) to help 167 | in assessing performance on a given system. 168 | 169 | 170 | ## Authors and license 171 | 172 | The tests have been developed by Chris N. Richardson 173 | () and Garth N. Wells (). 174 | 175 | The code is covered by the MIT license. See LICENSE.md. 176 | -------------------------------------------------------------------------------- /performance.md: -------------------------------------------------------------------------------- 1 | # Performance test results 2 | 3 | * [Some test results on CSD3 (University of Cambridge HPC system)](https://fenics.github.io/performance-test-results) up to 512 cores on 16 nodes. [Raw data](https://github.com/FEniCS/performance-test-results) 4 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | CMakeCache.txt 2 | CMakeFiles/ 3 | Elasticity.c 4 | Elasticity.h 5 | Makefile 6 | Poisson.c 7 | Poisson.h 8 | cmake_install.cmake 9 | dolfinx-scaling-test 10 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.16) 2 | 3 | set(PROJECT_NAME dolfinx-scaling-test) 4 | project(${PROJECT_NAME}) 5 | 6 | include(GNUInstallDirs) 7 | set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) 8 | 9 | # Use C++20 10 | set(CMAKE_CXX_STANDARD 20) 11 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 12 | set(CMAKE_CXX_EXTENSIONS OFF) 13 | 14 | # Get DOLFIN configuration data (DOLFINConfig.cmake must be in 15 | # DOLFIN_CMAKE_CONFIG_PATH) 16 | find_package(DOLFINX REQUIRED) 17 | 18 | # set(CMAKE_BUILD_TYPE "Release") 19 | set(CMAKE_CXX_FLAGS "-Ofast ${CMAKE_CXX_FLAGS} -g -Wall") 20 | set(CMAKE_C_FLAGS "-Ofast ${CMAKE_C_FLAGS} -g -Wall") 21 | 22 | # Compile UFL files 23 | add_custom_command( 24 | OUTPUT Poisson.c 25 | COMMAND ffcx ${CMAKE_CURRENT_SOURCE_DIR}/Poisson.py 26 | DEPENDS Poisson.py 27 | ) 28 | 29 | add_custom_command( 30 | OUTPUT Elasticity.c 31 | COMMAND ffcx ${CMAKE_CURRENT_SOURCE_DIR}/Elasticity.py 32 | DEPENDS Elasticity.py 33 | ) 34 | 35 | set(CMAKE_INCLUDE_CURRENT_DIR ON) 36 | 37 | # Executable 38 | add_executable(${PROJECT_NAME} main.cpp mesh.cpp elasticity_problem.cpp cgpoisson_problem.cpp poisson_problem.cpp mem.cpp 39 | ${CMAKE_CURRENT_BINARY_DIR}/Elasticity.c 40 | ${CMAKE_CURRENT_BINARY_DIR}/Poisson.c) 41 | 42 | # Find Boost program_options 43 | if(DEFINED ENV{BOOST_ROOT} OR DEFINED BOOST_ROOT) 44 | set(Boost_NO_SYSTEM_PATHS on) 45 | endif() 46 | # set(Boost_USE_MULTITHREADED $ENV{BOOST_USE_MULTITHREADED}) 47 | set(Boost_VERBOSE TRUE) 48 | find_package(Boost 1.70 REQUIRED program_options) 49 | 50 | # Target libraries 51 | target_link_libraries(${PROJECT_NAME} dolfinx Boost::program_options pthread) 52 | 53 | message(STATUS ${CMAKE_CXX_FLAGS}) 54 | install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) 55 | -------------------------------------------------------------------------------- /src/Elasticity.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017-2022 Chris N. Richardson and Garth N. Wells 2 | # 3 | # This file is part of FEniCS-miniapp (https://www.fenicsproject.org) 4 | # 5 | # SPDX-License-Identifier: MIT 6 | 7 | import basix.ufl 8 | from ufl import (Coefficient, Identity, FunctionSpace, Mesh, TestFunction, TrialFunction, 9 | dx, grad, inner, tetrahedron, tr) 10 | 11 | # Elasticity parameters 12 | E = 1.0e6 13 | nu = 0.3 14 | mu = E / (2.0 * (1.0 + nu)) 15 | lmbda = E * nu / ((1.0 + nu) * (1.0 - 2.0 * nu)) 16 | cell = tetrahedron 17 | 18 | # Load namespace 19 | ns = vars() 20 | 21 | forms = [] 22 | for degree in range(1, 4): 23 | element = basix.ufl.element("Lagrange", "tetrahedron", degree, shape=(3, )) 24 | domain = Mesh(basix.ufl.element("Lagrange", "tetrahedron", 1, shape=(3, ))) 25 | space = FunctionSpace(domain, element) 26 | 27 | u, v = TrialFunction(space), TestFunction(space) 28 | f = Coefficient(space) 29 | 30 | def eps(v): 31 | return 0.5*(grad(v) + grad(v).T) 32 | 33 | def sigma(v): 34 | return 2.0*mu*eps(v) + lmbda*tr(eps(v))*Identity(3) 35 | 36 | # Add forms to namespace with names a1, a2, a3 etc. 37 | aname = 'a' + str(degree) 38 | Lname = 'L' + str(degree) 39 | ns[aname] = inner(sigma(u), eps(v))*dx 40 | ns[Lname] = inner(f, v)*dx 41 | 42 | del u, v, f 43 | forms += [ns[aname], ns[Lname]] 44 | -------------------------------------------------------------------------------- /src/Poisson.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017-2022 Chris N. Richardson and Garth N. Wells 2 | # 3 | # This file is part of FEniCS-miniapp (https://www.fenicsproject.org) 4 | # 5 | # SPDX-License-Identifier: MIT 6 | 7 | import basix.ufl 8 | from ufl import (Coefficient, FunctionSpace, TestFunction, TrialFunction, Mesh, action, ds, 9 | dx, grad, inner, tetrahedron) 10 | 11 | # Load namespace 12 | ns = vars() 13 | 14 | forms = [] 15 | for degree in range(1, 4): 16 | element = basix.ufl.element("Lagrange", "tetrahedron", degree) 17 | domain = Mesh(basix.ufl.element("Lagrange", "tetrahedron", 1, shape=(3,))) 18 | space = FunctionSpace(domain, element) 19 | 20 | u = TrialFunction(space) 21 | v = TestFunction(space) 22 | f = Coefficient(space) 23 | g = Coefficient(space) 24 | un = Coefficient(space) 25 | 26 | aname = 'a' + str(degree) 27 | Lname = 'L' + str(degree) 28 | Mname = 'M' + str(degree) 29 | 30 | # Insert into namespace so that the forms will be named a1, a2, a3 etc. 31 | ns[aname] = inner(grad(u), grad(v))*dx 32 | ns[Lname] = f*v*dx + g*v*ds 33 | ns[Mname] = action(ns[aname], un) 34 | 35 | # Delete, so that the forms will get unnamed args and coefficients 36 | # and default to v_0, v_1, w0, w1 etc. 37 | del u, v, f, g, un 38 | 39 | forms += [ns[aname], ns[Lname], ns[Mname]] 40 | -------------------------------------------------------------------------------- /src/cg.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2021 Igor A. Baratta, Chris Richardson 2 | // SPDX-License-Identifier: MIT 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace dolfinx; 10 | 11 | namespace linalg 12 | { 13 | /// Compute vector r = alpha*x + y 14 | /// @param[out] r Result 15 | /// @param[in] alpha 16 | /// @param[in] x 17 | /// @param[in] y 18 | template 19 | void axpy(la::Vector& r, U alpha, const la::Vector& x, 20 | const la::Vector& y) 21 | { 22 | std::transform(x.array().begin(), x.array().end(), y.array().begin(), 23 | r.mutable_array().begin(), 24 | [alpha](auto x, auto y) { return alpha * x + y; }); 25 | } 26 | 27 | /// Solve problem A.x = b using the Conjugate Gradient method 28 | /// @tparam U The scalar type 29 | /// @tparam ApplyFunction Type of the function object "action" 30 | /// @param[in, out] x Solution vector, may be set to an initial guess 31 | /// @param[in] b RHS Vector 32 | /// @param[in] action Function that provides the action of the linear operator 33 | /// @param[in] kmax Maximum number of iterations 34 | /// @param[in] rtol Relative tolerances for convergence 35 | /// @return The number if iterations 36 | /// @pre It is required that the ghost values of `x` and `b` have been 37 | /// updated before this function is called 38 | template 39 | int cg(la::Vector& x, const la::Vector& b, ApplyFunction&& action, 40 | int kmax = 50, double rtol = 1e-8) 41 | { 42 | // Create working vectors 43 | la::Vector r(b), y(b); 44 | 45 | // Compute initial residual r0 = b - Ax0 46 | action(x, y); 47 | axpy(r, U(-1), y, b); 48 | 49 | // Create p work vector 50 | la::Vector p(r); 51 | 52 | // Iterations of CG 53 | auto rnorm0 = la::squared_norm(r); 54 | const auto rtol2 = rtol * rtol; 55 | auto rnorm = rnorm0; 56 | int k = 0; 57 | while (k < kmax) 58 | { 59 | ++k; 60 | 61 | // Compute y = A p 62 | action(p, y); 63 | 64 | // Compute alpha = r.r/p.y 65 | const U alpha = rnorm / la::inner_product(p, y); 66 | 67 | // Update x (x <- x + alpha*p) 68 | axpy(x, alpha, p, x); 69 | 70 | // Update r (r <- r - alpha*y) 71 | axpy(r, -alpha, y, r); 72 | 73 | // Update residual norm 74 | const auto rnorm_new = la::squared_norm(r); 75 | const U beta = rnorm_new / rnorm; 76 | rnorm = rnorm_new; 77 | 78 | if (rnorm / rnorm0 < rtol2) 79 | break; 80 | 81 | // Update p (p <- beta*p + r) 82 | axpy(p, beta, p, r); 83 | } 84 | 85 | return k; 86 | } 87 | } // namespace linalg 88 | -------------------------------------------------------------------------------- /src/cgpoisson_problem.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2017-2019 Chris N. Richardson and Garth N. Wells 2 | // 3 | // This file is part of FEniCS-miniapp (https://www.fenicsproject.org) 4 | // 5 | // SPDX-License-Identifier: MIT 6 | 7 | #include "cgpoisson_problem.h" 8 | #include "Poisson.h" 9 | #include "cg.h" 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | using namespace dolfinx; 28 | using T = PetscScalar; 29 | 30 | namespace 31 | { 32 | void pack_fn(std::span in, std::span idx, 33 | std::span out) 34 | { 35 | for (std::size_t i = 0; i < idx.size(); ++i) 36 | out[i] = in[idx[i]]; 37 | } 38 | 39 | void unpack_fn(std::span in, std::span idx, 40 | std::span out, std::function op) 41 | { 42 | for (std::size_t i = 0; i < idx.size(); ++i) 43 | out[idx[i]] = op(out[idx[i]], in[i]); 44 | } 45 | } // namespace 46 | 47 | std::tuple>, std::shared_ptr>, 48 | std::function&, const la::Vector&)>> 49 | cgpoisson::problem(std::shared_ptr> mesh, int order, 50 | std::string scatterer) 51 | { 52 | common::Timer t0("ZZZ FunctionSpace"); 53 | 54 | auto element = basix::create_element( 55 | basix::element::family::P, basix::cell::type::tetrahedron, order, 56 | basix::element::lagrange_variant::gll_warped, 57 | basix::element::dpc_variant::unset, false); 58 | 59 | auto dolfinx_element 60 | = std::make_shared>(element); 61 | 62 | auto V = std::make_shared>( 63 | fem::create_functionspace(mesh, dolfinx_element)); 64 | 65 | t0.stop(); 66 | t0.flush(); 67 | 68 | common::Timer t1("ZZZ Assemble"); 69 | 70 | common::Timer t2("ZZZ Create boundary conditions"); 71 | // Define boundary condition 72 | auto u0 = std::make_shared>(V); 73 | u0->x()->set(0); 74 | 75 | // Find facets with bc applied 76 | const int tdim = mesh->topology()->dim(); 77 | const std::vector bc_facets = mesh::locate_entities( 78 | *mesh, tdim - 1, 79 | [](auto x) 80 | { 81 | constexpr double eps = 1.0e-8; 82 | std::vector marker(x.extent(1), false); 83 | for (std::size_t p = 0; p < x.extent(1); ++p) 84 | { 85 | double x0 = x(0, p); 86 | if (std::abs(x0) < eps or std::abs(x0 - 1) < eps) 87 | marker[p] = true; 88 | } 89 | return marker; 90 | }); 91 | 92 | // Find constrained dofs 93 | const std::vector bdofs = fem::locate_dofs_topological( 94 | *V->mesh()->topology_mutable(), *V->dofmap(), tdim - 1, bc_facets); 95 | 96 | auto bc = std::make_shared>(u0, bdofs); 97 | t2.stop(); 98 | t2.flush(); 99 | 100 | // Define coefficients 101 | common::Timer t3("ZZZ Create RHS function"); 102 | auto f = std::make_shared>(V); 103 | auto g = std::make_shared>(V); 104 | f->interpolate( 105 | [](auto x) -> std::pair, std::vector> 106 | { 107 | std::vector v(x.extent(1)); 108 | for (std::size_t p = 0; p < x.extent(1); ++p) 109 | { 110 | double dx = x(0, p) - 0.5; 111 | double dy = x(1, p) - 0.5; 112 | double dr = dx * dx + dy * dy; 113 | v[p] = 10 * std::exp(-dr / 0.02); 114 | } 115 | 116 | return {std::move(v), {v.size()}}; 117 | }); 118 | g->interpolate( 119 | [](auto x) -> std::pair, std::vector> 120 | { 121 | std::vector f(x.extent(1)); 122 | for (std::size_t p = 0; p < x.extent(1); ++p) 123 | f[p] = std::sin(5 * x(0, p)); 124 | return {f, {f.size()}}; 125 | }); 126 | t3.stop(); 127 | t3.flush(); 128 | 129 | std::vector form_poisson_L 130 | = {form_Poisson_L1, form_Poisson_L2, form_Poisson_L3}; 131 | std::vector form_poisson_a 132 | = {form_Poisson_a1, form_Poisson_a2, form_Poisson_a3}; 133 | std::vector form_poisson_M 134 | = {form_Poisson_M1, form_Poisson_M2, form_Poisson_M3}; 135 | 136 | // Define variational forms 137 | auto L = std::make_shared>(fem::create_form( 138 | *form_poisson_L.at(order - 1), {V}, {{"w0", f}, {"w1", g}}, {}, {}, {})); 139 | // auto a = std::make_shared>(fem::create_form( 140 | // *form_poisson_a.at(order - 1), {V, V}, 141 | // std::vector>>{}, {}, {})); 142 | 143 | auto un = std::make_shared>(V); 144 | auto M = std::make_shared>(fem::create_form( 145 | *form_poisson_M.at(order - 1), {V}, {{"w0", un}}, {{}}, {}, {})); 146 | 147 | // Create la::Vector 148 | la::Vector b(L->function_spaces()[0]->dofmap()->index_map, 149 | L->function_spaces()[0]->dofmap()->index_map_bs()); 150 | b.set(0); 151 | common::Timer t5("ZZZ Assemble vector"); 152 | const std::vector constants_L = fem::pack_constants(*L); 153 | auto coeffs_L = fem::allocate_coefficient_storage(*L); 154 | fem::pack_coefficients(*L, coeffs_L); 155 | fem::assemble_vector(b.mutable_array(), *L, constants_L, 156 | fem::make_coefficients_span(coeffs_L)); 157 | 158 | // Apply lifting to account for Dirichlet boundary condition 159 | // b <- b - A * x_bc 160 | bc->set(un->x()->mutable_array(), std::nullopt, -1.0); 161 | fem::assemble_vector(b.mutable_array(), *M); 162 | 163 | // Communicate ghost values 164 | b.scatter_rev(std::plus()); 165 | 166 | // Set BC dofs to zero (effectively zeroes columns of A) 167 | bc->set(b.mutable_array(), std::nullopt, 0.0); 168 | b.scatter_fwd(); 169 | 170 | // Pack coefficients and constants 171 | 172 | if (un->x()->array().size() != b.array().size()) 173 | throw std::runtime_error("error"); 174 | // Create Function to hold solution 175 | auto u = std::make_shared>(V); 176 | 177 | std::function&, const la::Vector&)> solver_function 178 | = [M, un, bc, scatterer](fem::Function& u, const la::Vector& b) 179 | { 180 | const std::vector constants; 181 | auto coeff = fem::allocate_coefficient_storage(*M); 182 | 183 | auto V = M->function_spaces()[0]; 184 | auto idx_map = V->dofmap()->index_map; 185 | int bs = V->dofmap()->bs(); 186 | common::Scatterer sct(*idx_map, bs); 187 | 188 | std::vector local_buffer(sct.local_buffer_size(), 0); 189 | std::vector remote_buffer(sct.remote_buffer_size(), 0); 190 | 191 | common::Scatterer<>::type type; 192 | if (scatterer == "neighbor") 193 | type = common::Scatterer<>::type::neighbor; 194 | if (scatterer == "p2p") 195 | type = common::Scatterer<>::type::p2p; 196 | 197 | std::vector request = sct.create_request_vector(type); 198 | 199 | // Create function for computing the action of A on x (y = Ax) 200 | auto action = [&](la::Vector& x, la::Vector& y) 201 | { 202 | // Zero y 203 | y.set(0.0); 204 | 205 | // Update coefficient un (just copy data from x to un) 206 | std::copy(x.array().begin(), x.array().end(), 207 | un->x()->mutable_array().begin()); 208 | 209 | // Compute action of A on x 210 | fem::pack_coefficients(*M, coeff); 211 | fem::assemble_vector(y.mutable_array(), *M, std::span(constants), 212 | fem::make_coefficients_span(coeff)); 213 | 214 | // Set BC dofs to zero (effectively zeroes rows of A) 215 | bc->set(y.mutable_array(), std::nullopt, 0.0); 216 | 217 | // Accumuate ghost values 218 | // y.scatter_rev(std::plus()); 219 | 220 | const std::int32_t local_size = bs * idx_map->size_local(); 221 | const std::int32_t num_ghosts = bs * idx_map->num_ghosts(); 222 | std::span remote_data(y.mutable_array().data() + local_size, 223 | num_ghosts); 224 | std::span local_data(y.mutable_array().data(), local_size); 225 | sct.scatter_rev_begin(remote_data, remote_buffer, local_buffer, 226 | pack_fn, request, type); 227 | sct.scatter_rev_end(local_buffer, local_data, unpack_fn, 228 | std::plus(), request); 229 | 230 | // Update ghost values 231 | sct.scatter_fwd_begin(local_data, local_buffer, remote_buffer, pack_fn, 232 | request, type); 233 | sct.scatter_fwd_end(remote_buffer, remote_data, unpack_fn, request); 234 | }; 235 | 236 | common::Timer tcg; 237 | int num_it = linalg::cg(*u.x(), b, action, 100, 1e-6); 238 | tcg.stop(); 239 | tcg.flush(); 240 | double time = std::chrono::duration(tcg.elapsed()).count(); 241 | double ndofs_global 242 | = static_cast(V->dofmap()->index_map->size_global()); 243 | double gdofs = (num_it * ndofs_global) / time / 1e9; 244 | 245 | std::cout << "CG matrix-free action processed: " << gdofs << " Gdof/s\n"; 246 | 247 | return num_it; 248 | }; 249 | 250 | return {std::make_shared>(std::move(b)), u, solver_function}; 251 | } 252 | -------------------------------------------------------------------------------- /src/cgpoisson_problem.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2017-2019 Chris N. Richardson and Garth N. Wells 2 | // 3 | // This file is part of FEniCS-miniapp (https://www.fenicsproject.org) 4 | // 5 | // SPDX-License-Identifier: MIT 6 | 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace cgpoisson 17 | { 18 | 19 | std::tuple>, 20 | std::shared_ptr>, 21 | std::function&, 22 | const dolfinx::la::Vector&)>> 23 | problem(std::shared_ptr> mesh, int order, std::string scatterer); 24 | 25 | } // namespace poisson 26 | -------------------------------------------------------------------------------- /src/elasticity_problem.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2017-2019 Chris N. Richardson and Garth N. Wells 2 | // 3 | // This file is part of FEniCS-miniapp (https://www.fenicsproject.org) 4 | // 5 | // SPDX-License-Identifier: MIT 6 | 7 | #include "elasticity_problem.h" 8 | #include "Elasticity.h" 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | using namespace dolfinx; 30 | using T = PetscScalar; 31 | 32 | namespace 33 | { 34 | // Function to compute the near nullspace for elasticity - it is made up 35 | // of the six rigid body modes 36 | MatNullSpace build_near_nullspace(const fem::FunctionSpace& V) 37 | { 38 | // Create vectors for nullspace basis 39 | auto map = V.dofmap()->index_map; 40 | int bs = V.dofmap()->index_map_bs(); 41 | std::vector> basis(6, la::Vector(map, bs)); 42 | 43 | // x0, x1, x2 translations 44 | std::int32_t length_block = map->size_local() + map->num_ghosts(); 45 | for (int k = 0; k < 3; ++k) 46 | { 47 | std::span x = basis[k].mutable_array(); 48 | for (std::int32_t i = 0; i < length_block; ++i) 49 | x[bs * i + k] = 1.0; 50 | } 51 | 52 | // Rotations 53 | auto x3 = basis[3].mutable_array(); 54 | auto x4 = basis[4].mutable_array(); 55 | auto x5 = basis[5].mutable_array(); 56 | 57 | const std::vector x = V.tabulate_dof_coordinates(false); 58 | const std::int32_t* dofs = V.dofmap()->map().data_handle(); 59 | for (std::size_t i = 0; i < V.dofmap()->map().size(); ++i) 60 | { 61 | std::span xd(x.data() + 3 * dofs[i], 3); 62 | 63 | x3[bs * dofs[i] + 0] = -xd[1]; 64 | x3[bs * dofs[i] + 1] = xd[0]; 65 | 66 | x4[bs * dofs[i] + 0] = xd[2]; 67 | x4[bs * dofs[i] + 2] = -xd[0]; 68 | 69 | x5[bs * dofs[i] + 2] = xd[1]; 70 | x5[bs * dofs[i] + 1] = -xd[2]; 71 | } 72 | 73 | // Orthonormalize basis 74 | la::orthonormalize(std::vector>>( 75 | basis.begin(), basis.end())); 76 | if (!la::is_orthonormal( 77 | std::vector>>( 78 | basis.begin(), basis.end()))) 79 | { 80 | throw std::runtime_error("Space not orthonormal"); 81 | } 82 | 83 | // Build PETSc nullspace object 84 | std::int32_t length = bs * map->size_local(); 85 | std::vector> basis_local; 86 | std::transform(basis.cbegin(), basis.cend(), std::back_inserter(basis_local), 87 | [length](auto& x) 88 | { return std::span(x.array().data(), length); }); 89 | MPI_Comm comm = V.mesh()->comm(); 90 | std::vector v = la::petsc::create_vectors(comm, basis_local); 91 | MatNullSpace ns = la::petsc::create_nullspace(comm, v); 92 | std::for_each(v.begin(), v.end(), [](auto v) { VecDestroy(&v); }); 93 | return ns; 94 | } 95 | } // namespace 96 | 97 | std::tuple>, std::shared_ptr>, 98 | std::function&, const la::Vector&)>> 99 | elastic::problem(std::shared_ptr> mesh, int order) 100 | { 101 | common::Timer t0("ZZZ FunctionSpace"); 102 | 103 | auto element = basix::create_element( 104 | basix::element::family::P, basix::cell::type::tetrahedron, order, 105 | basix::element::lagrange_variant::gll_warped, 106 | basix::element::dpc_variant::unset, false); 107 | 108 | auto dolfinx_element = std::make_shared>( 109 | element, std::vector{3}); 110 | auto V = std::make_shared>( 111 | fem::create_functionspace(mesh, dolfinx_element)); 112 | 113 | t0.stop(); 114 | t0.flush(); 115 | 116 | common::Timer t0a("ZZZ Create boundary conditions"); 117 | 118 | // Define boundary condition 119 | auto u0 = std::make_shared>(V); 120 | u0->x()->set(0); 121 | 122 | const int tdim = mesh->topology()->dim(); 123 | 124 | // Find facets with bc applied 125 | const std::vector bc_facets = mesh::locate_entities( 126 | *mesh, tdim - 1, 127 | [](auto x) 128 | { 129 | constexpr double eps = 1.0e-8; 130 | std::vector marker(x.extent(1), false); 131 | for (std::size_t p = 0; p < x.extent(1); ++p) 132 | { 133 | double x1 = x(1, p); 134 | if (std::abs(x1) < eps) 135 | marker[p] = true; 136 | } 137 | return marker; 138 | }); 139 | 140 | // Find constrained dofs 141 | const std::vector bdofs = fem::locate_dofs_topological( 142 | *V->mesh()->topology_mutable(), *V->dofmap(), tdim - 1, bc_facets); 143 | 144 | // Bottom (x[1] = 0) surface 145 | auto bc = std::make_shared>(u0, bdofs); 146 | 147 | t0a.stop(); 148 | t0a.flush(); 149 | 150 | common::Timer t0b("ZZZ Create RHS function"); 151 | 152 | // Define coefficients 153 | auto f = std::make_shared>(V); 154 | f->interpolate( 155 | [](auto x) -> std::pair, std::vector> 156 | { 157 | std::vector vdata(x.extent(0) * x.extent(1)); 158 | namespace stdex 159 | = MDSPAN_IMPL_STANDARD_NAMESPACE::MDSPAN_IMPL_PROPOSED_NAMESPACE; 160 | MDSPAN_IMPL_STANDARD_NAMESPACE::mdspan< 161 | T, 162 | MDSPAN_IMPL_STANDARD_NAMESPACE::extents< 163 | std::size_t, 3, MDSPAN_IMPL_STANDARD_NAMESPACE::dynamic_extent>> 164 | v(vdata.data(), x.extent(0), x.extent(1)); 165 | for (std::size_t p = 0; p < x.extent(1); ++p) 166 | { 167 | double dx = x(0, p) - 0.5; 168 | double dz = x(2, p) - 0.5; 169 | double r = std::sqrt(dx * dx + dz * dz); 170 | v(0, p) = -dz * r * x(1, p); 171 | v(1, p) = 1.0; 172 | v(2, p) = dx * r * x(1, p); 173 | } 174 | 175 | return {vdata, {v.extent(0), v.extent(1)}}; 176 | }); 177 | 178 | t0b.stop(); 179 | t0b.flush(); 180 | 181 | common::Timer t0c("ZZZ Create forms"); 182 | 183 | // Define variational forms 184 | std::vector form_elasticity_L 185 | = {form_Elasticity_L1, form_Elasticity_L2, form_Elasticity_L3}; 186 | std::vector form_elasticity_a 187 | = {form_Elasticity_a1, form_Elasticity_a2, form_Elasticity_a3}; 188 | auto L = std::make_shared>(fem::create_form( 189 | *form_elasticity_L.at(order - 1), {V}, {{"w0", f}}, {}, {}, {})); 190 | auto a = std::make_shared>(fem::create_form( 191 | *form_elasticity_a.at(order - 1), {V, V}, {}, {}, {}, {})); 192 | t0c.stop(); 193 | t0c.flush(); 194 | 195 | // Create matrices and vector, and assemble system 196 | std::shared_ptr A = std::make_shared( 197 | fem::petsc::create_matrix(*a), false); 198 | 199 | common::Timer t2("ZZZ Assemble matrix"); 200 | const std::vector constants_a = fem::pack_constants(*a); 201 | auto coeffs_a = fem::allocate_coefficient_storage(*a); 202 | fem::pack_coefficients(*a, coeffs_a); 203 | fem::assemble_matrix(la::petsc::Matrix::set_block_fn(A->mat(), ADD_VALUES), 204 | *a, std::span(constants_a), 205 | fem::make_coefficients_span(coeffs_a), {*bc}); 206 | MatAssemblyBegin(A->mat(), MAT_FLUSH_ASSEMBLY); 207 | MatAssemblyEnd(A->mat(), MAT_FLUSH_ASSEMBLY); 208 | fem::set_diagonal(la::petsc::Matrix::set_fn(A->mat(), INSERT_VALUES), *V, 209 | {*bc}); 210 | MatAssemblyBegin(A->mat(), MAT_FINAL_ASSEMBLY); 211 | MatAssemblyEnd(A->mat(), MAT_FINAL_ASSEMBLY); 212 | t2.stop(); 213 | t2.flush(); 214 | 215 | // Wrap la::Vector with Petsc Vec 216 | la::Vector b(L->function_spaces()[0]->dofmap()->index_map, 217 | L->function_spaces()[0]->dofmap()->index_map_bs()); 218 | b.set(0); 219 | common::Timer t3("ZZZ Assemble vector"); 220 | const std::vector constants_L = fem::pack_constants(*L); 221 | auto coeffs_L = fem::allocate_coefficient_storage(*L); 222 | fem::pack_coefficients(*L, coeffs_L); 223 | fem::assemble_vector(b.mutable_array(), *L, constants_L, 224 | fem::make_coefficients_span(coeffs_L)); 225 | fem::apply_lifting(b.mutable_array(), {*a}, {constants_L}, 226 | {fem::make_coefficients_span(coeffs_L)}, 227 | {{*bc}}, {}, 1.0); 228 | b.scatter_rev(std::plus<>()); 229 | bc->set(b.mutable_array(), std::nullopt); 230 | t3.stop(); 231 | t3.flush(); 232 | 233 | common::Timer t4("ZZZ Create near-nullspace"); 234 | 235 | // Create Function to hold solution 236 | auto u = std::make_shared>(V); 237 | 238 | // Build near-nullspace and attach to matrix 239 | MatNullSpace ns = build_near_nullspace(*V); 240 | MatSetNearNullSpace(A->mat(), ns); 241 | MatNullSpaceDestroy(&ns); 242 | 243 | t4.stop(); 244 | t4.flush(); 245 | 246 | std::function&, const la::Vector&)> solver_function 247 | = [A](fem::Function& u, const la::Vector& b) 248 | { 249 | // Create solver 250 | la::petsc::KrylovSolver solver(MPI_COMM_WORLD); 251 | solver.set_from_options(); 252 | solver.set_operator(A->mat()); 253 | 254 | // Wrap la::Vector 255 | la::petsc::Vector _b(la::petsc::create_vector_wrap(b), false); 256 | la::petsc::Vector x(la::petsc::create_vector_wrap(*u.x()), false); 257 | 258 | // Solve 259 | int num_iter = solver.solve(x.vec(), _b.vec()); 260 | return num_iter; 261 | }; 262 | 263 | return {std::make_shared>(std::move(b)), u, solver_function}; 264 | } 265 | -------------------------------------------------------------------------------- /src/elasticity_problem.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2017-2019 Chris N. Richardson and Garth N. Wells 2 | // 3 | // This file is part of FEniCS-miniapp (https://www.fenicsproject.org) 4 | // 5 | // SPDX-License-Identifier: MIT 6 | 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | namespace dolfinx::mesh 16 | { 17 | template 18 | class Mesh; 19 | } 20 | 21 | namespace elastic 22 | { 23 | 24 | std::tuple>, 25 | std::shared_ptr>, 26 | std::function&, 27 | const dolfinx::la::Vector&)>> 28 | problem(std::shared_ptr> mesh, int order); 29 | 30 | } // namespace elastic 31 | -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2017-2022 Chris N. Richardson and Garth N. Wells 2 | // 3 | // This file is part of FEniCS-miniapp (https://www.fenicsproject.org) 4 | // 5 | // SPDX-License-Identifier: MIT 6 | 7 | #include "cgpoisson_problem.h" 8 | #include "elasticity_problem.h" 9 | #include "mem.h" 10 | #include "mesh.h" 11 | #include "poisson_problem.h" 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | namespace po = boost::program_options; 30 | 31 | std::string int64_to_human(std::int64_t n) 32 | { 33 | double r = static_cast(n); 34 | const std::string name[] = {"", "thousand", "million", "billion", "trillion"}; 35 | 36 | int i = 0; 37 | while (r > 1000.0) 38 | { 39 | r /= 1000.0; 40 | i++; 41 | } 42 | if (i > 4) 43 | throw std::runtime_error("number too big"); 44 | 45 | std::stringstream s; 46 | if (i == 0) 47 | return s.str(); 48 | s << " (" << std::setprecision(3) << r << " " << name[i] << ")"; 49 | return s.str(); 50 | } 51 | 52 | void solve(int argc, char* argv[]) 53 | { 54 | po::options_description desc("Allowed options"); 55 | bool mem_profile; 56 | bool use_subcomm; 57 | desc.add_options()("help,h", "print usage message")( 58 | "problem_type", po::value()->default_value("poisson"), 59 | "problem (poisson, cgpoisson, or elasticity)")( 60 | "mesh_type", po::value()->default_value("cube"), 61 | "mesh (cube or unstructured)")( 62 | "memory_profiling", po::bool_switch(&mem_profile)->default_value(false), 63 | "turn on memory logging")( 64 | "subcomm_partition", po::bool_switch(&use_subcomm)->default_value(false), 65 | "Use sub-communicator for partitioning")( 66 | "scaling_type", po::value()->default_value("weak"), 67 | "scaling (weak or strong)")( 68 | "output", po::value()->default_value(""), 69 | "output directory (no output unless this is set)")( 70 | "ndofs", po::value()->default_value(50000), 71 | "number of degrees of freedom")( 72 | "order", po::value()->default_value(1), "polynomial order")( 73 | "scatterer", po::value()->default_value("neighbor"), 74 | "scatterer for CG (neighbor or p2p)"); 75 | 76 | po::variables_map vm; 77 | po::store(po::command_line_parser(argc, argv) 78 | .options(desc) 79 | .allow_unregistered() 80 | .run(), 81 | vm); 82 | po::notify(vm); 83 | 84 | if (vm.count("help")) 85 | { 86 | std::cout << desc << std::endl; 87 | ; 88 | return; 89 | } 90 | 91 | const std::string problem_type = vm["problem_type"].as(); 92 | const std::string mesh_type = vm["mesh_type"].as(); 93 | const std::string scaling_type = vm["scaling_type"].as(); 94 | const std::size_t ndofs = vm["ndofs"].as(); 95 | const int order = vm["order"].as(); 96 | const std::string scatterer = vm["scatterer"].as(); 97 | const std::string output_dir = vm["output"].as(); 98 | const bool output = (output_dir.size() > 0); 99 | const int mpi_rank = dolfinx::MPI::rank(MPI_COMM_WORLD); 100 | 101 | bool quit_flag = false; 102 | std::thread mem_thread; 103 | 104 | if (mem_profile and mpi_rank == 0) 105 | { 106 | mem_thread = std::thread(process_mem_usage, std::ref(quit_flag)); 107 | } 108 | 109 | bool strong_scaling; 110 | if (scaling_type == "strong") 111 | strong_scaling = true; 112 | else if (scaling_type == "weak") 113 | strong_scaling = false; 114 | else 115 | throw std::runtime_error("Scaling type '" + scaling_type + "` unknown"); 116 | 117 | // Get number of processes 118 | const std::size_t num_processes = dolfinx::MPI::size(MPI_COMM_WORLD); 119 | 120 | // Assemble problem 121 | std::shared_ptr> mesh; 122 | std::shared_ptr> b; 123 | std::shared_ptr> u; 124 | std::function&, 125 | const dolfinx::la::Vector&)> 126 | solver_function; 127 | 128 | const int ndofs_per_node = (problem_type == "elasticity") ? 3 : 1; 129 | 130 | dolfinx::common::Timer t0("ZZZ Create Mesh"); 131 | if (mesh_type == "cube") 132 | { 133 | mesh = std::make_shared>( 134 | create_cube_mesh(MPI_COMM_WORLD, ndofs, strong_scaling, ndofs_per_node, 135 | order, use_subcomm)); 136 | } 137 | else 138 | { 139 | mesh = create_spoke_mesh(MPI_COMM_WORLD, ndofs, strong_scaling, 140 | ndofs_per_node); 141 | } 142 | t0.stop(); 143 | t0.flush(); 144 | 145 | dolfinx::common::Timer t_ent( 146 | "ZZZ Create facets and facet->cell connectivity"); 147 | mesh->topology_mutable()->create_entities(2); 148 | mesh->topology_mutable()->create_connectivity(2, 3); 149 | t_ent.stop(); 150 | t_ent.flush(); 151 | 152 | if (problem_type == "poisson") 153 | { 154 | // Create Poisson problem 155 | std::tie(b, u, solver_function) = poisson::problem(mesh, order); 156 | } 157 | else if (problem_type == "cgpoisson") 158 | { 159 | // Create Poisson problem 160 | std::tie(b, u, solver_function) 161 | = cgpoisson::problem(mesh, order, scatterer); 162 | } 163 | else if (problem_type == "elasticity") 164 | { 165 | // Create elasticity problem. Near-nullspace will be attached to the 166 | // linear operator (matrix). 167 | std::tie(b, u, solver_function) = elastic::problem(mesh, order); 168 | } 169 | else 170 | throw std::runtime_error("Unknown problem type: " + problem_type); 171 | 172 | // Print simulation summary 173 | if (dolfinx::MPI::rank(MPI_COMM_WORLD) == 0) 174 | { 175 | char petsc_version[256]; 176 | PetscGetVersion(petsc_version, 256); 177 | 178 | const std::int64_t num_dofs 179 | = u->function_space()->dofmap()->index_map->size_global() 180 | * u->function_space()->dofmap()->index_map_bs(); 181 | const int tdim = mesh->topology()->dim(); 182 | const std::int64_t num_cells 183 | = mesh->topology()->index_map(tdim)->size_global(); 184 | const std::string num_cells_human = int64_to_human(num_cells); 185 | const std::string num_dofs_human = int64_to_human(num_dofs); 186 | std::cout 187 | << "----------------------------------------------------------------" 188 | << std::endl; 189 | std::cout << "Test problem summary" << std::endl; 190 | std::cout << " dolfinx version: " << DOLFINX_VERSION_STRING << std::endl; 191 | std::cout << " dolfinx hash: " << DOLFINX_VERSION_GIT << std::endl; 192 | std::cout << " ufl hash: " << UFCX_SIGNATURE << std::endl; 193 | std::cout << " petsc version: " << petsc_version << std::endl; 194 | std::cout << " Problem type: " << problem_type << std::endl; 195 | std::cout << " Scaling type: " << scaling_type << std::endl; 196 | std::cout << " Num processes: " << num_processes << std::endl; 197 | std::cout << " Num cells: " << num_cells << num_cells_human 198 | << std::endl; 199 | std::cout << " Total degrees of freedom: " << num_dofs 200 | << num_dofs_human << std::endl; 201 | std::cout << " Average degrees of freedom per process: " 202 | << num_dofs / dolfinx::MPI::size(MPI_COMM_WORLD) << std::endl; 203 | std::cout 204 | << "----------------------------------------------------------------" 205 | << std::endl; 206 | } 207 | 208 | dolfinx::common::Timer t5("ZZZ Solve"); 209 | int num_iter = solver_function(*u, *b); 210 | t5.stop(); 211 | t5.flush(); 212 | 213 | if (output) 214 | { 215 | dolfinx::common::Timer t6("ZZZ Output"); 216 | std::string filename 217 | = output_dir + "/solution-" + std::to_string(num_processes) + ".xdmf"; 218 | dolfinx::io::XDMFFile file(MPI_COMM_WORLD, filename, "w"); 219 | file.write_mesh(*mesh); 220 | file.write_function(*u, 0.0); 221 | t6.stop(); 222 | t6.flush(); 223 | } 224 | 225 | // Display timings 226 | dolfinx::list_timings(MPI_COMM_WORLD); 227 | 228 | // Report number of Krylov iterations 229 | double norm = dolfinx::la::norm(*(u->x())); 230 | if (dolfinx::MPI::rank(MPI_COMM_WORLD) == 0) 231 | { 232 | std::cout << "*** Number of Krylov iterations: " << num_iter << std::endl; 233 | std::cout << "*** Solution norm: " << norm << std::endl; 234 | } 235 | 236 | if (mem_profile and mpi_rank == 0) 237 | { 238 | quit_flag = true; 239 | mem_thread.join(); 240 | } 241 | } 242 | 243 | int main(int argc, char* argv[]) 244 | { 245 | dolfinx::common::Timer t0("Init MPI"); 246 | MPI_Init(&argc, &argv); 247 | t0.stop(); 248 | t0.flush(); 249 | 250 | dolfinx::common::Timer t1("Init logging"); 251 | dolfinx::init_logging(argc, argv); 252 | t1.stop(); 253 | t1.flush(); 254 | 255 | dolfinx::common::Timer t2("Init PETSc"); 256 | PetscInitialize(&argc, &argv, nullptr, nullptr); 257 | t2.stop(); 258 | t2.flush(); 259 | 260 | // Set the logging thread name to show the process rank and enable on 261 | // rank 0 (add more here if desired) 262 | const int mpi_rank = dolfinx::MPI::rank(MPI_COMM_WORLD); 263 | std::string thread_name = "RANK: " + std::to_string(mpi_rank); 264 | std::string fmt = "[%Y-%m-%d %H:%M:%S.%e] [" + thread_name + "] [%l] %v"; 265 | spdlog::set_pattern(fmt); 266 | if (mpi_rank == 0) 267 | spdlog::set_level(spdlog::level::info); 268 | 269 | solve(argc, argv); 270 | 271 | PetscFinalize(); 272 | MPI_Finalize(); 273 | 274 | return 0; 275 | } 276 | -------------------------------------------------------------------------------- /src/mem.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2021 Chris N. Richardson 2 | // 3 | // This file is part of FEniCS-miniapp (https://www.fenicsproject.org) 4 | // 5 | // SPDX-License-Identifier: MIT 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | void process_mem_usage(bool& quit) 19 | { 20 | std::string fmt = "[%Y-%m-%d %H:%M:%S.%e] [MEM] [%l] %v"; 21 | spdlog::set_pattern(fmt); 22 | 23 | const int page_size_bytes = sysconf(_SC_PAGE_SIZE); 24 | 25 | while (!quit) 26 | { 27 | std::ifstream f("/proc/self/stat", std::ios_base::in); 28 | std::istream_iterator it(f); 29 | std::advance(it, 21); 30 | 31 | std::size_t vsize, rss; 32 | f >> vsize >> rss; 33 | f.close(); 34 | spdlog::warn("VSIZE={}, RSS={}", vsize / 1024, 35 | rss * page_size_bytes / 1024); 36 | std::this_thread::sleep_for(std::chrono::milliseconds(100)); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/mem.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2021 Chris N. Richardson 2 | // 3 | // This file is part of FEniCS-miniapp (https://www.fenicsproject.org) 4 | // 5 | // SPDX-License-Identifier: MIT 6 | 7 | /// Thread to output memory usage to logger 8 | void process_mem_usage(bool& quit); 9 | -------------------------------------------------------------------------------- /src/mesh.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2019 Chris N. Richardson and Garth N. Wells 2 | // Licensed under the MIT License. See LICENSE file in the project 3 | // root for full license information. 4 | 5 | #include "mesh.h" 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | namespace 22 | { 23 | // The numbers of lower-dimensional cells of the CW complex of the right prism. 24 | // 25 | // The right prism with dimensions i x j x k is uniformly decomposed 26 | // into ijk unit cubes, and each cube is decomposed into 6 tetrahedra; 27 | // the decomposition procedure is described in Hatcher's "Algebraic 28 | // Topology", on the proof of Theorem 2.10 [1], although pictures of 29 | // the decomposition for the particular case of 3 dimensions simply 30 | // can be viewed online by searching for "tetrahedral decomposition of 31 | // cube". 32 | // 33 | // This decomposition of the right prism leads to a number of 34 | // vertices, edges, faces, and tetrahedra (cells). The counting of 35 | // edges and faces is complicated by the fact that many cells might 36 | // share an edge, and two cells might share a face. 37 | // 38 | // The variable @param nrefine controls the dyadic subdivision of the 39 | // prism; essentially equivalent to scaling up the prism by a factor 40 | // of 2^nrefine in all directions. It should be a nonnegative small 41 | // integer. 42 | // 43 | // 1. Available at . 44 | constexpr std::tuple 45 | num_entities(std::int64_t i, std::int64_t j, std::int64_t k, int nrefine) { 46 | i <<= nrefine; 47 | j <<= nrefine; 48 | k <<= nrefine; 49 | std::int64_t vertices = (i + 1) * (j + 1) * (k + 1); 50 | std::int64_t edges = 7*i*j*k + 3*(i*j + i*k + j*k) + (i + j + k); 51 | std::int64_t faces = 12*i*j*k + 2*(i*j + i*k + j*k); 52 | std::int64_t cells = 6 * (i * j * k); 53 | return {vertices, edges, faces, cells}; 54 | } 55 | 56 | std::int64_t num_pdofs(std::int64_t i, std::int64_t j, std::int64_t k, 57 | int nrefine, int order) 58 | { 59 | auto [nv, ne, nf, nc] = num_entities(i, j, k, nrefine); 60 | 61 | switch (order) 62 | { 63 | case 1: 64 | return nv; 65 | case 2: 66 | return nv + ne; 67 | case 3: 68 | return nv + 2 * ne + nf; 69 | case 4: 70 | return nv + 3 * ne + 3 * nf + nc; 71 | default: 72 | throw std::runtime_error("Order not supported"); 73 | } 74 | } 75 | 76 | } // namespace 77 | 78 | dolfinx::mesh::Mesh 79 | create_cube_mesh(MPI_Comm comm, std::size_t target_dofs, bool target_dofs_total, 80 | std::size_t dofs_per_node, int order, bool use_subcomm) 81 | { 82 | // Get number of processes 83 | const std::size_t num_processes = dolfinx::MPI::size(comm); 84 | 85 | // Target total dofs 86 | std::int64_t N = 0; 87 | if (target_dofs_total == true) 88 | N = target_dofs / dofs_per_node; 89 | else 90 | N = target_dofs * num_processes / dofs_per_node; 91 | 92 | std::int64_t Nx, Ny, Nz; 93 | int r = 0; 94 | 95 | // Choose Nx_max carefully. If too large, the base mesh may become too 96 | // large for the partitioner; likewise, if too small, it will fail on 97 | // large numbers of processes. 98 | const std::int64_t Nx_max = 200; 99 | 100 | // Get initial guess for Nx, Ny, Nz, r 101 | Nx = 1; 102 | std::int64_t ndofs = 0; 103 | while (ndofs < N) 104 | { 105 | // Increase base mesh size 106 | ++Nx; 107 | if (Nx > Nx_max) 108 | { 109 | // Base mesh got too big, so add refinement levels 110 | // Each increase will dramatically (~8x) increase the number of 111 | // dofs 112 | while (ndofs < N) 113 | { 114 | // Keep on refining until we have overshot 115 | ++r; 116 | ndofs = num_pdofs(Nx, Nx, Nx, r, order); 117 | } 118 | while (ndofs > N) 119 | { 120 | // Shrink base mesh until dofs are back on target 121 | --Nx; 122 | ndofs = num_pdofs(Nx, Nx, Nx, r, order); 123 | } 124 | } 125 | ndofs = num_pdofs(Nx, Nx, Nx, r, order); 126 | } 127 | 128 | Ny = Nx; 129 | Nz = Nx; 130 | 131 | // Optimise number of dofs by trying nearby mesh sizes +/- 5 or 10 in 132 | // each dimension 133 | 134 | std::size_t mindiff = 1000000; 135 | for (std::int64_t i = Nx - 10; i < Nx + 10; ++i) 136 | { 137 | for (std::int64_t j = i - 5; j < i + 5; ++j) 138 | { 139 | for (std::int64_t k = i - 5; k < i + 5; ++k) 140 | { 141 | std::size_t diff = std::abs(num_pdofs(i, j, k, r, order) - N); 142 | if (diff < mindiff) 143 | { 144 | mindiff = diff; 145 | Nx = i; 146 | Ny = j; 147 | Nz = k; 148 | } 149 | } 150 | } 151 | } 152 | 153 | #ifdef HAS_PARMETIS 154 | auto graph_part = dolfinx::graph::parmetis::partitioner(); 155 | #elif HAS_PTSCOTCH 156 | auto graph_part = dolfinx::graph::scotch::partitioner( 157 | dolfinx::graph::scotch::strategy::scalability); 158 | #elif HAS_KAHIP 159 | auto graph_part = dolfinx::graph::kahip::partitioner(); 160 | #else 161 | #error "No mesh partitioner has been selected" 162 | #endif 163 | 164 | MPI_Comm sub_comm; 165 | 166 | if (use_subcomm) 167 | { 168 | // Create a sub-communicator for mesh partitioning 169 | MPI_Comm shm_comm; 170 | // Get a local comm on each node 171 | MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, 172 | &shm_comm); 173 | int shm_comm_rank = dolfinx::MPI::rank(shm_comm); 174 | MPI_Comm_free(&shm_comm); 175 | // Create a comm across nodes, using rank 0 of the local comm on each node 176 | int color = (shm_comm_rank == 0) ? 0 : MPI_UNDEFINED; 177 | MPI_Comm_split(comm, color, 0, &sub_comm); 178 | } 179 | else 180 | MPI_Comm_dup(comm, &sub_comm); 181 | 182 | auto cell_part = dolfinx::mesh::create_cell_partitioner( 183 | dolfinx::mesh::GhostMode::none, graph_part); 184 | auto mesh = dolfinx::mesh::create_box( 185 | comm, sub_comm, {{{0.0, 0.0, 0.0}, {1.0, 1.0, 1.0}}}, {Nx, Ny, Nz}, 186 | dolfinx::mesh::CellType::tetrahedron, cell_part); 187 | 188 | MPI_Comm_free(&sub_comm); 189 | 190 | if (dolfinx::MPI::rank(mesh.comm()) == 0) 191 | { 192 | std::cout << "UnitCube (" << Nx << "x" << Ny << "x" << Nz 193 | << ") to be refined " << r << " times" << std::endl; 194 | } 195 | 196 | for (int i = 0; i < r; ++i) 197 | { 198 | mesh.topology_mutable()->create_connectivity(3, 1); 199 | auto [new_mesh, _parent_edges, _parent_facet] = dolfinx::refinement::refine( 200 | mesh, std::nullopt, 201 | dolfinx::mesh::create_cell_partitioner(dolfinx::mesh::GhostMode::shared_facet), 202 | dolfinx::refinement::Option::parent_cell_and_facet); 203 | mesh = std::move(new_mesh); 204 | } 205 | 206 | return mesh; 207 | } 208 | //----------------------------------------------------------------------------- 209 | std::shared_ptr> 210 | create_spoke_mesh(MPI_Comm comm, std::size_t target_dofs, 211 | bool target_dofs_total, std::size_t dofs_per_node) 212 | { 213 | int target = target_dofs / dofs_per_node; 214 | int mpi_size = dolfinx::MPI::size(comm); 215 | if (!target_dofs_total) 216 | target *= mpi_size; 217 | 218 | // Parameters controlling shape 219 | constexpr int n = 17; // number of spokes 220 | constexpr double r0 = 0.25; // inner radius of ring 221 | constexpr double r1 = 0.5; // outer radius of ring 222 | 223 | constexpr double h0 = 1.2; // height (inner) 224 | constexpr double h1 = 1.0; // height (outer) 225 | 226 | constexpr int lspur = 6; // number of elements in each spoke 227 | constexpr double l0 = 0.5; // length of each element in spoke 228 | constexpr double dth = 0.15; // curl (angle increment) as spoke goes out 229 | constexpr double tap 230 | = 0.9; // taper (fractional height decrease on each element) 231 | 232 | // Subdivision of a cube into 6 tetrahedra 233 | constexpr int cube[6][4] = {{0, 1, 2, 4}, {1, 2, 4, 5}, {2, 4, 5, 6}, 234 | {0, 2, 3, 4}, {6, 7, 4, 2}, {2, 3, 4, 7}}; 235 | 236 | // Calculate number of points and cells (only on process 0) 237 | int npoints = 0; 238 | int ncells = 0; 239 | const int mpi_rank = dolfinx::MPI::rank(comm); 240 | 241 | if (mpi_rank == 0) 242 | { 243 | npoints = n * 4 + n * lspur * 4; 244 | ncells = n * 6 + n * lspur * 6; 245 | } 246 | 247 | std::vector x(npoints * 3); 248 | std::vector topo(4 * ncells); 249 | if (mpi_rank == 0) 250 | { 251 | int p = 0; 252 | int c = 0; 253 | 254 | // Add n 'cubes' to make a joined up ring. 255 | for (int i = 0; i < n; ++i) 256 | { 257 | std::cout << "Adding cube " << i << std::endl; 258 | // Get the points for current cube 259 | std::array pts; 260 | for (std::size_t j = 0; j < pts.size(); ++j) 261 | pts[j] = (i * 4 + j) % (n * 4); 262 | 263 | // Add to topology 264 | for (int k = 0; k < 6; ++k) 265 | { 266 | for (int j = 0; j < 4; ++j) 267 | topo[4 * c + j] = pts[cube[k][j]]; 268 | ++c; 269 | } 270 | 271 | // Calculate the position of points 272 | const double th = 2 * std::numbers::pi * i / n; 273 | 274 | std::array p0 = {r0 * std::cos(th), r0 * std::sin(th), h0}; 275 | std::copy(p0.begin(), p0.end(), std::next(x.begin(), 3 * p)); 276 | 277 | std::array p1 = {r0 * std::cos(th), r0 * std::sin(th), -h0}; 278 | std::copy(p1.begin(), p1.end(), std::next(x.begin(), 3 * (p + 1))); 279 | 280 | std::array p2 = {r1 * std::cos(th), r1 * std::sin(th), -h1}; 281 | std::copy(p2.begin(), p2.end(), std::next(x.begin(), 3 * (p + 2))); 282 | 283 | std::array p3 = {r1 * std::cos(th), r1 * std::sin(th), h1}; 284 | std::copy(p3.begin(), p3.end(), std::next(x.begin(), 3 * (p + 3))); 285 | 286 | p += 4; 287 | } 288 | 289 | // Add spurs to ring 290 | for (int i = 0; i < n; ++i) 291 | { 292 | std::cout << "Adding spur " << i << std::endl; 293 | 294 | // Intermediate angle between two faces 295 | const double th0 = 2 * std::numbers::pi * (i + 0.5) / n; 296 | 297 | // Starting points on outer edge of ring 298 | std::array pts = {(i * 4 + 2) % (n * 4), 299 | (i * 4 + 3) % (n * 4), 300 | (i * 4 + 7) % (n * 4), 301 | (i * 4 + 6) % (n * 4), 302 | 0, 303 | 0, 304 | 0, 305 | 0}; 306 | 307 | // Build each spur outwards 308 | for (int k = 0; k < lspur; ++k) 309 | { 310 | // Add new points 311 | for (int j = 0; j < 4; ++j) 312 | { 313 | pts[j + 4] = p; 314 | std::span xp(x.data() + 3 * p, 3); 315 | std::copy_n(std::next(x.begin(), 3 * pts[j]), 3, xp.begin()); 316 | xp[0] += l0 * std::cos(th0 + k * dth); 317 | xp[1] += l0 * std::sin(th0 + k * dth); 318 | xp[2] *= std::pow(tap, k); 319 | ++p; 320 | } 321 | 322 | // Add new cells 323 | for (int m = 0; m < 6; ++m) 324 | { 325 | for (int j = 0; j < 4; ++j) 326 | topo[4 * c + j] = pts[cube[m][j]]; 327 | ++c; 328 | } 329 | 330 | // Outer face becomes inner face of next cube 331 | std::span _pts(pts.data(), 8); 332 | auto pts0 = _pts.first<4>(); 333 | auto pts1 = _pts.last<4>(); 334 | std::copy(pts1.begin(), pts1.end(), pts0.begin()); 335 | } 336 | } 337 | 338 | // Check geometric sizes and rescale 339 | double x0min(0), x0max(0), x1min(0), x1max(0), x2min(0), x2max(0); 340 | for (std::size_t i = 0; i < x.size(); i += 3) 341 | { 342 | x0min = std::min(std::abs(x[i]), x0min); 343 | x0max = std::max(std::abs(x[i]), x0max); 344 | 345 | x1min = std::min(std::abs(x[i + 1]), x1min); 346 | x1max = std::max(std::abs(x[i + 1]), x1max); 347 | 348 | x2min = std::min(std::abs(x[i + 2]), x2min); 349 | x2max = std::max(std::abs(x[i + 2]), x2max); 350 | } 351 | 352 | for (std::size_t i = 0; i < x.size(); i += 3) 353 | x[i] -= 0.9 * x0min; 354 | std::transform(x.begin(), x.end(), x.begin(), 355 | [scale = 0.9 * x0max](auto x) { return x / scale; }); 356 | 357 | spdlog::info("x range = {} - {}", x0min, x0max); 358 | spdlog::info("y range = {} - {}", x1min, x1max); 359 | spdlog::info("z range = {} - {}", x2min, x2max); 360 | } 361 | 362 | // New Mesh 363 | dolfinx::fem::CoordinateElement element( 364 | dolfinx::mesh::CellType::tetrahedron, 1); 365 | 366 | auto mesh = std::make_shared>( 367 | dolfinx::mesh::create_mesh(comm, topo, element, x, {x.size() / 3, 3}, 368 | dolfinx::mesh::GhostMode::none)); 369 | 370 | mesh->topology_mutable()->create_entities(1); 371 | 372 | while (mesh->topology()->index_map(0)->size_global() 373 | + mesh->topology()->index_map(1)->size_global() 374 | < target) 375 | { 376 | auto [new_mesh, _parent_edges, _parent_facet] = dolfinx::refinement::refine( 377 | *mesh, std::nullopt, 378 | dolfinx::mesh::create_cell_partitioner(dolfinx::mesh::GhostMode::shared_facet), 379 | dolfinx::refinement::Option::parent_cell_and_facet); 380 | mesh = std::make_shared>(new_mesh); 381 | mesh->topology_mutable()->create_entities(1); 382 | } 383 | 384 | double fraction 385 | = (double)(target - mesh->topology()->index_map(0)->size_global()) 386 | / mesh->topology()->index_map(1)->size_global(); 387 | 388 | if (mpi_rank == 0) 389 | { 390 | std::cout << "Create unstructured mesh: desired fraction=" << fraction 391 | << std::endl; 392 | } 393 | 394 | // Estimate step needed to get desired refinement fraction 395 | // using some heuristics and bisection method 396 | int nmarked = pow(fraction, 1.6) * 2000; 397 | 398 | double f_lower = 0.0; 399 | double f_upper = 1.0; 400 | int lmark = 0; 401 | int umark = 2000; 402 | 403 | std::shared_ptr> meshi; 404 | for (int k = 0; k < 5; ++k) 405 | { 406 | // Trial step 407 | mesh->topology_mutable()->create_entities(1); 408 | std::vector marked_edges; 409 | const std::int32_t num_edges = mesh->topology()->index_map(1)->size_local(); 410 | for (int i = 0; i < num_edges; ++i) 411 | if (i % 2000 < nmarked) 412 | marked_edges.push_back(i); 413 | 414 | auto [new_mesh, _parent_edges, _parent_facet] = dolfinx::refinement::refine( 415 | *mesh, marked_edges, 416 | dolfinx::mesh::create_cell_partitioner(dolfinx::mesh::GhostMode::shared_facet), 417 | dolfinx::refinement::Option::parent_cell_and_facet); 418 | meshi = std::make_shared>(new_mesh); 419 | 420 | double actual_fraction 421 | = (double)(meshi->topology()->index_map(0)->size_global() 422 | - mesh->topology()->index_map(0)->size_global()) 423 | / mesh->topology()->index_map(1)->size_global(); 424 | 425 | if (mpi_rank == 0) 426 | { 427 | std::cout << "Edges marked = " << nmarked << "/2000" << std::endl; 428 | std::cout << "Step " << k 429 | << " achieved actual fraction = " << actual_fraction 430 | << std::endl; 431 | } 432 | 433 | if (actual_fraction > fraction) 434 | { 435 | umark = nmarked; 436 | f_upper = actual_fraction; 437 | } 438 | else 439 | { 440 | lmark = nmarked; 441 | f_lower = actual_fraction; 442 | } 443 | int new_mark = (lmark * (f_upper - fraction) + umark * (fraction - f_lower)) 444 | / (f_upper - f_lower); 445 | 446 | if (nmarked == new_mark) 447 | break; 448 | else 449 | nmarked = new_mark; 450 | } 451 | 452 | return meshi; 453 | } 454 | -------------------------------------------------------------------------------- /src/mesh.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2017 Chris N. Richardson and Garth N. Wells 2 | // Licensed under the MIT License. See LICENSE file in the project 3 | // root for full license information. 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | namespace dolfinx::fem 11 | { 12 | template 13 | class CoordinateElement; 14 | } 15 | 16 | namespace dolfinx::mesh 17 | { 18 | template 19 | class Mesh; 20 | } 21 | 22 | dolfinx::mesh::Mesh 23 | create_cube_mesh(MPI_Comm comm, std::size_t target_dofs, bool target_dofs_total, 24 | std::size_t dofs_per_node, int order, bool use_subcomm); 25 | 26 | std::shared_ptr> 27 | create_spoke_mesh(MPI_Comm comm, std::size_t target_dofs, 28 | bool target_dofs_total, std::size_t dofs_per_node); 29 | -------------------------------------------------------------------------------- /src/poisson_problem.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2017-2019 Chris N. Richardson and Garth N. Wells 2 | // 3 | // This file is part of FEniCS-miniapp (https://www.fenicsproject.org) 4 | // 5 | // SPDX-License-Identifier: MIT 6 | 7 | #include "poisson_problem.h" 8 | #include "Poisson.h" 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | # 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | using namespace dolfinx; 27 | using T = PetscScalar; 28 | 29 | std::tuple>, std::shared_ptr>, 30 | std::function&, const la::Vector&)>> 31 | poisson::problem(std::shared_ptr> mesh, int order) 32 | { 33 | common::Timer t0("ZZZ FunctionSpace"); 34 | 35 | auto element = basix::create_element( 36 | basix::element::family::P, basix::cell::type::tetrahedron, order, 37 | basix::element::lagrange_variant::gll_warped, 38 | basix::element::dpc_variant::unset, false); 39 | 40 | auto dolfinx_element 41 | = std::make_shared>(element); 42 | 43 | auto V = std::make_shared>( 44 | fem::create_functionspace(mesh, dolfinx_element)); 45 | 46 | t0.stop(); 47 | t0.flush(); 48 | 49 | common::Timer t1("ZZZ Assemble"); 50 | 51 | common::Timer t2("ZZZ Create boundary conditions"); 52 | // Define boundary condition 53 | auto u0 = std::make_shared>(V); 54 | u0->x()->set(0); 55 | 56 | // Find facets with bc applied 57 | const int tdim = mesh->topology()->dim(); 58 | const std::vector bc_facets = mesh::locate_entities( 59 | *mesh, tdim - 1, 60 | [](auto x) 61 | { 62 | constexpr double eps = 1.0e-8; 63 | std::vector marker(x.extent(1), false); 64 | for (std::size_t p = 0; p < x.extent(1); ++p) 65 | { 66 | double x0 = x(0, p); 67 | if (std::abs(x0) < eps or std::abs(x0 - 1) < eps) 68 | marker[p] = true; 69 | } 70 | return marker; 71 | }); 72 | 73 | // Find constrained dofs 74 | const std::vector bdofs = fem::locate_dofs_topological( 75 | *V->mesh()->topology_mutable(), *V->dofmap(), tdim - 1, bc_facets); 76 | 77 | auto bc = std::make_shared>(u0, bdofs); 78 | t2.stop(); 79 | t2.flush(); 80 | 81 | // Define coefficients 82 | common::Timer t3("ZZZ Create RHS function"); 83 | auto f = std::make_shared>(V); 84 | auto g = std::make_shared>(V); 85 | f->interpolate( 86 | [](auto x) -> std::pair, std::vector> 87 | { 88 | std::vector v(x.extent(1)); 89 | for (std::size_t p = 0; p < x.extent(1); ++p) 90 | { 91 | double dx = x(0, p) - 0.5; 92 | double dy = x(1, p) - 0.5; 93 | double dr = dx * dx + dy * dy; 94 | v[p] = 10 * std::exp(-dr / 0.02); 95 | } 96 | 97 | return {std::move(v), {v.size()}}; 98 | }); 99 | g->interpolate( 100 | [](auto x) -> std::pair, std::vector> 101 | { 102 | std::vector f(x.extent(1)); 103 | for (std::size_t p = 0; p < x.extent(1); ++p) 104 | f[p] = std::sin(5 * x(0, p)); 105 | return {f, {f.size()}}; 106 | }); 107 | t3.stop(); 108 | t3.flush(); 109 | 110 | std::vector form_poisson_L 111 | = {form_Poisson_L1, form_Poisson_L2, form_Poisson_L3}; 112 | std::vector form_poisson_a 113 | = {form_Poisson_a1, form_Poisson_a2, form_Poisson_a3}; 114 | 115 | // Define variational forms 116 | auto L = std::make_shared>(fem::create_form( 117 | *form_poisson_L.at(order - 1), {V}, {{"w0", f}, {"w1", g}}, {}, {}, {})); 118 | auto a = std::make_shared>(fem::create_form( 119 | *form_poisson_a.at(order - 1), {V, V}, {}, {}, {}, {})); 120 | 121 | // Create matrices and vector, and assemble system 122 | std::shared_ptr A = std::make_shared( 123 | fem::petsc::create_matrix(*a), false); 124 | 125 | common::Timer t4("ZZZ Assemble matrix"); 126 | const std::vector constants_a = fem::pack_constants(*a); 127 | auto coeffs_a = fem::allocate_coefficient_storage(*a); 128 | fem::pack_coefficients(*a, coeffs_a); 129 | fem::assemble_matrix(la::petsc::Matrix::set_block_fn(A->mat(), ADD_VALUES), 130 | *a, constants_a, 131 | fem::make_coefficients_span(coeffs_a), {*bc}); 132 | MatAssemblyBegin(A->mat(), MAT_FLUSH_ASSEMBLY); 133 | MatAssemblyEnd(A->mat(), MAT_FLUSH_ASSEMBLY); 134 | fem::set_diagonal(la::petsc::Matrix::set_fn(A->mat(), INSERT_VALUES), *V, 135 | {*bc}); 136 | MatAssemblyBegin(A->mat(), MAT_FINAL_ASSEMBLY); 137 | MatAssemblyEnd(A->mat(), MAT_FINAL_ASSEMBLY); 138 | t4.stop(); 139 | t4.flush(); 140 | 141 | // Create la::Vector 142 | la::Vector b(L->function_spaces()[0]->dofmap()->index_map, 143 | L->function_spaces()[0]->dofmap()->index_map_bs()); 144 | b.set(0); 145 | common::Timer t5("ZZZ Assemble vector"); 146 | const std::vector constants_L = fem::pack_constants(*L); 147 | auto coeffs_L = fem::allocate_coefficient_storage(*L); 148 | fem::pack_coefficients(*L, coeffs_L); 149 | fem::assemble_vector(b.mutable_array(), *L, constants_L, 150 | fem::make_coefficients_span(coeffs_L)); 151 | fem::apply_lifting(b.mutable_array(), {*a}, {constants_L}, 152 | {fem::make_coefficients_span(coeffs_L)}, 153 | {{*bc}}, {}, 1.0); 154 | b.scatter_rev(std::plus<>()); 155 | bc->set(b.mutable_array(), std::nullopt); 156 | t5.stop(); 157 | t5.flush(); 158 | 159 | t1.stop(); 160 | t1.flush(); 161 | 162 | // Create Function to hold solution 163 | auto u = std::make_shared>(V); 164 | std::function&, const la::Vector&)> solver_function 165 | = [A](fem::Function& u, const la::Vector& b) 166 | { 167 | // Create solver 168 | la::petsc::KrylovSolver solver(MPI_COMM_WORLD); 169 | solver.set_from_options(); 170 | solver.set_operator(A->mat()); 171 | 172 | // Wrap la::Vector 173 | la::petsc::Vector _b(la::petsc::create_vector_wrap(b), false); 174 | la::petsc::Vector x(la::petsc::create_vector_wrap(*u.x()), false); 175 | 176 | // Solve 177 | int num_iter = solver.solve(x.vec(), _b.vec()); 178 | return num_iter; 179 | }; 180 | 181 | return {std::make_shared>(std::move(b)), u, solver_function}; 182 | } 183 | -------------------------------------------------------------------------------- /src/poisson_problem.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2017-2019 Chris N. Richardson and Garth N. Wells 2 | // 3 | // This file is part of FEniCS-miniapp (https://www.fenicsproject.org) 4 | // 5 | // SPDX-License-Identifier: MIT 6 | 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace poisson 17 | { 18 | 19 | std::tuple>, 20 | std::shared_ptr>, 21 | std::function&, 22 | const dolfinx::la::Vector&)>> 23 | problem(std::shared_ptr> mesh, int order); 24 | 25 | } // namespace poisson 26 | --------------------------------------------------------------------------------