├── ReleaseVersion.txt ├── doc └── AMGX_Reference.pdf ├── src ├── configs │ ├── eigen_configs │ │ ├── ARNOLDI │ │ ├── POWER_ITERATION │ │ ├── PAGERANK │ │ ├── SUBSPACE_ITERATION │ │ ├── LANCZOS │ │ ├── JACOBI_DAVIDSON │ │ ├── LOBPCG │ │ └── INVERSE_FGMRES │ ├── CG_DILU.json │ ├── PCG_DILU.json │ ├── GMRES.json │ ├── PCG_NOPREC.json │ ├── JACOBI.json │ ├── FGMRES_NOPREC.json │ ├── PBICGSTAB_NOPREC.json │ ├── F.json │ ├── V.json │ ├── W.json │ ├── AMG_CLASSICAL_CG.json │ ├── AMG_CLASSICAL_CGF.json │ ├── CLASSICAL_CG_CYCLE.json │ ├── CLASSICAL_F_CYCLE.json │ ├── CLASSICAL_V_CYCLE.json │ ├── CLASSICAL_W_CYCLE.json │ ├── CLASSICAL_CGF_CYCLE.json │ ├── IDRMSYNC_DILU.json │ ├── AGGREGATION_THRUST_BJ.json │ ├── IDR_DILU.json │ ├── AMG_AGGRREGATION_CG.json │ ├── AGGREGATION_DILU.json │ ├── AGGREGATION_JACOBI.json │ ├── AGGREGATION_LOW_DEG_DILU.json │ ├── AGGREGATION_LOW_DEG_BJ.json │ ├── AGGREGATION_THRUST_DILU.json │ ├── AGGREGATION_GS.json │ ├── AGGREGATION_LOW_DEG_GS.json │ ├── AGGREGATION_THRUST_GS.json │ ├── FGMRES.json │ ├── PBICGSTAB_W.json │ ├── PBICGSTAB_AGGREGATION_W_JACOBI.json │ ├── AGGREGATION_MULTI_PAIRWISE.json │ ├── V-cheby_poly-smoother.json │ ├── FGMRES_AGGREGATION_JACOBI.json │ ├── PBICGSTAB.json │ ├── PBICGSTAB_CLASSICAL_JACOBI.json │ ├── V-cheby-aggres-L1-trunc.json │ ├── PCG_F.json │ ├── PCG_V.json │ ├── PCG_W.json │ ├── agg_cheb4.json │ ├── PCGF_CLASSICAL_F_JACOBI.json │ ├── PCGF_CLASSICAL_V_JACOBI.json │ ├── PCGF_CLASSICAL_W_JACOBI.json │ ├── PCG_CLASSICAL_F_JACOBI.json │ ├── PCG_CLASSICAL_W_JACOBI.json │ ├── V-cheby-smoother.json │ ├── GMRES_AMG_D2.json │ ├── AMG_CLASSICAL_L1_TRUNC.json │ ├── PCG_CLASSICAL_V_JACOBI.json │ ├── FGMRES_AGGREGATION.json │ ├── FGMRES_AGGREGATION_DILU.json │ ├── V-cheby-aggres-L1-trunc-userLambda.json │ ├── AMG_CLASSICAL_AGGRESSIVE_L1.json │ ├── AMG_CLASSICAL_L1_AGGRESSIVE_HMIS.json │ ├── PCG_AGGREGATION_JACOBI.json │ ├── AMG_CLASSICAL_PMIS.json │ ├── AMG_CLASSICAL_AGGRESSIVE_L1_TRUNC.json │ ├── FGMRES_CLASSICAL_AGGRESSIVE_HMIS.json │ ├── FGMRES_CLASSICAL_AGGRESSIVE_PMIS.json │ └── AMG_CLASSICAL_AGGRESSIVE_CHEB_L1_TRUNC.json ├── version.cu ├── memory_info.cu ├── api_version.cu ├── amgx_types │ └── io.cu ├── operators │ ├── solver_operator.cu │ ├── shifted_operator.cu │ ├── deflated_multiply_operator.cu │ └── solve_operator.cu ├── device_properties.cu ├── distributed │ └── distributed_comms.cu ├── solvers │ ├── user_solver.cu │ └── dummy_solver.cu ├── amgx_c_common.cu ├── tests │ ├── version_test.cu │ └── truncate_count_test.cu ├── misc.cu ├── convergence │ └── absolute.cu ├── cycles │ ├── v_cycle.cu │ ├── w_cycle.cu │ └── f_cycle.cu ├── thread_manager.cu └── classical │ └── interpolators │ └── common.cu ├── .gitignore ├── include ├── memory_space.h ├── device_properties.h ├── marker.h ├── cusp │ ├── detail │ │ ├── device │ │ │ ├── common.h │ │ │ ├── dereference.h │ │ │ ├── generalized_spmv │ │ │ │ ├── coo.h │ │ │ │ ├── csr.h │ │ │ │ ├── coo_serial.h │ │ │ │ └── hyb.h │ │ │ ├── spmv │ │ │ │ ├── hyb.h │ │ │ │ └── coo_serial.h │ │ │ ├── elementwise.h │ │ │ └── arch.h │ │ ├── utils.h │ │ ├── random.h │ │ ├── host │ │ │ ├── elementwise.h │ │ │ ├── update.sh │ │ │ └── reference │ │ │ │ ├── ell.h │ │ │ │ └── dia.h │ │ ├── forward_definitions.h │ │ ├── matrix_shape.h │ │ ├── format_utils.h │ │ ├── functional.h │ │ ├── csr_matrix.inl │ │ ├── hyb_matrix.inl │ │ ├── config.h │ │ ├── dispatch │ │ │ ├── transpose.h │ │ │ └── multiply.h │ │ └── convert.inl │ ├── verify.h │ ├── version.h │ ├── convert.h │ ├── copy.h │ ├── format.h │ ├── gallery │ │ └── stencil.h │ ├── precond │ │ ├── strength.h │ │ ├── aggregate.h │ │ ├── smooth.h │ │ └── detail │ │ │ └── diagonal.inl │ ├── memory.h │ ├── elementwise.h │ ├── transpose.h │ ├── relaxation │ │ ├── jacobi.h │ │ └── polynomial.h │ ├── graph │ │ └── maximal_independent_set.h │ └── linear_operator.h ├── sort.h ├── amgx_types │ ├── io.h │ ├── rand.h │ └── pod_types.h ├── eigensolvers.h ├── amg_signal.h ├── distributed │ └── amgx_mpi.h ├── transpose.h ├── version.h ├── texture.h ├── numerical_zero.h ├── core.h ├── profile.h ├── misc.h ├── stream.h ├── determinism_checker.h ├── eigensolvers │ ├── multivector_operations.h │ ├── qr.h │ ├── eigenvector_solver.h │ ├── subspace_iteration_eigensolver.h │ └── arnoldi_eigensolver.h ├── async_event.h ├── multiply.h ├── amgx_eig_c.h ├── classical │ ├── strength │ │ ├── all.h │ │ └── ahat.h │ └── interpolators │ │ └── common.h ├── miscmath.h ├── memory_info.h ├── norm.h ├── aggregation │ └── selectors │ │ ├── dummy.h │ │ ├── serial_greedy.h │ │ └── serial_bfs_selector.h ├── cycles │ └── fixed_cycle.h └── convergence │ ├── relative_ini.h │ ├── absolute.h │ └── relative_max.h ├── ci ├── test.sh ├── containers │ ├── x86_64-ubuntu18.04-gnu7-cuda10.2.py │ ├── x86_64-ubuntu18.04-gnu8-cuda11.0.py │ └── x86_64-ubuntu18.04-llvm9-cuda11.0.py ├── README.md └── run.sh ├── external └── rapidjson │ ├── include │ └── rapidjson │ │ ├── internal │ │ └── strfunc.h │ │ ├── filestream.h │ │ └── stringbuffer.h │ ├── license.txt │ └── readme.txt ├── examples ├── install_makefiles_nompi │ └── Makefile ├── matrix.mtx ├── Makefile.cray ├── generate_poisson.cu ├── convert.c └── install_makefiles_mpi │ └── Makefile ├── .github └── ISSUE_TEMPLATE │ ├── compilation-issue-report.md │ └── bug-report.md └── LICENSES └── BSD-3-Clause.txt /ReleaseVersion.txt: -------------------------------------------------------------------------------- 1 | 2.5.0 2 | -------------------------------------------------------------------------------- /doc/AMGX_Reference.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/AMGX/main/doc/AMGX_Reference.pdf -------------------------------------------------------------------------------- /src/configs/eigen_configs/ARNOLDI: -------------------------------------------------------------------------------- 1 | config_version=2 2 | default:eig_solver=ARNOLDI 3 | default:eig_max_iters=128 4 | default:eig_tolerance=1e-4 5 | default:eig_which=largest -------------------------------------------------------------------------------- /src/configs/eigen_configs/POWER_ITERATION: -------------------------------------------------------------------------------- 1 | config_version=2 2 | default:eig_solver=POWER_ITERATION 3 | default:eig_max_iters=40000 4 | default:eig_tolerance=1e-4 5 | default:eig_which=largest -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | /build*/ 3 | .logamgx 4 | *.cmake 5 | plugin_config.cu 6 | *.sublime-project 7 | *.sublime-workspace 8 | core/src/version.cu 9 | ci/docker/ 10 | install 11 | Release 12 | -------------------------------------------------------------------------------- /src/configs/eigen_configs/PAGERANK: -------------------------------------------------------------------------------- 1 | config_version=2 2 | default:eig_solver=PAGERANK 3 | default:eig_damping_factor=0.85 4 | default:eig_max_iters=50 5 | default:eig_tolerance=1e-3 6 | default:eig_which=pagerank -------------------------------------------------------------------------------- /src/version.cu: -------------------------------------------------------------------------------- 1 | #include 2 | namespace amgx{ 3 | const char __AMGX_BUILD_DATE__ [] = __DATE__; 4 | const char __AMGX_BUILD_TIME__ [] = __TIME__; 5 | const char __AMGX_BUILD_ID__ [] = "2.5.0"; 6 | } 7 | -------------------------------------------------------------------------------- /src/configs/eigen_configs/SUBSPACE_ITERATION: -------------------------------------------------------------------------------- 1 | config_version=2 2 | default:eig_solver=SUBSPACE_ITERATION 3 | default:eig_max_iters=256 4 | default:eig_tolerance=1e-2 5 | default:eig_which=largest 6 | default:eig_wanted_count=4 -------------------------------------------------------------------------------- /src/configs/eigen_configs/LANCZOS: -------------------------------------------------------------------------------- 1 | config_version=2 2 | default:eig_solver=LANCZOS 3 | default:eig_max_iters=128 4 | default:eig_tolerance=1e-4 5 | default:eig_which=largest 6 | default:eig_eigenvector=0 7 | default:eig_eigenvector_solver=default -------------------------------------------------------------------------------- /include/memory_space.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | #include 7 | 8 | using cusp::host_memory; 9 | using cusp::device_memory; 10 | -------------------------------------------------------------------------------- /src/memory_info.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | 7 | namespace amgx 8 | { 9 | 10 | size_t MemoryInfo::max_allocated = 0; 11 | 12 | } 13 | -------------------------------------------------------------------------------- /include/device_properties.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | namespace amgx 8 | { 9 | 10 | cudaDeviceProp getDeviceProperties(); 11 | 12 | int getSMCount(); 13 | 14 | } 15 | -------------------------------------------------------------------------------- /include/marker.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | template __global__ void marker_kernel() {} 6 | 7 | template void marker() 8 | { 9 | marker_kernel <<< 1, 1>>>(); 10 | } 11 | 12 | -------------------------------------------------------------------------------- /src/api_version.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | namespace amgx 7 | { 8 | 9 | const int __AMGX_API_VERSION_MAJOR = 1; 10 | const int __AMGX_API_VERSION_MINOR = 0; 11 | 12 | } 13 | -------------------------------------------------------------------------------- /include/cusp/detail/device/common.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | // maximum number of co-resident threads 8 | const int MAX_THREADS = (30 * 1024); 9 | const int WARP_SIZE = 32; 10 | 11 | -------------------------------------------------------------------------------- /include/sort.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace amgx 10 | { 11 | 12 | // sort an array 13 | template 14 | void sort(Vector &v); 15 | 16 | } // namespace amgx 17 | -------------------------------------------------------------------------------- /include/amgx_types/io.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | std::ostream &operator<<(std::ostream &os, const cuComplex &x); 11 | std::ostream &operator<<(std::ostream &os, const cuDoubleComplex &x); -------------------------------------------------------------------------------- /include/eigensolvers.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace amgx 10 | { 11 | namespace eigensolvers 12 | { 13 | AMGX_ERROR initialize(); 14 | void finalize(); 15 | } //namespace eigensolvers 16 | } // namespace amgx 17 | -------------------------------------------------------------------------------- /include/amg_signal.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | namespace amgx 8 | { 9 | 10 | class SignalHandler 11 | { 12 | static bool hooked; 13 | public: 14 | static void hook(); 15 | static void unhook(); 16 | }; 17 | 18 | } // namespace amgx 19 | -------------------------------------------------------------------------------- /include/distributed/amgx_mpi.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | 8 | #ifdef AMGX_WITH_MPI 9 | #include 10 | 11 | namespace amgx 12 | { 13 | void installMPIErrorHandler(MPI_Comm comm); 14 | void uninstallMPIErrorHandler(MPI_Comm comm); 15 | } 16 | #else 17 | 18 | #endif 19 | 20 | 21 | -------------------------------------------------------------------------------- /include/transpose.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | namespace amgx 8 | { 9 | 10 | //computes B=A^T 11 | template 12 | void transpose(const Matrix &A, Matrix &B); 13 | 14 | template 15 | void transpose(const Matrix &A, Matrix &B, int num_rows); 16 | 17 | } // namespace amgx 18 | -------------------------------------------------------------------------------- /include/version.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | namespace amgx 7 | { 8 | extern const char __AMGX_BUILD_ID__ []; 9 | extern const char __AMGX_BUILD_TIME__ []; 10 | extern const char __AMGX_BUILD_DATE__ []; 11 | 12 | extern const int __AMGX_API_VERSION_MAJOR; 13 | extern const int __AMGX_API_VERSION_MINOR; 14 | } 15 | 16 | -------------------------------------------------------------------------------- /include/texture.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | #include "cuda_runtime.h" 11 | 12 | namespace amgx 13 | { 14 | 15 | 16 | template __inline__ __device__ T_ELEM __cachingLoad(const T_ELEM *addr) 17 | { 18 | return __ldg(addr); 19 | } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /include/cusp/detail/utils.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | namespace cusp 8 | { 9 | namespace detail 10 | { 11 | 12 | template 13 | IntegralType round_up(IntegralType n, IntegralType k) 14 | { 15 | return k * ((n + k - 1) / k); 16 | } 17 | 18 | } // end namespace detail 19 | } // end namespace cusp 20 | 21 | -------------------------------------------------------------------------------- /include/numerical_zero.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #if !defined(AMGX_NUMERICAL_ZERO_H_) 6 | #define AMGX_NUMERICAL_ZERO_H_ 7 | 8 | #define AMGX_NUMERICAL_AZERO 0.0 //"absolute" zero 9 | #define AMGX_NUMERICAL_SZERO 1e-10 //single precision zero 10 | #define AMGX_NUMERICAL_DZERO 1e-20 //double precision zero 11 | 12 | #endif /* AMGX_NUMERICAL_ZERO_H_ */ 13 | -------------------------------------------------------------------------------- /src/configs/CG_DILU.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "preconditioner": { 5 | "scope": "precond", 6 | "solver": "MULTICOLOR_DILU" 7 | }, 8 | "solver": "CG", 9 | "print_solve_stats": 1, 10 | "obtain_timings": 1, 11 | "max_iters": 100, 12 | "monitor_residual": 1, 13 | "scope": "main", 14 | "tolerance" : 1e-06, 15 | "convergence": "RELATIVE_INI", 16 | "norm": "L2" 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/configs/PCG_DILU.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "preconditioner": { 5 | "scope": "precond", 6 | "solver": "MULTICOLOR_DILU" 7 | }, 8 | "solver": "PCG", 9 | "print_solve_stats": 1, 10 | "obtain_timings": 1, 11 | "max_iters": 100, 12 | "monitor_residual": 1, 13 | "scope": "main", 14 | "tolerance" : 1e-06, 15 | "convergence": "RELATIVE_INI", 16 | "norm": "L2" 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/amgx_types/io.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | 7 | std::ostream &operator<<(std::ostream &os, const cuComplex &x) 8 | { 9 | os << amgx::types::get_re(x) << " " << amgx::types::get_im(x); 10 | return os; 11 | } 12 | 13 | std::ostream &operator<<(std::ostream &os, const cuDoubleComplex &x) 14 | { 15 | os << amgx::types::get_re(x) << " " << amgx::types::get_im(x); 16 | return os; 17 | } -------------------------------------------------------------------------------- /src/configs/GMRES.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "preconditioner": { 5 | "scope": "amg", 6 | "solver": "NOSOLVER" 7 | }, 8 | "use_scalar_norm": 1, 9 | "solver": "GMRES", 10 | "print_solve_stats": 1, 11 | "obtain_timings": 1, 12 | "monitor_residual": 1, 13 | "convergence": "RELATIVE_INI", 14 | "scope": "main", 15 | "max_iters": 100, 16 | "tolerance" : 1e-06, 17 | "norm": "L2" 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/configs/PCG_NOPREC.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "preconditioner": { 5 | "scope": "amg", 6 | "solver": "NOSOLVER" 7 | }, 8 | "use_scalar_norm": 1, 9 | "solver": "PCG", 10 | "print_solve_stats": 1, 11 | "obtain_timings": 1, 12 | "monitor_residual": 1, 13 | "convergence": "RELATIVE_INI", 14 | "scope": "main", 15 | "max_iters": 100, 16 | "tolerance" : 1e-06, 17 | "norm": "L2" 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /include/cusp/detail/device/dereference.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | #if THRUST_VERSION >= 100600 10 | #include 11 | #define CUSP_DEREFERENCE(x) amgx::thrust::raw_reference_cast(*x) 12 | #else 13 | #include 14 | #define CUSP_DEREFERENCE(x) amgx::thrust::detail::backend::dereference(x) 15 | #endif 16 | 17 | 18 | -------------------------------------------------------------------------------- /src/configs/JACOBI.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "preconditioner": { 5 | "scope": "amg", 6 | "solver": "NOSOLVER" 7 | }, 8 | "use_scalar_norm": 1, 9 | "solver": "BLOCK_JACOBI", 10 | "print_solve_stats": 1, 11 | "obtain_timings": 1, 12 | "monitor_residual": 1, 13 | "convergence": "RELATIVE_INI", 14 | "scope": "main", 15 | "max_iters": 100, 16 | "tolerance" : 1e-06, 17 | "norm": "L2" 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/configs/FGMRES_NOPREC.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "preconditioner": { 5 | "scope": "amg", 6 | "solver": "NOSOLVER" 7 | }, 8 | "use_scalar_norm": 1, 9 | "solver": "FGMRES", 10 | "print_solve_stats": 1, 11 | "obtain_timings": 1, 12 | "monitor_residual": 1, 13 | "convergence": "RELATIVE_INI", 14 | "scope": "main", 15 | "max_iters": 100, 16 | "tolerance" : 1e-06, 17 | "norm": "L2" 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/configs/PBICGSTAB_NOPREC.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "preconditioner": { 5 | "scope": "amg_solver", 6 | "solver": "NOSOLVER" 7 | }, 8 | "use_scalar_norm": 1, 9 | "solver": "PBICGSTAB", 10 | "print_solve_stats": 1, 11 | "obtain_timings": 1, 12 | "monitor_residual": 1, 13 | "convergence": "RELATIVE_INI", 14 | "scope": "main", 15 | "max_iters": 100, 16 | "tolerance" : 1e-06, 17 | "norm": "L2" 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /include/cusp/detail/random.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | namespace cusp 8 | { 9 | namespace detail 10 | { 11 | 12 | // array view containing random integers 13 | template 14 | class random_integers; 15 | 16 | // array view containing random real numbers in [0,1) 17 | template 18 | class random_reals; 19 | 20 | } // end namespace detail 21 | } // end namespace cusp 22 | 23 | #include 24 | 25 | -------------------------------------------------------------------------------- /src/operators/solver_operator.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | namespace amgx 6 | { 7 | 8 | template class Operator; 9 | 10 | } 11 | 12 | #include 13 | #include 14 | 15 | namespace amgx 16 | { 17 | 18 | #define AMGX_CASE_LINE(CASE) template class SolverOperator::Type>; 19 | AMGX_FORALL_BUILDS(AMGX_CASE_LINE) 20 | AMGX_FORCOMPLEX_BUILDS(AMGX_CASE_LINE) 21 | #undef AMGX_CASE_LINE 22 | 23 | } 24 | -------------------------------------------------------------------------------- /src/configs/F.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "print_grid_stats": 1, 5 | "solver": "AMG", 6 | "print_solve_stats": 1, 7 | "presweeps": 1, 8 | "interpolator": "D2", 9 | "obtain_timings": 1, 10 | "max_iters": 100, 11 | "monitor_residual": 1, 12 | "convergence": "RELATIVE_INI", 13 | "scope": "main", 14 | "max_levels": 50, 15 | "cycle": "F", 16 | "tolerance" : 1e-06, 17 | "norm": "L2", 18 | "postsweeps": 1 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/configs/V.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "print_grid_stats": 1, 5 | "solver": "AMG", 6 | "print_solve_stats": 1, 7 | "presweeps": 1, 8 | "obtain_timings": 1, 9 | "max_iters": 100, 10 | "interpolator": "D2", 11 | "monitor_residual": 1, 12 | "convergence": "RELATIVE_INI", 13 | "scope": "main", 14 | "max_levels": 50, 15 | "cycle": "V", 16 | "tolerance" : 1e-06, 17 | "norm": "L2", 18 | "postsweeps": 1 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/configs/W.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "print_grid_stats": 1, 5 | "solver": "AMG", 6 | "print_solve_stats": 1, 7 | "presweeps": 1, 8 | "obtain_timings": 1, 9 | "max_iters": 100, 10 | "monitor_residual": 1, 11 | "interpolator": "D2", 12 | "convergence": "RELATIVE_INI", 13 | "scope": "main", 14 | "max_levels": 50, 15 | "cycle": "W", 16 | "tolerance" : 1e-06, 17 | "norm": "L2", 18 | "postsweeps": 1 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /ci/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | # SPDX-FileCopyrightText: 2024 NVIDIA CORPORATION. All Rights Reserved. 4 | # 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | 7 | set -ex 8 | 9 | BUILD_DIR=$1 10 | if [ -z "${1}" ]; then 11 | BUILD_DIR=build 12 | fi 13 | 14 | if [ "${AMGX_CI_KEEP_BUILD}" = "0" ]; then 15 | rm -rf "${BUILD_DIR}" || true 16 | fi 17 | mkdir -p "${BUILD_DIR}" 18 | 19 | ( 20 | cd "${BUILD_DIR}" 21 | cmake .. 22 | make -j 8 all 23 | # WIP: test_launcher is allowed to fail; not all tests pass 24 | set +e 25 | ./tests/amgx_tests_launcher 26 | ) 27 | -------------------------------------------------------------------------------- /include/core.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace amgx 10 | { 11 | 12 | void allocate_resources(size_t pool_size, 13 | size_t max_alloc_size, 14 | size_t scaling_factor, 15 | size_t scaling_threshold, 16 | size_t max_size); 17 | void free_resources(); 18 | 19 | AMGX_ERROR initialize(); 20 | void finalize(); 21 | 22 | } // namespace amgx 23 | -------------------------------------------------------------------------------- /src/configs/AMG_CLASSICAL_CG.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "print_grid_stats": 1, 5 | "solver": "AMG", 6 | "print_solve_stats": 1, 7 | "interpolator": "D2", 8 | "presweeps": 1, 9 | "obtain_timings": 1, 10 | "max_iters": 100, 11 | "monitor_residual": 1, 12 | "convergence": "RELATIVE_INI", 13 | "scope": "main", 14 | "max_levels": 50, 15 | "cycle": "CG", 16 | "tolerance" : 1e-06, 17 | "norm": "L2", 18 | "postsweeps": 1 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/configs/AMG_CLASSICAL_CGF.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "print_grid_stats": 1, 5 | "solver": "AMG", 6 | "print_solve_stats": 1, 7 | "interpolator": "D2", 8 | "presweeps": 1, 9 | "obtain_timings": 1, 10 | "max_iters": 100, 11 | "monitor_residual": 1, 12 | "convergence": "RELATIVE_INI", 13 | "scope": "main", 14 | "max_levels": 50, 15 | "cycle": "CGF", 16 | "tolerance" : 1e-06, 17 | "norm": "L2", 18 | "postsweeps": 1 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/configs/CLASSICAL_CG_CYCLE.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "print_grid_stats": 1, 5 | "solver": "AMG", 6 | "print_solve_stats": 1, 7 | "presweeps": 1, 8 | "interpolator": "D2", 9 | "obtain_timings": 1, 10 | "max_iters": 100, 11 | "monitor_residual": 1, 12 | "convergence": "RELATIVE_INI", 13 | "scope": "main", 14 | "max_levels": 50, 15 | "cycle": "CG", 16 | "tolerance" : 1e-06, 17 | "norm": "L2", 18 | "postsweeps": 1 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/configs/CLASSICAL_F_CYCLE.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "print_grid_stats": 1, 5 | "solver": "AMG", 6 | "print_solve_stats": 1, 7 | "presweeps": 1, 8 | "obtain_timings": 1, 9 | "interpolator": "D2", 10 | "max_iters": 100, 11 | "monitor_residual": 1, 12 | "convergence": "RELATIVE_INI", 13 | "scope": "main", 14 | "max_levels": 50, 15 | "cycle": "F", 16 | "tolerance" : 1e-06, 17 | "norm": "L2", 18 | "postsweeps": 1 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/configs/CLASSICAL_V_CYCLE.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "print_grid_stats": 1, 5 | "solver": "AMG", 6 | "print_solve_stats": 1, 7 | "presweeps": 1, 8 | "interpolator": "D2", 9 | "obtain_timings": 1, 10 | "max_iters": 100, 11 | "monitor_residual": 1, 12 | "convergence": "RELATIVE_INI", 13 | "scope": "main", 14 | "max_levels": 50, 15 | "cycle": "V", 16 | "tolerance" : 1e-06, 17 | "norm": "L2", 18 | "postsweeps": 1 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/configs/CLASSICAL_W_CYCLE.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "print_grid_stats": 1, 5 | "solver": "AMG", 6 | "print_solve_stats": 1, 7 | "presweeps": 1, 8 | "interpolator": "D2", 9 | "obtain_timings": 1, 10 | "max_iters": 100, 11 | "monitor_residual": 1, 12 | "convergence": "RELATIVE_INI", 13 | "scope": "main", 14 | "max_levels": 50, 15 | "cycle": "W", 16 | "tolerance" : 1e-06, 17 | "norm": "L2", 18 | "postsweeps": 1 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/configs/CLASSICAL_CGF_CYCLE.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "print_grid_stats": 1, 5 | "solver": "AMG", 6 | "print_solve_stats": 1, 7 | "interpolator": "D2", 8 | "presweeps": 1, 9 | "obtain_timings": 1, 10 | "max_iters": 100, 11 | "monitor_residual": 1, 12 | "convergence": "RELATIVE_INI", 13 | "scope": "main", 14 | "max_levels": 50, 15 | "cycle": "CGF", 16 | "tolerance" : 1e-06, 17 | "norm": "L2", 18 | "postsweeps": 1 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/configs/eigen_configs/JACOBI_DAVIDSON: -------------------------------------------------------------------------------- 1 | default:config_version=2 2 | default:eig_solver=JACOBI_DAVIDSON 3 | default:eig_max_iters=128 4 | default:eig_tolerance=1e-4 5 | default:eig_which=largest 6 | 7 | #gmres accelerator config 8 | default:solver(main)=FGMRES 9 | main:gmres_n_restart=10 10 | main:preconditioner(amg)=NOSOLVER 11 | 12 | #outer solver setup 13 | main:convergence=RELATIVE_INI 14 | main:norm=L2 15 | main:use_scalar_norm=1 16 | main:max_iters=20 17 | main:tolerance=1e-2 18 | 19 | #printing obtions 20 | #main:print_solve_stats=1 21 | #main:monitor_residual=1 22 | #main:obtain_timings=1 23 | -------------------------------------------------------------------------------- /include/profile.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | void profileLevelUp(); 8 | void profileLevelDown(); 9 | void profileLevelZero(); 10 | void profilePhaseSetup(); 11 | void profilePhaseSolve(); 12 | void profilePhaseNone(); 13 | void profileSubphaseMatrixColoring(); 14 | void profileSubphaseSmootherSetup(); 15 | void profileSubphaseFindAggregates(); 16 | void profileSubphaseComputeRestriction(); 17 | void profileSubphaseComputeCoarseA(); 18 | void profileSubphaseNone(); 19 | void profileSubphaseTruncateP(); 20 | -------------------------------------------------------------------------------- /src/configs/IDRMSYNC_DILU.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "max_iters": 100, 5 | "preconditioner": { 6 | "scope": "precond", 7 | "max_iters": 1, 8 | "solver": "MULTICOLOR_DILU" 9 | }, 10 | "use_scalar_norm": 1, 11 | "solver": "IDRMSYNC", 12 | "print_solve_stats": 1, 13 | "obtain_timings": 1, 14 | "subspace_dim_s": 1, 15 | "monitor_residual": 1, 16 | "convergence": "RELATIVE_INI", 17 | "scope": "main", 18 | "tolerance" : 1e-06, 19 | "norm": "L2" 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/configs/AGGREGATION_THRUST_BJ.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "print_grid_stats": 1, 5 | "algorithm": "AGGREGATION", 6 | "coarseAgenerator": "THRUST", 7 | "solver": "AMG", 8 | "smoother": "BLOCK_JACOBI", 9 | "print_solve_stats": 1, 10 | "presweeps": 1, 11 | "selector": "SIZE_2", 12 | "obtain_timings": 1, 13 | "max_iters": 100, 14 | "monitor_residual": 1, 15 | "scope": "main", 16 | "postsweeps": 1, 17 | "tolerance" : 1e-06, 18 | "convergence": "RELATIVE_INI", 19 | "cycle": "V" 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/configs/IDR_DILU.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "max_iters": 100, 5 | "print_grid_stats": 1, 6 | "preconditioner": { 7 | "scope": "precond", 8 | "max_iters": 1, 9 | "solver": "MULTICOLOR_DILU" 10 | }, 11 | "use_scalar_norm": 1, 12 | "solver": "IDR", 13 | "print_solve_stats": 1, 14 | "obtain_timings": 1, 15 | "subspace_dim_s": 1, 16 | "monitor_residual": 1, 17 | "convergence": "RELATIVE_INI", 18 | "scope": "main", 19 | "tolerance" : 1e-06, 20 | "norm": "L2" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/configs/AMG_AGGRREGATION_CG.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "print_grid_stats": 1, 5 | "solver": "AMG", 6 | "algorithm":"AGGREGATION", 7 | "selector": "SIZE_4", 8 | "print_solve_stats": 1, 9 | "smoother": "JACOBI_L1", 10 | "presweeps": 0, 11 | "postsweeps": 3, 12 | "obtain_timings": 1, 13 | "max_iters": 100, 14 | "monitor_residual": 1, 15 | "convergence": "RELATIVE_INI", 16 | "scope": "main", 17 | "max_levels": 50, 18 | "cycle": "CG", 19 | "tolerance" : 1e-06, 20 | "norm": "L2" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /include/cusp/verify.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | /*! \file verify.h 6 | * \brief Validate matrix format 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | namespace cusp 14 | { 15 | 16 | template 17 | bool is_valid_matrix(const MatrixType& A); 18 | 19 | template 20 | bool is_valid_matrix(const MatrixType& A, OutputStream& ostream); 21 | 22 | template 23 | void assert_is_valid_matrix(const MatrixType& A); 24 | 25 | } // end namespace cusp 26 | 27 | #include 28 | 29 | -------------------------------------------------------------------------------- /include/misc.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | namespace amgx 8 | { 9 | 10 | typedef void (*AMGX_output_callback)(const char *msg, int length); 11 | extern AMGX_output_callback amgx_output; 12 | extern AMGX_output_callback error_output; 13 | extern AMGX_output_callback amgx_distributed_output; 14 | int amgx_printf(const char *fmt, ...); 15 | 16 | #ifdef NDEBUG 17 | #define amgx_printf_debug(fmt,...) 18 | #define device_printf(fmt,...) 19 | #else 20 | #define amgx_printf_debug(fmt,...) amgx_printf(fmt,##__VA_ARGS__) 21 | #define device_printf(fmt,...) printf(fmt,##__VA_ARGS__) 22 | #endif 23 | 24 | } // namespace amgx 25 | 26 | -------------------------------------------------------------------------------- /include/stream.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace amgx 10 | { 11 | 12 | class Stream 13 | { 14 | cudaStream_t s; 15 | 16 | public: 17 | 18 | inline 19 | Stream(unsigned flags = cudaStreamNonBlocking) 20 | { 21 | cudaStreamCreateWithFlags(&s, flags); 22 | cudaCheckError(); 23 | } 24 | 25 | inline 26 | ~Stream() 27 | { 28 | cudaStreamDestroy(s); 29 | } 30 | 31 | inline 32 | cudaStream_t get() { return s; } 33 | }; 34 | 35 | } // namespace amgx 36 | -------------------------------------------------------------------------------- /include/cusp/version.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | /*! \file version.h 6 | * \brief Cusp version 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | // This is the only cusp header that is guaranteed to 14 | // change with every cusp release. 15 | // 16 | // CUSP_VERSION % 100 is the sub-minor version 17 | // CUSP_VERSION / 100 % 1000 is the minor version 18 | // CUSP_VERSION / 100000 is the major version 19 | 20 | #define CUSP_VERSION 300 21 | #define CUSP_MAJOR_VERSION (CUSP_VERSION / 100000) 22 | #define CUSP_MINOR_VERSION (CUSP_VERSION / 100 % 1000) 23 | #define CUSP_SUBMINOR_VERSION (CUSP_VERSION % 100) 24 | 25 | -------------------------------------------------------------------------------- /include/cusp/detail/host/elementwise.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | namespace cusp 8 | { 9 | namespace detail 10 | { 11 | namespace host 12 | { 13 | 14 | template 18 | void transform_elementwise(const Matrix1& A, 19 | const Matrix2& B, 20 | Matrix3& C, 21 | BinaryFunction op); 22 | 23 | } // end namespace host 24 | } // end namespace detail 25 | } // end namespace cusp 26 | 27 | #include 28 | 29 | -------------------------------------------------------------------------------- /include/cusp/detail/host/update.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | svn --force export http://svn.scipy.org/svn/scipy/trunk/scipy/sparse/sparsetools/bsr.h 4 | svn --force export http://svn.scipy.org/svn/scipy/trunk/scipy/sparse/sparsetools/coo.h 5 | svn --force export http://svn.scipy.org/svn/scipy/trunk/scipy/sparse/sparsetools/csc.h 6 | svn --force export http://svn.scipy.org/svn/scipy/trunk/scipy/sparse/sparsetools/csr.h 7 | svn --force export http://svn.scipy.org/svn/scipy/trunk/scipy/sparse/sparsetools/dense.h 8 | svn --force export http://svn.scipy.org/svn/scipy/trunk/scipy/sparse/sparsetools/dia.h 9 | svn --force export http://svn.scipy.org/svn/scipy/trunk/scipy/sparse/sparsetools/fixed_size.h 10 | svn --force export http://svn.scipy.org/svn/scipy/trunk/scipy/sparse/sparsetools/scratch.h 11 | 12 | -------------------------------------------------------------------------------- /external/rapidjson/include/rapidjson/internal/strfunc.h: -------------------------------------------------------------------------------- 1 | #ifndef RAPIDJSON_INTERNAL_STRFUNC_H_ 2 | #define RAPIDJSON_INTERNAL_STRFUNC_H_ 3 | 4 | namespace rapidjson { 5 | namespace internal { 6 | 7 | //! Custom strlen() which works on different character types. 8 | /*! \tparam Ch Character type (e.g. char, wchar_t, short) 9 | \param s Null-terminated input string. 10 | \return Number of characters in the string. 11 | \note This has the same semantics as strlen(), the return value is not number of Unicode codepoints. 12 | */ 13 | template 14 | inline SizeType StrLen(const Ch* s) { 15 | const Ch* p = s; 16 | while (*p != '\0') 17 | ++p; 18 | return SizeType(p - s); 19 | } 20 | 21 | } // namespace internal 22 | } // namespace rapidjson 23 | 24 | #endif // RAPIDJSON_INTERNAL_STRFUNC_H_ 25 | -------------------------------------------------------------------------------- /src/configs/AGGREGATION_DILU.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "matrix_coloring_scheme": "MIN_MAX", 5 | "max_uncolored_percentage": 0.15, 6 | "algorithm": "AGGREGATION", 7 | "obtain_timings": 1, 8 | "solver": "AMG", 9 | "smoother": "MULTICOLOR_DILU", 10 | "print_solve_stats": 1, 11 | "presweeps": 1, 12 | "selector": "SIZE_2", 13 | "coarsest_sweeps": 2, 14 | "max_iters": 100, 15 | "monitor_residual": 1, 16 | "scope": "main", 17 | "max_levels": 50, 18 | "postsweeps": 1, 19 | "tolerance" : 1e-06, 20 | "convergence": "RELATIVE_INI", 21 | "print_grid_stats": 1, 22 | "norm": "L1", 23 | "cycle": "V" 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/configs/AGGREGATION_JACOBI.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "determinism_flag": 1, 4 | "solver": { 5 | "print_grid_stats": 1, 6 | "algorithm": "AGGREGATION", 7 | "obtain_timings": 1, 8 | "solver": "AMG", 9 | "smoother": "BLOCK_JACOBI", 10 | "print_solve_stats": 1, 11 | "presweeps": 2, 12 | "selector": "SIZE_2", 13 | "convergence": "RELATIVE_INI", 14 | "coarsest_sweeps": 2, 15 | "max_iters": 100, 16 | "monitor_residual": 1, 17 | "min_coarse_rows": 2, 18 | "relaxation_factor": 0.75, 19 | "scope": "main", 20 | "max_levels": 50, 21 | "postsweeps": 2, 22 | "tolerance" : 1e-06, 23 | "norm": "L1", 24 | "cycle": "V" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/configs/AGGREGATION_LOW_DEG_DILU.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "matrix_coloring_scheme": "MIN_MAX", 5 | "max_uncolored_percentage": 0.15, 6 | "algorithm": "AGGREGATION", 7 | "obtain_timings": 1, 8 | "solver": "AMG", 9 | "smoother": "MULTICOLOR_DILU", 10 | "print_solve_stats": 1, 11 | "presweeps": 1, 12 | "selector": "SIZE_2", 13 | "coarsest_sweeps": 2, 14 | "max_iters": 100, 15 | "monitor_residual": 1, 16 | "scope": "main", 17 | "max_levels": 50, 18 | "postsweeps": 1, 19 | "tolerance" : 1e-06, 20 | "convergence": "RELATIVE_INI", 21 | "print_grid_stats": 1, 22 | "norm": "L1", 23 | "cycle": "V" 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /include/cusp/convert.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | /*! \file convert.h 6 | * \brief Matrix format conversion 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | namespace cusp 14 | { 15 | 16 | /*! \addtogroup algorithms Algorithms 17 | * \ingroup algorithms 18 | * \{ 19 | */ 20 | 21 | /*! \p copy : Convert between matrix formats 22 | * 23 | * \note DestinationType will be resized as necessary 24 | * 25 | * \see \p cusp::copy 26 | */ 27 | template 28 | void convert(const SourceType& src, DestinationType& dst); 29 | 30 | /*! \} 31 | */ 32 | 33 | } // end namespace cusp 34 | 35 | #include 36 | 37 | -------------------------------------------------------------------------------- /src/configs/AGGREGATION_LOW_DEG_BJ.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "determinism_flag": 1, 4 | "solver": { 5 | "print_grid_stats": 1, 6 | "algorithm": "AGGREGATION", 7 | "obtain_timings": 1, 8 | "solver": "AMG", 9 | "smoother": "BLOCK_JACOBI", 10 | "print_solve_stats": 1, 11 | "presweeps": 2, 12 | "selector": "SIZE_2", 13 | "convergence": "RELATIVE_INI", 14 | "coarsest_sweeps": 2, 15 | "max_iters": 100, 16 | "monitor_residual": 1, 17 | "min_coarse_rows": 2, 18 | "relaxation_factor": 0.75, 19 | "scope": "main", 20 | "max_levels": 50, 21 | "postsweeps": 2, 22 | "tolerance" : 1e-06, 23 | "norm": "L1", 24 | "cycle": "V" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /include/determinism_checker.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace amgx 10 | { 11 | namespace testing_tools 12 | { 13 | 14 | struct hash_path_determinism_checker_private; 15 | 16 | struct hash_path_determinism_checker 17 | { 18 | static hash_path_determinism_checker *singleton(); 19 | hash_path_determinism_checker(); 20 | ~hash_path_determinism_checker(); 21 | 22 | hash_path_determinism_checker_private *priv; 23 | void checkpoint(const std::string &name, void *data, long long int size_in_bytes, bool no_permute = true); 24 | unsigned long long int checksum( void *data, long long int size_in_bytes, bool no_permute = true ); 25 | }; 26 | 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /examples/install_makefiles_nompi/Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2011 - 2024 NVIDIA CORPORATION. All Rights Reserved. 2 | # 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | 6 | # CUDA Toolkit location 7 | INC_D = -I/usr/local/cuda/include 8 | LIB_D = -L/usr/local/cuda/lib64 9 | LIB_L = -lcudart -ldl 10 | 11 | # Examples 12 | amgx_capi: 13 | gcc -O2 -std=c99 amgx_capi.c -c $(INC_D) 14 | g++ -O2 amgx_capi.o -o amgx_capi $(LIB_D) $(LIB_L) -L../lib -lamgxsh -Wl,-rpath=../lib 15 | 16 | amgx_capi_dynamic: 17 | gcc -O2 -std=c99 amgx_capi.c -c -o amgx_capi_dynamic.o $(INC_D) -DAMGX_DYNAMIC_LOADING 18 | g++ -O2 amgx_capi_dynamic.o -o amgx_capi_dynamic $(LIB_D) $(LIB_L) -Wl,-rpath=../lib 19 | 20 | # All 21 | all: amgx_capi amgx_capi_dynamic 22 | 23 | # Clean 24 | clean: 25 | rm -f amgx_capi 26 | rm -f amgx_capi_dynamic 27 | rm -f *.o 28 | -------------------------------------------------------------------------------- /include/cusp/copy.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | /*! \file copy.h 6 | * \brief Performs (deep) copy operations between containers and views. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | namespace cusp 14 | { 15 | 16 | /*! \addtogroup algorithms Algorithms 17 | * \ingroup algorithms 18 | * \{ 19 | */ 20 | 21 | /*! \p copy : Copy one array or matrix to another 22 | * 23 | * \note T1 and T2 must have the same format type 24 | * \note T2 will be resized as necessary 25 | * 26 | * \see \p convert 27 | */ 28 | template 29 | void copy(const T1& src, T2& dst); 30 | 31 | /*! \} 32 | */ 33 | 34 | } // end namespace cusp 35 | 36 | #include 37 | 38 | -------------------------------------------------------------------------------- /src/configs/AGGREGATION_THRUST_DILU.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "obtain_timings": 1, 5 | "print_grid_stats": 1, 6 | "max_uncolored_percentage": 0.02, 7 | "algorithm": "AGGREGATION", 8 | "coarseAgenerator": "THRUST", 9 | "solver": "AMG", 10 | "smoother": "MULTICOLOR_DILU", 11 | "print_solve_stats": 1, 12 | "presweeps": 1, 13 | "selector": "SIZE_2", 14 | "coarsest_sweeps": 2, 15 | "max_iters": 100, 16 | "monitor_residual": 1, 17 | "postsweeps": 1, 18 | "scope": "main", 19 | "max_levels": 50, 20 | "matrix_coloring_scheme": "MIN_MAX", 21 | "tolerance" : 1e-06, 22 | "convergence": "RELATIVE_INI", 23 | "norm": "L1", 24 | "cycle": "V" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/device_properties.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | #include 7 | namespace amgx 8 | { 9 | static cudaDeviceProp deviceProps; 10 | static bool initialized=false; 11 | 12 | cudaDeviceProp getDeviceProperties() 13 | { 14 | if(!initialized) { 15 | int dev; 16 | cudaGetDevice(&dev); 17 | cudaCheckError(); 18 | cudaGetDeviceProperties(&deviceProps, dev); 19 | cudaCheckError(); 20 | initialized=true; 21 | } 22 | return deviceProps; 23 | } 24 | 25 | // Return the number of Streaming Multiprocessors on the current device 26 | int getSMCount() 27 | { 28 | auto devProp = getDeviceProperties(); 29 | return devProp.multiProcessorCount; 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /src/configs/AGGREGATION_GS.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "determinism_flag": 1, 4 | "solver": { 5 | "print_grid_stats": 1, 6 | "max_uncolored_percentage": 0.15, 7 | "algorithm": "AGGREGATION", 8 | "obtain_timings": 1, 9 | "solver": "AMG", 10 | "smoother": "MULTICOLOR_GS", 11 | "print_solve_stats": 1, 12 | "presweeps": 1, 13 | "symmetric_GS": 1, 14 | "selector": "SIZE_2", 15 | "coarsest_sweeps": 2, 16 | "max_iters": 100, 17 | "monitor_residual": 1, 18 | "postsweeps": 1, 19 | "scope": "main", 20 | "max_levels": 50, 21 | "matrix_coloring_scheme": "MIN_MAX", 22 | "tolerance" : 1e-06, 23 | "convergence": "RELATIVE_INI", 24 | "norm": "L1", 25 | "cycle": "V" 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /include/cusp/format.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | /*! \file format.h 6 | * \brief Format types 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | namespace cusp 14 | { 15 | 16 | struct known_format {}; 17 | struct unknown_format {}; 18 | 19 | struct dense_format : public known_format {}; 20 | struct array1d_format : public dense_format {}; 21 | struct array2d_format : public dense_format {}; 22 | 23 | struct sparse_format : public known_format {}; 24 | struct coo_format : public sparse_format {}; 25 | struct csr_format : public sparse_format {}; 26 | struct dia_format : public sparse_format {}; 27 | struct ell_format : public sparse_format {}; 28 | struct hyb_format : public sparse_format {}; 29 | 30 | } // end namespace cusp 31 | 32 | -------------------------------------------------------------------------------- /src/configs/AGGREGATION_LOW_DEG_GS.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "determinism_flag": 1, 4 | "solver": { 5 | "print_grid_stats": 1, 6 | "max_uncolored_percentage": 0.15, 7 | "algorithm": "AGGREGATION", 8 | "obtain_timings": 1, 9 | "solver": "AMG", 10 | "smoother": "MULTICOLOR_GS", 11 | "print_solve_stats": 1, 12 | "presweeps": 1, 13 | "symmetric_GS": 1, 14 | "selector": "SIZE_2", 15 | "coarsest_sweeps": 2, 16 | "max_iters": 100, 17 | "monitor_residual": 1, 18 | "postsweeps": 1, 19 | "scope": "main", 20 | "max_levels": 50, 21 | "matrix_coloring_scheme": "MIN_MAX", 22 | "tolerance" : 1e-06, 23 | "convergence": "RELATIVE_INI", 24 | "norm": "L1", 25 | "cycle": "V" 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/configs/AGGREGATION_THRUST_GS.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "coarsest_sweeps": 2, 5 | "matrix_coloring_scheme": "MIN_MAX", 6 | "smoother": "MULTICOLOR_GS", 7 | "print_solve_stats": 1, 8 | "obtain_timings": 1, 9 | "relaxation_factor": 0.75, 10 | "scope": "main", 11 | "tolerance" : 1e-06, 12 | "norm": "L1", 13 | "postsweeps": 2, 14 | "max_uncolored_percentage": 0.15, 15 | "presweeps": 2, 16 | "selector": "SIZE_2", 17 | "convergence": "RELATIVE_INI", 18 | "cycle": "V", 19 | "print_grid_stats": 1, 20 | "algorithm": "AGGREGATION", 21 | "coarseAgenerator": "THRUST", 22 | "solver": "AMG", 23 | "max_iters": 100, 24 | "monitor_residual": 1, 25 | "max_levels": 50 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /include/cusp/gallery/stencil.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | /*! \file stencil.h 6 | * \brief Generate sparse matrix from grid stencil 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | 15 | namespace cusp 16 | { 17 | namespace gallery 18 | { 19 | 20 | template 23 | void generate_matrix_from_stencil( MatrixType& matrix, 24 | const cusp::array1d& stencil, 25 | const GridDimension& grid); 26 | 27 | } // end namespace gallery 28 | } // end namespace cusp 29 | 30 | #include 31 | 32 | -------------------------------------------------------------------------------- /include/cusp/detail/forward_definitions.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | namespace cusp 8 | { 9 | 10 | template class array1d; 11 | template class array2d; 12 | template class coo_matrix; 13 | template class csr_matrix; 14 | template class dia_matrix; 15 | template class ell_matrix; 16 | template class hyb_matrix; 17 | 18 | } // end namespace cusp 19 | 20 | -------------------------------------------------------------------------------- /src/configs/FGMRES.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "preconditioner": { 5 | "print_grid_stats": 1, 6 | "print_vis_data": 0, 7 | "solver": "AMG", 8 | "print_solve_stats": 0, 9 | "interpolator": "D2", 10 | "presweeps": 1, 11 | "max_iters": 1, 12 | "monitor_residual": 0, 13 | "store_res_history": 0, 14 | "scope": "amg", 15 | "cycle": "V", 16 | "postsweeps": 1 17 | }, 18 | "solver": "FGMRES", 19 | "print_solve_stats": 1, 20 | "obtain_timings": 1, 21 | "max_iters": 100, 22 | "monitor_residual": 1, 23 | "gmres_n_restart": 20, 24 | "convergence": "RELATIVE_INI", 25 | "scope": "main", 26 | "tolerance" : 1e-06, 27 | "norm": "L2" 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /include/eigensolvers/multivector_operations.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | namespace amgx 11 | { 12 | 13 | 14 | // res = alpha * lhs * rhs + beta * res. 15 | template 16 | void 17 | distributed_gemm_TN(typename TConfig::VecPrec alpha, const Vector &lhs, 18 | const Vector &rhs, 19 | typename TConfig::VecPrec beta, Vector &res, 20 | const Operator &A); 21 | 22 | template 23 | void 24 | multivector_column_norms(const Vector &v, 25 | Vector::Type> &results, 26 | const Operator &A); 27 | 28 | } 29 | -------------------------------------------------------------------------------- /src/distributed/distributed_comms.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | namespace amgx 12 | { 13 | 14 | /*************************************** 15 | * Source Definitions 16 | ***************************************/ 17 | template 18 | DistributedComms::~DistributedComms() 19 | { 20 | }; 21 | 22 | /**************************************** 23 | * Explict instantiations 24 | ***************************************/ 25 | #define AMGX_CASE_LINE(CASE) template class DistributedComms::Type >; 26 | AMGX_FORALL_BUILDS(AMGX_CASE_LINE) 27 | AMGX_FORCOMPLEX_BUILDS(AMGX_CASE_LINE) 28 | #undef AMGX_CASE_LINE 29 | 30 | } // namespace amgx 31 | -------------------------------------------------------------------------------- /include/cusp/detail/matrix_shape.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | namespace cusp 8 | { 9 | 10 | template 11 | class matrix_shape 12 | { 13 | public: 14 | typedef IndexType index_type; 15 | 16 | index_type num_rows; 17 | index_type num_cols; 18 | 19 | matrix_shape() 20 | : num_rows(0), num_cols(0) {} 21 | 22 | matrix_shape(IndexType rows, IndexType cols) 23 | : num_rows(rows), num_cols(cols) {} 24 | 25 | void swap(matrix_shape& shape) 26 | { 27 | amgx::thrust::swap(num_rows, shape.num_rows); 28 | amgx::thrust::swap(num_cols, shape.num_cols); 29 | } 30 | }; 31 | 32 | } // end namespace cusp 33 | -------------------------------------------------------------------------------- /src/operators/shifted_operator.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | namespace amgx 6 | { 7 | 8 | template class Operator; 9 | 10 | } 11 | 12 | #include 13 | #include 14 | 15 | namespace amgx 16 | { 17 | 18 | template 19 | void ShiftedOperator::apply(const Vector &v, Vector &res, ViewType view) 20 | { 21 | Operator &A = *m_A; 22 | int offset, size; 23 | A.getOffsetAndSizeForView(view, &offset, &size); 24 | A.apply(v, res, OWNED); 25 | axpy(v, res, m_shift, offset, size); 26 | } 27 | 28 | #define AMGX_CASE_LINE(CASE) template class ShiftedOperator::Type>; 29 | AMGX_FORALL_BUILDS(AMGX_CASE_LINE) 30 | AMGX_FORCOMPLEX_BUILDS(AMGX_CASE_LINE) 31 | #undef AMGX_CASE_LINE 32 | 33 | } 34 | -------------------------------------------------------------------------------- /include/cusp/detail/device/generalized_spmv/coo.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace cusp 10 | { 11 | namespace detail 12 | { 13 | namespace device 14 | { 15 | 16 | template 17 | void spmv(const coo_matrix& coo, 18 | const ValueType * x, 19 | ValueType * y) 20 | { 21 | spmv_coo_flat(coo, x, y); 22 | } 23 | 24 | template 25 | void spmv_tex(const coo_matrix& coo, 26 | const ValueType * x, 27 | ValueType * y) 28 | { 29 | spmv_coo_flat_tex(coo, x, y); 30 | } 31 | 32 | } // end namespace device 33 | } // end namespace detail 34 | } // end namespace cusp 35 | 36 | -------------------------------------------------------------------------------- /src/solvers/user_solver.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | #include 7 | 8 | namespace amgx 9 | { 10 | 11 | template 12 | void 13 | User_Solver::solver_setup(bool reuse_matrix_structure) 14 | { 15 | } 16 | 17 | //launches a single standard cycle 18 | template 19 | AMGX_STATUS 20 | User_Solver::solve_iteration( VVector &b, VVector &x, bool xIsZero ) 21 | { 22 | assert( callback ); 23 | callback( *this->m_A, b, x ); 24 | return this->converged( b, x ); 25 | }; 26 | 27 | /**************************************** 28 | * Explict instantiations 29 | ***************************************/ 30 | #define AMGX_CASE_LINE(CASE) template class User_Solver::Type>; 31 | AMGX_FORALL_BUILDS(AMGX_CASE_LINE) 32 | #undef AMGX_CASE_LINE 33 | 34 | } // namespace amgx 35 | -------------------------------------------------------------------------------- /examples/matrix.mtx: -------------------------------------------------------------------------------- 1 | %%MatrixMarket matrix coordinate real general 2 | 12 12 61 3 | 1 1 1.0 4 | 1 2 2.0 5 | 1 4 3.0 6 | 1 9 4.0 7 | 2 1 5.0 8 | 2 2 6.0 9 | 2 3 7.0 10 | 2 4 8.0 11 | 3 2 9.0 12 | 3 3 10.0 13 | 3 4 11.0 14 | 3 5 12.0 15 | 3 6 13.0 16 | 4 1 14.0 17 | 4 2 15.0 18 | 4 3 16.0 19 | 4 4 17.0 20 | 4 5 18.0 21 | 4 6 19.0 22 | 4 9 20.0 23 | 4 11 21.0 24 | 5 3 22.0 25 | 5 5 23.0 26 | 5 6 24.0 27 | 5 7 25.0 28 | 6 3 26.0 29 | 6 4 27.0 30 | 6 5 28.0 31 | 6 6 29.0 32 | 6 7 30.0 33 | 6 8 31.0 34 | 6 11 32.0 35 | 7 5 33.0 36 | 7 6 34.0 37 | 7 7 35.0 38 | 7 8 36.0 39 | 8 6 37.0 40 | 8 7 38.0 41 | 8 8 39.0 42 | 8 10 40.0 43 | 8 11 41.0 44 | 9 1 42.0 45 | 9 4 43.0 46 | 9 9 44.0 47 | 9 11 45.0 48 | 9 12 46.0 49 | 10 8 47.0 50 | 10 10 48.0 51 | 10 11 49.0 52 | 10 12 50.0 53 | 11 4 51.0 54 | 11 6 52.0 55 | 11 8 53.0 56 | 11 9 54.0 57 | 11 10 55.0 58 | 11 11 56.0 59 | 11 12 57.0 60 | 12 9 58.0 61 | 12 10 59.0 62 | 12 11 60.0 63 | 12 12 61.0 64 | -------------------------------------------------------------------------------- /include/cusp/precond/strength.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace cusp 10 | { 11 | namespace precond 12 | { 13 | namespace detail 14 | { 15 | 16 | /* Compute a strength of connection matrix using the standard symmetric measure. 17 | * An off-diagonal connection A[i,j] is strong iff:: 18 | * 19 | * abs(A[i,j]) >= theta * sqrt( abs(A[i,i]) * abs(A[j,j]) ) 20 | * 21 | * With the default threshold (theta = 0.0) all connections are strong. 22 | * 23 | * Note: explicit diagonal entries are always considered strong. 24 | */ 25 | template 26 | void symmetric_strength_of_connection(const Matrix1& A, Matrix2& S, const double theta = 0.0); 27 | 28 | } // end namepace detail 29 | } // end namespace precond 30 | } // end namespace cusp 31 | 32 | #include 33 | 34 | -------------------------------------------------------------------------------- /include/amgx_types/rand.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | namespace amgx 11 | { 12 | namespace types 13 | { 14 | 15 | template 16 | __inline__ __host__ T get_rand(); 17 | 18 | template <> 19 | __inline__ __host__ float get_rand() 20 | { 21 | return 1.f * rand() / RAND_MAX; 22 | } 23 | 24 | template <> 25 | __inline__ __host__ double get_rand() 26 | { 27 | return 1.*rand() / RAND_MAX; 28 | } 29 | 30 | template <> 31 | __inline__ __host__ cuComplex get_rand() 32 | { 33 | return make_cuComplex (1.f * rand() / RAND_MAX, 1.f * rand() / RAND_MAX); 34 | } 35 | 36 | template <> 37 | __inline__ __host__ cuDoubleComplex get_rand() 38 | { 39 | return make_cuDoubleComplex (1.*rand() / RAND_MAX, 1.*rand() / RAND_MAX); 40 | } 41 | 42 | } // namespace types 43 | } // namespace amgx 44 | -------------------------------------------------------------------------------- /include/async_event.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | namespace amgx 8 | { 9 | 10 | class AsyncEvent 11 | { 12 | public: 13 | AsyncEvent() : async_event(NULL) { } 14 | AsyncEvent(int size) : async_event(NULL) { cudaEventCreate(&async_event); } 15 | ~AsyncEvent() { if (async_event != NULL) cudaEventDestroy(async_event); } 16 | 17 | void create() { cudaEventCreate(&async_event); } 18 | void record(cudaStream_t s = 0) 19 | { 20 | if (async_event == NULL) 21 | { 22 | cudaEventCreate(&async_event); // check if we haven't created the event yet 23 | } 24 | 25 | cudaEventRecord(async_event, s); 26 | } 27 | void sync() 28 | { 29 | cudaEventSynchronize(async_event); 30 | } 31 | private: 32 | cudaEvent_t async_event; 33 | }; 34 | 35 | } 36 | -------------------------------------------------------------------------------- /include/cusp/detail/device/generalized_spmv/csr.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | namespace cusp 11 | { 12 | namespace detail 13 | { 14 | namespace device 15 | { 16 | 17 | template 18 | void spmv(const csr_matrix& csr, 19 | const ValueType * x, 20 | ValueType * y) 21 | { 22 | spmv_csr_vector(csr, x, y); 23 | } 24 | 25 | template 26 | void spmv_tex(const csr_matrix& csr, 27 | const ValueType * x, 28 | ValueType * y) 29 | { 30 | spmv_csr_vector_tex(csr, x, y); 31 | } 32 | 33 | } // end namespace device 34 | } // end namespace detail 35 | } // end namespace cusp 36 | 37 | -------------------------------------------------------------------------------- /src/configs/PBICGSTAB_W.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "preconditioner": { 5 | "error_scaling": 2, 6 | "algorithm": "AGGREGATION", 7 | "solver": "AMG", 8 | "smoother": { 9 | "relaxation_factor": 0.9, 10 | "scope": "amg_smoother", 11 | "solver": "BLOCK_JACOBI" 12 | }, 13 | "presweeps": 1, 14 | "selector": "SIZE_2", 15 | "max_iters": 1, 16 | "monitor_residual": 1, 17 | "convergence": "RELATIVE_INI", 18 | "scope": "amg", 19 | "cycle": "W", 20 | "norm": "L1", 21 | "postsweeps": 2 22 | }, 23 | "solver": "PBICGSTAB", 24 | "print_solve_stats": 1, 25 | "obtain_timings": 1, 26 | "max_iters": 100, 27 | "monitor_residual": 1, 28 | "scope": "main", 29 | "tolerance" : 1e-06, 30 | "norm": "L2" 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/configs/PBICGSTAB_AGGREGATION_W_JACOBI.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "preconditioner": { 5 | "error_scaling": 2, 6 | "algorithm": "AGGREGATION", 7 | "solver": "AMG", 8 | "smoother": { 9 | "relaxation_factor": 0.9, 10 | "scope": "amg_smoother", 11 | "solver": "BLOCK_JACOBI" 12 | }, 13 | "presweeps": 1, 14 | "selector": "SIZE_2", 15 | "max_iters": 1, 16 | "scope": "amg", 17 | "cycle": "W", 18 | "max_levels": 50, 19 | "norm": "L1", 20 | "postsweeps": 2 21 | }, 22 | "solver": "PBICGSTAB", 23 | "print_solve_stats": 1, 24 | "obtain_timings": 1, 25 | "max_iters": 100, 26 | "monitor_residual": 1, 27 | "convergence": "RELATIVE_INI", 28 | "scope": "main", 29 | "tolerance" : 1e-06, 30 | "norm": "L2" 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /include/multiply.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | #include 7 | #include 8 | #include 9 | 10 | namespace amgx 11 | { 12 | 13 | //computes C=A*B 14 | template 15 | void multiply(Matrix &A, Vector &B, Vector &C, ViewType view = OWNED); 16 | 17 | template 18 | void multiply_masked(Matrix &A, Vector &B, Vector &C, typename Matrix::IVector &mask, ViewType view = OWNED); 19 | 20 | template 21 | void multiply_with_mask(MatrixA &A, Vector &B, Vector &C); 22 | 23 | template 24 | void multiply_with_mask_restriction(MatrixA &A, Vector &B, Vector &C, MatrixA &P); 25 | 26 | 27 | //computes C=A*B 28 | template 29 | void multiplyMM(const Matrix &A, const Matrix &B, Matrix &C); 30 | 31 | } // namespace amgx 32 | -------------------------------------------------------------------------------- /include/cusp/detail/device/spmv/hyb.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | namespace cusp 11 | { 12 | namespace detail 13 | { 14 | namespace device 15 | { 16 | 17 | template 19 | void spmv_hyb(const Matrix& A, 20 | const ValueType* x, 21 | ValueType* y) 22 | { 23 | spmv_ell(A.ell, x, y); 24 | __spmv_coo_flat(A.coo, x, y); 25 | } 26 | 27 | template 29 | void spmv_hyb_tex(const Matrix& A, 30 | const ValueType* x, 31 | ValueType* y) 32 | { 33 | spmv_ell_tex(A.ell, x, y); 34 | __spmv_coo_flat(A.coo, x, y); 35 | } 36 | 37 | } // end namespace device 38 | } // end namespace detail 39 | } // end namespace cusp 40 | 41 | -------------------------------------------------------------------------------- /src/configs/AGGREGATION_MULTI_PAIRWISE.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "use_scalar_norm": 1, 5 | "min_coarse_rows": 32, 6 | "matrix_coloring_scheme": "PARALLEL_GREEDY", 7 | "smoother": "BLOCK_JACOBI", 8 | "print_solve_stats": 1, 9 | "obtain_timings": 1, 10 | "relaxation_factor": 0.5, 11 | "weight_formula": 1, 12 | "tolerance" : 1e-06, 13 | "norm": "L2", 14 | "postsweeps": 3, 15 | "merge_singletons": 2, 16 | "presweeps": 0, 17 | "selector": "MULTI_PAIRWISE", 18 | "scope": "main", 19 | "convergence": "RELATIVE_INI", 20 | "cycle": "F", 21 | "print_grid_stats": 1, 22 | "algorithm": "AGGREGATION", 23 | "solver": "AMG", 24 | "aggregation_passes": 1, 25 | "max_uncolored_percentage": 0.05, 26 | "coarse_solver": "DENSE_LU_SOLVER", 27 | "max_iters": 100, 28 | "monitor_residual": 1, 29 | "max_levels": 50 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/configs/eigen_configs/LOBPCG: -------------------------------------------------------------------------------- 1 | config_version=2 2 | default:eig_solver=LOBPCG 3 | default:eig_max_iters=1000 4 | default:eig_tolerance=1e-4 5 | default:eig_which=largest 6 | 7 | #gmres accelerator config 8 | default:solver(main)=FGMRES 9 | main:gmres_n_restart=10 10 | main:preconditioner(amg)=AMG 11 | 12 | #outer solver setup 13 | main:convergence=RELATIVE_INI 14 | main:norm=L2 15 | main:use_scalar_norm=1 16 | main:max_iters=2 17 | main:tolerance=1e-4 18 | 19 | #amg specific 20 | amg:max_iters=1 21 | amg:algorithm=AGGREGATION 22 | amg:selector=SIZE_2 23 | amg:cycle=V 24 | amg:smoother=MULTICOLOR_DILU 25 | amg:presweeps=0 26 | amg:postsweeps=3 27 | amg:error_scaling=0 28 | amg:max_levels=100 29 | amg:coarseAgenerator=LOW_DEG 30 | amg:matrix_coloring_scheme=PARALLEL_GREEDY 31 | amg:max_uncolored_percentage=0.05 32 | amg:relaxation_factor=0.75 33 | amg:coarse_solver=DENSE_LU_SOLVER 34 | amg:min_coarse_rows=32 35 | 36 | 37 | 38 | #printing obtions 39 | #main:print_solve_stats=1 40 | main:monitor_residual=1 41 | #main:obtain_timings=1 42 | #amg:print_grid_stats=1 43 | -------------------------------------------------------------------------------- /include/cusp/detail/format_utils.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | namespace cusp 8 | { 9 | namespace detail 10 | { 11 | 12 | template 13 | void offsets_to_indices(const OffsetArray& offsets, IndexArray& indices); 14 | 15 | template 16 | void indices_to_offsets(const IndexArray& indices, OffsetArray& offsets); 17 | 18 | template 19 | void extract_diagonal(const MatrixType& A, ArrayType& output); 20 | 21 | template 22 | void sort_by_row(Array1& rows, Array2& columns, Array3& values); 23 | 24 | template 25 | void sort_by_row_and_column(Array1& rows, Array2& columns, Array3& values); 26 | 27 | } // end namespace detail 28 | } // end namespace cusp 29 | 30 | #include 31 | 32 | -------------------------------------------------------------------------------- /src/configs/eigen_configs/INVERSE_FGMRES: -------------------------------------------------------------------------------- 1 | config_version=2 2 | default:eig_solver=INVERSE_ITERATION 3 | default:eig_max_iters=400 4 | default:eig_tolerance=1e-4 5 | default:eig_which=smallest 6 | 7 | #gmres accelerator config 8 | default:solver(main)=FGMRES 9 | main:gmres_n_restart=10 10 | main:preconditioner(amg)=AMG 11 | 12 | #outer solver setup 13 | main:convergence=RELATIVE_INI 14 | main:norm=L2 15 | main:use_scalar_norm=1 16 | main:max_iters=100 17 | main:tolerance=1e-4 18 | 19 | #amg specific 20 | amg:max_iters=1 21 | amg:algorithm=AGGREGATION 22 | amg:selector=SIZE_2 23 | amg:cycle=V 24 | amg:smoother=MULTICOLOR_DILU 25 | amg:presweeps=0 26 | amg:postsweeps=3 27 | amg:error_scaling=0 28 | amg:max_levels=100 29 | amg:coarseAgenerator=LOW_DEG 30 | amg:matrix_coloring_scheme=PARALLEL_GREEDY 31 | amg:max_uncolored_percentage=0.05 32 | amg:relaxation_factor=0.75 33 | amg:coarse_solver=DENSE_LU_SOLVER 34 | amg:min_coarse_rows=32 35 | 36 | 37 | 38 | #printing obtions 39 | #main:print_solve_stats=1 40 | #main:monitor_residual=1 41 | #main:obtain_timings=1 42 | #amg:print_grid_stats=1 43 | -------------------------------------------------------------------------------- /src/configs/V-cheby_poly-smoother.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "determinism_flag":1, 4 | "solver": { 5 | "scope": "main", 6 | "print_grid_stats": 1, 7 | "solver": "AMG", 8 | "scaling":"NONE", 9 | "interpolator": "D2", 10 | "aggressive_levels": 0, 11 | "interp_max_elements": 4, 12 | "max_row_sum": 0.9, 13 | "print_solve_stats": 1, 14 | "obtain_timings": 1, 15 | "max_iters": 100, 16 | "monitor_residual": 1, 17 | "convergence": "RELATIVE_INI", 18 | "max_levels": 50, 19 | "cycle": "V", 20 | "smoother": { 21 | "scope": "cheb_smoother", 22 | "solver": "CHEBYSHEV_POLY", 23 | "chebyshev_polynomial_order":2, 24 | "max_iters":1, 25 | "preconditioner": { 26 | "solver": "JACOBI_L1" 27 | }, 28 | "tolerance": 1e-4 29 | }, 30 | "tolerance" : 1e-06, 31 | "norm": "L2", 32 | "presweeps": 0, 33 | "postsweeps":3 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/configs/FGMRES_AGGREGATION_JACOBI.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "preconditioner": { 5 | "error_scaling": 0, 6 | "print_grid_stats": 1, 7 | "algorithm": "AGGREGATION", 8 | "solver": "AMG", 9 | "smoother": "BLOCK_JACOBI", 10 | "presweeps": 0, 11 | "selector": "SIZE_2", 12 | "coarse_solver": "NOSOLVER", 13 | "max_iters": 1, 14 | "min_coarse_rows": 32, 15 | "relaxation_factor": 0.75, 16 | "scope": "amg", 17 | "max_levels": 50, 18 | "postsweeps": 3, 19 | "cycle": "V" 20 | }, 21 | "use_scalar_norm": 1, 22 | "solver": "FGMRES", 23 | "print_solve_stats": 1, 24 | "obtain_timings": 1, 25 | "max_iters": 100, 26 | "monitor_residual": 1, 27 | "gmres_n_restart": 32, 28 | "convergence": "RELATIVE_INI", 29 | "scope": "main", 30 | "tolerance" : 1e-06, 31 | "norm": "L2" 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/configs/PBICGSTAB.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "preconditioner": { 5 | "print_vis_data": 0, 6 | "solver": "AMG", 7 | "smoother": { 8 | "scope": "jacobi", 9 | "solver": "BLOCK_JACOBI", 10 | "monitor_residual": 0, 11 | "print_solve_stats": 0 12 | }, 13 | "print_solve_stats": 0, 14 | "interpolator": "D2", 15 | "presweeps": 1, 16 | "max_iters": 1, 17 | "monitor_residual": 0, 18 | "store_res_history": 0, 19 | "scope": "amg", 20 | "max_levels": 50, 21 | "cycle": "V", 22 | "postsweeps": 1 23 | }, 24 | "solver": "PBICGSTAB", 25 | "print_solve_stats": 1, 26 | "obtain_timings": 1, 27 | "max_iters": 100, 28 | "monitor_residual": 1, 29 | "convergence": "RELATIVE_INI", 30 | "scope": "main", 31 | "tolerance" : 1e-06, 32 | "norm": "L2" 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /include/amgx_eig_c.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include "amgx_config.h" 10 | #include "amgx_c.h" 11 | 12 | #if defined(__cplusplus) 13 | extern "C" { 14 | #endif 15 | 16 | typedef struct AMGX_eigensolver_handle_struct {char AMGX_eigensolver_handle_dummy;} *AMGX_eigensolver_handle; 17 | 18 | AMGX_RC AMGX_API AMGX_eigensolver_create(AMGX_eigensolver_handle *ret, AMGX_resources_handle rsc, AMGX_Mode mode, const AMGX_config_handle config_eigensolver); 19 | 20 | AMGX_RC AMGX_API AMGX_eigensolver_setup(AMGX_eigensolver_handle eigensolver, AMGX_matrix_handle mtx); 21 | 22 | AMGX_RC AMGX_API AMGX_eigensolver_pagerank_setup(AMGX_eigensolver_handle eigensolver, AMGX_vector_handle a); 23 | 24 | AMGX_RC AMGX_API AMGX_eigensolver_solve(AMGX_eigensolver_handle eigensolver, AMGX_vector_handle x); 25 | 26 | AMGX_RC AMGX_API AMGX_eigensolver_destroy(AMGX_eigensolver_handle obj); 27 | 28 | #if defined(__cplusplus) 29 | }//extern "C" 30 | #endif 31 | -------------------------------------------------------------------------------- /include/cusp/detail/functional.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | /*! \file functional.h 6 | * \brief Defines templated functors and traits analogous to what 7 | * is found in stl and boost's functional. 8 | */ 9 | 10 | #pragma once 11 | 12 | #include 13 | 14 | #include 15 | 16 | namespace cusp 17 | { 18 | namespace detail 19 | { 20 | 21 | template 22 | struct zero_function 23 | { 24 | __host__ __device__ T operator()(const T &x) const {return T(0);} 25 | }; // end minus 26 | 27 | template 28 | struct identity_function 29 | { 30 | __host__ __device__ const T& operator()(const T &x) const {return x;} 31 | }; // end identity 32 | 33 | } // end namespace detail 34 | } // end namespace cusp 35 | 36 | // Add identity to amgx::thrust namespace for backward compatibility 37 | namespace amgx 38 | { 39 | namespace thrust 40 | { 41 | template 42 | using identity = cusp::detail::identity_function; 43 | } 44 | } 45 | 46 | -------------------------------------------------------------------------------- /src/configs/PBICGSTAB_CLASSICAL_JACOBI.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "preconditioner": { 5 | "print_vis_data": 0, 6 | "solver": "AMG", 7 | "smoother": { 8 | "scope": "jacobi", 9 | "solver": "BLOCK_JACOBI", 10 | "monitor_residual": 0, 11 | "print_solve_stats": 0 12 | }, 13 | "print_solve_stats": 0, 14 | "presweeps": 1, 15 | "interpolator": "D2", 16 | "max_iters": 1, 17 | "monitor_residual": 0, 18 | "store_res_history": 0, 19 | "scope": "amg", 20 | "max_levels": 50, 21 | "cycle": "V", 22 | "postsweeps": 1 23 | }, 24 | "solver": "PBICGSTAB", 25 | "print_solve_stats": 1, 26 | "obtain_timings": 1, 27 | "max_iters": 100, 28 | "monitor_residual": 1, 29 | "convergence": "RELATIVE_INI", 30 | "scope": "main", 31 | "tolerance" : 1e-06, 32 | "norm": "L2" 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/configs/V-cheby-aggres-L1-trunc.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "print_grid_stats": 1, 5 | "solver": "AMG", 6 | "interpolator":"D2", 7 | "interp_max_elements": 4, 8 | "aggressive_levels": 1, 9 | "print_solve_stats": 1, 10 | "obtain_timings": 1, 11 | "max_iters": 100, 12 | "coarse_solver": "NOSOLVER", 13 | "monitor_residual": 1, 14 | "convergence": "RELATIVE_INI", 15 | "scope": "main", 16 | "max_levels": 50, 17 | "error_scaling":3, 18 | "cycle": "V", 19 | "smoother": 20 | { 21 | "solver": "CHEBYSHEV", 22 | "preconditioner" : 23 | { 24 | "solver": "JACOBI_L1", 25 | "max_iters": 1 26 | }, 27 | "max_iters": 1, 28 | "chebyshev_polynomial_order" : 2, 29 | "chebyshev_lambda_estimate_mode" : 2 30 | }, 31 | "tolerance" : 1e-06, 32 | "norm": "L2", 33 | "presweeps": 0, 34 | "postsweeps": 1 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/configs/PCG_F.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "preconditioner": { 5 | "print_grid_stats": 1, 6 | "print_vis_data": 0, 7 | "solver": "AMG", 8 | "smoother": { 9 | "scope": "jacobi", 10 | "solver": "BLOCK_JACOBI", 11 | "monitor_residual": 0, 12 | "print_solve_stats": 0 13 | }, 14 | "print_solve_stats": 0, 15 | "presweeps": 1, 16 | "interpolator": "D2", 17 | "max_iters": 1, 18 | "monitor_residual": 0, 19 | "store_res_history": 0, 20 | "scope": "amg", 21 | "max_levels": 50, 22 | "cycle": "F", 23 | "postsweeps": 1 24 | }, 25 | "solver": "PCG", 26 | "print_solve_stats": 1, 27 | "obtain_timings": 1, 28 | "max_iters": 100, 29 | "monitor_residual": 1, 30 | "convergence": "RELATIVE_INI", 31 | "scope": "main", 32 | "tolerance" : 1e-06, 33 | "norm": "L2" 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/configs/PCG_V.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "preconditioner": { 5 | "print_grid_stats": 1, 6 | "print_vis_data": 0, 7 | "solver": "AMG", 8 | "smoother": { 9 | "scope": "jacobi", 10 | "solver": "BLOCK_JACOBI", 11 | "monitor_residual": 0, 12 | "print_solve_stats": 0 13 | }, 14 | "print_solve_stats": 0, 15 | "presweeps": 1, 16 | "interpolator": "D2", 17 | "max_iters": 1, 18 | "monitor_residual": 0, 19 | "store_res_history": 0, 20 | "scope": "amg", 21 | "max_levels": 50, 22 | "cycle": "V", 23 | "postsweeps": 1 24 | }, 25 | "solver": "PCG", 26 | "print_solve_stats": 1, 27 | "obtain_timings": 1, 28 | "max_iters": 100, 29 | "monitor_residual": 1, 30 | "convergence": "RELATIVE_INI", 31 | "scope": "main", 32 | "tolerance" : 1e-06, 33 | "norm": "L2" 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/configs/PCG_W.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "preconditioner": { 5 | "print_grid_stats": 1, 6 | "print_vis_data": 0, 7 | "solver": "AMG", 8 | "smoother": { 9 | "scope": "jacobi", 10 | "solver": "BLOCK_JACOBI", 11 | "monitor_residual": 0, 12 | "print_solve_stats": 0 13 | }, 14 | "print_solve_stats": 0, 15 | "presweeps": 1, 16 | "interpolator": "D2", 17 | "max_iters": 1, 18 | "monitor_residual": 0, 19 | "store_res_history": 0, 20 | "scope": "amg", 21 | "max_levels": 50, 22 | "cycle": "W", 23 | "postsweeps": 1 24 | }, 25 | "solver": "PCG", 26 | "print_solve_stats": 1, 27 | "obtain_timings": 1, 28 | "max_iters": 100, 29 | "monitor_residual": 1, 30 | "convergence": "RELATIVE_INI", 31 | "scope": "main", 32 | "tolerance" : 1e-06, 33 | "norm": "L2" 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/configs/agg_cheb4.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "determinism_flag": 1, 4 | "solver": { 5 | "print_grid_stats": 1, 6 | "algorithm": "AGGREGATION", 7 | "obtain_timings": 1, 8 | "error_scaling": 3, 9 | "solver": "AMG", 10 | "smoother": 11 | { 12 | "solver": "CHEBYSHEV", 13 | "preconditioner" : 14 | { 15 | "solver": "JACOBI_L1", 16 | "max_iters": 1 17 | }, 18 | "max_iters": 1, 19 | "chebyshev_polynomial_order" : 4, 20 | "chebyshev_lambda_estimate_mode" : 2 21 | }, 22 | "presweeps": 0, 23 | "postsweeps": 1, 24 | "print_solve_stats": 1, 25 | "selector": "SIZE_8", 26 | "coarsest_sweeps": 1, 27 | "max_iters": 100, 28 | "monitor_residual": 1, 29 | "min_coarse_rows": 2, 30 | "scope": "main", 31 | "max_levels": 50, 32 | "convergence": "RELATIVE_INI", 33 | "tolerance" : 1e-06, 34 | "norm": "L2", 35 | "cycle": "V" 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /include/classical/strength/all.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace amgx 10 | { 11 | 12 | template 13 | class Strength_All : public Strength_Base 14 | { 15 | typedef T_Config TConfig; 16 | typedef typename TConfig::MatPrec ValueType; 17 | typedef typename TConfig::IndPrec IndexType; 18 | public: 19 | Strength_All(AMG_Config &cfg, const std::string &cfg_scope) : Strength_Base(cfg, cfg_scope) {} 20 | __host__ __device__ 21 | bool strongly_connected(ValueType val, ValueType threshold, ValueType diagonal) 22 | { 23 | return true; 24 | } 25 | }; 26 | 27 | template 28 | class Strength_All_StrengthFactory : public StrengthFactory 29 | { 30 | public: 31 | Strength *create(AMG_Config &cfg, const std::string &cfg_scope) { return new Strength_All(cfg, cfg_scope); } 32 | }; 33 | 34 | } // namespace amgx 35 | -------------------------------------------------------------------------------- /external/rapidjson/license.txt: -------------------------------------------------------------------------------- 1 | Copyright (C) 2011 Milo Yip 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. -------------------------------------------------------------------------------- /include/miscmath.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | namespace amgx 8 | { 9 | template typename Matrix::value_type estimate_largest_eigen_value(Matrix &A); 10 | } 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | namespace amgx 17 | { 18 | 19 | template 20 | typename Matrix::value_type estimate_largest_eigen_value(Matrix &A) 21 | { 22 | typedef typename Matrix::TConfig TConfig; 23 | typedef typename Matrix::value_type ValueTypeA; 24 | typedef typename TConfig::VecPrec ValueTypeB; 25 | typedef Vector VVector; 26 | VVector x(A.get_num_rows()), y(A.get_num_rows()); 27 | fill(x, 1); 28 | 29 | for (int i = 0; i < 20; i++) 30 | { 31 | ValueTypeB Lmax = get_norm(A, x, LMAX); 32 | scal(x, ValueTypeB(1) / Lmax); 33 | multiply(A, x, y); 34 | x.swap(y); 35 | } 36 | 37 | ValueTypeB retval = get_norm(A, x, L2) / get_norm(A, y, L2); 38 | return retval; 39 | } 40 | 41 | } // namespace amgx 42 | -------------------------------------------------------------------------------- /src/configs/PCGF_CLASSICAL_F_JACOBI.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "preconditioner": { 5 | "print_grid_stats": 1, 6 | "print_vis_data": 0, 7 | "solver": "AMG", 8 | "smoother": { 9 | "scope": "jacobi", 10 | "solver": "BLOCK_JACOBI", 11 | "monitor_residual": 0, 12 | "print_solve_stats": 0 13 | }, 14 | "print_solve_stats": 0, 15 | "presweeps": 1, 16 | "interpolator": "D2", 17 | "max_iters": 1, 18 | "monitor_residual": 0, 19 | "store_res_history": 0, 20 | "scope": "amg", 21 | "max_levels": 50, 22 | "cycle": "F", 23 | "postsweeps": 1 24 | }, 25 | "solver": "PCGF", 26 | "print_solve_stats": 1, 27 | "obtain_timings": 1, 28 | "max_iters": 100, 29 | "monitor_residual": 1, 30 | "convergence": "RELATIVE_INI", 31 | "scope": "main", 32 | "tolerance" : 1e-06, 33 | "norm": "L2" 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/configs/PCGF_CLASSICAL_V_JACOBI.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "preconditioner": { 5 | "print_grid_stats": 1, 6 | "print_vis_data": 0, 7 | "solver": "AMG", 8 | "smoother": { 9 | "scope": "jacobi", 10 | "solver": "BLOCK_JACOBI", 11 | "monitor_residual": 0, 12 | "print_solve_stats": 0 13 | }, 14 | "print_solve_stats": 0, 15 | "presweeps": 1, 16 | "interpolator": "D2", 17 | "max_iters": 1, 18 | "monitor_residual": 0, 19 | "store_res_history": 0, 20 | "scope": "amg", 21 | "max_levels": 50, 22 | "cycle": "V", 23 | "postsweeps": 1 24 | }, 25 | "solver": "PCGF", 26 | "print_solve_stats": 1, 27 | "obtain_timings": 1, 28 | "max_iters": 100, 29 | "monitor_residual": 1, 30 | "convergence": "RELATIVE_INI", 31 | "scope": "main", 32 | "tolerance" : 1e-06, 33 | "norm": "L2" 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/configs/PCGF_CLASSICAL_W_JACOBI.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "preconditioner": { 5 | "print_grid_stats": 1, 6 | "print_vis_data": 0, 7 | "solver": "AMG", 8 | "smoother": { 9 | "scope": "jacobi", 10 | "solver": "BLOCK_JACOBI", 11 | "monitor_residual": 0, 12 | "print_solve_stats": 0 13 | }, 14 | "print_solve_stats": 0, 15 | "presweeps": 1, 16 | "interpolator": "D2", 17 | "max_iters": 1, 18 | "monitor_residual": 0, 19 | "store_res_history": 0, 20 | "scope": "amg", 21 | "max_levels": 50, 22 | "cycle": "W", 23 | "postsweeps": 1 24 | }, 25 | "solver": "PCGF", 26 | "print_solve_stats": 1, 27 | "obtain_timings": 1, 28 | "max_iters": 100, 29 | "monitor_residual": 1, 30 | "convergence": "RELATIVE_INI", 31 | "scope": "main", 32 | "tolerance" : 1e-06, 33 | "norm": "L2" 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/configs/PCG_CLASSICAL_F_JACOBI.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "preconditioner": { 5 | "print_grid_stats": 1, 6 | "print_vis_data": 0, 7 | "solver": "AMG", 8 | "smoother": { 9 | "scope": "jacobi", 10 | "solver": "BLOCK_JACOBI", 11 | "monitor_residual": 0, 12 | "print_solve_stats": 0 13 | }, 14 | "print_solve_stats": 0, 15 | "presweeps": 1, 16 | "max_iters": 1, 17 | "interpolator": "D2", 18 | "monitor_residual": 0, 19 | "store_res_history": 0, 20 | "scope": "amg", 21 | "max_levels": 50, 22 | "cycle": "F", 23 | "postsweeps": 1 24 | }, 25 | "solver": "PCG", 26 | "print_solve_stats": 1, 27 | "obtain_timings": 1, 28 | "max_iters": 100, 29 | "monitor_residual": 1, 30 | "convergence": "RELATIVE_INI", 31 | "scope": "main", 32 | "tolerance" : 1e-06, 33 | "norm": "L2" 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/configs/PCG_CLASSICAL_W_JACOBI.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "preconditioner": { 5 | "print_grid_stats": 1, 6 | "print_vis_data": 0, 7 | "solver": "AMG", 8 | "smoother": { 9 | "scope": "jacobi", 10 | "solver": "BLOCK_JACOBI", 11 | "monitor_residual": 0, 12 | "print_solve_stats": 0 13 | }, 14 | "print_solve_stats": 0, 15 | "presweeps": 1, 16 | "interpolator": "D2", 17 | "max_iters": 1, 18 | "monitor_residual": 0, 19 | "store_res_history": 0, 20 | "scope": "amg", 21 | "max_levels": 50, 22 | "cycle": "W", 23 | "postsweeps": 1 24 | }, 25 | "solver": "PCG", 26 | "print_solve_stats": 1, 27 | "obtain_timings": 1, 28 | "max_iters": 100, 29 | "monitor_residual": 1, 30 | "convergence": "RELATIVE_INI", 31 | "scope": "main", 32 | "tolerance" : 1e-06, 33 | "norm": "L2" 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /include/cusp/detail/csr_matrix.inl: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | 7 | namespace cusp 8 | { 9 | 10 | ////////////////// 11 | // Constructors // 12 | ////////////////// 13 | 14 | // construct from a different matrix 15 | template 16 | template 17 | csr_matrix 18 | ::csr_matrix(const MatrixType& matrix) 19 | { 20 | cusp::convert(matrix, *this); 21 | } 22 | 23 | ////////////////////// 24 | // Member Functions // 25 | ////////////////////// 26 | 27 | // assignment from another matrix 28 | template 29 | template 30 | csr_matrix& 31 | csr_matrix 32 | ::operator=(const MatrixType& matrix) 33 | { 34 | cusp::convert(matrix, *this); 35 | 36 | return *this; 37 | } 38 | 39 | } // end namespace cusp 40 | 41 | -------------------------------------------------------------------------------- /src/configs/V-cheby-smoother.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "determinism_flag":1, 4 | "solver": { 5 | "scope": "main", 6 | "print_grid_stats": 1, 7 | "solver": "AMG", 8 | "scaling":"DIAGONAL_SYMMETRIC", 9 | "interpolator": "D2", 10 | "aggressive_levels": 0, 11 | "interp_max_elements": 4, 12 | "coarse_solver": "NOSOLVER", 13 | "print_solve_stats": 1, 14 | "obtain_timings": 1, 15 | "max_iters": 100, 16 | "monitor_residual": 1, 17 | "convergence": "RELATIVE_INI", 18 | "max_levels": 50, 19 | "cycle": "V", 20 | "smoother": 21 | { 22 | "solver": "CHEBYSHEV", 23 | "preconditioner" : 24 | { 25 | "solver": "NOSOLVER", 26 | "max_iters": 1 27 | }, 28 | "max_iters": 1, 29 | "chebyshev_polynomial_order" : 4, 30 | "chebyshev_lambda_estimate_mode" : 2 31 | }, 32 | "tolerance" : 1e-06, 33 | "norm": "L2", 34 | "presweeps": 0, 35 | "postsweeps":1 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /external/rapidjson/include/rapidjson/filestream.h: -------------------------------------------------------------------------------- 1 | #ifndef RAPIDJSON_FILESTREAM_H_ 2 | #define RAPIDJSON_FILESTREAM_H_ 3 | 4 | #include 5 | 6 | namespace rapidjson { 7 | 8 | //! Wrapper of C file stream for input or output. 9 | /*! 10 | This simple wrapper does not check the validity of the stream. 11 | \implements Stream 12 | */ 13 | class FileStream { 14 | public: 15 | typedef char Ch; //!< Character type. Only support char. 16 | 17 | FileStream(FILE* fp) : fp_(fp), count_(0) { Read(); } 18 | char Peek() const { return current_; } 19 | char Take() { char c = current_; Read(); return c; } 20 | size_t Tell() const { return count_; } 21 | void Put(char c) { fputc(c, fp_); } 22 | 23 | // Not implemented 24 | char* PutBegin() { return 0; } 25 | size_t PutEnd(char*) { return 0; } 26 | 27 | private: 28 | void Read() { 29 | RAPIDJSON_ASSERT(fp_ != 0); 30 | int c = fgetc(fp_); 31 | if (c != EOF) { 32 | current_ = (char)c; 33 | count_++; 34 | } 35 | else 36 | current_ = '\0'; 37 | } 38 | 39 | FILE* fp_; 40 | char current_; 41 | size_t count_; 42 | }; 43 | 44 | } // namespace rapidjson 45 | 46 | #endif // RAPIDJSON_FILESTREAM_H_ 47 | -------------------------------------------------------------------------------- /src/configs/GMRES_AMG_D2.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "determinism_flag": 1, 4 | "exception_handling" : 1, 5 | "solver": { 6 | "scope": "main", 7 | "print_grid_stats": 1, 8 | "store_res_history": 1, 9 | "solver": "GMRES", 10 | "print_solve_stats": 1, 11 | "obtain_timings": 1, 12 | "preconditioner": { 13 | "interpolator": "D2", 14 | "print_grid_stats": 1, 15 | "solver": "AMG", 16 | "smoother": "JACOBI_L1", 17 | "presweeps": 2, 18 | "selector": "PMIS", 19 | "coarsest_sweeps": 2, 20 | "coarse_solver": "NOSOLVER", 21 | "max_iters": 1, 22 | "interp_max_elements": 4, 23 | "min_coarse_rows": 2, 24 | "scope": "amg_solver", 25 | "max_levels": 50, 26 | "cycle": "V", 27 | "postsweeps": 2 28 | }, 29 | "max_iters": 100, 30 | "monitor_residual": 1, 31 | "gmres_n_restart": 10, 32 | "convergence": "RELATIVE_INI", 33 | "tolerance" : 1e-06, 34 | "norm": "L2" 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /include/cusp/detail/hyb_matrix.inl: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | namespace cusp 10 | { 11 | 12 | ////////////////// 13 | // Constructors // 14 | ////////////////// 15 | 16 | // construct from another matrix 17 | template 18 | template 19 | hyb_matrix 20 | ::hyb_matrix(const MatrixType& matrix) 21 | { 22 | cusp::convert(matrix, *this); 23 | } 24 | 25 | ////////////////////// 26 | // Member Functions // 27 | ////////////////////// 28 | 29 | template 30 | template 31 | hyb_matrix& 32 | hyb_matrix 33 | ::operator=(const MatrixType& matrix) 34 | { 35 | cusp::convert(matrix, *this); 36 | 37 | return *this; 38 | } 39 | 40 | } // end namespace cusp 41 | 42 | -------------------------------------------------------------------------------- /src/configs/AMG_CLASSICAL_L1_TRUNC.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "print_grid_stats": 1, 4 | "store_res_history": 1, 5 | "solver": { 6 | "scope": "main", 7 | "solver": "FGMRES", 8 | "print_solve_stats": 1, 9 | "obtain_timings": 1, 10 | "preconditioner": { 11 | "interpolator": "D2", 12 | "print_grid_stats": 1, 13 | "solver": "AMG", 14 | "interp_max_elements": 4, 15 | "smoother": { 16 | "relaxation_factor": 1, 17 | "scope": "jacobi", 18 | "solver": "JACOBI_L1" 19 | }, 20 | "presweeps": 1, 21 | "coarsest_sweeps": 1, 22 | "coarse_solver": "NOSOLVER", 23 | "max_iters": 1, 24 | "max_row_sum": 0.9, 25 | "scope": "amg_solver", 26 | "max_levels": 50, 27 | "postsweeps": 1, 28 | "cycle": "V" 29 | }, 30 | "max_iters": 100, 31 | "monitor_residual": 1, 32 | "gmres_n_restart": 10, 33 | "convergence": "RELATIVE_INI", 34 | "tolerance" : 1e-06, 35 | "norm": "L2" 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/configs/PCG_CLASSICAL_V_JACOBI.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "preconditioner": { 5 | "print_grid_stats": 1, 6 | "print_vis_data": 0, 7 | "solver": "AMG", 8 | "smoother": { 9 | "scope": "jacobi", 10 | "solver": "BLOCK_JACOBI", 11 | "monitor_residual": 0, 12 | "print_solve_stats": 0 13 | }, 14 | "print_solve_stats": 0, 15 | "aggressive_levels": 2, 16 | "presweeps": 1, 17 | "interpolator": "D2", 18 | "max_iters": 1, 19 | "monitor_residual": 0, 20 | "store_res_history": 0, 21 | "scope": "amg", 22 | "max_levels": 50, 23 | "cycle": "V", 24 | "postsweeps": 1 25 | }, 26 | "solver": "PCG", 27 | "print_solve_stats": 1, 28 | "obtain_timings": 1, 29 | "max_iters": 100, 30 | "monitor_residual": 1, 31 | "convergence": "RELATIVE_INI", 32 | "scope": "main", 33 | "tolerance" : 1e-06, 34 | "norm": "L2" 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/solvers/dummy_solver.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | #include 7 | 8 | #include "amgx_types/util.h" 9 | 10 | namespace amgx 11 | { 12 | 13 | template 14 | void 15 | Dummy_Solver::solver_setup(bool reuse_matrix_structure) 16 | { 17 | } 18 | 19 | //launches a single standard cycle 20 | template 21 | AMGX_STATUS 22 | Dummy_Solver::solve_iteration( VVector &b, VVector &x, bool xIsZero ) 23 | { 24 | if (xIsZero) 25 | { 26 | thrust_wrapper::fill(x.begin(), x.end(), types::util::get_zero()); 27 | cudaCheckError(); 28 | } 29 | 30 | return (this->converged(b, x)); 31 | }; 32 | 33 | /**************************************** 34 | * Explict instantiations 35 | ***************************************/ 36 | #define AMGX_CASE_LINE(CASE) template class Dummy_Solver::Type>; 37 | AMGX_FORALL_BUILDS(AMGX_CASE_LINE) 38 | AMGX_FORCOMPLEX_BUILDS(AMGX_CASE_LINE) 39 | #undef AMGX_CASE_LINE 40 | 41 | } // namespace amgx 42 | -------------------------------------------------------------------------------- /include/cusp/detail/device/elementwise.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | namespace cusp 8 | { 9 | namespace detail 10 | { 11 | namespace device 12 | { 13 | 14 | //template 18 | //void transform_elementwise(const Matrix1& A, 19 | // const Matrix2& B, 20 | // Matrix3& C, 21 | // BinaryFunction op); 22 | 23 | template 26 | void add(const Matrix1& A, 27 | const Matrix2& B, 28 | Matrix3& C); 29 | 30 | template 33 | void subtract(const Matrix1& A, 34 | const Matrix2& B, 35 | Matrix3& C); 36 | 37 | } // end namespace device 38 | } // end namespace detail 39 | } // end namespace cusp 40 | 41 | #include 42 | 43 | -------------------------------------------------------------------------------- /include/cusp/detail/config.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | /*! \file config.h 6 | * \brief Defines platform configuration. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | // Cusp supports Thrust v1.3 and above 15 | #if THRUST_VERSION < 100500 16 | #error "Thrust v1.5.0 or newer is required" 17 | #endif 18 | 19 | // decorator for deprecated features 20 | #ifdef THRUST_DEPRECATED 21 | #define CUSP_DEPRECATED THRUST_DEPRECATED 22 | #else 23 | // THRUST_DEPRECATED not available in this version, use C++14 attribute or empty macro 24 | #if __cplusplus >= 201402L 25 | #define CUSP_DEPRECATED [[deprecated]] 26 | #else 27 | #define CUSP_DEPRECATED 28 | #endif 29 | #endif 30 | 31 | // hooks for profiling 32 | #if defined(CUSP_PROFILE_ENABLED) 33 | // profiling enabled 34 | #define CUSP_PROFILE_SCOPED() PROFILE_SCOPED() 35 | #define CUSP_PROFILE_DUMP() cusp::detail::profiler::dump() 36 | #include 37 | #else 38 | // profiling disabled 39 | #define CUSP_PROFILE_SCOPED() 40 | #define CUSP_PROFILE_DUMP() 41 | #endif 42 | 43 | -------------------------------------------------------------------------------- /include/cusp/precond/aggregate.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | namespace cusp 15 | { 16 | namespace precond 17 | { 18 | template 20 | void mis_to_aggregates(const cusp::coo_matrix& C, 21 | const ArrayType& mis, 22 | ArrayType& aggregates); 23 | 24 | template 26 | void standard_aggregation(const cusp::coo_matrix& C, 27 | ArrayType& aggregates); 28 | 29 | template 31 | void standard_aggregation(const cusp::csr_matrix& C, 32 | ArrayType& aggregates); 33 | 34 | } // end namespace precond 35 | } // end namespace cusp 36 | 37 | #include 38 | -------------------------------------------------------------------------------- /src/configs/FGMRES_AGGREGATION.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "preconditioner": { 5 | "error_scaling": 0, 6 | "print_grid_stats": 1, 7 | "max_uncolored_percentage": 0.05, 8 | "algorithm": "AGGREGATION", 9 | "solver": "AMG", 10 | "smoother": "MULTICOLOR_DILU", 11 | "presweeps": 0, 12 | "selector": "SIZE_2", 13 | "coarse_solver": "DENSE_LU_SOLVER", 14 | "max_iters": 1, 15 | "postsweeps": 3, 16 | "min_coarse_rows": 32, 17 | "relaxation_factor": 0.75, 18 | "scope": "amg", 19 | "max_levels": 50, 20 | "matrix_coloring_scheme": "PARALLEL_GREEDY", 21 | "cycle": "V" 22 | }, 23 | "use_scalar_norm": 1, 24 | "solver": "FGMRES", 25 | "print_solve_stats": 1, 26 | "obtain_timings": 1, 27 | "max_iters": 100, 28 | "monitor_residual": 1, 29 | "gmres_n_restart": 10, 30 | "convergence": "RELATIVE_INI", 31 | "scope": "main", 32 | "tolerance" : 1e-06, 33 | "norm": "L2" 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/configs/FGMRES_AGGREGATION_DILU.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "preconditioner": { 5 | "error_scaling": 0, 6 | "print_grid_stats": 1, 7 | "max_uncolored_percentage": 0.05, 8 | "algorithm": "AGGREGATION", 9 | "solver": "AMG", 10 | "smoother": "MULTICOLOR_DILU", 11 | "presweeps": 0, 12 | "selector": "SIZE_2", 13 | "coarse_solver": "DENSE_LU_SOLVER", 14 | "max_iters": 1, 15 | "postsweeps": 3, 16 | "min_coarse_rows": 32, 17 | "relaxation_factor": 0.75, 18 | "scope": "amg", 19 | "max_levels": 50, 20 | "matrix_coloring_scheme": "PARALLEL_GREEDY", 21 | "cycle": "V" 22 | }, 23 | "use_scalar_norm": 1, 24 | "solver": "FGMRES", 25 | "print_solve_stats": 1, 26 | "obtain_timings": 1, 27 | "max_iters": 100, 28 | "monitor_residual": 1, 29 | "gmres_n_restart": 10, 30 | "convergence": "RELATIVE_INI", 31 | "scope": "main", 32 | "tolerance" : 1e-06, 33 | "norm": "L2" 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/configs/V-cheby-aggres-L1-trunc-userLambda.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "print_grid_stats": 1, 5 | "solver": "AMG", 6 | "interpolator":"D2", 7 | "interp_max_elements": 4, 8 | "aggressive_levels": 1, 9 | "print_solve_stats": 1, 10 | "obtain_timings": 1, 11 | "max_iters": 100, 12 | "coarse_solver": "NOSOLVER", 13 | "monitor_residual": 1, 14 | "convergence": "RELATIVE_INI", 15 | "scope": "main", 16 | "max_levels": 50, 17 | "error_scaling":3, 18 | "cycle": "V", 19 | "smoother": 20 | { 21 | "solver": "CHEBYSHEV", 22 | "preconditioner" : 23 | { 24 | "solver": "JACOBI_L1", 25 | "max_iters": 1 26 | }, 27 | "max_iters": 1, 28 | "chebyshev_polynomial_order" : 2, 29 | "chebyshev_lambda_estimate_mode" : 3, 30 | "cheby_max_lambda" : 0.92, 31 | "cheby_min_lambda" : 0.07 32 | }, 33 | "tolerance" : 1e-06, 34 | "norm": "L2", 35 | "presweeps": 0, 36 | "postsweeps": 1 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/amgx_c_common.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "amgx_c_common.h" 6 | 7 | namespace amgx 8 | { 9 | 10 | AMGX_RC getCAPIerror_x(AMGX_ERROR err) 11 | { 12 | return (AMGX_RC)((int)(err)); 13 | } 14 | 15 | AMGX_ERROR getAMGXerror(AMGX_RC err) 16 | { 17 | return (AMGX_ERROR)((int)(err)); 18 | } 19 | 20 | 21 | void amgx_error_exit(Resources *rsc, int err) 22 | { 23 | #ifdef AMGX_WITH_MPI 24 | int isInitialized = 0; 25 | MPI_Initialized(&isInitialized); 26 | 27 | if (isInitialized) 28 | if (rsc != NULL) 29 | { 30 | //Resources * res = (Resources*)(((amgx::CWrapper*) rsc)->hdl); 31 | MPI_Abort(*(rsc->getMpiComm()), err); 32 | } 33 | else 34 | { 35 | MPI_Abort(MPI_COMM_WORLD, err); 36 | //MPI_Finalize(); 37 | } 38 | else 39 | { 40 | exit(err); 41 | } 42 | 43 | #else 44 | exit(err); 45 | #endif 46 | } 47 | 48 | MemCArrManager &get_c_arr_mem_manager(void) 49 | { 50 | static MemCArrManager man_; 51 | return man_; 52 | } 53 | 54 | 55 | } // namespace amgx -------------------------------------------------------------------------------- /include/classical/strength/ahat.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace amgx 10 | { 11 | 12 | template 13 | class Strength_Ahat : public Strength_Base 14 | { 15 | typedef T_Config TConfig; 16 | typedef typename TConfig::MatPrec ValueType; 17 | typedef typename TConfig::IndPrec IndexType; 18 | public: 19 | Strength_Ahat(AMG_Config &cfg, const std::string &cfg_scope) : Strength_Base(cfg, cfg_scope) {} 20 | __host__ __device__ 21 | bool strongly_connected(ValueType val, ValueType threshold, ValueType diagonal) 22 | { 23 | //return (diagonal<0.0) ? val>threshold : val= -threshold; 25 | } 26 | }; 27 | 28 | template 29 | class Strength_Ahat_StrengthFactory: public StrengthFactory 30 | { 31 | public: 32 | Strength *create(AMG_Config &cfg, const std::string &cfg_scope) { return new Strength_Ahat(cfg, cfg_scope); } 33 | }; 34 | 35 | } // namespace amgx 36 | 37 | 38 | -------------------------------------------------------------------------------- /include/classical/interpolators/common.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | #include 7 | #include 8 | 9 | namespace amgx 10 | { 11 | 12 | /* 13 | * hold general routines common to both diatance1 and distance2 14 | */ 15 | 16 | struct is_non_neg 17 | { 18 | __host__ __device__ 19 | bool operator()(const int &x) 20 | { 21 | return x >= 0; 22 | } 23 | }; 24 | 25 | template< typename T > 26 | __device__ __forceinline__ 27 | bool 28 | sign( T x ) 29 | { 30 | return x >= T(0); 31 | } 32 | 33 | 34 | __global__ 35 | void coarseMarkKernel(int *cf_map, int *mark, const int numEntries); 36 | 37 | __global__ 38 | void modifyCoarseMapKernel(int *cf_map, int *mark, const int numEntries); 39 | 40 | __global__ 41 | void nonZerosPerRowKernel(const int num_rows, const int *cf_map, const int *C_hat_start, 42 | const int *C_hat_end, int *nonZerosPerRow); 43 | 44 | __global__ 45 | void nonZerosPerRowSizeKernel(const int num_rows, const int *cf_map, const int *C_hat_size, 46 | int *nonZerosPerRow); 47 | 48 | } // namespace amgx 49 | -------------------------------------------------------------------------------- /include/cusp/memory.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | /*! \file memory.h 6 | * \brief Memory spaces and allocators 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | namespace cusp 15 | { 16 | using host_memory = amgx::thrust::host_system_tag; 17 | using device_memory = amgx::thrust::device_system_tag; 18 | using any_memory = amgx::thrust::any_system_tag; 19 | 20 | template 21 | struct default_memory_allocator; 22 | 23 | template 24 | struct minimum_space; 25 | 26 | 27 | } // end namespace cusp 28 | 29 | #include 30 | #include 31 | 32 | namespace cusp 33 | { 34 | template struct CuspMemMap; 35 | template <> struct CuspMemMap { static const int value = AMGX_host; }; 36 | template <> struct CuspMemMap { static const int value = AMGX_device; }; 37 | template <> struct CuspMemMap { static const int value = AMGX_host; }; 38 | } // end namespace cusp 39 | 40 | -------------------------------------------------------------------------------- /include/memory_info.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | namespace amgx 8 | { 9 | class MemoryInfo 10 | { 11 | public: 12 | static float getTotalMemory() 13 | { 14 | size_t free; 15 | size_t total; 16 | cudaMemGetInfo(&free, &total); 17 | return total / 1024.0 / 1024 / 1024; 18 | } 19 | 20 | static size_t getFreeMemory() 21 | { 22 | size_t free; 23 | size_t total; 24 | cudaMemGetInfo(&free, &total); 25 | return free / 1024.0 / 1024 / 1024; 26 | } 27 | 28 | static float getMaxMemoryUsage() 29 | { 30 | return max_allocated / 1024.0 / 1024 / 1024; 31 | } 32 | 33 | static void updateMaxMemoryUsage() 34 | { 35 | size_t free; 36 | size_t total; 37 | cudaMemGetInfo(&free, &total); 38 | size_t allocated = total - free; 39 | 40 | if (allocated > max_allocated) 41 | { 42 | max_allocated = allocated; 43 | } 44 | } 45 | private: 46 | static size_t max_allocated; 47 | }; 48 | } 49 | -------------------------------------------------------------------------------- /src/tests/version_test.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "unit_test.h" 6 | #include "version.h" 7 | #include "amgx_c.h" 8 | 9 | namespace amgx 10 | 11 | { 12 | 13 | // parameter is used as test name 14 | DECLARE_UNITTEST_BEGIN(CAPIVersionCheck); 15 | 16 | void run() 17 | { 18 | int major, minor; 19 | AMGX_get_api_version(&major, &minor); 20 | UNITTEST_ASSERT_EQUAL(major, __AMGX_API_VERSION_MAJOR); 21 | UNITTEST_ASSERT_EQUAL(minor, __AMGX_API_VERSION_MINOR); 22 | } 23 | 24 | DECLARE_UNITTEST_END(CAPIVersionCheck); 25 | 26 | 27 | // if you want to be able run this test for all available configs you can write this: 28 | //#define AMGX_CASE_LINE(CASE) SampleTest ::Type> TemplateTest_##CASE; 29 | // AMGX_FORALL_BUILDS(AMGX_CASE_LINE) 30 | //#undef AMGX_CASE_LINE 31 | 32 | // or run for all device configs 33 | //#define AMGX_CASE_LINE(CASE) SampleTest ::Type> TemplateTest_##CASE; 34 | // AMGX_FORALL_BUILDS_DEVICE(AMGX_CASE_LINE) 35 | //#undef AMGX_CASE_LINE 36 | 37 | // or you can specify several desired configs 38 | CAPIVersionCheck::Type> CAPIVersionCheck_dDDI; 39 | 40 | 41 | } //namespace amgx 42 | -------------------------------------------------------------------------------- /include/cusp/detail/dispatch/transpose.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | #include 7 | 8 | namespace cusp 9 | { 10 | namespace detail 11 | { 12 | namespace dispatch 13 | { 14 | 15 | //////////////// 16 | // Host Paths // 17 | //////////////// 18 | template 19 | void transpose(const MatrixType1& A, MatrixType2& At, 20 | cusp::host_memory) 21 | { 22 | cusp::detail::host::transpose(A, At, 23 | typename MatrixType1::format(), 24 | typename MatrixType2::format()); 25 | } 26 | 27 | ////////////////// 28 | // Device Paths // 29 | ////////////////// 30 | template 31 | void transpose(const MatrixType1& A, MatrixType2& At, 32 | cusp::device_memory) 33 | { 34 | cusp::detail::device::transpose(A, At, 35 | typename MatrixType1::format(), 36 | typename MatrixType2::format()); 37 | } 38 | 39 | } // end namespace dispatch 40 | } // end namespace detail 41 | } // end namespace cusp 42 | 43 | -------------------------------------------------------------------------------- /src/operators/deflated_multiply_operator.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | namespace amgx 6 | { 7 | 8 | template class Operator; 9 | 10 | } 11 | 12 | #include 13 | #include 14 | 15 | namespace amgx 16 | { 17 | 18 | template 19 | void DeflatedMultiplyOperator::apply(const Vector &v, Vector &res, ViewType view) 20 | { 21 | Operator &A = *m_A; 22 | int offset, size; 23 | A.getOffsetAndSizeForView(view, &offset, &size); 24 | copy(v, *m_work, offset, size); 25 | ValueTypeVec xtv = dot(A, *m_x, *m_work); 26 | axpy(*m_x, *m_work, types::util::invert(xtv), offset, size); 27 | A.apply(*m_work, res, OWNED); 28 | axpy(*m_work, res, types::util::invert(m_mu), offset, size); 29 | ValueTypeVec xtres = dot(A, *m_x, res); 30 | axpy(*m_x, res, types::util::invert(xtres), offset, size); 31 | } 32 | 33 | #define AMGX_CASE_LINE(CASE) template class DeflatedMultiplyOperator::Type>; 34 | AMGX_FORALL_BUILDS(AMGX_CASE_LINE) 35 | AMGX_FORCOMPLEX_BUILDS(AMGX_CASE_LINE) 36 | #undef AMGX_CASE_LINE 37 | 38 | } 39 | -------------------------------------------------------------------------------- /ci/containers/x86_64-ubuntu18.04-gnu7-cuda10.2.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 NVIDIA CORPORATION. All Rights Reserved. 2 | # 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | """ 6 | AmgX base image: x86_64-ubuntu18.04-gnu-cuda10.2 7 | """ 8 | 9 | Stage0 += comment(__doc__, reformat=False) 10 | Stage0 += baseimage(image='nvidia/cuda:10.2-devel-ubuntu18.04') 11 | 12 | # Last compiler supported for Ubuntu 18.04 by CUDA 10.2 13 | # https://docs.nvidia.com/cuda/archive/10.2/cuda-installation-guide-linux/index.html#system-requirements 14 | compiler = gnu() 15 | Stage0 += compiler 16 | 17 | # Current minimum version required by AMGX 18 | Stage0 += cmake(eula=True, version='3.7.0') 19 | 20 | # MPI 21 | Stage0 += mlnx_ofed(version='5.0-2.1.8.0') 22 | Stage0 += gdrcopy(ldconfig=True, version='2.0') 23 | Stage0 += knem(ldconfig=True, version='1.1.3') 24 | Stage0 += ucx(gdrcopy=True, knem=True, ofed=True, cuda=True) 25 | Stage0 += openmpi( 26 | cuda=True, 27 | infiniband=True, 28 | version='4.0.3', 29 | pmix=True, 30 | ucx=True, 31 | toolchain=compiler.toolchain 32 | ) 33 | Stage0 += environment(multinode_vars = { 34 | 'OMPI_MCA_pml': 'ucx', 35 | 'OMPI_MCA_btl': '^smcuda,vader,tcp,uct,openib', 36 | 'UCX_MEMTYPE_CACHE': 'n', 37 | 'UCX_TLS': 'rc,cuda_copy,cuda_ipc,gdr_copy,sm' 38 | }) 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/compilation-issue-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Compilation issue report 3 | about: Configuration error with CMake, or build error. 4 | title: "[Build]" 5 | labels: build issues 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the issue** 11 | 12 | A clear and concise description of what the issue is. 13 | 14 | **Environment information:** 15 | - OS: [e.g. `Windows 10`, `Ubuntu 22.04`] 16 | - Compiler version: [e.g. `gcc 9.3.0`, `MSVC 14.31`] 17 | - CMake version: [e.g. `3.23`] 18 | - CUDA used for AMGX compilation: [e.g. `CUDA 11.7.0`] 19 | - MPI version (if applicable): [e.g. `OpenMPI 4.0.3`, `MS-MPI v10.1.2`] 20 | - AMGX version or commit hash [e.g. `v2.3.0`, `34232979e993d349a03486f7892830a1209b2fc9`] 21 | - Any related environment variables information 22 | 23 | **Configuration information** 24 | 25 | Provide your `cmake` command line that was used for configuration and it's full output. 26 | 27 | **Compilation information** 28 | 29 | Provide your make command 30 | 31 | **Issue information** 32 | 33 | Provide any error messages from your CMake or compiler. It will also greatly help to attach output of `make` command rerun with `VERBOSE=1` to see exact host compiler launch command that issues the error. 34 | 35 | **Additional context** 36 | 37 | Add any other context about the problem here. 38 | -------------------------------------------------------------------------------- /ci/containers/x86_64-ubuntu18.04-gnu8-cuda11.0.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 NVIDIA CORPORATION. All Rights Reserved. 2 | # 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | """ 6 | AmgX base image: x86_64-ubuntu18.04-gnu-cuda11.0 7 | """ 8 | 9 | Stage0 += comment(__doc__, reformat=False) 10 | Stage0 += baseimage(image='nvidia/cuda:11.0-devel-ubuntu18.04') 11 | 12 | # Last compiler supported for Ubuntu 18.04 by CUDA 11.0 13 | # https://docs.nvidia.com/cuda/archive/11.0/cuda-installation-guide-linux/index.html#system-requirements 14 | compiler = gnu(version='8') 15 | Stage0 += compiler 16 | 17 | # Current minimum version required by AMGX 18 | Stage0 += cmake(eula=True, version='3.7.0') 19 | 20 | # MPI 21 | Stage0 += mlnx_ofed(version='5.0-2.1.8.0') 22 | Stage0 += gdrcopy(ldconfig=True, version='2.0') 23 | Stage0 += knem(ldconfig=True, version='1.1.3') 24 | Stage0 += ucx(gdrcopy=True, knem=True, ofed=True, cuda=True) 25 | Stage0 += openmpi( 26 | cuda=True, 27 | infiniband=True, 28 | version='4.0.3', 29 | pmix=True, 30 | ucx=True, 31 | toolchain=compiler.toolchain 32 | ) 33 | Stage0 += environment(multinode_vars = { 34 | 'OMPI_MCA_pml': 'ucx', 35 | 'OMPI_MCA_btl': '^smcuda,vader,tcp,uct,openib', 36 | 'UCX_MEMTYPE_CACHE': 'n', 37 | 'UCX_TLS': 'rc,cuda_copy,cuda_ipc,gdr_copy,sm' 38 | }) 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: 'AMGX crashes, APIs return unexpected errors, etc. ' 4 | title: "[Issue]" 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the issue** 11 | 12 | A clear and concise description of what the issue is. 13 | 14 | **Environment information:** 15 | - OS: [e.g. `Windows 10`, `Ubuntu 22.04`] 16 | - CUDA runtime: [e.g. `CUDA 11.7.0`] 17 | - MPI version (if applicable): [e.g. `OpenMPI 4.0.3`, `MS-MPI v10.1.2`] 18 | - AMGX version or commit hash [e.g. `v2.3.0`, `34232979e993d349a03486f7892830a1209b2fc9`] 19 | - NVIDIA driver: [e.g. 517.40] 20 | - NVIDIA GPU: [e.g. NVIDIA V100] 21 | - Any related environment variables information 22 | 23 | **AMGX solver configuration** 24 | 25 | If you used AMGX solver configuration to initialize a solver - provide it here. 26 | 27 | **Matrix Data** 28 | 29 | It would be very helpful if you can provide matrix data that reproduces the error. If you can share it (matrix, right hand side, initial solution) - describe how to get it or provide download links for this data. 30 | 31 | **Reproduction steps** 32 | 33 | If your AMGX workflow differs from one of AMGX examples - provide minimal reproducible example for the reported issue 34 | 35 | **Additional context** 36 | 37 | Add any other context about the problem here. 38 | -------------------------------------------------------------------------------- /src/configs/AMG_CLASSICAL_AGGRESSIVE_L1.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "scope": "main", 5 | "print_grid_stats": 1, 6 | "store_res_history": 1, 7 | "solver": "FGMRES", 8 | "print_solve_stats": 1, 9 | "obtain_timings": 1, 10 | "preconditioner": { 11 | "interpolator": "D2", 12 | "print_grid_stats": 1, 13 | "aggressive_levels": 1, 14 | "solver": "AMG", 15 | "smoother": { 16 | "relaxation_factor": 1, 17 | "scope": "jacobi", 18 | "solver": "JACOBI_L1" 19 | }, 20 | "presweeps": 1, 21 | "selector": "PMIS", 22 | "coarsest_sweeps": 1, 23 | "coarse_solver": "NOSOLVER", 24 | "max_iters": 1, 25 | "max_row_sum": 0.9, 26 | "strength_threshold": 0.25, 27 | "min_coarse_rows": 2, 28 | "scope": "amg_solver", 29 | "max_levels": 50, 30 | "cycle": "V", 31 | "postsweeps": 1 32 | }, 33 | "max_iters": 100, 34 | "monitor_residual": 1, 35 | "gmres_n_restart": 10, 36 | "convergence": "RELATIVE_INI", 37 | "tolerance" : 1e-06, 38 | "norm": "L2" 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /include/eigensolvers/qr.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | namespace amgx 11 | { 12 | 13 | template 14 | class HouseholderQR 15 | { 16 | public: 17 | typedef Matrix TMatrix; 18 | typedef Vector TVector; 19 | 20 | typedef typename TConfig::template setMemSpace::Type TConfig_h; 21 | typedef typename TConfig::template setMemSpace::Type TConfig_d; 22 | 23 | typedef Vector Vector_h; 24 | typedef Vector Vector_d; 25 | 26 | typedef typename TConfig::VecPrec ValueTypeVec; 27 | 28 | HouseholderQR(TMatrix &A); 29 | void QR_decomposition(TVector &V); 30 | private: 31 | void QR(TVector &V); 32 | void QR(TVector &V, TVector &R); 33 | void send_vector(TVector &V, int destination); 34 | void receive_vector(TVector &V, int source); 35 | void inverse_phase(TVector &V, TVector &R, int root); 36 | private: 37 | TMatrix &m_A; 38 | Vector_h m_tau; 39 | TVector m_work; 40 | bool m_use_R_inverse; 41 | std::stack m_local_comms_stack; 42 | }; 43 | 44 | } 45 | -------------------------------------------------------------------------------- /src/configs/AMG_CLASSICAL_L1_AGGRESSIVE_HMIS.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "scope": "main", 5 | "print_grid_stats": 1, 6 | "store_res_history": 1, 7 | "solver": "FGMRES", 8 | "print_solve_stats": 1, 9 | "obtain_timings": 1, 10 | "preconditioner": { 11 | "interpolator": "D2", 12 | "print_grid_stats": 1, 13 | "aggressive_levels": 1, 14 | "solver": "AMG", 15 | "smoother": { 16 | "relaxation_factor": 1, 17 | "scope": "jacobi", 18 | "solver": "JACOBI_L1" 19 | }, 20 | "presweeps": 1, 21 | "selector": "HMIS", 22 | "coarsest_sweeps": 1, 23 | "coarse_solver": "NOSOLVER", 24 | "max_iters": 1, 25 | "max_row_sum": 0.9, 26 | "strength_threshold": 0.25, 27 | "min_coarse_rows": 2, 28 | "scope": "amg_solver", 29 | "max_levels": 50, 30 | "cycle": "V", 31 | "postsweeps": 1 32 | }, 33 | "max_iters": 100, 34 | "monitor_residual": 1, 35 | "gmres_n_restart": 10, 36 | "convergence": "RELATIVE_INI", 37 | "tolerance" : 1e-06, 38 | "norm": "L2" 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/configs/PCG_AGGREGATION_JACOBI.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "determinism_flag": 1, 4 | "solver": { 5 | "preconditioner": { 6 | "print_grid_stats": 1, 7 | "algorithm": "AGGREGATION", 8 | "print_vis_data": 0, 9 | "solver": "AMG", 10 | "smoother": { 11 | "relaxation_factor": 0.8, 12 | "scope": "jacobi", 13 | "solver": "BLOCK_JACOBI", 14 | "monitor_residual": 0, 15 | "print_solve_stats": 0 16 | }, 17 | "print_solve_stats": 0, 18 | "presweeps": 0, 19 | "interpolator": "D2", 20 | "selector": "SIZE_2", 21 | "coarse_solver": "NOSOLVER", 22 | "max_iters": 1, 23 | "monitor_residual": 0, 24 | "store_res_history": 0, 25 | "scope": "amg", 26 | "max_levels": 50, 27 | "postsweeps": 3, 28 | "cycle": "V" 29 | }, 30 | "solver": "PCG", 31 | "print_solve_stats": 1, 32 | "obtain_timings": 1, 33 | "max_iters": 100, 34 | "monitor_residual": 1, 35 | "convergence": "RELATIVE_INI", 36 | "scope": "main", 37 | "tolerance" : 1e-06, 38 | "norm": "L2" 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /include/norm.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | namespace amgx 15 | { 16 | 17 | /********************************************************** 18 | * Returns the norm of a vector 19 | *********************************************************/ 20 | template 21 | typename types::PODTypes::type get_norm(const MatrixType &A, const VectorType &r, const NormType norm_type, typename types::PODTypes::type norm_factor = 1.0); 22 | 23 | template 24 | void get_norm(const MatrixType &A, const VectorType &r, const int block_size, const NormType norm_type, PlainVectorType &block_nrm, typename types::PODTypes::type norm_factor = 1.0); 25 | 26 | template 27 | void compute_norm_factor(MatrixType &A, VectorType &b, VectorType &x, const NormType normType, typename types::PODTypes::type &normFactor); 28 | 29 | } // namespace amgx 30 | 31 | -------------------------------------------------------------------------------- /include/cusp/detail/convert.inl: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | 7 | #include 8 | 9 | namespace cusp 10 | { 11 | namespace detail 12 | { 13 | 14 | // same format 15 | template 17 | void convert(const SourceType& src, DestinationType& dst, 18 | T1, T1) 19 | { 20 | cusp::copy(src, dst); 21 | } 22 | 23 | // different formats 24 | template 26 | void convert(const SourceType& src, DestinationType& dst, 27 | T1, T2) 28 | { 29 | cusp::detail::dispatch::convert(src, dst, 30 | typename SourceType::memory_space(), 31 | typename DestinationType::memory_space()); 32 | } 33 | 34 | } // end namespace detail 35 | 36 | ///////////////// 37 | // Entry Point // 38 | ///////////////// 39 | template 40 | void convert(const SourceType& src, DestinationType& dst) 41 | { 42 | CUSP_PROFILE_SCOPED(); 43 | 44 | cusp::detail::convert(src, dst, 45 | typename SourceType::format(), 46 | typename DestinationType::format()); 47 | } 48 | 49 | } // end namespace cusp 50 | 51 | -------------------------------------------------------------------------------- /src/configs/AMG_CLASSICAL_PMIS.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "determinism_flag": 1, 4 | "solver": { 5 | "scope": "main", 6 | "print_grid_stats": 1, 7 | "store_res_history": 1, 8 | "obtain_timings": 1, 9 | "solver": "GMRES", 10 | "print_solve_stats": 1, 11 | "preconditioner": { 12 | "interpolator": "D2", 13 | "solver": "AMG", 14 | "cycle": "V", 15 | "smoother": { 16 | "relaxation_factor": 1, 17 | "scope": "jacobi", 18 | "solver": "JACOBI_L1" 19 | }, 20 | "presweeps": 2, 21 | "postsweeps": 2, 22 | "selector": "PMIS", 23 | "coarsest_sweeps": 2, 24 | "coarse_solver": "NOSOLVER", 25 | "max_iters": 1, 26 | "max_row_sum": 0.9, 27 | "min_coarse_rows": 2, 28 | "scope": "amg_solver", 29 | "max_levels": 50, 30 | "print_grid_stats": 1, 31 | "aggressive_levels": 1, 32 | "interp_max_elements": 4 33 | }, 34 | "max_iters": 100, 35 | "monitor_residual": 1, 36 | "gmres_n_restart": 10, 37 | "convergence": "RELATIVE_INI", 38 | "tolerance" : 1e-06, 39 | "norm": "L2" 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /include/aggregation/selectors/dummy.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | #include 7 | #include 8 | 9 | namespace amgx 10 | { 11 | namespace aggregation 12 | { 13 | 14 | template class DUMMY_Selector; 15 | 16 | template 17 | class DUMMY_Selector : public Selector 18 | { 19 | public: 20 | typedef T_Config TConfig; 21 | typedef typename T_Config::MatPrec ValueType; 22 | typedef typename T_Config::IndPrec IndexType; 23 | typedef typename T_Config::MemSpace MemorySpace; 24 | typedef typename Matrix::IVector IVector; 25 | 26 | // Constructor 27 | DUMMY_Selector(AMG_Config &cfg, const std::string &cfg_scope); 28 | 29 | void setAggregates( Matrix &A, 30 | IVector &aggregates, IVector &aggregates_global, int &num_aggregates); 31 | 32 | private: 33 | int aggregate_size; 34 | 35 | }; 36 | 37 | template 38 | class DUMMY_SelectorFactory : public SelectorFactory 39 | { 40 | public: 41 | Selector *create(AMG_Config &cfg, const std::string &cfg_scope) { return new DUMMY_Selector(cfg, cfg_scope); } 42 | }; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /include/cusp/detail/device/arch.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | ///TODO: FIX ME: 10 | /// 11 | ///#if THRUST_VERSION >= 100600 12 | ///#include 13 | ///#else 14 | ///#include 15 | ///#endif 16 | 17 | #include 18 | 19 | namespace cusp 20 | { 21 | namespace detail 22 | { 23 | namespace device 24 | { 25 | namespace arch 26 | { 27 | 28 | template 29 | size_t max_active_blocks(KernelFunction kernel, const size_t CTA_SIZE, const size_t dynamic_smem_bytes) 30 | { 31 | #if THRUST_VERSION >= 100600 32 | ///return amgx::thrust::system::cuda::detail::arch::max_active_blocks(kernel, CTA_SIZE, dynamic_smem_bytes);//OLD Thrust 33 | int numBlocks = 0; 34 | cudaOccupancyMaxActiveBlocksPerMultiprocessor ( &numBlocks, kernel, CTA_SIZE, dynamic_smem_bytes );//NEW Thrust: THRUST_VERSION >= 100802 35 | return numBlocks; 36 | #else 37 | return amgx::thrust::detail::backend::cuda::arch::max_active_blocks(kernel, CTA_SIZE, dynamic_smem_bytes);//Ancient Thrust: This SHOULD trigger error 38 | #endif 39 | } 40 | 41 | } // end namespace arch 42 | } // end namespace device 43 | } // end namespace detail 44 | } // end namespace cusp 45 | 46 | -------------------------------------------------------------------------------- /src/configs/AMG_CLASSICAL_AGGRESSIVE_L1_TRUNC.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "scope": "main", 5 | "print_grid_stats": 1, 6 | "store_res_history": 1, 7 | "solver": "FGMRES", 8 | "print_solve_stats": 1, 9 | "obtain_timings": 1, 10 | "preconditioner": { 11 | "interpolator": "D2", 12 | "solver": "AMG", 13 | "print_grid_stats": 1, 14 | "aggressive_levels": 1, 15 | "interp_max_elements": 4, 16 | "smoother": { 17 | "relaxation_factor": 1, 18 | "scope": "jacobi", 19 | "solver": "JACOBI_L1" 20 | }, 21 | "presweeps": 2, 22 | "selector": "PMIS", 23 | "coarsest_sweeps": 2, 24 | "coarse_solver": "NOSOLVER", 25 | "max_iters": 1, 26 | "max_row_sum": 0.9, 27 | "strength_threshold": 0.25, 28 | "min_coarse_rows": 2, 29 | "scope": "amg_solver", 30 | "max_levels": 50, 31 | "cycle": "V", 32 | "postsweeps": 2 33 | }, 34 | "max_iters": 100, 35 | "monitor_residual": 1, 36 | "gmres_n_restart": 10, 37 | "convergence": "RELATIVE_INI", 38 | "tolerance" : 1e-06, 39 | "norm": "L2" 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/configs/FGMRES_CLASSICAL_AGGRESSIVE_HMIS.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "scope": "main", 5 | "print_grid_stats": 1, 6 | "store_res_history": 1, 7 | "solver": "FGMRES", 8 | "print_solve_stats": 1, 9 | "obtain_timings": 1, 10 | "preconditioner": { 11 | "interpolator": "D2", 12 | "solver": "AMG", 13 | "print_grid_stats": 1, 14 | "aggressive_levels": 1, 15 | "interp_max_elements": 4, 16 | "smoother": { 17 | "relaxation_factor": 1, 18 | "scope": "jacobi", 19 | "solver": "JACOBI_L1" 20 | }, 21 | "presweeps": 2, 22 | "selector": "HMIS", 23 | "coarsest_sweeps": 2, 24 | "coarse_solver": "NOSOLVER", 25 | "max_iters": 1, 26 | "max_row_sum": 0.9, 27 | "strength_threshold": 0.25, 28 | "min_coarse_rows": 2, 29 | "scope": "amg_solver", 30 | "max_levels": 50, 31 | "cycle": "V", 32 | "postsweeps": 2 33 | }, 34 | "max_iters": 100, 35 | "monitor_residual": 1, 36 | "gmres_n_restart": 100, 37 | "convergence": "RELATIVE_INI", 38 | "tolerance" : 1e-06, 39 | "norm": "L2" 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/configs/FGMRES_CLASSICAL_AGGRESSIVE_PMIS.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "solver": { 4 | "scope": "main", 5 | "print_grid_stats": 1, 6 | "store_res_history": 1, 7 | "solver": "FGMRES", 8 | "print_solve_stats": 1, 9 | "obtain_timings": 1, 10 | "preconditioner": { 11 | "interpolator": "D2", 12 | "solver": "AMG", 13 | "print_grid_stats": 1, 14 | "aggressive_levels": 1, 15 | "interp_max_elements": 4, 16 | "smoother": { 17 | "relaxation_factor": 1, 18 | "scope": "jacobi", 19 | "solver": "JACOBI_L1" 20 | }, 21 | "presweeps": 2, 22 | "selector": "PMIS", 23 | "coarsest_sweeps": 2, 24 | "coarse_solver": "NOSOLVER", 25 | "max_iters": 1, 26 | "max_row_sum": 0.9, 27 | "strength_threshold": 0.25, 28 | "min_coarse_rows": 2, 29 | "scope": "amg_solver", 30 | "max_levels": 50, 31 | "cycle": "V", 32 | "postsweeps": 2 33 | }, 34 | "max_iters": 100, 35 | "monitor_residual": 1, 36 | "gmres_n_restart": 100, 37 | "convergence": "RELATIVE_INI", 38 | "tolerance" : 1e-06, 39 | "norm": "L2" 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /include/cycles/fixed_cycle.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | #include 7 | 8 | namespace amgx 9 | { 10 | template< class T_Config, template< AMGX_VecPrecision t_vecPrec, AMGX_MatPrecision t_matPrec, AMGX_IndPrecision t_indPrec > class CycleDispatcher > class FixedCycle; 11 | } 12 | 13 | #include 14 | #include 15 | 16 | namespace amgx 17 | { 18 | 19 | template< class T_Config, template< AMGX_VecPrecision t_vecPrec, AMGX_MatPrecision t_matPrec, AMGX_IndPrecision t_indPrec > class CycleDispatcher > 20 | class FixedCycle: public Cycle 21 | { 22 | public: 23 | static const AMGX_VecPrecision vecPrec = T_Config::vecPrec; 24 | static const AMGX_MatPrecision matPrec = T_Config::matPrec; 25 | static const AMGX_IndPrecision indPrec = T_Config::indPrec; 26 | typedef AMG AMG_Class; 27 | typedef T_Config TConfig; 28 | typedef Vector VVector; 29 | typedef typename T_Config::MatPrec ValueTypeA; 30 | typedef typename T_Config::template setMemSpace::Type TConfig_h; 31 | typedef Vector Vector_h; 32 | 33 | 34 | void cycle( AMG_Class *amg, AMG_Level *level, VVector &b, VVector &x ); 35 | virtual ~FixedCycle() {}; 36 | }; 37 | 38 | } // namespace amgx 39 | -------------------------------------------------------------------------------- /include/aggregation/selectors/serial_greedy.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | #include 7 | #include 8 | 9 | namespace amgx 10 | { 11 | namespace aggregation 12 | { 13 | 14 | template class SerialGreedySelector; 15 | 16 | template 17 | class SerialGreedySelector : public Selector 18 | { 19 | public: 20 | typedef T_Config TConfig; 21 | typedef typename T_Config::MatPrec ValueType; 22 | typedef typename T_Config::IndPrec IndexType; 23 | typedef typename T_Config::MemSpace MemorySpace; 24 | typedef typename Matrix::IVector IVector; 25 | 26 | // Constructor 27 | SerialGreedySelector(AMG_Config &cfg, const std::string &cfg_scope); 28 | 29 | void setAggregates( Matrix &A, 30 | IVector &aggregates, IVector &aggregates_global, int &num_aggregates); 31 | 32 | private: 33 | int aggregate_size; 34 | int edge_weight_component; 35 | 36 | }; 37 | 38 | template 39 | class SerialGreedySelectorFactory : public SelectorFactory 40 | { 41 | public: 42 | Selector *create(AMG_Config &cfg, const std::string &cfg_scope) { return new SerialGreedySelector(cfg, cfg_scope); } 43 | }; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/misc.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #ifdef AMGX_WITH_MPI 10 | #include 11 | #endif 12 | 13 | namespace amgx 14 | { 15 | 16 | #define PRINT_BUF_SIZE 4096 17 | 18 | void amgx_default_output(const char *msg, int length) 19 | { 20 | printf("%s", msg); 21 | } 22 | 23 | void amgx_dist_output(const char *msg, int length) 24 | { 25 | #ifdef AMGX_WITH_MPI 26 | int rank = 0; 27 | int mpi_initialized = 0; 28 | MPI_Initialized(&mpi_initialized); // We want to make sure MPI_Init has been called. 29 | 30 | if (mpi_initialized) 31 | { 32 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 33 | } 34 | 35 | if (rank == 0) { amgx_output(msg, length); } 36 | 37 | #else 38 | amgx_output(msg, length); 39 | #endif 40 | } 41 | 42 | AMGX_output_callback amgx_output = amgx_default_output; 43 | AMGX_output_callback error_output = amgx_default_output; 44 | AMGX_output_callback amgx_distributed_output = amgx_dist_output; 45 | 46 | int amgx_printf(const char *fmt, ...) 47 | { 48 | int retval = 0; 49 | char buffer[PRINT_BUF_SIZE]; 50 | va_list ap; 51 | va_start(ap, fmt); 52 | retval = vsnprintf(buffer, PRINT_BUF_SIZE, fmt, ap); 53 | va_end(ap); 54 | amgx_distributed_output(buffer, strlen(buffer)); 55 | return retval; 56 | } 57 | 58 | } // namespace amgx 59 | -------------------------------------------------------------------------------- /include/cusp/detail/dispatch/multiply.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | namespace cusp 11 | { 12 | namespace detail 13 | { 14 | namespace dispatch 15 | { 16 | 17 | //////////////// 18 | // Host Paths // 19 | //////////////// 20 | template 23 | void multiply(const LinearOperator& A, 24 | const MatrixOrVector1& B, 25 | MatrixOrVector2& C, 26 | cusp::host_memory, 27 | cusp::host_memory, 28 | cusp::host_memory) 29 | { 30 | cusp::detail::host::multiply(A, B, C); 31 | } 32 | 33 | ////////////////// 34 | // Device Paths // 35 | ////////////////// 36 | template 39 | void multiply(const LinearOperator& A, 40 | const MatrixOrVector1& B, 41 | MatrixOrVector2& C, 42 | cusp::device_memory, 43 | cusp::device_memory, 44 | cusp::device_memory) 45 | { 46 | cusp::detail::device::multiply(A, B, C); 47 | } 48 | 49 | } // end namespace dispatch 50 | } // end namespace detail 51 | } // end namespace cusp 52 | 53 | -------------------------------------------------------------------------------- /src/convergence/absolute.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "amg_config.h" 6 | #include "convergence/absolute.h" 7 | 8 | namespace amgx 9 | { 10 | 11 | template 12 | AbsoluteConvergence::AbsoluteConvergence(AMG_Config &cfg, const std::string &cfg_scope) : Convergence(cfg, cfg_scope) 13 | { 14 | } 15 | 16 | template 17 | void AbsoluteConvergence::convergence_init() 18 | { 19 | this->m_tolerance = this->m_cfg->AMG_Config::template getParameter("tolerance", this->m_cfg_scope); 20 | } 21 | 22 | 23 | template 24 | AMGX_STATUS AbsoluteConvergence::convergence_update_and_check(const PODVec_h &nrm, const PODVec_h &nrm_ini) 25 | { 26 | bool res_converged = true; 27 | 28 | for (int i = 0; i < nrm.size(); i++) 29 | { 30 | bool conv = nrm[i] < this->m_tolerance; 31 | res_converged = res_converged && conv; 32 | } 33 | 34 | return res_converged ? AMGX_ST_CONVERGED : AMGX_ST_NOT_CONVERGED; 35 | } 36 | 37 | /**************************************** 38 | * Explict instantiations 39 | ***************************************/ 40 | #define AMGX_CASE_LINE(CASE) template class AbsoluteConvergence::Type>; 41 | AMGX_FORALL_BUILDS(AMGX_CASE_LINE) 42 | AMGX_FORCOMPLEX_BUILDS(AMGX_CASE_LINE) 43 | #undef AMGX_CASE_LINE 44 | 45 | } // end namespace 46 | 47 | -------------------------------------------------------------------------------- /include/amgx_types/pod_types.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | // definition of base type for supported AMGX datatype which we would call PODType 10 | 11 | namespace amgx 12 | { 13 | 14 | namespace types 15 | { 16 | 17 | // plain data traits 18 | template 19 | struct PODTypes; 20 | 21 | template <> 22 | struct PODTypes 23 | { 24 | // raw datatype for AMGX datatype 25 | typedef float type; 26 | // tconfig vector precision of raw datatype (used in TConfig templates construction) 27 | static const AMGX_VecPrecision vec_prec = AMGX_vecFloat; 28 | // number of raw dataitems in AMGX datatype 29 | static const int pod_items = 1; 30 | }; 31 | 32 | template <> 33 | struct PODTypes 34 | { 35 | typedef double type; 36 | static const AMGX_VecPrecision vec_prec = AMGX_vecDouble; 37 | static const int pod_items = 1; 38 | }; 39 | 40 | template <> 41 | struct PODTypes 42 | { 43 | typedef float type; 44 | static const AMGX_VecPrecision vec_prec = AMGX_vecFloat; 45 | static const int pod_items = 2; 46 | }; 47 | 48 | template <> 49 | struct PODTypes 50 | { 51 | typedef double type; 52 | static const AMGX_VecPrecision vec_prec = AMGX_vecDouble; 53 | static const int pod_items = 2; 54 | }; 55 | 56 | 57 | } // namespace types 58 | 59 | } // namespace amgx -------------------------------------------------------------------------------- /include/aggregation/selectors/serial_bfs_selector.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | #include 7 | #include 8 | 9 | namespace amgx 10 | { 11 | namespace aggregation 12 | { 13 | 14 | template class Serial_BFS_Selector; 15 | 16 | template 17 | class Serial_BFS_Selector : public Selector 18 | { 19 | public: 20 | typedef T_Config TConfig; 21 | typedef typename T_Config::MatPrec ValueType; 22 | typedef typename T_Config::IndPrec IndexType; 23 | typedef typename T_Config::MemSpace MemorySpace; 24 | typedef typename Matrix::IVector IVector; 25 | 26 | // Constructor 27 | Serial_BFS_Selector(AMG_Config &cfg, const std::string &cfg_scope); 28 | 29 | void setAggregates( Matrix &A, 30 | IVector &aggregates, IVector &aggregates_global, int &num_aggregates); 31 | 32 | private: 33 | int aggregate_size; 34 | AMG_Config coloring_cfg; 35 | std::string coloring_cfg_scope; 36 | }; 37 | 38 | template 39 | class Serial_BFS_SelectorFactory : public SelectorFactory 40 | { 41 | public: 42 | Selector *create(AMG_Config &cfg, const std::string &cfg_scope) { return new Serial_BFS_Selector(cfg, cfg_scope); } 43 | }; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /include/cusp/precond/smooth.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | namespace cusp 13 | { 14 | namespace precond 15 | { 16 | 17 | // Smoothed (final) prolongator defined by P = (I - omega/rho(K) K) * T 18 | // where K = diag(S)^-1 * S and rho(K) is an approximation to the 19 | // spectral radius of K. 20 | template 21 | void smooth_prolongator(const cusp::coo_matrix& S, 22 | const cusp::coo_matrix& T, 23 | cusp::coo_matrix& P, 24 | const ValueType omega = 4.0/3.0, 25 | const ValueType rho_Dinv_S = 0.0); 26 | 27 | template 28 | void smooth_prolongator(const cusp::csr_matrix& S, 29 | const cusp::csr_matrix& T, 30 | cusp::csr_matrix& P, 31 | const ValueType omega = 4.0/3.0, 32 | const ValueType rho_Dinv_S = 0.0); 33 | 34 | } // end namespace precond 35 | } // end namespace cusp 36 | 37 | #include 38 | -------------------------------------------------------------------------------- /ci/containers/x86_64-ubuntu18.04-llvm9-cuda11.0.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 NVIDIA CORPORATION. All Rights Reserved. 2 | # 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | """ 6 | AmgX base image: x86_64-ubuntu18.04-llvm-cuda11.0 7 | """ 8 | 9 | Stage0 += comment(__doc__, reformat=False) 10 | Stage0 += baseimage(image='nvidia/cuda:11.0-devel-ubuntu18.04') 11 | 12 | # Last compiler supported for Ubuntu 18.04 by CUDA 11.0 13 | # https://docs.nvidia.com/cuda/archive/11.0/cuda-installation-guide-linux/index.html#system-requirements 14 | compiler = llvm(version='9') 15 | Stage0 += compiler 16 | Stage0 += shell(commands=[ 17 | 'update-alternatives --install /usr/bin/cc cc /usr/bin/clang-9 40', 18 | 'update-alternatives --install /usr/bin/c++ c++ /usr/bin/clang++-9 60' 19 | ]) 20 | 21 | # Current minimum version required by AMGX 22 | Stage0 += cmake(eula=True, version='3.7.0') 23 | 24 | # MPI 25 | Stage0 += mlnx_ofed(version='5.0-2.1.8.0') 26 | Stage0 += gdrcopy(ldconfig=True, version='2.0') 27 | Stage0 += knem(ldconfig=True, version='1.1.3') 28 | Stage0 += ucx(gdrcopy=True, knem=True, ofed=True, cuda=True) 29 | Stage0 += openmpi( 30 | cuda=True, 31 | infiniband=True, 32 | version='4.0.3', 33 | pmix=True, 34 | ucx=True, 35 | toolchain=compiler.toolchain 36 | ) 37 | Stage0 += environment(multinode_vars = { 38 | 'OMPI_MCA_pml': 'ucx', 39 | 'OMPI_MCA_btl': '^smcuda,vader,tcp,uct,openib', 40 | 'UCX_MEMTYPE_CACHE': 'n', 41 | 'UCX_TLS': 'rc,cuda_copy,cuda_ipc,gdr_copy,sm' 42 | }) 43 | -------------------------------------------------------------------------------- /include/cusp/elementwise.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | /*! \file elementwise.h 6 | * \brief Elementwise operations on matrices. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | namespace cusp 14 | { 15 | 16 | /*! \addtogroup algorithms Algorithms 17 | * \ingroup algorithms 18 | * \{ 19 | */ 20 | 21 | //// Uses Matrix1::value_type(0) and Matrix2::value_type(0) for values not present 22 | //template 26 | //void transform_elementwise(const Matrix1& A, 27 | // const Matrix2& B, 28 | // Matrix3& C, 29 | // BinaryFunction op); 30 | 31 | /*! \p add : Compute the sum of two matrices 32 | */ 33 | template 36 | void add(const Matrix1& A, 37 | const Matrix2& B, 38 | Matrix3& C); 39 | 40 | /*! \p add : Compute the difference of two matrices 41 | */ 42 | template 45 | void subtract(const Matrix1& A, 46 | const Matrix2& B, 47 | Matrix3& C); 48 | /*! \} 49 | */ 50 | 51 | } // end namespace cusp 52 | 53 | #include 54 | 55 | -------------------------------------------------------------------------------- /ci/README.md: -------------------------------------------------------------------------------- 1 | 6 | 7 | Continuous integration 8 | === 9 | 10 | **WIP**: Adding continuous integration to AmgX is currently a work in progress. 11 | 12 | * [`./ci/run.sh`](run.sh) runs the whole CI pipeline locally: it builds the 13 | docker containers for each supported environment, builds AmgX for that 14 | environment, and runs the AmgX tests. 15 | * [`./ci/test.sh`](test.sh) performs a clean run of the AmgX tests. 16 | 17 | The containers are specified using [`HPCCM`], see [`containers/`](containers). 18 | 19 | [`HPCCM`]: https://github.com/NVIDIA/hpc-container-maker 20 | 21 | The behavior of the CI system is configured using the following environment variables: 22 | 23 | * `AMGX_CI_CONTAINERS=""`: list of containers to test. By default all 24 | containers are tested. 25 | * `AMGX_CI_KEEP_BUILD=0|1`: whether the build directories are preserved across 26 | CI runs. The default is `0`, i.e., the build directories are cleaned and amgx 27 | is re-built from scratch on every run. 28 | 29 | * `AMGX_CI_CONTAINER_FILE`: dump container build recipe to a file in the current 30 | working directory: `Dockerfile_${baseimage}`. 31 | 32 | For example, to only test the `x86_64-ubuntu18.04-gnu7-cuda10.2.py` container, 33 | preserving the build directory (e.g. during development): 34 | 35 | ```shell 36 | AMGX_CI_CONTAINERS="x86_64-ubuntu18.04-gnu7-cuda10.2.py" AMGX_CI_KEEP_BUILD=1 ./ci/run.sh 37 | ``` 38 | -------------------------------------------------------------------------------- /external/rapidjson/include/rapidjson/stringbuffer.h: -------------------------------------------------------------------------------- 1 | #ifndef RAPIDJSON_STRINGBUFFER_H_ 2 | #define RAPIDJSON_STRINGBUFFER_H_ 3 | 4 | #include "rapidjson.h" 5 | #include "internal/stack.h" 6 | 7 | namespace rapidjson { 8 | 9 | //! Represents an in-memory output stream. 10 | /*! 11 | \tparam Encoding Encoding of the stream. 12 | \tparam Allocator type for allocating memory buffer. 13 | \implements Stream 14 | */ 15 | template 16 | struct GenericStringBuffer { 17 | typedef typename Encoding::Ch Ch; 18 | 19 | GenericStringBuffer(Allocator* allocator = 0, size_t capacity = kDefaultCapacity) : stack_(allocator, capacity) {} 20 | 21 | void Put(Ch c) { *stack_.template Push() = c; } 22 | 23 | void Clear() { stack_.Clear(); } 24 | 25 | const char* GetString() const { 26 | // Push and pop a null terminator. This is safe. 27 | *stack_.template Push() = '\0'; 28 | stack_.template Pop(1); 29 | 30 | return stack_.template Bottom(); 31 | } 32 | 33 | size_t Size() const { return stack_.GetSize(); } 34 | 35 | static const size_t kDefaultCapacity = 256; 36 | mutable internal::Stack stack_; 37 | }; 38 | 39 | typedef GenericStringBuffer > StringBuffer; 40 | 41 | //! Implement specialized version of PutN() with memset() for better performance. 42 | template<> 43 | inline void PutN(GenericStringBuffer >& stream, char c, size_t n) { 44 | memset(stream.stack_.Push(n), c, n * sizeof(c)); 45 | } 46 | 47 | } // namespace rapidjson 48 | 49 | #endif // RAPIDJSON_STRINGBUFFER_H_ 50 | -------------------------------------------------------------------------------- /src/tests/truncate_count_test.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "unit_test.h" 6 | #include 7 | #include 8 | #include "test_utils.h" 9 | #include "util.h" 10 | #include 11 | 12 | namespace amgx 13 | { 14 | 15 | DECLARE_UNITTEST_BEGIN(truncateCountTest); 16 | 17 | void run() 18 | { 19 | typedef Vector::Type> IVector; 20 | const int N = 100; 21 | Matrix A; 22 | A.addProps(CSR); 23 | MatrixCusp Aw(&A); 24 | cusp::gallery::poisson5pt(Aw, N, N); 25 | IVector count(A.get_num_rows(), 0); 26 | VVector x(A.get_num_rows(), 4.), new_row_sum(A.get_num_rows(), 0.); 27 | const double trunc_factor = 0.5; 28 | countTruncElements(A, trunc_factor, x, count, new_row_sum); 29 | int new_count = amgx::thrust::reduce(count.begin(), count.end()); 30 | this->PrintOnFail("truncateCountTest: new nnz should = num rows"); 31 | UNITTEST_ASSERT_TRUE(A.get_num_rows() == new_count); 32 | 33 | for (int i = 0; i < new_row_sum.size(); i++) 34 | { 35 | this->PrintOnFail("truncateCountTest: new_row_sum[i] should = 4 for all i"); 36 | UNITTEST_ASSERT_TRUE(fabs(new_row_sum[i] - 4.) <= 1e-6); 37 | } 38 | } 39 | 40 | DECLARE_UNITTEST_END(truncateCountTest); 41 | 42 | truncateCountTest::Type> truncateCountTest_instance_mode_dDDI; 43 | 44 | } // end namespace amgx 45 | -------------------------------------------------------------------------------- /src/cycles/v_cycle.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | 7 | namespace amgx 8 | { 9 | 10 | template< AMGX_VecPrecision t_vecPrec, AMGX_MatPrecision t_matPrec, AMGX_IndPrecision t_indPrec > 11 | void V_CycleDispatcher::dispatch( AMG_Class *amg, AMG_Level *level, Vector &b, Vector &x ) const 12 | { 13 | V_Cycle( amg, level, b, x ); 14 | } 15 | 16 | template< AMGX_VecPrecision t_vecPrec, AMGX_MatPrecision t_matPrec, AMGX_IndPrecision t_indPrec > 17 | void V_CycleDispatcher::dispatch( AMG_Class *amg, AMG_Level *level, Vector &b, Vector &x ) const 18 | { 19 | V_Cycle( amg, level, b, x ); 20 | } 21 | 22 | /**************************************** 23 | * Explict instantiations 24 | ***************************************/ 25 | template class V_CycleDispatcher; 26 | template class V_CycleDispatcher; 27 | template class V_CycleDispatcher; 28 | 29 | template class V_CycleDispatcher; 30 | template class V_CycleDispatcher; 31 | template class V_CycleDispatcher; 32 | } // namespace amgx 33 | 34 | -------------------------------------------------------------------------------- /src/thread_manager.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | #include "thread_manager.h" 7 | #include "vector.h" 8 | 9 | namespace amgx 10 | { 11 | 12 | // /////////////////////////////////////////////////////////////////////////////////////////////////////////// 13 | 14 | ThreadWorker::ThreadWorker(ThreadManager *manager, int skills) : 15 | m_manager(manager), 16 | m_skills(skills) 17 | { 18 | } 19 | 20 | ThreadWorker::~ThreadWorker() 21 | { 22 | } 23 | 24 | float ThreadWorker::estimate_workload() 25 | { 26 | return 0.0; 27 | } 28 | 29 | void ThreadWorker::push_task(AsyncTask *task) 30 | { 31 | } 32 | 33 | void ThreadWorker::wait_empty() 34 | { 35 | } 36 | 37 | void ThreadWorker::run() 38 | { 39 | } 40 | 41 | // /////////////////////////////////////////////////////////////////////////////////////////////////////////// 42 | 43 | void ThreadManager::setup_streams( int num_streams, bool priority, bool serialize ) 44 | { 45 | } 46 | 47 | void ThreadManager::join_threads() 48 | { 49 | } 50 | 51 | void ThreadManager::wait_threads() 52 | { 53 | } 54 | 55 | void ThreadManager::spawn_threads(size_t pool_size, 56 | size_t max_alloc_size) 57 | { 58 | } 59 | 60 | void ThreadManager::push_work(AsyncTask *task, bool use_cnp) 61 | { 62 | } 63 | 64 | // /////////////////////////////////////////////////////////////////////////////////////////////////////////// 65 | 66 | void InitTask::exec() 67 | { 68 | } 69 | 70 | } // namespace amgx 71 | 72 | -------------------------------------------------------------------------------- /examples/Makefile.cray: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2011-2024 NVIDIA CORPORATION. All Rights Reserved. 2 | # 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | # CUDA Toolkit location 6 | cc = cc 7 | CC = CC 8 | COMMON_L = -ldl -lamgxsh -L../lib -Wl,-rpath=../lib 9 | CUDA_DIR = $(CRAY_CUDATOOLKIT_DIR) 10 | CUDA_L = -lcudart -L$(CUDA_DIR)/lib64 11 | CUDA_I = -I$(CUDA_DIR)/include 12 | MPI_DIR = $(CRAY_MPICH2_DIR) 13 | MPI_L = -lmpich -L$(MPI_DIR)/lib 14 | MPI_I = -I$(MPI_DIR)/include 15 | 16 | # Examples 17 | amgx_capi: 18 | $(cc) -O2 amgx_capi.c $(CUDA_I) -o amgx_capi $(CUDA_L) $(COMMON_L) 19 | 20 | amgx_mpi_capi: 21 | $(cc) -O2 amgx_mpi_capi.c $(CUDA_I) $(MPI_I) -o amgx_mpi_capi $(CUDA_L) $(MPI_L) $(COMMON_L) 22 | 23 | amgx_capi_dynamic: 24 | $(cc) -O2 amgx_capi.c -o amgx_capi_dynamic $(CUDA_I) -DAMGX_DYNAMIC_LOADING $(CUDA_L) $(COMMON_L) 25 | 26 | amgx_mpi_capi_dynamic: 27 | $(cc) -O2 amgx_mpi_capi.c -o amgx_mpi_capi_dynamic $(CUDA_I) $(MPI_I) -DAMGX_DYNAMIC_LOADING $(CUDA_L) $(COMMON_L) $(MPI_L) 28 | 29 | amgx_mpi_capi_agg: 30 | $(cc) -O2 amgx_mpi_capi_agg.c $(CUDA_I) $(MPI_I) -o amgx_mpi_capi_agg $(CUDA_L) $(COMMON_L) $(MPI_L) 31 | 32 | amgx_mpi_capi_cla: 33 | $(cc) -O2 amgx_mpi_capi_cla.c $(CUDA_I) $(MPI_I) -o amgx_mpi_capi_cla $(CUDA_L) $(COMMON_L) $(MPI_L) 34 | 35 | # All 36 | all: amgx_capi amgx_mpi_capi amgx_capi_dynamic amgx_mpi_capi_dynamic amgx_mpi_capi_agg amgx_mpi_capi_cla 37 | 38 | # Clean 39 | clean: 40 | rm -f amgx_capi 41 | rm -f amgx_mpi_capi 42 | rm -f amgx_capi_dynamic 43 | rm -f amgx_mpi_capi_dynamic 44 | rm -f amgx_mpi_capi_cla 45 | rm -f amgx_mpi_capi_agg 46 | rm -f *.o 47 | -------------------------------------------------------------------------------- /LICENSES/BSD-3-Clause.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 - 2024 NVIDIA CORPORATION. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions 5 | are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in the 11 | documentation and/or other materials provided with the distribution. 12 | 3. Neither the name of NVIDIA CORPORATION nor the names of its 13 | contributors may be used to endorse or promote products derived 14 | from this software without specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | -------------------------------------------------------------------------------- /include/cusp/transpose.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | /*! \file transpose.h 6 | * \brief Matrix transpose 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | namespace cusp 14 | { 15 | 16 | /*! \addtogroup algorithms Algorithms 17 | * \ingroup algorithms 18 | * \{ 19 | */ 20 | 21 | /*! \p transpose : transpose a matrix 22 | * 23 | * \param A input matrix 24 | * \param At output matrix (transpose of A) 25 | * 26 | * \tparam MatrixType1 matrix 27 | * \tparam MatrixType2 matrix 28 | * 29 | * The following code snippet demonstrates how to use \p transpose. 30 | * 31 | * \code 32 | * #include 33 | * #include 34 | * #include 35 | * 36 | * int main(void) 37 | * { 38 | * // initialize a 2x3 matrix 39 | * cusp::array2d A(2,3); 40 | * A(0,0) = 10; A(0,1) = 20; A(0,2) = 30; 41 | * A(1,0) = 40; A(1,1) = 50; A(1,2) = 60; 42 | * 43 | * // print A 44 | * cusp::print(A); 45 | * 46 | * // compute the transpose 47 | * cusp::array2d At; 48 | * cusp::transpose(A, At); 49 | * 50 | * // print A^T 51 | * cusp::print(At); 52 | * 53 | * return 0; 54 | * } 55 | * \endcode 56 | */ 57 | template 58 | void transpose(const MatrixType1& A, MatrixType2& At); 59 | 60 | /*! \} 61 | */ 62 | 63 | } // end namespace cusp 64 | 65 | #include 66 | 67 | -------------------------------------------------------------------------------- /examples/generate_poisson.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | int main(int argc, char **argv) 12 | { 13 | cusp::coo_matrix A; 14 | const char *fname = NULL; 15 | // check command line arguments 16 | for (int i=1; i < argc; i++) 17 | { 18 | if (strncmp(argv[i],"-p",100) == 0) 19 | { 20 | int points = atoi(argv[++i]); 21 | int x = atoi(argv[++i]); 22 | int y = atoi(argv[++i]); 23 | int z; 24 | 25 | switch(points) 26 | { 27 | case 5: 28 | cusp::gallery::poisson5pt(A,x,y); 29 | break; 30 | case 7: 31 | z = atoi(argv[++i]); 32 | cusp::gallery::poisson7pt(A,x,y,z); 33 | break; 34 | case 9: 35 | cusp::gallery::poisson9pt(A,x,y); 36 | break; 37 | case 27: 38 | z = atoi(argv[++i]); 39 | cusp::gallery::poisson27pt(A,x,y,z); 40 | break; 41 | default: 42 | printf("Error invalid number of poisson points specified, valid numbers are 5, 7, 9, 27\n"); 43 | exit(0); 44 | } 45 | } 46 | else if (strncmp(argv[i],"-o",100) == 0) 47 | { 48 | i++; 49 | fname = argv[i]; 50 | } 51 | } 52 | 53 | // output 54 | if (fname == NULL) 55 | { 56 | fname = "output.mtx"; 57 | } 58 | 59 | cusp::io::write_matrix_market_file(A,fname); 60 | } 61 | 62 | -------------------------------------------------------------------------------- /src/operators/solve_operator.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | namespace amgx 6 | { 7 | 8 | template class Operator; 9 | 10 | } 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | #include "amgx_types/util.h" 17 | 18 | namespace amgx 19 | { 20 | 21 | template 22 | SolveOperator::~SolveOperator() 23 | { 24 | delete m_solver; 25 | } 26 | 27 | template 28 | void SolveOperator::apply(const Vector &v, Vector &res, ViewType view) 29 | { 30 | Operator &A = *this->m_A; 31 | Vector &v_ = const_cast&>(v); 32 | int offset, size; 33 | A.getOffsetAndSizeForView(view, &offset, &size); 34 | // Fill initial solution with 0s before solving. 35 | fill(res, types::util::value_type>::get_zero(), offset, size); 36 | AMGX_STATUS solve_status = m_solver->solve(v_, res, false); 37 | 38 | if (solve_status != AMGX_ST_CONVERGED) 39 | { 40 | FatalError("OperatorSolve: solver did not converge.", AMGX_ERR_CONFIGURATION); 41 | } 42 | } 43 | 44 | template 45 | void SolveOperator::setup() 46 | { 47 | assert(m_A); 48 | assert(m_solver); 49 | m_solver->setup(*m_A, false); 50 | } 51 | 52 | #define AMGX_CASE_LINE(CASE) template class SolveOperator::Type>; 53 | AMGX_FORALL_BUILDS(AMGX_CASE_LINE) 54 | AMGX_FORCOMPLEX_BUILDS(AMGX_CASE_LINE) 55 | #undef AMGX_CASE_LINE 56 | 57 | } 58 | -------------------------------------------------------------------------------- /include/cusp/relaxation/jacobi.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | /*! \file jacobi.h 6 | * \brief Jacobi relaxation. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | 15 | namespace cusp 16 | { 17 | namespace relaxation 18 | { 19 | 20 | template 21 | class jacobi 22 | { 23 | ValueType default_omega; 24 | cusp::array1d diagonal; 25 | cusp::array1d temp; 26 | 27 | public: 28 | jacobi(); 29 | 30 | template 31 | jacobi(const MatrixType& A, ValueType omega=1.0); 32 | 33 | // ignores initial x 34 | template 35 | void presmooth(const MatrixType& A, const VectorType1& b, VectorType2& x); 36 | 37 | // smooths initial x 38 | template 39 | void postsmooth(const MatrixType& A, const VectorType1& b, VectorType2& x); 40 | 41 | template 42 | void operator()(const MatrixType& A, const VectorType1& b, VectorType2& x); 43 | 44 | template 45 | void operator()(const MatrixType& A, const VectorType1& b, VectorType2& x, ValueType omega); 46 | }; 47 | 48 | } // end namespace relaxation 49 | } // end namespace cusp 50 | 51 | #include 52 | 53 | -------------------------------------------------------------------------------- /ci/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | # SPDX-FileCopyrightText: 2024 NVIDIA CORPORATION. All Rights Reserved. 4 | # 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | 7 | # 8 | # Supports following environment variables: 9 | # 10 | # AMGX_CI_CONTAINERS: list of containers to run. Default: all containers. 11 | # 12 | # AMGX_CI_KEEP_BUILD: preserves build directory. Default: build directories 13 | # are cleaned up each time. 14 | set -ex 15 | 16 | if command -v shellcheck ; then 17 | shellcheck ci/*.sh 18 | fi 19 | 20 | CONTAINERS=$(ls ci/containers) 21 | if [ -n "${AMGX_CI_CONTAINERS}" ]; then 22 | CONTAINERS="${AMGX_CI_CONTAINERS}" 23 | fi 24 | 25 | KEEP_BUILD=0 26 | if [ -n "${AMGX_CI_KEEP_BUILD}" ]; then 27 | KEEP_BUILD="${AMGX_CI_KEEP_BUILD}" 28 | fi 29 | 30 | CONTAINER_FILE=/dev/fd/2 31 | 32 | for CONTAINER in $CONTAINERS; do 33 | BASE_NAME=$(basename "${CONTAINER}" .py) 34 | BASE_IMG="amgx:base_${BASE_NAME}" 35 | BUILD_DIR="build_${BASE_NAME}" 36 | RECIPE="ci/containers/${CONTAINER}" 37 | if ! test -f "${RECIPE}"; then 38 | echo "Container at \"${RECIPE}\" does not exist" 39 | exit 1 40 | fi 41 | 42 | if [ -n "${AMGX_CI_CONTAINER_FILE}" ]; then 43 | CONTAINER_FILE="Dockerfile_${BASE_NAME}" 44 | fi 45 | 46 | hpccm --recipe "${RECIPE}" --format=docker \ 47 | | tee "${CONTAINER_FILE}" | \ 48 | docker build -t "${BASE_IMG}" - 49 | nvidia-docker \ 50 | run \ 51 | -v "$(pwd -LP)":/amgx \ 52 | -u "$(id -u "${USER}")":"$(id -g "${USER}")" \ 53 | "${BASE_IMG}" \ 54 | bash -c "cd /amgx/ && AMGX_CI_KEEP_BUILD=${KEEP_BUILD} ./ci/test.sh ${BUILD_DIR}" 55 | done 56 | -------------------------------------------------------------------------------- /src/configs/AMG_CLASSICAL_AGGRESSIVE_CHEB_L1_TRUNC.json: -------------------------------------------------------------------------------- 1 | { 2 | "config_version": 2, 3 | "determinism_flag": 1, 4 | "solver":{ 5 | "scope": "main", 6 | "solver": "PCG", 7 | "store_res_history": 1, 8 | "print_solve_stats": 1, 9 | "obtain_timings": 1, 10 | "preconditioner": { 11 | "print_grid_stats": 1, 12 | "scope": "amg_solver", 13 | "interpolator": "D2", 14 | "solver": "AMG", 15 | "max_levels": 50, 16 | "selector": "PMIS", 17 | "cycle": "V", 18 | "presweeps": 0, 19 | "postsweeps": 3, 20 | "coarsest_sweeps": 2, 21 | "min_coarse_rows": 2, 22 | "coarse_solver": "NOSOLVER", 23 | "max_iters": 1, 24 | "max_row_sum": 0.9, 25 | "strength_threshold": 0.25, 26 | "error_scaling":3, 27 | "print_grid_stats": 1, 28 | "aggressive_levels": 1, 29 | "interp_max_elements": 4, 30 | "smoother": { 31 | "relaxation_factor": 0.91, 32 | "scope": "jacobi", 33 | "solver": "CHEBYSHEV", 34 | "preconditioner" : 35 | { 36 | "solver": "JACOBI_L1", 37 | "max_iters": 1 38 | }, 39 | "chebyshev_polynomial_order": 2, 40 | "chebyshev_lambda_estimate_mode": 2 41 | } 42 | }, 43 | "max_iters": 100, 44 | "monitor_residual": 1, 45 | "convergence": "RELATIVE_INI", 46 | "tolerance" : 1e-06, 47 | "norm": "L2" 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /include/eigensolvers/eigenvector_solver.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | namespace amgx 11 | { 12 | 13 | template 14 | class EigenVectorSolver 15 | { 16 | public: 17 | typedef Matrix MMatrix; 18 | typedef Vector VVector; 19 | 20 | typedef typename TConfig::template setMemSpace::Type TConfig_h; 21 | typedef typename TConfig::template setMemSpace::Type TConfig_d; 22 | 23 | typedef Matrix Matrix_h; 24 | typedef Matrix Matrix_d; 25 | 26 | typedef Vector Vector_h; 27 | typedef Vector Vector_d; 28 | 29 | typedef typename TConfig::MatPrec ValueTypeMat; 30 | typedef typename TConfig::VecPrec ValueTypeVec; 31 | typedef typename TConfig::IndPrec IndType; 32 | 33 | EigenVectorSolver(AMG_Config &cfg, const std::string &cfg_scope); 34 | ~EigenVectorSolver(); 35 | 36 | void setup(Operator &A); 37 | AMGX_STATUS solve(ValueTypeVec eigenvalue, VVector &eigenvector); 38 | private: 39 | AMG_Config m_cfg; 40 | Operator *m_A; 41 | EigenSolver *m_solver; 42 | }; 43 | 44 | template 45 | class EigenVectorSolverFactory 46 | { 47 | public: 48 | static EigenVectorSolver *create(std::string &name); 49 | private: 50 | static EigenVectorSolver *create_inverse_iteration(); 51 | }; 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/cycles/w_cycle.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | 7 | namespace amgx 8 | { 9 | 10 | template< AMGX_VecPrecision t_vecPrec, AMGX_MatPrecision t_matPrec, AMGX_IndPrecision t_indPrec > 11 | void W_CycleDispatcher::dispatch( AMG_Class *amg, AMG_Level *level, Vector &b, Vector &x ) const 12 | { 13 | W_Cycle( amg, level, b, x ); 14 | W_Cycle( amg, level, b, x ); 15 | } 16 | 17 | template< AMGX_VecPrecision t_vecPrec, AMGX_MatPrecision t_matPrec, AMGX_IndPrecision t_indPrec > 18 | void W_CycleDispatcher::dispatch( AMG_Class *amg, AMG_Level *level, Vector &b, Vector &x ) const 19 | { 20 | AMGX_CPU_PROFILER( "W_Cycle::dispatch " ); 21 | W_Cycle( amg, level, b, x ); 22 | W_Cycle( amg, level, b, x ); 23 | } 24 | 25 | /**************************************** 26 | * Explict instantiations 27 | ***************************************/ 28 | template class W_CycleDispatcher; 29 | template class W_CycleDispatcher; 30 | template class W_CycleDispatcher; 31 | 32 | template class W_CycleDispatcher; 33 | template class W_CycleDispatcher; 34 | template class W_CycleDispatcher; 35 | 36 | } // namespace amgx 37 | -------------------------------------------------------------------------------- /src/cycles/f_cycle.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | namespace amgx 10 | { 11 | 12 | template< AMGX_VecPrecision t_vecPrec, AMGX_MatPrecision t_matPrec, AMGX_IndPrecision t_indPrec > 13 | void F_CycleDispatcher::dispatch( AMG_Class *amg, AMG_Level *level, Vector &b, Vector &x ) const 14 | { 15 | W_Cycle( amg, level, b, x ); 16 | V_Cycle( amg, level, b, x ); 17 | } 18 | 19 | template< AMGX_VecPrecision t_vecPrec, AMGX_MatPrecision t_matPrec, AMGX_IndPrecision t_indPrec > 20 | void F_CycleDispatcher::dispatch( AMG_Class *amg, AMG_Level *level, Vector &b, Vector &x ) const 21 | { 22 | W_Cycle( amg, level, b, x ); 23 | V_Cycle( amg, level, b, x ); 24 | } 25 | 26 | /**************************************** 27 | * Explict instantiations 28 | ***************************************/ 29 | template class F_CycleDispatcher; 30 | template class F_CycleDispatcher; 31 | template class F_CycleDispatcher; 32 | 33 | template class F_CycleDispatcher; 34 | template class F_CycleDispatcher; 35 | template class F_CycleDispatcher; 36 | 37 | } // namespace amgx 38 | -------------------------------------------------------------------------------- /include/cusp/detail/host/reference/ell.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #ifndef __ELL_H__ 6 | #define __ELL_H__ 7 | 8 | //////////////////////////////////////////////////////////////////////////////// 9 | //! Compute y += A*x for a sparse ELL matrix A and column vectors x and y 10 | //! @param num_rows number of rows in A 11 | //! @param num_cols number of columns in A 12 | //! @param num_entries_per_row number columns in each row (smaller rows are zero padded) 13 | //! @param stride seperation between row entries (stride >= num_rows, for alignment) 14 | //! @param Aj ELL column indices 15 | //! @param Ax ELL nonzero values 16 | //! @param x column vector 17 | //! @param y column vector 18 | //////////////////////////////////////////////////////////////////////////////// 19 | template 20 | void ell_matvec(const IndexType num_rows, 21 | const IndexType num_cols, 22 | const IndexType num_entries_per_row, 23 | const IndexType stride, 24 | const IndexType * Aj, 25 | const ValueType * Ax, 26 | const ValueType * x, 27 | ValueType * y) 28 | { 29 | for(IndexType n = 0; n < num_entries_per_row; n++){ 30 | const IndexType * Aj_n = Aj + n * stride; 31 | const ValueType * Ax_n = Ax + n * stride; 32 | for(IndexType i = 0; i < num_rows; i++){ 33 | y[i] += Ax_n[i] * x[Aj_n[i]]; 34 | } 35 | } 36 | } 37 | 38 | 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /examples/convert.c: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "amgx_c.h" 6 | #include 7 | 8 | // Tool for conversion MatrixMarket files to binary files (mainly for faster reading or storing on disk) 9 | // compilation: g++ convert.c -o convert -lamgxsh -L../lib -Wl,-rpath=../lib 10 | // run: convert 11 | 12 | int main(int argc, char *argv[]) 13 | { 14 | AMGX_config_handle cfg; 15 | AMGX_matrix_handle A; 16 | AMGX_vector_handle b, x; 17 | AMGX_resources_handle rsrc; 18 | AMGX_Mode mode = AMGX_mode_hDDI; 19 | 20 | if (argc < 1) 21 | { 22 | printf("Specify matrix file as first argument"); 23 | exit(2); 24 | } 25 | 26 | AMGX_SAFE_CALL(AMGX_initialize()); 27 | AMGX_SAFE_CALL(AMGX_config_create(&cfg, "config_version=2, matrix_writer=binary")); 28 | AMGX_resources_create_simple(&rsrc, cfg); 29 | AMGX_matrix_create(&A, rsrc, mode); 30 | AMGX_vector_create(&x, rsrc, mode); 31 | AMGX_vector_create(&b, rsrc, mode); 32 | std::string arg = argv[1]; 33 | int n, bsize_x, bsize_y, sol_size, sol_bsize; 34 | AMGX_read_system(A, b, x, arg.c_str()); 35 | AMGX_matrix_get_size(A, &n, &bsize_x, &bsize_y); 36 | AMGX_vector_get_size(x, &sol_size, &sol_bsize); 37 | 38 | if (sol_size == 0 || sol_bsize == 0) 39 | { 40 | printf("Initializing solution with 0\n"); 41 | AMGX_vector_set_zero(x, n, bsize_x); 42 | } 43 | 44 | arg = arg + ".bin"; 45 | AMGX_write_system(A, b, x, arg.c_str()); 46 | AMGX_resources_destroy(rsrc); 47 | AMGX_SAFE_CALL(AMGX_config_destroy(cfg)); 48 | AMGX_SAFE_CALL(AMGX_finalize()); 49 | } 50 | -------------------------------------------------------------------------------- /include/cusp/graph/maximal_independent_set.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | /*! \file maximal_independent_set.h 6 | * \brief Maximal independent set of a graph 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | namespace cusp 14 | { 15 | namespace graph 16 | { 17 | /*! \addtogroup algorithms Algorithms 18 | * \ingroup algorithms 19 | * \{ 20 | */ 21 | 22 | /*! \p maximal_independent_set : computes a maximal independent set (MIS) 23 | * a graph. The MIS is a set of vertices such that (1) no two vertices 24 | * are adjacent and (2) it is not possible to add another vertex to thes 25 | * set without violating the first property. The MIS(k) is a generalization 26 | * of the MIS with the property that no two vertices in the set are joined 27 | * by a path of \p k edges or less. The standard MIS is therefore a MIS(1). 28 | * 29 | * The MIS(k) is represented by an array of {0,1} values. Specifically, 30 | * stencil[i] is 1 if vertex \p i is a member of the MIS(k) and 31 | * 0 otherwise. 32 | * 33 | * \param A symmetric matrix that represents a graph 34 | * \param stencil array to hold the MIS(k) 35 | * \param k radius of independence 36 | * 37 | * \tparam Matrix matrix 38 | * \tparam Array array 39 | * 40 | * \see http://en.wikipedia.org/wiki/Maximal_independent_set 41 | */ 42 | 43 | template 44 | size_t maximal_independent_set(const Matrix& A, Array& stencil, size_t k = 1); 45 | 46 | /*! \} 47 | */ 48 | 49 | 50 | } // end namespace graph 51 | } // end namespace cusp 52 | 53 | #include 54 | 55 | -------------------------------------------------------------------------------- /src/classical/interpolators/common.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include 6 | #include 7 | 8 | namespace amgx 9 | { 10 | 11 | /* 12 | * Count the # of non-zeros per row 13 | */ 14 | __global__ 15 | void nonZerosPerRowKernel(const int num_rows, const int *cf_map, const int *C_hat_start, 16 | const int *C_hat_end, int *nonZerosPerRow) 17 | { 18 | for (int tIdx = threadIdx.x + blockIdx.x * blockDim.x; tIdx < num_rows; tIdx += gridDim.x * blockDim.x) 19 | { 20 | int nonZeros = 0; 21 | 22 | if (cf_map[tIdx] >= 0) 23 | { 24 | nonZeros = 1; 25 | } 26 | else if (cf_map[tIdx] == STRONG_FINE) 27 | { 28 | nonZeros = 0; 29 | } 30 | else 31 | { 32 | nonZeros = C_hat_end[tIdx] - C_hat_start[tIdx]; 33 | } 34 | 35 | nonZerosPerRow[tIdx] = nonZeros; 36 | } 37 | } 38 | 39 | __global__ 40 | void nonZerosPerRowSizeKernel(const int num_rows, const int *cf_map, 41 | const int *C_hat_size, int *nonZerosPerRow) 42 | { 43 | for (int tIdx = threadIdx.x + blockIdx.x * blockDim.x; tIdx < num_rows; tIdx += gridDim.x * blockDim.x) 44 | { 45 | int nonZeros = 0; 46 | 47 | if (cf_map[tIdx] >= 0) 48 | { 49 | nonZeros = 1; 50 | } 51 | else if (cf_map[tIdx] == STRONG_FINE) 52 | { 53 | nonZeros = 0; 54 | } 55 | else 56 | { 57 | nonZeros = C_hat_size[tIdx]; 58 | } 59 | 60 | nonZerosPerRow[tIdx] = nonZeros; 61 | } 62 | } 63 | 64 | 65 | 66 | } // namespace amgx 67 | -------------------------------------------------------------------------------- /include/convergence/relative_ini.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace amgx 10 | { 11 | 12 | template 13 | class RelativeIniConvergence : public Convergence 14 | { 15 | public: 16 | static const AMGX_VecPrecision vecPrec = TConfig::vecPrec; 17 | static const AMGX_MatPrecision matPrec = TConfig::matPrec; 18 | static const AMGX_IndPrecision indPrec = TConfig::indPrec; 19 | typedef Vector > Vector_h; 20 | typedef typename TConfig::VecPrec ValueTypeB; 21 | typedef typename types::PODTypes::type PODValueTypeB; 22 | typedef typename TConfig::template setMemSpace::Type TConfig_h; 23 | typedef typename TConfig::template setMemSpace::Type TConfig_d; 24 | typedef Vector::vec_prec>::Type> PODVec; 25 | typedef Vector::vec_prec>::Type> PODVec_h; 26 | RelativeIniConvergence(AMG_Config &amg, const std::string &cfg_scope); 27 | 28 | void convergence_init(); 29 | 30 | AMGX_STATUS convergence_update_and_check(const PODVec_h &nrm, const PODVec_h &nrm_ini); 31 | 32 | }; 33 | 34 | template 35 | class RelativeIniConvergenceFactory : public ConvergenceFactory 36 | { 37 | public: 38 | Convergence *create(AMG_Config &cfg, const std::string &cfg_scope) { return new RelativeIniConvergence(cfg, cfg_scope); } 39 | }; 40 | 41 | } // end namespace amgx 42 | -------------------------------------------------------------------------------- /include/cusp/detail/device/spmv/coo_serial.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace cusp 10 | { 11 | namespace detail 12 | { 13 | namespace device 14 | { 15 | 16 | // COO format SpMV kernel that uses only one thread 17 | // This is incredibly slow, so it is only useful for testing purposes, 18 | // *extremely* small matrices, or a few elements at the end of a 19 | // larger matrix 20 | 21 | template 22 | __global__ void 23 | spmv_coo_serial_kernel(const IndexType num_entries, 24 | const IndexType * I, 25 | const IndexType * J, 26 | const ValueType * V, 27 | const ValueType * x, 28 | ValueType * y) 29 | { 30 | for(IndexType n = 0; n < num_entries; n++) 31 | { 32 | y[I[n]] += V[n] * x[J[n]]; 33 | } 34 | } 35 | 36 | 37 | template 39 | void spmv_coo_serial_device(const Matrix& A, 40 | const ValueType* x, 41 | ValueType* y) 42 | { 43 | typedef typename Matrix::index_type IndexType; 44 | 45 | const IndexType * I = amgx::thrust::raw_pointer_cast(&A.row_indices[0]); 46 | const IndexType * J = amgx::thrust::raw_pointer_cast(&A.column_indices[0]); 47 | const ValueType * V = amgx::thrust::raw_pointer_cast(&A.values[0]); 48 | 49 | spmv_coo_serial_kernel <<<1,1>>> 50 | (A.num_entries, I, J, V, x, y); 51 | } 52 | 53 | } // end namespace device 54 | } // end namespace detail 55 | } // end namespace cusp 56 | 57 | -------------------------------------------------------------------------------- /include/cusp/precond/detail/diagonal.inl: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | /*! \file diagonal.inl 6 | * \brief Inline file for diagonal.h 7 | */ 8 | 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | 15 | namespace cusp 16 | { 17 | namespace precond 18 | { 19 | namespace detail 20 | { 21 | template 22 | struct reciprocal 23 | { 24 | __host__ __device__ 25 | T operator()(const T& v) 26 | { 27 | return T(1.0) / v; 28 | } 29 | }; 30 | 31 | } // end namespace detail 32 | 33 | 34 | // constructor 35 | template 36 | template 37 | diagonal 38 | ::diagonal(const MatrixType& A) 39 | : linear_operator(A.num_rows, A.num_cols, A.num_rows) 40 | { 41 | // extract the main diagonal 42 | cusp::detail::extract_diagonal(A, diagonal_reciprocals); 43 | 44 | // invert the entries 45 | thrust_wrapper::transform(diagonal_reciprocals.begin(), diagonal_reciprocals.end(), 46 | diagonal_reciprocals.begin(), detail::reciprocal()); 47 | } 48 | 49 | // linear operator 50 | template 51 | template 52 | void diagonal 53 | ::operator()(const VectorType1& x, VectorType2& y) const 54 | { 55 | cusp::blas::xmy(diagonal_reciprocals, x, y); 56 | } 57 | 58 | } // end namespace precond 59 | } // end namespace cusp 60 | 61 | -------------------------------------------------------------------------------- /include/cusp/detail/device/generalized_spmv/coo_serial.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace cusp 10 | { 11 | namespace detail 12 | { 13 | namespace device 14 | { 15 | 16 | // COO format SpMV kernel that uses only one thread 17 | // This is incredibly slow, so it is only useful for testing purposes, 18 | // *extremely* small matrices, or a few elements at the end of a 19 | // larger matrix 20 | 21 | template 22 | __global__ void 23 | spmv_coo_serial_kernel(const IndexType num_nonzeros, 24 | const IndexType * I, 25 | const IndexType * J, 26 | const ValueType * V, 27 | const ValueType * x, 28 | ValueType * y) 29 | { 30 | for(IndexType n = 0; n < num_nonzeros; n++) 31 | { 32 | y[I[n]] += V[n] * x[J[n]]; 33 | } 34 | } 35 | 36 | 37 | template 38 | void spmv_coo_serial_device(const coo_matrix& coo, 39 | const ValueType * d_x, 40 | ValueType * d_y) 41 | { 42 | const IndexType * I = amgx::thrust::raw_pointer_cast(&coo.row_indices[0]); 43 | const IndexType * J = amgx::thrust::raw_pointer_cast(&coo.column_indices[0]); 44 | const ValueType * V = amgx::thrust::raw_pointer_cast(&coo.values[0]); 45 | 46 | spmv_coo_serial_kernel <<<1,1>>> 47 | (coo.num_nonzeros, coo.I, coo.J, coo.V, d_x, d_y); 48 | } 49 | 50 | } // end namespace device 51 | } // end namespace detail 52 | } // end namespace cusp 53 | 54 | -------------------------------------------------------------------------------- /include/cusp/detail/device/generalized_spmv/hyb.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | namespace cusp 13 | { 14 | namespace detail 15 | { 16 | namespace device 17 | { 18 | 19 | // SpMV kernels for the hybrid ELL/COO matrix format. 20 | template 21 | void spmv_hyb(const cusp::hyb_matrix& hyb, 22 | const ValueType * x, 23 | ValueType * y) 24 | { 25 | cusp::detail::device::spmv(hyb.ell, x, y); 26 | cusp::detail::device::spmv(hyb.coo, x, y); 27 | } 28 | 29 | template 30 | void spmv_hyb_tex(const cusp::hyb_matrix& hyb, 31 | const ValueType * x, 32 | ValueType * y) 33 | { 34 | cusp::detail::device::spmv_tex(hyb.ell, x, y); 35 | cusp::detail::device::spmv_tex(hyb.coo, x, y); 36 | } 37 | 38 | 39 | template 40 | void spmv(const cusp::hyb_matrix& hyb, 41 | const ValueType * x, 42 | ValueType * y) 43 | { 44 | spmv_hyb(hyb, x, y); 45 | } 46 | 47 | template 48 | void spmv_tex(const cusp::hyb_matrix& hyb, 49 | const ValueType * x, 50 | ValueType * y) 51 | { 52 | spmv_hyb_tex(hyb, x, y); 53 | } 54 | 55 | } // end namespace device 56 | } // end namespace detail 57 | } // end namespace cusp 58 | 59 | -------------------------------------------------------------------------------- /include/convergence/absolute.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace amgx 10 | { 11 | 12 | template 13 | class AbsoluteConvergence : public Convergence 14 | { 15 | public: 16 | static const AMGX_VecPrecision vecPrec = TConfig::vecPrec; 17 | static const AMGX_MatPrecision matPrec = TConfig::matPrec; 18 | static const AMGX_IndPrecision indPrec = TConfig::indPrec; 19 | typedef Vector > Vector_h; 20 | typedef typename TConfig::VecPrec ValueTypeB; 21 | typedef typename TConfig::MatPrec ValueTypeA; 22 | typedef typename types::PODTypes::type PODValueTypeB; 23 | typedef typename TConfig::template setMemSpace::Type TConfig_h; 24 | typedef typename TConfig::template setMemSpace::Type TConfig_d; 25 | typedef Vector::vec_prec>::Type> PODVec; 26 | typedef Vector::vec_prec>::Type> PODVec_h; 27 | AbsoluteConvergence(AMG_Config &amg, const std::string &cfg_scope); 28 | 29 | void convergence_init(); 30 | 31 | AMGX_STATUS convergence_update_and_check(const PODVec_h &nrm, const PODVec_h &nrm_ini); 32 | }; 33 | 34 | template 35 | class AbsoluteConvergenceFactory : public ConvergenceFactory 36 | { 37 | public: 38 | Convergence *create(AMG_Config &cfg, const std::string &cfg_scope) { return new AbsoluteConvergence(cfg, cfg_scope); } 39 | }; 40 | 41 | } // end namespace amgx 42 | -------------------------------------------------------------------------------- /include/cusp/relaxation/polynomial.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | /*! \file polynomial.h 6 | * \brief polynomial relaxation. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | 15 | namespace cusp 16 | { 17 | namespace relaxation 18 | { 19 | 20 | template 21 | class polynomial 22 | { 23 | cusp::array1d default_coefficients; 24 | cusp::array1d residual; 25 | cusp::array1d h; 26 | cusp::array1d y; 27 | 28 | public: 29 | polynomial(); 30 | 31 | template 32 | polynomial(const MatrixType& A, const VectorType& coefficients); 33 | 34 | // ignores initial x 35 | template 36 | void presmooth(const MatrixType& A, const VectorType1& b, VectorType2& x); 37 | 38 | // smooths initial x 39 | template 40 | void postsmooth(const MatrixType& A, const VectorType1& b, VectorType2& x); 41 | 42 | template 43 | void operator()(const MatrixType& A, const VectorType1& b, VectorType2& x) const; 44 | 45 | template 46 | void operator()(const MatrixType& A, const VectorType1& b, VectorType2& x, VectorType3& coeffients); 47 | }; 48 | 49 | } // end namespace relaxation 50 | } // end namespace cusp 51 | 52 | #include 53 | 54 | -------------------------------------------------------------------------------- /include/convergence/relative_max.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace amgx 10 | { 11 | 12 | template 13 | class RelativeMaxConvergence : public Convergence 14 | { 15 | public: 16 | static const AMGX_VecPrecision vecPrec = TConfig::vecPrec; 17 | static const AMGX_MatPrecision matPrec = TConfig::matPrec; 18 | static const AMGX_IndPrecision indPrec = TConfig::indPrec; 19 | typedef Vector > Vector_h; 20 | typedef typename TConfig::VecPrec ValueTypeB; 21 | typedef typename TConfig::template setMemSpace::Type TConfig_h; 22 | typedef typename TConfig::template setMemSpace::Type TConfig_d; 23 | typedef typename types::PODTypes::type PODValueTypeB; 24 | typedef Vector::vec_prec>::Type> PODVec; 25 | typedef Vector::vec_prec>::Type> PODVec_h; 26 | 27 | RelativeMaxConvergence(AMG_Config &amg, const std::string &cfg_scope); 28 | 29 | void convergence_init(); 30 | 31 | AMGX_STATUS convergence_update_and_check(const PODVec_h &nrm, const PODVec_h &nrm_ini); 32 | 33 | private: 34 | PODVec_h _max_nrm; 35 | }; 36 | 37 | template 38 | class RelativeMaxConvergenceFactory : public ConvergenceFactory 39 | { 40 | public: 41 | Convergence *create(AMG_Config &cfg, const std::string &cfg_scope) { return new RelativeMaxConvergence(cfg, cfg_scope); } 42 | }; 43 | 44 | } // end namespace amgx 45 | 46 | -------------------------------------------------------------------------------- /include/cusp/linear_operator.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | /*! \file linear_operator.h 6 | * \brief Abstract interface for iterative solvers 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | namespace cusp 19 | { 20 | 21 | template 22 | class linear_operator : public cusp::detail::matrix_base 23 | { 24 | typedef cusp::detail::matrix_base Parent; 25 | public: 26 | linear_operator() 27 | : Parent() {} 28 | 29 | linear_operator(IndexType num_rows, IndexType num_cols) 30 | : Parent(num_rows, num_cols) {} 31 | 32 | linear_operator(IndexType num_rows, IndexType num_cols, IndexType num_entries) 33 | : Parent(num_rows, num_cols, num_entries) {} 34 | }; // linear_operator 35 | 36 | template 37 | class identity_operator : public linear_operator 38 | { 39 | typedef linear_operator Parent; 40 | public: 41 | 42 | identity_operator() 43 | : Parent() {} 44 | 45 | identity_operator(IndexType num_rows, IndexType num_cols) 46 | : Parent(num_rows, num_cols) {} 47 | 48 | template 50 | void operator()(const VectorType1& x, VectorType2& y) const 51 | { 52 | cusp::blas::copy(x, y); 53 | } 54 | }; // identity_operator 55 | 56 | } // end namespace cusp 57 | 58 | -------------------------------------------------------------------------------- /examples/install_makefiles_mpi/Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2011 - 2024 NVIDIA CORPORATION. All Rights Reserved. 2 | # 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | # CUDA Toolkit location 6 | INC_D = -I/usr/local/cuda/include 7 | LIB_D = -L/usr/local/cuda/lib64 8 | LIB_L = -lcusparse -lcublas -lcudart -ldl 9 | 10 | # Examples 11 | amgx_capi: 12 | gcc -O2 -std=c99 amgx_capi.c -c $(INC_D) 13 | g++ -O2 amgx_capi.o -o amgx_capi $(LIB_D) $(LIB_L) -L../lib -lamgxsh -Wl,-rpath=../lib 14 | 15 | amgx_mpi_capi: 16 | mpicc -O2 -std=c99 amgx_mpi_capi.c -c $(INC_D) 17 | mpic++ -O2 amgx_mpi_capi.o -o amgx_mpi_capi $(LIB_D) $(LIB_L) -L../lib -lamgxsh -lmpi -Wl,-rpath=../lib 18 | 19 | amgx_capi_dynamic: 20 | gcc -O2 -std=c99 amgx_capi.c -c -o amgx_capi_dynamic.o $(INC_D) -DAMGX_DYNAMIC_LOADING 21 | g++ -O2 amgx_capi_dynamic.o -o amgx_capi_dynamic $(LIB_D) $(LIB_L) -Wl,-rpath=../lib 22 | 23 | amgx_mpi_capi_dynamic: 24 | mpicc -O2 -std=c99 amgx_mpi_capi.c -c -o amgx_mpi_capi_dynamic.o $(INC_D) -DAMGX_DYNAMIC_LOADING 25 | mpic++ -O2 amgx_mpi_capi_dynamic.o -o amgx_mpi_capi_dynamic $(LIB_D) $(LIB_L) -lmpi -Wl,-rpath=../lib 26 | 27 | amgx_mpi_capi_agg: 28 | mpicc -O2 -std=c99 amgx_mpi_capi_agg.c -c $(INC_D) 29 | mpic++ -O2 amgx_mpi_capi_agg.o -o amgx_mpi_capi_agg $(LIB_D) $(LIB_L) -L../lib -lamgxsh -lmpi -Wl,-rpath=../lib 30 | 31 | amgx_mpi_capi_cla: 32 | mpicc -O2 -std=c99 amgx_mpi_capi_cla.c -c $(INC_D) 33 | mpic++ -O2 amgx_mpi_capi_cla.o -o amgx_mpi_capi_cla $(LIB_D) $(LIB_L) -L../lib -lamgxsh -lmpi -Wl,-rpath=../lib 34 | 35 | # All 36 | all: amgx_capi amgx_mpi_capi amgx_capi_dynamic amgx_mpi_capi_dynamic amgx_mpi_capi_agg amgx_mpi_capi_cla 37 | 38 | # Clean 39 | clean: 40 | rm -f amgx_capi 41 | rm -f amgx_mpi_capi 42 | rm -f amgx_capi_dynamic 43 | rm -f amgx_mpi_capi_dynamic 44 | rm -f amgx_mpi_capi_cla 45 | rm -f amgx_mpi_capi_agg 46 | rm -f *.o 47 | -------------------------------------------------------------------------------- /external/rapidjson/readme.txt: -------------------------------------------------------------------------------- 1 | rapidjson v0.11 2 | 3 | Copyright (c) 2011 Milo Yip (miloyip@gmail.com) 4 | 5 | http://code.google.com/p/rapidjson/ 6 | 7 | 16 Nov 2012 8 | 9 | 1. Introduction 10 | Rapidjson is a JSON parser and generator for C++. It was inspired by rapidxml http://rapidxml.sourceforge.net/ 11 | Rapidjson is small but complete. It supports both SAX and DOM style API. The SAX parser is only a half thousand lines of code. 12 | Rapidjson is fast. Its performance can be comparable to strlen(). It also optionally supports SSE2/SSE4.1 for acceleration. 13 | Rapidjson is self-contained. It does not depend on external libraries such as BOOST. It even does not depend on STL. 14 | Rapidjson is memory friendly. Each JSON value costs exactly 16/20 bytes for 32/64-bit machines (excluding text string). By default it uses a fast memory allocator, and the parser allocates memory compactly during parsing. 15 | 16 | For the full features please refer to the user guide. 17 | 18 | JSON(JavaScript Object Notation) is a light-weight data exchange format. 19 | More information about JSON can be obtained at 20 | http://json.org/ 21 | http://www.ietf.org/rfc/rfc4627.txt 22 | 23 | 2. Installation 24 | 25 | Rapidjson is a header-only C++ library. Just copy the rapidjson/include/rapidjson folder to system or project's include path. 26 | 27 | To build the tests and examples, 28 | 1. obtain premake4 http://industriousone.com/premake/download 29 | 2. Copy premake4 executable to rapidjson/build 30 | 3. Run rapidjson/build/premake.bat on Windows, rapidjson/build/premake on Linux or other platforms 31 | 4. On Windows, build the solution at rapidjson/build/vs2008/ or /vs2010/ 32 | 5. On other platforms, run GNU make at rapidjson/build/gmake/ (e.g., make -f test.make config=release32, make -f example.make config=debug32) 33 | 6. On success, the executable are generated at rapidjson/bin 34 | -------------------------------------------------------------------------------- /include/eigensolvers/subspace_iteration_eigensolver.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace amgx 10 | { 11 | 12 | template 13 | class SubspaceIteration_EigenSolver : public EigenSolver 14 | { 15 | public: 16 | typedef EigenSolver Base; 17 | 18 | typedef typename Base::TConfig_h TConfig_h; 19 | typedef typename Base::VVector VVector; 20 | typedef typename Base::MMatrix MMatrix; 21 | typedef typename Base::Vector_h Vector_h; 22 | typedef typename Base::Matrix_h Matrix_h; 23 | typedef typename Base::ValueTypeMat ValueTypeMat; 24 | typedef typename Base::ValueTypeVec ValueTypeVec; 25 | 26 | SubspaceIteration_EigenSolver(AMG_Config &cfg, const std::string &cfg_scope); 27 | ~SubspaceIteration_EigenSolver(); 28 | 29 | void solver_setup(); 30 | void solver_pagerank_setup(VVector &a); 31 | void solve_init(VVector &x); 32 | bool solve_iteration(VVector &x); 33 | void solve_finalize(); 34 | private: 35 | void orthonormalize(VVector &V); 36 | private: 37 | VVector m_X; 38 | VVector m_V; 39 | VVector m_H; 40 | VVector m_R; 41 | int m_subspace_size; 42 | int m_wanted_count; 43 | ValueTypeVec m_initial_residual; 44 | }; 45 | 46 | template 47 | class SubspaceIteration_EigenSolverFactory : public EigenSolverFactory 48 | { 49 | public: 50 | EigenSolver *create(AMG_Config &cfg, const std::string &cfg_scope, ThreadManager *tmng) 51 | { 52 | return new SubspaceIteration_EigenSolver(cfg, cfg_scope); 53 | } 54 | }; 55 | 56 | } 57 | -------------------------------------------------------------------------------- /include/eigensolvers/arnoldi_eigensolver.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | namespace amgx 12 | { 13 | 14 | template 15 | class Arnoldi_EigenSolver : public EigenSolver 16 | { 17 | public: 18 | typedef EigenSolver Base; 19 | 20 | typedef typename Base::TConfig_h TConfig_h; 21 | typedef typename Base::VVector VVector; 22 | typedef typename Base::MMatrix MMatrix; 23 | typedef typename Base::Vector_h Vector_h; 24 | typedef typename Base::Matrix_h Matrix_h; 25 | typedef typename Base::ValueTypeMat ValueTypeMat; 26 | typedef typename Base::ValueTypeVec ValueTypeVec; 27 | 28 | Arnoldi_EigenSolver(AMG_Config &cfg, const std::string &cfg_scope); 29 | ~Arnoldi_EigenSolver(); 30 | 31 | void solver_setup(); 32 | void solver_pagerank_setup(VVector &a); 33 | void solve_init(VVector &x); 34 | bool solve_iteration(VVector &x); 35 | void solve_finalize(); 36 | 37 | private: 38 | void free_allocated(); 39 | private: 40 | int m_krylov_size; 41 | std::vector m_V_vectors; 42 | Vector_h m_H; 43 | Vector_h m_H_tmp; 44 | Vector_h m_ritz_eigenvalues; 45 | Vector_h m_ritz_eigenvectors; 46 | ValueTypeVec m_beta; 47 | }; 48 | 49 | template 50 | class Arnoldi_EigenSolverFactory : public EigenSolverFactory 51 | { 52 | public: 53 | EigenSolver *create(AMG_Config &cfg, const std::string &cfg_scope, ThreadManager *tmng) 54 | { 55 | return new Arnoldi_EigenSolver(cfg, cfg_scope); 56 | } 57 | }; 58 | 59 | } 60 | -------------------------------------------------------------------------------- /include/cusp/detail/host/reference/dia.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved. 2 | // 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #ifndef __DIA_H__ 6 | #define __DIA_H__ 7 | 8 | #include 9 | 10 | 11 | /* 12 | * Compute Y += A*X for DIA matrix A and dense vectors X,Y 13 | * 14 | * 15 | * Input Arguments: 16 | * I n_row - number of rows in A 17 | * I n_col - number of columns in A 18 | * I n_diags - number of diagonals 19 | * I L - length of each diagonal 20 | * I offsets[n_diags] - diagonal offsets 21 | * T diags[n_diags,L] - nonzeros 22 | * T Xx[n_col] - input vector 23 | * 24 | * Output Arguments: 25 | * T Yx[n_row] - output vector 26 | * 27 | * Note: 28 | * Output array Yx must be preallocated 29 | * Negative offsets correspond to lower diagonals 30 | * Positive offsets correspond to upper diagonals 31 | * 32 | */ 33 | template 34 | void dia_matvec(const I n_row, 35 | const I n_col, 36 | const I n_diags, 37 | const I L, 38 | const I offsets[], 39 | const T diags[], 40 | const T Xx[], 41 | T Yx[]) 42 | { 43 | for(I i = 0; i < n_diags; i++){ 44 | const I k = offsets[i]; //diagonal offset 45 | 46 | const I i_start = std::max(0,-k); 47 | const I j_start = std::max(0, k); 48 | const I j_end = std::min(std::min(n_row + k, n_col),L); 49 | 50 | const I N = j_end - j_start; //number of elements to process 51 | 52 | const T * diag = diags + i*L + j_start; 53 | const T * x = Xx + j_start; 54 | T * y = Yx + i_start; 55 | 56 | for(I n = 0; n < N; n++){ 57 | y[n] += diag[n] * x[n]; 58 | } 59 | } 60 | } 61 | 62 | 63 | #endif 64 | --------------------------------------------------------------------------------