├── ReleaseVersion.txt
├── doc
    └── AMGX_Reference.pdf
├── src
    ├── configs
    │   ├── eigen_configs
    │   │   ├── ARNOLDI
    │   │   ├── POWER_ITERATION
    │   │   ├── PAGERANK
    │   │   ├── SUBSPACE_ITERATION
    │   │   ├── LANCZOS
    │   │   ├── JACOBI_DAVIDSON
    │   │   ├── LOBPCG
    │   │   └── INVERSE_FGMRES
    │   ├── CG_DILU.json
    │   ├── PCG_DILU.json
    │   ├── GMRES.json
    │   ├── PCG_NOPREC.json
    │   ├── JACOBI.json
    │   ├── FGMRES_NOPREC.json
    │   ├── PBICGSTAB_NOPREC.json
    │   ├── F.json
    │   ├── V.json
    │   ├── W.json
    │   ├── AMG_CLASSICAL_CG.json
    │   ├── AMG_CLASSICAL_CGF.json
    │   ├── CLASSICAL_CG_CYCLE.json
    │   ├── CLASSICAL_F_CYCLE.json
    │   ├── CLASSICAL_V_CYCLE.json
    │   ├── CLASSICAL_W_CYCLE.json
    │   ├── CLASSICAL_CGF_CYCLE.json
    │   ├── IDRMSYNC_DILU.json
    │   ├── AGGREGATION_THRUST_BJ.json
    │   ├── IDR_DILU.json
    │   ├── AMG_AGGRREGATION_CG.json
    │   ├── AGGREGATION_DILU.json
    │   ├── AGGREGATION_JACOBI.json
    │   ├── AGGREGATION_LOW_DEG_DILU.json
    │   ├── AGGREGATION_LOW_DEG_BJ.json
    │   ├── AGGREGATION_THRUST_DILU.json
    │   ├── AGGREGATION_GS.json
    │   ├── AGGREGATION_LOW_DEG_GS.json
    │   ├── AGGREGATION_THRUST_GS.json
    │   ├── FGMRES.json
    │   ├── PBICGSTAB_W.json
    │   ├── PBICGSTAB_AGGREGATION_W_JACOBI.json
    │   ├── AGGREGATION_MULTI_PAIRWISE.json
    │   ├── V-cheby_poly-smoother.json
    │   ├── FGMRES_AGGREGATION_JACOBI.json
    │   ├── PBICGSTAB.json
    │   ├── PBICGSTAB_CLASSICAL_JACOBI.json
    │   ├── V-cheby-aggres-L1-trunc.json
    │   ├── PCG_F.json
    │   ├── PCG_V.json
    │   ├── PCG_W.json
    │   ├── agg_cheb4.json
    │   ├── PCGF_CLASSICAL_F_JACOBI.json
    │   ├── PCGF_CLASSICAL_V_JACOBI.json
    │   ├── PCGF_CLASSICAL_W_JACOBI.json
    │   ├── PCG_CLASSICAL_F_JACOBI.json
    │   ├── PCG_CLASSICAL_W_JACOBI.json
    │   ├── V-cheby-smoother.json
    │   ├── GMRES_AMG_D2.json
    │   ├── AMG_CLASSICAL_L1_TRUNC.json
    │   ├── PCG_CLASSICAL_V_JACOBI.json
    │   ├── FGMRES_AGGREGATION.json
    │   ├── FGMRES_AGGREGATION_DILU.json
    │   ├── V-cheby-aggres-L1-trunc-userLambda.json
    │   ├── AMG_CLASSICAL_AGGRESSIVE_L1.json
    │   ├── AMG_CLASSICAL_L1_AGGRESSIVE_HMIS.json
    │   ├── PCG_AGGREGATION_JACOBI.json
    │   ├── AMG_CLASSICAL_PMIS.json
    │   ├── AMG_CLASSICAL_AGGRESSIVE_L1_TRUNC.json
    │   ├── FGMRES_CLASSICAL_AGGRESSIVE_HMIS.json
    │   ├── FGMRES_CLASSICAL_AGGRESSIVE_PMIS.json
    │   └── AMG_CLASSICAL_AGGRESSIVE_CHEB_L1_TRUNC.json
    ├── version.cu
    ├── memory_info.cu
    ├── api_version.cu
    ├── amgx_types
    │   └── io.cu
    ├── operators
    │   ├── solver_operator.cu
    │   ├── shifted_operator.cu
    │   ├── deflated_multiply_operator.cu
    │   └── solve_operator.cu
    ├── device_properties.cu
    ├── distributed
    │   └── distributed_comms.cu
    ├── solvers
    │   ├── user_solver.cu
    │   └── dummy_solver.cu
    ├── amgx_c_common.cu
    ├── tests
    │   ├── version_test.cu
    │   └── truncate_count_test.cu
    ├── misc.cu
    ├── convergence
    │   └── absolute.cu
    ├── cycles
    │   ├── v_cycle.cu
    │   ├── w_cycle.cu
    │   └── f_cycle.cu
    ├── thread_manager.cu
    └── classical
    │   └── interpolators
    │       └── common.cu
├── .gitignore
├── include
    ├── memory_space.h
    ├── device_properties.h
    ├── marker.h
    ├── cusp
    │   ├── detail
    │   │   ├── device
    │   │   │   ├── common.h
    │   │   │   ├── dereference.h
    │   │   │   ├── generalized_spmv
    │   │   │   │   ├── coo.h
    │   │   │   │   ├── csr.h
    │   │   │   │   ├── coo_serial.h
    │   │   │   │   └── hyb.h
    │   │   │   ├── spmv
    │   │   │   │   ├── hyb.h
    │   │   │   │   └── coo_serial.h
    │   │   │   ├── elementwise.h
    │   │   │   └── arch.h
    │   │   ├── utils.h
    │   │   ├── random.h
    │   │   ├── host
    │   │   │   ├── elementwise.h
    │   │   │   ├── update.sh
    │   │   │   └── reference
    │   │   │   │   ├── ell.h
    │   │   │   │   └── dia.h
    │   │   ├── forward_definitions.h
    │   │   ├── matrix_shape.h
    │   │   ├── format_utils.h
    │   │   ├── functional.h
    │   │   ├── csr_matrix.inl
    │   │   ├── hyb_matrix.inl
    │   │   ├── config.h
    │   │   ├── dispatch
    │   │   │   ├── transpose.h
    │   │   │   └── multiply.h
    │   │   └── convert.inl
    │   ├── verify.h
    │   ├── version.h
    │   ├── convert.h
    │   ├── copy.h
    │   ├── format.h
    │   ├── gallery
    │   │   └── stencil.h
    │   ├── precond
    │   │   ├── strength.h
    │   │   ├── aggregate.h
    │   │   ├── smooth.h
    │   │   └── detail
    │   │   │   └── diagonal.inl
    │   ├── memory.h
    │   ├── elementwise.h
    │   ├── transpose.h
    │   ├── relaxation
    │   │   ├── jacobi.h
    │   │   └── polynomial.h
    │   ├── graph
    │   │   └── maximal_independent_set.h
    │   └── linear_operator.h
    ├── sort.h
    ├── amgx_types
    │   ├── io.h
    │   ├── rand.h
    │   └── pod_types.h
    ├── eigensolvers.h
    ├── amg_signal.h
    ├── distributed
    │   └── amgx_mpi.h
    ├── transpose.h
    ├── version.h
    ├── texture.h
    ├── numerical_zero.h
    ├── core.h
    ├── profile.h
    ├── misc.h
    ├── stream.h
    ├── determinism_checker.h
    ├── eigensolvers
    │   ├── multivector_operations.h
    │   ├── qr.h
    │   ├── eigenvector_solver.h
    │   ├── subspace_iteration_eigensolver.h
    │   └── arnoldi_eigensolver.h
    ├── async_event.h
    ├── multiply.h
    ├── amgx_eig_c.h
    ├── classical
    │   ├── strength
    │   │   ├── all.h
    │   │   └── ahat.h
    │   └── interpolators
    │   │   └── common.h
    ├── miscmath.h
    ├── memory_info.h
    ├── norm.h
    ├── aggregation
    │   └── selectors
    │   │   ├── dummy.h
    │   │   ├── serial_greedy.h
    │   │   └── serial_bfs_selector.h
    ├── cycles
    │   └── fixed_cycle.h
    └── convergence
    │   ├── relative_ini.h
    │   ├── absolute.h
    │   └── relative_max.h
├── ci
    ├── test.sh
    ├── containers
    │   ├── x86_64-ubuntu18.04-gnu7-cuda10.2.py
    │   ├── x86_64-ubuntu18.04-gnu8-cuda11.0.py
    │   └── x86_64-ubuntu18.04-llvm9-cuda11.0.py
    ├── README.md
    └── run.sh
├── external
    └── rapidjson
    │   ├── include
    │       └── rapidjson
    │       │   ├── internal
    │       │       └── strfunc.h
    │       │   ├── filestream.h
    │       │   └── stringbuffer.h
    │   ├── license.txt
    │   └── readme.txt
├── examples
    ├── install_makefiles_nompi
    │   └── Makefile
    ├── matrix.mtx
    ├── Makefile.cray
    ├── generate_poisson.cu
    ├── convert.c
    └── install_makefiles_mpi
    │   └── Makefile
├── .github
    └── ISSUE_TEMPLATE
    │   ├── compilation-issue-report.md
    │   └── bug-report.md
└── LICENSES
    └── BSD-3-Clause.txt


/ReleaseVersion.txt:
--------------------------------------------------------------------------------
1 | 2.5.0
2 | 


--------------------------------------------------------------------------------
/doc/AMGX_Reference.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/AMGX/main/doc/AMGX_Reference.pdf


--------------------------------------------------------------------------------
/src/configs/eigen_configs/ARNOLDI:
--------------------------------------------------------------------------------
1 | config_version=2
2 | default:eig_solver=ARNOLDI
3 | default:eig_max_iters=128
4 | default:eig_tolerance=1e-4
5 | default:eig_which=largest


--------------------------------------------------------------------------------
/src/configs/eigen_configs/POWER_ITERATION:
--------------------------------------------------------------------------------
1 | config_version=2
2 | default:eig_solver=POWER_ITERATION
3 | default:eig_max_iters=40000
4 | default:eig_tolerance=1e-4
5 | default:eig_which=largest


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | /build*/
 3 | .logamgx
 4 | *.cmake
 5 | plugin_config.cu
 6 | *.sublime-project
 7 | *.sublime-workspace
 8 | core/src/version.cu
 9 | ci/docker/
10 | install
11 | Release
12 | 


--------------------------------------------------------------------------------
/src/configs/eigen_configs/PAGERANK:
--------------------------------------------------------------------------------
1 | config_version=2
2 | default:eig_solver=PAGERANK
3 | default:eig_damping_factor=0.85
4 | default:eig_max_iters=50
5 | default:eig_tolerance=1e-3
6 | default:eig_which=pagerank


--------------------------------------------------------------------------------
/src/version.cu:
--------------------------------------------------------------------------------
1 | #include <version.h>
2 | namespace amgx{
3 | const char __AMGX_BUILD_DATE__ [] = __DATE__;
4 | const char __AMGX_BUILD_TIME__ [] = __TIME__;
5 | const char __AMGX_BUILD_ID__ [] = "2.5.0";
6 | }
7 | 


--------------------------------------------------------------------------------
/src/configs/eigen_configs/SUBSPACE_ITERATION:
--------------------------------------------------------------------------------
1 | config_version=2
2 | default:eig_solver=SUBSPACE_ITERATION
3 | default:eig_max_iters=256
4 | default:eig_tolerance=1e-2
5 | default:eig_which=largest
6 | default:eig_wanted_count=4


--------------------------------------------------------------------------------
/src/configs/eigen_configs/LANCZOS:
--------------------------------------------------------------------------------
1 | config_version=2
2 | default:eig_solver=LANCZOS
3 | default:eig_max_iters=128
4 | default:eig_tolerance=1e-4
5 | default:eig_which=largest
6 | default:eig_eigenvector=0
7 | default:eig_eigenvector_solver=default


--------------------------------------------------------------------------------
/include/memory_space.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | #include <cusp/memory.h>
 7 | 
 8 | using cusp::host_memory;
 9 | using cusp::device_memory;
10 | 


--------------------------------------------------------------------------------
/src/memory_info.cu:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #include <memory_info.h>
 6 | 
 7 | namespace amgx
 8 | {
 9 | 
10 | size_t MemoryInfo::max_allocated = 0;
11 | 
12 | }
13 | 


--------------------------------------------------------------------------------
/include/device_properties.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | namespace amgx
 8 | {
 9 | 
10 | cudaDeviceProp getDeviceProperties();
11 | 
12 | int getSMCount();
13 | 
14 | }
15 | 


--------------------------------------------------------------------------------
/include/marker.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | template<int i> __global__ void marker_kernel() {}
 6 | 
 7 | template<int i> void marker()
 8 | {
 9 |     marker_kernel<i> <<< 1, 1>>>();
10 | }
11 | 
12 | 


--------------------------------------------------------------------------------
/src/api_version.cu:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #include <version.h>
 6 | namespace amgx
 7 | {
 8 | 
 9 | const int __AMGX_API_VERSION_MAJOR = 1;
10 | const int __AMGX_API_VERSION_MINOR = 0;
11 | 
12 | }
13 | 


--------------------------------------------------------------------------------
/include/cusp/detail/device/common.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | // maximum number of co-resident threads
 8 | const int MAX_THREADS = (30 * 1024);
 9 | const int WARP_SIZE = 32;
10 | 
11 | 


--------------------------------------------------------------------------------
/include/sort.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <basic_types.h>
 8 | 
 9 | namespace amgx
10 | {
11 | 
12 | // sort an array
13 | template <class Vector>
14 | void sort(Vector &v);
15 | 
16 | } // namespace amgx
17 | 


--------------------------------------------------------------------------------
/include/amgx_types/io.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <basic_types.h>
 8 | #include <ostream>
 9 | 
10 | std::ostream &operator<<(std::ostream &os, const cuComplex &x);
11 | std::ostream &operator<<(std::ostream &os, const cuDoubleComplex &x);


--------------------------------------------------------------------------------
/include/eigensolvers.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <error.h>
 8 | 
 9 | namespace amgx
10 | {
11 | namespace eigensolvers
12 | {
13 | AMGX_ERROR initialize();
14 | void finalize();
15 | } //namespace eigensolvers
16 | } // namespace amgx
17 | 


--------------------------------------------------------------------------------
/include/amg_signal.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | namespace amgx
 8 | {
 9 | 
10 | class SignalHandler
11 | {
12 |         static bool hooked;
13 |     public:
14 |         static void hook();
15 |         static void unhook();
16 | };
17 | 
18 | } // namespace amgx
19 | 


--------------------------------------------------------------------------------
/include/distributed/amgx_mpi.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | 
 8 | #ifdef AMGX_WITH_MPI
 9 | #include <mpi.h>
10 | 
11 | namespace amgx
12 | {
13 | void installMPIErrorHandler(MPI_Comm comm);
14 | void uninstallMPIErrorHandler(MPI_Comm comm);
15 | }
16 | #else
17 | 
18 | #endif
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/include/transpose.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | namespace amgx
 8 | {
 9 | 
10 | //computes B=A^T
11 | template <class Matrix>
12 | void transpose(const Matrix &A, Matrix &B);
13 | 
14 | template <class Matrix>
15 | void transpose(const Matrix &A, Matrix &B, int num_rows);
16 | 
17 | } // namespace amgx
18 | 


--------------------------------------------------------------------------------
/include/version.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | namespace amgx
 7 | {
 8 | extern const char __AMGX_BUILD_ID__ [];
 9 | extern const char __AMGX_BUILD_TIME__ [];
10 | extern const char __AMGX_BUILD_DATE__ [];
11 | 
12 | extern const int __AMGX_API_VERSION_MAJOR;
13 | extern const int __AMGX_API_VERSION_MINOR;
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/include/texture.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <error.h>
 8 | #include <cutil.h>
 9 | 
10 | #include "cuda_runtime.h"
11 | 
12 | namespace amgx
13 | {
14 | 
15 | 
16 | template <typename T_ELEM> __inline__ __device__ T_ELEM __cachingLoad(const T_ELEM *addr)
17 | {
18 |     return __ldg(addr);
19 | }
20 | 
21 | }
22 | 


--------------------------------------------------------------------------------
/include/cusp/detail/utils.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | namespace cusp
 8 | {
 9 | namespace detail
10 | {
11 | 
12 | template <typename IntegralType>
13 | IntegralType round_up(IntegralType n, IntegralType k)
14 | {
15 |     return k * ((n + k - 1) / k);
16 | }
17 | 
18 | } // end namespace detail
19 | } // end namespace cusp
20 | 
21 | 


--------------------------------------------------------------------------------
/include/numerical_zero.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #if !defined(AMGX_NUMERICAL_ZERO_H_)
 6 | #define AMGX_NUMERICAL_ZERO_H_
 7 | 
 8 | #define AMGX_NUMERICAL_AZERO 0.0   //"absolute" zero
 9 | #define AMGX_NUMERICAL_SZERO 1e-10 //single precision zero
10 | #define AMGX_NUMERICAL_DZERO 1e-20 //double precision zero
11 | 
12 | #endif /* AMGX_NUMERICAL_ZERO_H_ */
13 | 


--------------------------------------------------------------------------------
/src/configs/CG_DILU.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "preconditioner": {
 5 |             "scope": "precond", 
 6 |             "solver": "MULTICOLOR_DILU"
 7 |         }, 
 8 |         "solver": "CG", 
 9 |         "print_solve_stats": 1, 
10 |         "obtain_timings": 1, 
11 |         "max_iters": 100, 
12 |         "monitor_residual": 1, 
13 |         "scope": "main", 
14 |         "tolerance" : 1e-06, 
15 |         "convergence": "RELATIVE_INI",
16 |         "norm": "L2"
17 |     }
18 | }
19 | 


--------------------------------------------------------------------------------
/src/configs/PCG_DILU.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "preconditioner": {
 5 |             "scope": "precond", 
 6 |             "solver": "MULTICOLOR_DILU"
 7 |         }, 
 8 |         "solver": "PCG", 
 9 |         "print_solve_stats": 1, 
10 |         "obtain_timings": 1, 
11 |         "max_iters": 100, 
12 |         "monitor_residual": 1, 
13 |         "scope": "main", 
14 |         "tolerance" : 1e-06, 
15 |         "convergence": "RELATIVE_INI",
16 |         "norm": "L2"
17 |     }
18 | }
19 | 


--------------------------------------------------------------------------------
/src/amgx_types/io.cu:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #include <amgx_types/util.h>
 6 | 
 7 | std::ostream &operator<<(std::ostream &os, const cuComplex &x)
 8 | {
 9 |     os << amgx::types::get_re(x) << " " << amgx::types::get_im(x);
10 |     return os;
11 | }
12 | 
13 | std::ostream &operator<<(std::ostream &os, const cuDoubleComplex &x)
14 | {
15 |     os << amgx::types::get_re(x) << " " << amgx::types::get_im(x);
16 |     return os;
17 | }


--------------------------------------------------------------------------------
/src/configs/GMRES.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "preconditioner": {
 5 |             "scope": "amg", 
 6 |             "solver": "NOSOLVER"
 7 |         }, 
 8 |         "use_scalar_norm": 1, 
 9 |         "solver": "GMRES", 
10 |         "print_solve_stats": 1, 
11 |         "obtain_timings": 1, 
12 |         "monitor_residual": 1, 
13 |         "convergence": "RELATIVE_INI", 
14 |         "scope": "main", 
15 |         "max_iters": 100, 
16 |         "tolerance" : 1e-06, 
17 |         "norm": "L2"
18 |     }
19 | }
20 | 


--------------------------------------------------------------------------------
/src/configs/PCG_NOPREC.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "preconditioner": {
 5 |             "scope": "amg", 
 6 |             "solver": "NOSOLVER"
 7 |         }, 
 8 |         "use_scalar_norm": 1, 
 9 |         "solver": "PCG", 
10 |         "print_solve_stats": 1, 
11 |         "obtain_timings": 1, 
12 |         "monitor_residual": 1, 
13 |         "convergence": "RELATIVE_INI", 
14 |         "scope": "main", 
15 |         "max_iters": 100, 
16 |         "tolerance" : 1e-06, 
17 |         "norm": "L2"
18 |     }
19 | }
20 | 


--------------------------------------------------------------------------------
/include/cusp/detail/device/dereference.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <cusp/detail/config.h>
 8 | 
 9 | #if THRUST_VERSION >= 100600
10 | #include <thrust/detail/raw_reference_cast.h>
11 | #define CUSP_DEREFERENCE(x)  amgx::thrust::raw_reference_cast(*x)
12 | #else
13 | #include <thrust/detail/backend/dereference.h>
14 | #define CUSP_DEREFERENCE(x)  amgx::thrust::detail::backend::dereference(x)
15 | #endif
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/src/configs/JACOBI.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "preconditioner": {
 5 |             "scope": "amg", 
 6 |             "solver": "NOSOLVER"
 7 |         }, 
 8 |         "use_scalar_norm": 1, 
 9 |         "solver": "BLOCK_JACOBI", 
10 |         "print_solve_stats": 1, 
11 |         "obtain_timings": 1, 
12 |         "monitor_residual": 1, 
13 |         "convergence": "RELATIVE_INI", 
14 |         "scope": "main", 
15 |         "max_iters": 100, 
16 |         "tolerance" : 1e-06, 
17 |         "norm": "L2"
18 |     }
19 | }
20 | 


--------------------------------------------------------------------------------
/src/configs/FGMRES_NOPREC.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "preconditioner": {
 5 |             "scope": "amg", 
 6 |             "solver": "NOSOLVER"
 7 |         }, 
 8 |         "use_scalar_norm": 1, 
 9 |         "solver": "FGMRES", 
10 |         "print_solve_stats": 1, 
11 |         "obtain_timings": 1, 
12 |         "monitor_residual": 1, 
13 |         "convergence": "RELATIVE_INI", 
14 |         "scope": "main", 
15 |         "max_iters": 100, 
16 |         "tolerance" : 1e-06, 
17 |         "norm": "L2"
18 |     }
19 | }
20 | 


--------------------------------------------------------------------------------
/src/configs/PBICGSTAB_NOPREC.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "preconditioner": {
 5 |             "scope": "amg_solver", 
 6 |             "solver": "NOSOLVER"
 7 |         }, 
 8 |         "use_scalar_norm": 1, 
 9 |         "solver": "PBICGSTAB", 
10 |         "print_solve_stats": 1, 
11 |         "obtain_timings": 1, 
12 |         "monitor_residual": 1, 
13 |         "convergence": "RELATIVE_INI", 
14 |         "scope": "main", 
15 |         "max_iters": 100, 
16 |         "tolerance" : 1e-06, 
17 |         "norm": "L2"
18 |     }
19 | }
20 | 


--------------------------------------------------------------------------------
/include/cusp/detail/random.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | namespace cusp
 8 | {
 9 | namespace detail
10 | {
11 | 
12 | // array view containing random integers
13 | template <typename T>
14 | class random_integers;
15 | 
16 | // array view containing random real numbers in [0,1)
17 | template <typename T>
18 | class random_reals;
19 | 
20 | } // end namespace detail
21 | } // end namespace cusp
22 | 
23 | #include <cusp/detail/random.inl>
24 | 
25 | 


--------------------------------------------------------------------------------
/src/operators/solver_operator.cu:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | namespace amgx
 6 | {
 7 | 
 8 | template <class T_Config> class Operator;
 9 | 
10 | }
11 | 
12 | #include <operators/solver_operator.h>
13 | #include <blas.h>
14 | 
15 | namespace amgx
16 | {
17 | 
18 | #define AMGX_CASE_LINE(CASE) template class SolverOperator<TemplateMode<CASE>::Type>;
19 | AMGX_FORALL_BUILDS(AMGX_CASE_LINE)
20 | AMGX_FORCOMPLEX_BUILDS(AMGX_CASE_LINE)
21 | #undef AMGX_CASE_LINE
22 | 
23 | }
24 | 


--------------------------------------------------------------------------------
/src/configs/F.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "print_grid_stats": 1, 
 5 |         "solver": "AMG", 
 6 |         "print_solve_stats": 1, 
 7 |         "presweeps": 1, 
 8 |         "interpolator": "D2",
 9 |         "obtain_timings": 1, 
10 |         "max_iters": 100, 
11 |         "monitor_residual": 1, 
12 |         "convergence": "RELATIVE_INI", 
13 |         "scope": "main", 
14 |         "max_levels": 50, 
15 |         "cycle": "F", 
16 |         "tolerance" : 1e-06, 
17 |         "norm": "L2", 
18 |         "postsweeps": 1
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/src/configs/V.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "print_grid_stats": 1, 
 5 |         "solver": "AMG", 
 6 |         "print_solve_stats": 1, 
 7 |         "presweeps": 1, 
 8 |         "obtain_timings": 1, 
 9 |         "max_iters": 100, 
10 |         "interpolator": "D2",
11 |         "monitor_residual": 1, 
12 |         "convergence": "RELATIVE_INI", 
13 |         "scope": "main", 
14 |         "max_levels": 50, 
15 |         "cycle": "V", 
16 |         "tolerance" : 1e-06, 
17 |         "norm": "L2", 
18 |         "postsweeps": 1
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/src/configs/W.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "print_grid_stats": 1, 
 5 |         "solver": "AMG", 
 6 |         "print_solve_stats": 1, 
 7 |         "presweeps": 1, 
 8 |         "obtain_timings": 1, 
 9 |         "max_iters": 100, 
10 |         "monitor_residual": 1, 
11 |         "interpolator": "D2",
12 |         "convergence": "RELATIVE_INI", 
13 |         "scope": "main", 
14 |         "max_levels": 50, 
15 |         "cycle": "W", 
16 |         "tolerance" : 1e-06, 
17 |         "norm": "L2", 
18 |         "postsweeps": 1
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/ci/test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env sh
 2 | 
 3 | # SPDX-FileCopyrightText: 2024 NVIDIA CORPORATION. All Rights Reserved.
 4 | #
 5 | # SPDX-License-Identifier: BSD-3-Clause
 6 | 
 7 | set -ex
 8 | 
 9 | BUILD_DIR=$1
10 | if [ -z "${1}" ]; then
11 |     BUILD_DIR=build
12 | fi
13 | 
14 | if [ "${AMGX_CI_KEEP_BUILD}" = "0" ]; then
15 |     rm -rf "${BUILD_DIR}" || true
16 | fi
17 | mkdir -p "${BUILD_DIR}"
18 | 
19 | (
20 |     cd "${BUILD_DIR}"
21 |     cmake ..
22 |     make -j 8 all
23 |     # WIP: test_launcher is allowed to fail; not all tests pass
24 |     set +e
25 |     ./tests/amgx_tests_launcher
26 | )
27 | 


--------------------------------------------------------------------------------
/include/core.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <error.h>
 8 | 
 9 | namespace amgx
10 | {
11 | 
12 | void allocate_resources(size_t pool_size,
13 |                         size_t max_alloc_size,
14 |                         size_t scaling_factor,
15 |                         size_t scaling_threshold,
16 |                         size_t max_size);
17 | void free_resources();
18 | 
19 | AMGX_ERROR initialize();
20 | void finalize();
21 | 
22 | } // namespace amgx
23 | 


--------------------------------------------------------------------------------
/src/configs/AMG_CLASSICAL_CG.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "print_grid_stats": 1, 
 5 |         "solver": "AMG", 
 6 |         "print_solve_stats": 1, 
 7 |         "interpolator": "D2",
 8 |         "presweeps": 1, 
 9 |         "obtain_timings": 1, 
10 |         "max_iters": 100, 
11 |         "monitor_residual": 1, 
12 |         "convergence": "RELATIVE_INI", 
13 |         "scope": "main", 
14 |         "max_levels": 50, 
15 |         "cycle": "CG", 
16 |         "tolerance" : 1e-06, 
17 |         "norm": "L2", 
18 |         "postsweeps": 1
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/src/configs/AMG_CLASSICAL_CGF.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "print_grid_stats": 1, 
 5 |         "solver": "AMG", 
 6 |         "print_solve_stats": 1, 
 7 |         "interpolator": "D2",
 8 |         "presweeps": 1, 
 9 |         "obtain_timings": 1, 
10 |         "max_iters": 100, 
11 |         "monitor_residual": 1, 
12 |         "convergence": "RELATIVE_INI", 
13 |         "scope": "main", 
14 |         "max_levels": 50, 
15 |         "cycle": "CGF", 
16 |         "tolerance" : 1e-06, 
17 |         "norm": "L2", 
18 |         "postsweeps": 1
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/src/configs/CLASSICAL_CG_CYCLE.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "print_grid_stats": 1, 
 5 |         "solver": "AMG", 
 6 |         "print_solve_stats": 1, 
 7 |         "presweeps": 1, 
 8 |         "interpolator": "D2",
 9 |         "obtain_timings": 1, 
10 |         "max_iters": 100, 
11 |         "monitor_residual": 1, 
12 |         "convergence": "RELATIVE_INI", 
13 |         "scope": "main", 
14 |         "max_levels": 50, 
15 |         "cycle": "CG", 
16 |         "tolerance" : 1e-06, 
17 |         "norm": "L2", 
18 |         "postsweeps": 1
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/src/configs/CLASSICAL_F_CYCLE.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "print_grid_stats": 1, 
 5 |         "solver": "AMG", 
 6 |         "print_solve_stats": 1, 
 7 |         "presweeps": 1, 
 8 |         "obtain_timings": 1, 
 9 |         "interpolator": "D2",
10 |         "max_iters": 100, 
11 |         "monitor_residual": 1, 
12 |         "convergence": "RELATIVE_INI", 
13 |         "scope": "main", 
14 |         "max_levels": 50, 
15 |         "cycle": "F", 
16 |         "tolerance" : 1e-06, 
17 |         "norm": "L2", 
18 |         "postsweeps": 1
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/src/configs/CLASSICAL_V_CYCLE.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "print_grid_stats": 1, 
 5 |         "solver": "AMG", 
 6 |         "print_solve_stats": 1, 
 7 |         "presweeps": 1, 
 8 |         "interpolator": "D2",
 9 |         "obtain_timings": 1, 
10 |         "max_iters": 100, 
11 |         "monitor_residual": 1, 
12 |         "convergence": "RELATIVE_INI", 
13 |         "scope": "main", 
14 |         "max_levels": 50, 
15 |         "cycle": "V", 
16 |         "tolerance" : 1e-06, 
17 |         "norm": "L2", 
18 |         "postsweeps": 1
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/src/configs/CLASSICAL_W_CYCLE.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "print_grid_stats": 1, 
 5 |         "solver": "AMG", 
 6 |         "print_solve_stats": 1, 
 7 |         "presweeps": 1, 
 8 |         "interpolator": "D2",
 9 |         "obtain_timings": 1, 
10 |         "max_iters": 100, 
11 |         "monitor_residual": 1, 
12 |         "convergence": "RELATIVE_INI", 
13 |         "scope": "main", 
14 |         "max_levels": 50, 
15 |         "cycle": "W", 
16 |         "tolerance" : 1e-06, 
17 |         "norm": "L2", 
18 |         "postsweeps": 1
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/src/configs/CLASSICAL_CGF_CYCLE.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "print_grid_stats": 1, 
 5 |         "solver": "AMG", 
 6 |         "print_solve_stats": 1, 
 7 |         "interpolator": "D2",
 8 |         "presweeps": 1, 
 9 |         "obtain_timings": 1, 
10 |         "max_iters": 100, 
11 |         "monitor_residual": 1, 
12 |         "convergence": "RELATIVE_INI", 
13 |         "scope": "main", 
14 |         "max_levels": 50, 
15 |         "cycle": "CGF", 
16 |         "tolerance" : 1e-06, 
17 |         "norm": "L2", 
18 |         "postsweeps": 1
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/src/configs/eigen_configs/JACOBI_DAVIDSON:
--------------------------------------------------------------------------------
 1 | default:config_version=2
 2 | default:eig_solver=JACOBI_DAVIDSON
 3 | default:eig_max_iters=128
 4 | default:eig_tolerance=1e-4
 5 | default:eig_which=largest
 6 | 
 7 | #gmres accelerator config
 8 | default:solver(main)=FGMRES
 9 | main:gmres_n_restart=10
10 | main:preconditioner(amg)=NOSOLVER
11 | 
12 | #outer solver setup
13 | main:convergence=RELATIVE_INI
14 | main:norm=L2
15 | main:use_scalar_norm=1
16 | main:max_iters=20
17 | main:tolerance=1e-2
18 | 
19 | #printing obtions
20 | #main:print_solve_stats=1
21 | #main:monitor_residual=1
22 | #main:obtain_timings=1
23 | 


--------------------------------------------------------------------------------
/include/profile.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | void profileLevelUp();
 8 | void profileLevelDown();
 9 | void profileLevelZero();
10 | void profilePhaseSetup();
11 | void profilePhaseSolve();
12 | void profilePhaseNone();
13 | void profileSubphaseMatrixColoring();
14 | void profileSubphaseSmootherSetup();
15 | void profileSubphaseFindAggregates();
16 | void profileSubphaseComputeRestriction();
17 | void profileSubphaseComputeCoarseA();
18 | void profileSubphaseNone();
19 | void profileSubphaseTruncateP();
20 | 


--------------------------------------------------------------------------------
/src/configs/IDRMSYNC_DILU.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "max_iters": 100, 
 5 |         "preconditioner": {
 6 |             "scope": "precond", 
 7 |             "max_iters": 1, 
 8 |             "solver": "MULTICOLOR_DILU"
 9 |         }, 
10 |         "use_scalar_norm": 1, 
11 |         "solver": "IDRMSYNC", 
12 |         "print_solve_stats": 1, 
13 |         "obtain_timings": 1, 
14 |         "subspace_dim_s": 1, 
15 |         "monitor_residual": 1, 
16 |         "convergence": "RELATIVE_INI", 
17 |         "scope": "main", 
18 |         "tolerance" : 1e-06, 
19 |         "norm": "L2"
20 |     }
21 | }
22 | 


--------------------------------------------------------------------------------
/src/configs/AGGREGATION_THRUST_BJ.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "print_grid_stats": 1, 
 5 |         "algorithm": "AGGREGATION", 
 6 |         "coarseAgenerator": "THRUST", 
 7 |         "solver": "AMG", 
 8 |         "smoother": "BLOCK_JACOBI", 
 9 |         "print_solve_stats": 1, 
10 |         "presweeps": 1, 
11 |         "selector": "SIZE_2", 
12 |         "obtain_timings": 1, 
13 |         "max_iters": 100, 
14 |         "monitor_residual": 1, 
15 |         "scope": "main", 
16 |         "postsweeps": 1, 
17 |         "tolerance" : 1e-06, 
18 |         "convergence": "RELATIVE_INI",
19 |         "cycle": "V"
20 |     }
21 | }
22 | 


--------------------------------------------------------------------------------
/src/configs/IDR_DILU.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "max_iters": 100, 
 5 |         "print_grid_stats": 1, 
 6 |         "preconditioner": {
 7 |             "scope": "precond", 
 8 |             "max_iters": 1, 
 9 |             "solver": "MULTICOLOR_DILU"
10 |         }, 
11 |         "use_scalar_norm": 1, 
12 |         "solver": "IDR", 
13 |         "print_solve_stats": 1, 
14 |         "obtain_timings": 1, 
15 |         "subspace_dim_s": 1, 
16 |         "monitor_residual": 1, 
17 |         "convergence": "RELATIVE_INI", 
18 |         "scope": "main", 
19 |         "tolerance" : 1e-06, 
20 |         "norm": "L2"
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/src/configs/AMG_AGGRREGATION_CG.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "print_grid_stats": 1, 
 5 |         "solver": "AMG", 
 6 |         "algorithm":"AGGREGATION",
 7 |         "selector": "SIZE_4",
 8 |         "print_solve_stats": 1, 
 9 |         "smoother": "JACOBI_L1",
10 |         "presweeps": 0, 
11 |         "postsweeps": 3,
12 |         "obtain_timings": 1, 
13 |         "max_iters": 100, 
14 |         "monitor_residual": 1, 
15 |         "convergence": "RELATIVE_INI", 
16 |         "scope": "main", 
17 |         "max_levels": 50, 
18 |         "cycle": "CG", 
19 |         "tolerance" : 1e-06, 
20 |         "norm": "L2" 
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/include/cusp/verify.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | /*! \file verify.h
 6 |  *  \brief Validate matrix format
 7 |  */
 8 | 
 9 | #pragma once
10 | 
11 | #include <cusp/detail/config.h>
12 | 
13 | namespace cusp
14 | {
15 | 
16 | template <typename MatrixType>
17 | bool is_valid_matrix(const MatrixType& A);
18 | 
19 | template <typename MatrixType, typename OutputStream>
20 | bool is_valid_matrix(const MatrixType& A, OutputStream& ostream);
21 | 
22 | template <typename MatrixType>
23 | void assert_is_valid_matrix(const MatrixType& A);
24 | 
25 | } // end namespace cusp
26 | 
27 | #include <cusp/detail/verify.inl>
28 | 
29 | 


--------------------------------------------------------------------------------
/include/misc.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | namespace amgx
 8 | {
 9 | 
10 | typedef void (*AMGX_output_callback)(const char *msg, int length);
11 | extern AMGX_output_callback amgx_output;
12 | extern AMGX_output_callback error_output;
13 | extern AMGX_output_callback amgx_distributed_output;
14 | int amgx_printf(const char *fmt, ...);
15 | 
16 | #ifdef NDEBUG
17 | #define amgx_printf_debug(fmt,...)
18 | #define device_printf(fmt,...)
19 | #else
20 | #define amgx_printf_debug(fmt,...) amgx_printf(fmt,##__VA_ARGS__)
21 | #define device_printf(fmt,...) printf(fmt,##__VA_ARGS__)
22 | #endif
23 | 
24 | } // namespace amgx
25 | 
26 | 


--------------------------------------------------------------------------------
/include/stream.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <global_thread_handle.h>
 8 | 
 9 | namespace amgx
10 | {
11 | 
12 | class Stream
13 | {
14 |         cudaStream_t s;
15 | 
16 |     public:
17 | 
18 |         inline
19 |         Stream(unsigned flags = cudaStreamNonBlocking) 
20 |         { 
21 |             cudaStreamCreateWithFlags(&s, flags);
22 |             cudaCheckError();
23 |         }
24 | 
25 |         inline
26 |         ~Stream() 
27 |         { 
28 |             cudaStreamDestroy(s); 
29 |         }
30 | 
31 |         inline
32 |         cudaStream_t get() { return s; }
33 | };
34 | 
35 | } // namespace amgx
36 | 


--------------------------------------------------------------------------------
/include/cusp/version.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | /*! \file version.h
 6 |  *  \brief Cusp version
 7 |  */
 8 | 
 9 | #pragma once
10 | 
11 | #include <cusp/detail/config.h>
12 | 
13 | //  This is the only cusp header that is guaranteed to 
14 | //  change with every cusp release.
15 | //
16 | //  CUSP_VERSION % 100 is the sub-minor version
17 | //  CUSP_VERSION / 100 % 1000 is the minor version
18 | //  CUSP_VERSION / 100000 is the major version
19 | 
20 | #define CUSP_VERSION 300
21 | #define CUSP_MAJOR_VERSION     (CUSP_VERSION / 100000)
22 | #define CUSP_MINOR_VERSION     (CUSP_VERSION / 100 % 1000)
23 | #define CUSP_SUBMINOR_VERSION  (CUSP_VERSION % 100)
24 | 
25 | 


--------------------------------------------------------------------------------
/include/cusp/detail/host/elementwise.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | namespace cusp
 8 | {
 9 | namespace detail
10 | {
11 | namespace host
12 | {
13 | 
14 | template <typename Matrix1,
15 |           typename Matrix2,
16 |           typename Matrix3,
17 |           typename BinaryFunction>
18 | void transform_elementwise(const Matrix1& A,
19 |                            const Matrix2& B,
20 |                                  Matrix3& C,
21 |                                  BinaryFunction op);
22 | 
23 | } // end namespace host
24 | } // end namespace detail
25 | } // end namespace cusp
26 | 
27 | #include <cusp/detail/host/elementwise.inl>
28 | 
29 | 


--------------------------------------------------------------------------------
/include/cusp/detail/host/update.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | svn --force export http://svn.scipy.org/svn/scipy/trunk/scipy/sparse/sparsetools/bsr.h
 4 | svn --force export http://svn.scipy.org/svn/scipy/trunk/scipy/sparse/sparsetools/coo.h
 5 | svn --force export http://svn.scipy.org/svn/scipy/trunk/scipy/sparse/sparsetools/csc.h
 6 | svn --force export http://svn.scipy.org/svn/scipy/trunk/scipy/sparse/sparsetools/csr.h
 7 | svn --force export http://svn.scipy.org/svn/scipy/trunk/scipy/sparse/sparsetools/dense.h
 8 | svn --force export http://svn.scipy.org/svn/scipy/trunk/scipy/sparse/sparsetools/dia.h
 9 | svn --force export http://svn.scipy.org/svn/scipy/trunk/scipy/sparse/sparsetools/fixed_size.h
10 | svn --force export http://svn.scipy.org/svn/scipy/trunk/scipy/sparse/sparsetools/scratch.h
11 | 
12 | 


--------------------------------------------------------------------------------
/external/rapidjson/include/rapidjson/internal/strfunc.h:
--------------------------------------------------------------------------------
 1 | #ifndef RAPIDJSON_INTERNAL_STRFUNC_H_
 2 | #define RAPIDJSON_INTERNAL_STRFUNC_H_
 3 | 
 4 | namespace rapidjson {
 5 | namespace internal {
 6 | 
 7 | //! Custom strlen() which works on different character types.
 8 | /*!	\tparam Ch Character type (e.g. char, wchar_t, short)
 9 | 	\param s Null-terminated input string.
10 | 	\return Number of characters in the string. 
11 | 	\note This has the same semantics as strlen(), the return value is not number of Unicode codepoints.
12 | */
13 | template <typename Ch>
14 | inline SizeType StrLen(const Ch* s) {
15 | 	const Ch* p = s;
16 | 	while (*p != '\0')
17 | 		++p;
18 | 	return SizeType(p - s);
19 | }
20 | 
21 | } // namespace internal
22 | } // namespace rapidjson
23 | 
24 | #endif // RAPIDJSON_INTERNAL_STRFUNC_H_
25 | 


--------------------------------------------------------------------------------
/src/configs/AGGREGATION_DILU.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "matrix_coloring_scheme": "MIN_MAX", 
 5 |         "max_uncolored_percentage": 0.15, 
 6 |         "algorithm": "AGGREGATION", 
 7 |         "obtain_timings": 1, 
 8 |         "solver": "AMG", 
 9 |         "smoother": "MULTICOLOR_DILU", 
10 |         "print_solve_stats": 1, 
11 |         "presweeps": 1, 
12 |         "selector": "SIZE_2", 
13 |         "coarsest_sweeps": 2, 
14 |         "max_iters": 100, 
15 |         "monitor_residual": 1, 
16 |         "scope": "main", 
17 |         "max_levels": 50, 
18 |         "postsweeps": 1, 
19 |         "tolerance" : 1e-06, 
20 |         "convergence": "RELATIVE_INI",
21 |         "print_grid_stats": 1, 
22 |         "norm": "L1", 
23 |         "cycle": "V"
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/src/configs/AGGREGATION_JACOBI.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "determinism_flag": 1, 
 4 |     "solver": {
 5 |         "print_grid_stats": 1, 
 6 |         "algorithm": "AGGREGATION", 
 7 |         "obtain_timings": 1, 
 8 |         "solver": "AMG", 
 9 |         "smoother": "BLOCK_JACOBI", 
10 |         "print_solve_stats": 1, 
11 |         "presweeps": 2, 
12 |         "selector": "SIZE_2", 
13 |         "convergence": "RELATIVE_INI", 
14 |         "coarsest_sweeps": 2, 
15 |         "max_iters": 100, 
16 |         "monitor_residual": 1, 
17 |         "min_coarse_rows": 2, 
18 |         "relaxation_factor": 0.75, 
19 |         "scope": "main", 
20 |         "max_levels": 50, 
21 |         "postsweeps": 2, 
22 |         "tolerance" : 1e-06, 
23 |         "norm": "L1", 
24 |         "cycle": "V"
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/src/configs/AGGREGATION_LOW_DEG_DILU.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "matrix_coloring_scheme": "MIN_MAX", 
 5 |         "max_uncolored_percentage": 0.15, 
 6 |         "algorithm": "AGGREGATION", 
 7 |         "obtain_timings": 1, 
 8 |         "solver": "AMG", 
 9 |         "smoother": "MULTICOLOR_DILU", 
10 |         "print_solve_stats": 1, 
11 |         "presweeps": 1, 
12 |         "selector": "SIZE_2", 
13 |         "coarsest_sweeps": 2, 
14 |         "max_iters": 100, 
15 |         "monitor_residual": 1, 
16 |         "scope": "main", 
17 |         "max_levels": 50, 
18 |         "postsweeps": 1, 
19 |         "tolerance" : 1e-06, 
20 |         "convergence": "RELATIVE_INI",
21 |         "print_grid_stats": 1, 
22 |         "norm": "L1", 
23 |         "cycle": "V"
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/include/cusp/convert.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | /*! \file convert.h
 6 |  *  \brief Matrix format conversion
 7 |  */
 8 | 
 9 | #pragma once
10 | 
11 | #include <cusp/detail/config.h>
12 | 
13 | namespace cusp
14 | {
15 | 
16 | /*! \addtogroup algorithms Algorithms
17 |  *  \ingroup algorithms
18 |  *  \{
19 |  */
20 | 
21 | /*! \p copy : Convert between matrix formats
22 |  *
23 |  * \note DestinationType will be resized as necessary
24 |  *
25 |  * \see \p cusp::copy
26 |  */
27 | template <typename SourceType, typename DestinationType>
28 | void convert(const SourceType& src, DestinationType& dst);
29 | 
30 | /*! \}
31 |  */
32 | 
33 | } // end namespace cusp
34 | 
35 | #include <cusp/detail/convert.inl>
36 | 
37 | 


--------------------------------------------------------------------------------
/src/configs/AGGREGATION_LOW_DEG_BJ.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "determinism_flag": 1, 
 4 |     "solver": {
 5 |         "print_grid_stats": 1, 
 6 |         "algorithm": "AGGREGATION", 
 7 |         "obtain_timings": 1, 
 8 |         "solver": "AMG", 
 9 |         "smoother": "BLOCK_JACOBI", 
10 |         "print_solve_stats": 1, 
11 |         "presweeps": 2, 
12 |         "selector": "SIZE_2", 
13 |         "convergence": "RELATIVE_INI", 
14 |         "coarsest_sweeps": 2, 
15 |         "max_iters": 100, 
16 |         "monitor_residual": 1, 
17 |         "min_coarse_rows": 2, 
18 |         "relaxation_factor": 0.75, 
19 |         "scope": "main", 
20 |         "max_levels": 50, 
21 |         "postsweeps": 2, 
22 |         "tolerance" : 1e-06, 
23 |         "norm": "L1", 
24 |         "cycle": "V"
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/include/determinism_checker.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <string>
 8 | 
 9 | namespace amgx
10 | {
11 | namespace testing_tools
12 | {
13 | 
14 | struct hash_path_determinism_checker_private;
15 | 
16 | struct hash_path_determinism_checker
17 | {
18 |     static hash_path_determinism_checker *singleton();
19 |     hash_path_determinism_checker();
20 |     ~hash_path_determinism_checker();
21 | 
22 |     hash_path_determinism_checker_private *priv;
23 |     void checkpoint(const std::string &name, void *data, long long int size_in_bytes, bool no_permute = true);
24 |     unsigned long long int checksum( void *data, long long int size_in_bytes, bool no_permute = true );
25 | };
26 | 
27 | }
28 | }
29 | 


--------------------------------------------------------------------------------
/examples/install_makefiles_nompi/Makefile:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2011 - 2024 NVIDIA CORPORATION. All Rights Reserved.
 2 | #
 3 | # SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | 
 6 | # CUDA Toolkit location
 7 | INC_D = -I/usr/local/cuda/include
 8 | LIB_D = -L/usr/local/cuda/lib64
 9 | LIB_L = -lcudart -ldl
10 | 
11 | # Examples
12 | amgx_capi:
13 | 	gcc -O2 -std=c99 amgx_capi.c -c $(INC_D)
14 | 	g++ -O2 amgx_capi.o -o amgx_capi $(LIB_D) $(LIB_L) -L../lib -lamgxsh -Wl,-rpath=../lib
15 | 
16 | amgx_capi_dynamic:
17 | 	gcc -O2 -std=c99 amgx_capi.c -c -o amgx_capi_dynamic.o $(INC_D) -DAMGX_DYNAMIC_LOADING
18 | 	g++ -O2 amgx_capi_dynamic.o -o amgx_capi_dynamic $(LIB_D) $(LIB_L) -Wl,-rpath=../lib
19 | 
20 | # All
21 | all: amgx_capi amgx_capi_dynamic
22 | 
23 | # Clean
24 | clean:
25 | 	rm -f amgx_capi
26 | 	rm -f amgx_capi_dynamic
27 | 	rm -f *.o
28 | 


--------------------------------------------------------------------------------
/include/cusp/copy.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | /*! \file copy.h
 6 |  *  \brief Performs (deep) copy operations between containers and views.
 7 |  */
 8 |     
 9 | #pragma once
10 | 
11 | #include <cusp/detail/config.h>
12 | 
13 | namespace cusp
14 | {
15 | 
16 | /*! \addtogroup algorithms Algorithms
17 |  *  \ingroup algorithms
18 |  *  \{
19 |  */
20 | 
21 | /*! \p copy : Copy one array or matrix to another
22 |  *
23 |  * \note T1 and T2 must have the same format type
24 |  * \note T2 will be resized as necessary
25 |  *
26 |  * \see \p convert
27 |  */
28 | template <typename T1, typename T2>
29 | void copy(const T1& src, T2& dst);
30 | 
31 | /*! \}
32 |  */
33 | 
34 | } // end namespace cusp
35 | 
36 | #include <cusp/detail/copy.inl>
37 | 
38 | 


--------------------------------------------------------------------------------
/src/configs/AGGREGATION_THRUST_DILU.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "obtain_timings": 1, 
 5 |         "print_grid_stats": 1, 
 6 |         "max_uncolored_percentage": 0.02, 
 7 |         "algorithm": "AGGREGATION", 
 8 |         "coarseAgenerator": "THRUST", 
 9 |         "solver": "AMG", 
10 |         "smoother": "MULTICOLOR_DILU", 
11 |         "print_solve_stats": 1, 
12 |         "presweeps": 1, 
13 |         "selector": "SIZE_2", 
14 |         "coarsest_sweeps": 2, 
15 |         "max_iters": 100, 
16 |         "monitor_residual": 1, 
17 |         "postsweeps": 1, 
18 |         "scope": "main", 
19 |         "max_levels": 50, 
20 |         "matrix_coloring_scheme": "MIN_MAX", 
21 |         "tolerance" : 1e-06, 
22 |         "convergence": "RELATIVE_INI",
23 |         "norm": "L1", 
24 |         "cycle": "V"
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/src/device_properties.cu:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #include <device_properties.h>
 6 | #include <error.h>
 7 | namespace amgx
 8 | {
 9 | static cudaDeviceProp deviceProps;
10 | static bool initialized=false;
11 | 
12 | cudaDeviceProp getDeviceProperties()
13 | {
14 |     if(!initialized) {
15 |         int dev;
16 |         cudaGetDevice(&dev);
17 |         cudaCheckError();
18 |         cudaGetDeviceProperties(&deviceProps, dev);
19 |         cudaCheckError();
20 |         initialized=true;
21 |     }
22 |     return deviceProps;
23 | }
24 | 
25 | // Return the number of Streaming Multiprocessors on the current device
26 | int getSMCount()
27 | {
28 |     auto devProp = getDeviceProperties();
29 |     return devProp.multiProcessorCount;
30 | }
31 | 
32 | }
33 | 


--------------------------------------------------------------------------------
/src/configs/AGGREGATION_GS.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "determinism_flag": 1, 
 4 |     "solver": {
 5 |         "print_grid_stats": 1, 
 6 |         "max_uncolored_percentage": 0.15, 
 7 |         "algorithm": "AGGREGATION", 
 8 |         "obtain_timings": 1, 
 9 |         "solver": "AMG", 
10 |         "smoother": "MULTICOLOR_GS", 
11 |         "print_solve_stats": 1, 
12 |         "presweeps": 1, 
13 |         "symmetric_GS": 1, 
14 |         "selector": "SIZE_2", 
15 |         "coarsest_sweeps": 2, 
16 |         "max_iters": 100, 
17 |         "monitor_residual": 1, 
18 |         "postsweeps": 1, 
19 |         "scope": "main", 
20 |         "max_levels": 50, 
21 |         "matrix_coloring_scheme": "MIN_MAX", 
22 |         "tolerance" : 1e-06, 
23 |         "convergence": "RELATIVE_INI",
24 |         "norm": "L1", 
25 |         "cycle": "V"
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/include/cusp/format.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | /*! \file format.h
 6 |  *  \brief Format types
 7 |  */
 8 | 
 9 | #pragma once
10 | 
11 | #include <cusp/detail/config.h>
12 | 
13 | namespace cusp
14 | {
15 | 
16 | struct known_format {};
17 | struct unknown_format {};
18 | 
19 | struct dense_format : public known_format {};
20 | struct array1d_format : public dense_format {};
21 | struct array2d_format : public dense_format {};
22 | 
23 | struct sparse_format : public known_format {};
24 | struct coo_format : public sparse_format {};
25 | struct csr_format : public sparse_format {};
26 | struct dia_format : public sparse_format {};
27 | struct ell_format : public sparse_format {};
28 | struct hyb_format : public sparse_format {};
29 | 
30 | } // end namespace cusp
31 | 
32 | 


--------------------------------------------------------------------------------
/src/configs/AGGREGATION_LOW_DEG_GS.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "determinism_flag": 1, 
 4 |     "solver": {
 5 |         "print_grid_stats": 1, 
 6 |         "max_uncolored_percentage": 0.15, 
 7 |         "algorithm": "AGGREGATION", 
 8 |         "obtain_timings": 1, 
 9 |         "solver": "AMG", 
10 |         "smoother": "MULTICOLOR_GS", 
11 |         "print_solve_stats": 1, 
12 |         "presweeps": 1, 
13 |         "symmetric_GS": 1, 
14 |         "selector": "SIZE_2", 
15 |         "coarsest_sweeps": 2, 
16 |         "max_iters": 100, 
17 |         "monitor_residual": 1, 
18 |         "postsweeps": 1, 
19 |         "scope": "main", 
20 |         "max_levels": 50, 
21 |         "matrix_coloring_scheme": "MIN_MAX", 
22 |         "tolerance" : 1e-06, 
23 |         "convergence": "RELATIVE_INI",
24 |         "norm": "L1", 
25 |         "cycle": "V"
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/src/configs/AGGREGATION_THRUST_GS.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "coarsest_sweeps": 2, 
 5 |         "matrix_coloring_scheme": "MIN_MAX", 
 6 |         "smoother": "MULTICOLOR_GS", 
 7 |         "print_solve_stats": 1, 
 8 |         "obtain_timings": 1, 
 9 |         "relaxation_factor": 0.75, 
10 |         "scope": "main", 
11 |         "tolerance" : 1e-06, 
12 |         "norm": "L1", 
13 |         "postsweeps": 2, 
14 |         "max_uncolored_percentage": 0.15, 
15 |         "presweeps": 2, 
16 |         "selector": "SIZE_2", 
17 |         "convergence": "RELATIVE_INI", 
18 |         "cycle": "V", 
19 |         "print_grid_stats": 1, 
20 |         "algorithm": "AGGREGATION", 
21 |         "coarseAgenerator": "THRUST", 
22 |         "solver": "AMG", 
23 |         "max_iters": 100, 
24 |         "monitor_residual": 1, 
25 |         "max_levels": 50
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/include/cusp/gallery/stencil.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | /*! \file stencil.h
 6 |  *  \brief Generate sparse matrix from grid stencil
 7 |  */
 8 | 
 9 | #pragma once
10 | 
11 | #include <cusp/detail/config.h>
12 | 
13 | #include <cusp/array1d.h>
14 | 
15 | namespace cusp
16 | {
17 | namespace gallery
18 | {
19 | 
20 | template <typename MatrixType,
21 |           typename StencilPoint,
22 |           typename GridDimension>
23 | void generate_matrix_from_stencil(      MatrixType& matrix,
24 |                                   const cusp::array1d<StencilPoint,cusp::host_memory>& stencil,
25 |                                   const GridDimension& grid);
26 |                             
27 | } // end namespace gallery
28 | } // end namespace cusp
29 | 
30 | #include <cusp/gallery/stencil.inl>
31 | 
32 | 


--------------------------------------------------------------------------------
/include/cusp/detail/forward_definitions.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | namespace cusp
 8 | {
 9 | 
10 | template <typename T, typename MemorySpace>                             class array1d;
11 | template <typename T, typename MemorySpace, typename Orientation>       class array2d;
12 | template <typename IndexType, typename ValueType, typename MemorySpace> class coo_matrix;
13 | template <typename IndexType, typename ValueType, typename MemorySpace> class csr_matrix;
14 | template <typename IndexType, typename ValueType, typename MemorySpace> class dia_matrix;
15 | template <typename IndexType, typename ValueType, typename MemorySpace> class ell_matrix;
16 | template <typename IndexType, typename ValueType, typename MemorySpace> class hyb_matrix;
17 | 
18 | } // end namespace cusp
19 | 
20 | 


--------------------------------------------------------------------------------
/src/configs/FGMRES.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "preconditioner": {
 5 |             "print_grid_stats": 1, 
 6 |             "print_vis_data": 0, 
 7 |             "solver": "AMG", 
 8 |             "print_solve_stats": 0, 
 9 |             "interpolator": "D2",
10 |             "presweeps": 1, 
11 |             "max_iters": 1, 
12 |             "monitor_residual": 0, 
13 |             "store_res_history": 0, 
14 |             "scope": "amg", 
15 |             "cycle": "V", 
16 |             "postsweeps": 1
17 |         }, 
18 |         "solver": "FGMRES", 
19 |         "print_solve_stats": 1, 
20 |         "obtain_timings": 1, 
21 |         "max_iters": 100, 
22 |         "monitor_residual": 1, 
23 |         "gmres_n_restart": 20, 
24 |         "convergence": "RELATIVE_INI", 
25 |         "scope": "main", 
26 |         "tolerance" : 1e-06, 
27 |         "norm": "L2"
28 |     }
29 | }
30 | 


--------------------------------------------------------------------------------
/include/eigensolvers/multivector_operations.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <matrix.h>
 8 | #include <vector.h>
 9 | 
10 | namespace amgx
11 | {
12 | 
13 | 
14 | // res = alpha * lhs * rhs + beta * res.
15 | template <typename TConfig>
16 | void
17 | distributed_gemm_TN(typename TConfig::VecPrec alpha, const Vector<TConfig> &lhs,
18 |                     const Vector<TConfig> &rhs,
19 |                     typename TConfig::VecPrec beta, Vector<TConfig> &res,
20 |                     const Operator<TConfig> &A);
21 | 
22 | template <typename TConfig>
23 | void
24 | multivector_column_norms(const Vector<TConfig> &v,
25 |                          Vector<typename TConfig::template setMemSpace<AMGX_host>::Type> &results,
26 |                          const Operator<TConfig> &A);
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/src/distributed/distributed_comms.cu:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #include <distributed/distributed_comms.h>
 6 | #include <basic_types.h>
 7 | #include <error.h>
 8 | #include <types.h>
 9 | #include <assert.h>
10 | 
11 | namespace amgx
12 | {
13 | 
14 | /***************************************
15 |  * Source Definitions
16 |  ***************************************/
17 | template<class T_Config>
18 | DistributedComms<T_Config>::~DistributedComms()
19 | {
20 | };
21 | 
22 | /****************************************
23 |  * Explict instantiations
24 |  ***************************************/
25 | #define AMGX_CASE_LINE(CASE) template class DistributedComms<TemplateMode<CASE>::Type >;
26 | AMGX_FORALL_BUILDS(AMGX_CASE_LINE)
27 | AMGX_FORCOMPLEX_BUILDS(AMGX_CASE_LINE)
28 | #undef AMGX_CASE_LINE
29 | 
30 | } // namespace amgx
31 | 


--------------------------------------------------------------------------------
/include/cusp/detail/matrix_shape.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | namespace cusp
 8 | {
 9 | 
10 |     template<typename IndexType>
11 |     class matrix_shape
12 |     {
13 |         public:
14 |             typedef IndexType index_type;
15 | 
16 |             index_type num_rows;
17 |             index_type num_cols;
18 |             
19 |             matrix_shape()
20 |                 : num_rows(0), num_cols(0) {}
21 | 
22 |             matrix_shape(IndexType rows, IndexType cols)
23 |                 : num_rows(rows), num_cols(cols) {}
24 | 
25 |             void swap(matrix_shape& shape)
26 |             {
27 |                 amgx::thrust::swap(num_rows, shape.num_rows);
28 |                 amgx::thrust::swap(num_cols, shape.num_cols);
29 |             }
30 |     };
31 | 
32 | } // end namespace cusp
33 | 


--------------------------------------------------------------------------------
/src/operators/shifted_operator.cu:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | namespace amgx
 6 | {
 7 | 
 8 | template <class T_Config> class Operator;
 9 | 
10 | }
11 | 
12 | #include <operators/shifted_operator.h>
13 | #include <blas.h>
14 | 
15 | namespace amgx
16 | {
17 | 
18 | template <typename TConfig>
19 | void ShiftedOperator<TConfig>::apply(const Vector<TConfig> &v, Vector<TConfig> &res, ViewType view)
20 | {
21 |     Operator<TConfig> &A = *m_A;
22 |     int offset, size;
23 |     A.getOffsetAndSizeForView(view, &offset, &size);
24 |     A.apply(v, res, OWNED);
25 |     axpy(v, res, m_shift, offset, size);
26 | }
27 | 
28 | #define AMGX_CASE_LINE(CASE) template class ShiftedOperator<TemplateMode<CASE>::Type>;
29 | AMGX_FORALL_BUILDS(AMGX_CASE_LINE)
30 | AMGX_FORCOMPLEX_BUILDS(AMGX_CASE_LINE)
31 | #undef AMGX_CASE_LINE
32 | 
33 | }
34 | 


--------------------------------------------------------------------------------
/include/cusp/detail/device/generalized_spmv/coo.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <cusp/detail/device/spmv/coo_flat.h>
 8 | 
 9 | namespace cusp
10 | {
11 | namespace detail
12 | {
13 | namespace device
14 | {
15 | 
16 | template <typename IndexType, typename ValueType>
17 | void spmv(const coo_matrix<IndexType,ValueType,cusp::device_memory>& coo, 
18 |           const ValueType * x, 
19 |                 ValueType * y)
20 | { 
21 |     spmv_coo_flat(coo, x, y);
22 | }
23 | 
24 | template <typename IndexType, typename ValueType>
25 | void spmv_tex(const coo_matrix<IndexType,ValueType,cusp::device_memory>& coo, 
26 |               const ValueType * x, 
27 |                     ValueType * y)
28 | { 
29 |     spmv_coo_flat_tex(coo, x, y);
30 | }
31 | 
32 | } // end namespace device
33 | } // end namespace detail
34 | } // end namespace cusp
35 | 
36 | 


--------------------------------------------------------------------------------
/src/solvers/user_solver.cu:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #include <solvers/user_solver.h>
 6 | #include <cassert>
 7 | 
 8 | namespace amgx
 9 | {
10 | 
11 | template<class T_Config>
12 | void
13 | User_Solver<T_Config>::solver_setup(bool reuse_matrix_structure)
14 | {
15 | }
16 | 
17 | //launches a single standard cycle
18 | template<class T_Config>
19 | AMGX_STATUS
20 | User_Solver<T_Config>::solve_iteration( VVector &b, VVector &x, bool xIsZero )
21 | {
22 |     assert( callback );
23 |     callback( *this->m_A, b, x );
24 |     return this->converged( b, x );
25 | };
26 | 
27 | /****************************************
28 |  * Explict instantiations
29 |  ***************************************/
30 | #define AMGX_CASE_LINE(CASE) template class User_Solver<TemplateMode<CASE>::Type>;
31 | AMGX_FORALL_BUILDS(AMGX_CASE_LINE)
32 | #undef AMGX_CASE_LINE
33 | 
34 | } // namespace amgx
35 | 


--------------------------------------------------------------------------------
/examples/matrix.mtx:
--------------------------------------------------------------------------------
 1 | %%MatrixMarket matrix coordinate real general
 2 | 12 12 61
 3 | 1 1 1.0 
 4 | 1 2 2.0
 5 | 1 4 3.0
 6 | 1 9 4.0                  
 7 | 2 1 5.0
 8 | 2 2 6.0
 9 | 2 3 7.0
10 | 2 4 8.0                  
11 | 3 2 9.0
12 | 3 3 10.0
13 | 3 4 11.0
14 | 3 5 12.0
15 | 3 6 13.0               
16 | 4 1 14.0
17 | 4 2 15.0
18 | 4 3 16.0
19 | 4 4 17.0
20 | 4 5 18.0
21 | 4 6 19.0
22 | 4 9 20.0
23 | 4 11 21.0               
24 | 5 3 22.0
25 | 5 5 23.0
26 | 5 6 24.0
27 | 5 7 25.0
28 | 6 3 26.0
29 | 6 4 27.0
30 | 6 5 28.0
31 | 6 6 29.0
32 | 6 7 30.0
33 | 6 8 31.0
34 | 6 11 32.0
35 | 7 5 33.0
36 | 7 6 34.0
37 | 7 7 35.0
38 | 7 8 36.0
39 | 8 6 37.0
40 | 8 7 38.0
41 | 8 8 39.0
42 | 8 10 40.0
43 | 8 11 41.0
44 | 9 1 42.0
45 | 9 4 43.0
46 | 9 9 44.0
47 | 9 11 45.0
48 | 9 12 46.0
49 | 10 8 47.0
50 | 10 10 48.0
51 | 10 11 49.0
52 | 10 12 50.0
53 | 11 4 51.0
54 | 11 6 52.0
55 | 11 8 53.0
56 | 11 9 54.0
57 | 11 10 55.0
58 | 11 11 56.0
59 | 11 12 57.0
60 | 12 9 58.0
61 | 12 10 59.0
62 | 12 11 60.0
63 | 12 12 61.0
64 | 


--------------------------------------------------------------------------------
/include/cusp/precond/strength.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <cusp/detail/config.h>
 8 | 
 9 | namespace cusp
10 | {
11 | namespace precond
12 | {
13 | namespace detail
14 | {
15 | 
16 | /*  Compute a strength of connection matrix using the standard symmetric measure.
17 |  *  An off-diagonal connection A[i,j] is strong iff::
18 |  *
19 |  *     abs(A[i,j]) >= theta * sqrt( abs(A[i,i]) * abs(A[j,j]) )
20 |  *
21 |  *  With the default threshold (theta = 0.0) all connections are strong.
22 |  *
23 |  *  Note: explicit diagonal entries are always considered strong.
24 |  */
25 | template <typename Matrix1, typename Matrix2>
26 | void symmetric_strength_of_connection(const Matrix1& A, Matrix2& S, const double theta = 0.0);
27 | 
28 | } // end namepace detail
29 | } // end namespace precond
30 | } // end namespace cusp
31 | 
32 | #include <cusp/precond/detail/strength.inl>
33 | 
34 | 


--------------------------------------------------------------------------------
/include/amgx_types/rand.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <basic_types.h>
 8 | #include <cstdlib>
 9 | 
10 | namespace amgx
11 | {
12 | namespace types
13 | {
14 | 
15 | template <typename T>
16 | __inline__ __host__ T get_rand();
17 | 
18 | template <>
19 | __inline__ __host__ float get_rand<float>()
20 | {
21 |     return 1.f * rand() / RAND_MAX;
22 | }
23 | 
24 | template <>
25 | __inline__ __host__ double get_rand<double>()
26 | {
27 |     return 1.*rand() / RAND_MAX;
28 | }
29 | 
30 | template <>
31 | __inline__ __host__ cuComplex get_rand<cuComplex>()
32 | {
33 |     return make_cuComplex (1.f * rand() / RAND_MAX, 1.f * rand() / RAND_MAX);
34 | }
35 | 
36 | template <>
37 | __inline__ __host__ cuDoubleComplex get_rand<cuDoubleComplex>()
38 | {
39 |     return make_cuDoubleComplex (1.*rand() / RAND_MAX, 1.*rand() / RAND_MAX);
40 | }
41 | 
42 | } // namespace types
43 | } // namespace amgx
44 | 


--------------------------------------------------------------------------------
/include/async_event.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | namespace amgx
 8 | {
 9 | 
10 | class AsyncEvent
11 | {
12 |     public:
13 |         AsyncEvent() : async_event(NULL) { }
14 |         AsyncEvent(int size) : async_event(NULL) { cudaEventCreate(&async_event); }
15 |         ~AsyncEvent() { if (async_event != NULL) cudaEventDestroy(async_event); }
16 | 
17 |         void create() { cudaEventCreate(&async_event); }
18 |         void record(cudaStream_t s = 0)
19 |         {
20 |             if (async_event == NULL)
21 |             {
22 |                 cudaEventCreate(&async_event);    // check if we haven't created the event yet
23 |             }
24 | 
25 |             cudaEventRecord(async_event, s);
26 |         }
27 |         void sync()
28 |         {
29 |             cudaEventSynchronize(async_event);
30 |         }
31 |     private:
32 |         cudaEvent_t async_event;
33 | };
34 | 
35 | }
36 | 


--------------------------------------------------------------------------------
/include/cusp/detail/device/generalized_spmv/csr.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <cusp/detail/device/spmv/csr_scalar.h>
 8 | #include <cusp/detail/device/spmv/csr_vector.h>
 9 | 
10 | namespace cusp
11 | {
12 | namespace detail
13 | {
14 | namespace device
15 | {
16 | 
17 | template <typename IndexType, typename ValueType>
18 | void spmv(const csr_matrix<IndexType,ValueType,cusp::device_memory>& csr, 
19 |           const ValueType * x, 
20 |                 ValueType * y)
21 | { 
22 |     spmv_csr_vector(csr, x, y);
23 | }
24 | 
25 | template <typename IndexType, typename ValueType>
26 | void spmv_tex(const csr_matrix<IndexType,ValueType,cusp::device_memory>& csr, 
27 |               const ValueType * x, 
28 |                     ValueType * y)
29 | { 
30 |     spmv_csr_vector_tex(csr, x, y);
31 | }
32 | 
33 | } // end namespace device
34 | } // end namespace detail
35 | } // end namespace cusp
36 | 
37 | 


--------------------------------------------------------------------------------
/src/configs/PBICGSTAB_W.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "preconditioner": {
 5 |             "error_scaling": 2, 
 6 |             "algorithm": "AGGREGATION", 
 7 |             "solver": "AMG", 
 8 |             "smoother": {
 9 |                 "relaxation_factor": 0.9, 
10 |                 "scope": "amg_smoother", 
11 |                 "solver": "BLOCK_JACOBI"
12 |             }, 
13 |             "presweeps": 1, 
14 |             "selector": "SIZE_2", 
15 |             "max_iters": 1, 
16 |             "monitor_residual": 1, 
17 |             "convergence": "RELATIVE_INI", 
18 |             "scope": "amg", 
19 |             "cycle": "W", 
20 |             "norm": "L1", 
21 |             "postsweeps": 2
22 |         }, 
23 |         "solver": "PBICGSTAB", 
24 |         "print_solve_stats": 1, 
25 |         "obtain_timings": 1, 
26 |         "max_iters": 100, 
27 |         "monitor_residual": 1, 
28 |         "scope": "main", 
29 |         "tolerance" : 1e-06, 
30 |         "norm": "L2"
31 |     }
32 | }
33 | 


--------------------------------------------------------------------------------
/src/configs/PBICGSTAB_AGGREGATION_W_JACOBI.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "preconditioner": {
 5 |             "error_scaling": 2, 
 6 |             "algorithm": "AGGREGATION", 
 7 |             "solver": "AMG", 
 8 |             "smoother": {
 9 |                 "relaxation_factor": 0.9, 
10 |                 "scope": "amg_smoother", 
11 |                 "solver": "BLOCK_JACOBI"
12 |             }, 
13 |             "presweeps": 1, 
14 |             "selector": "SIZE_2", 
15 |             "max_iters": 1, 
16 |             "scope": "amg", 
17 |             "cycle": "W", 
18 |             "max_levels": 50, 
19 |             "norm": "L1", 
20 |             "postsweeps": 2
21 |         }, 
22 |         "solver": "PBICGSTAB", 
23 |         "print_solve_stats": 1, 
24 |         "obtain_timings": 1, 
25 |         "max_iters": 100, 
26 |         "monitor_residual": 1, 
27 |         "convergence": "RELATIVE_INI", 
28 |         "scope": "main", 
29 |         "tolerance" : 1e-06, 
30 |         "norm": "L2"
31 |     }
32 | }
33 | 


--------------------------------------------------------------------------------
/include/multiply.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | #include <basic_types.h>
 7 | #include <vector.h>
 8 | #include <matrix.h>
 9 | 
10 | namespace amgx
11 | {
12 | 
13 | //computes C=A*B
14 | template <class TConfig>
15 | void multiply(Matrix<TConfig> &A, Vector<TConfig> &B, Vector<TConfig> &C, ViewType view = OWNED);
16 | 
17 | template <class TConfig>
18 | void multiply_masked(Matrix<TConfig> &A, Vector<TConfig> &B, Vector<TConfig> &C, typename Matrix<TConfig>::IVector &mask, ViewType view = OWNED);
19 | 
20 | template <class MatrixA, class Vector>
21 | void multiply_with_mask(MatrixA &A, Vector &B, Vector &C);
22 | 
23 | template <class MatrixA, class Vector>
24 | void multiply_with_mask_restriction(MatrixA &A, Vector &B, Vector &C, MatrixA &P);
25 | 
26 | 
27 | //computes C=A*B
28 | template <class TConfig>
29 | void multiplyMM(const Matrix<TConfig> &A, const Matrix<TConfig> &B, Matrix<TConfig> &C);
30 | 
31 | } // namespace amgx
32 | 


--------------------------------------------------------------------------------
/include/cusp/detail/device/spmv/hyb.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <cusp/detail/device/spmv/ell.h>
 8 | #include <cusp/detail/device/spmv/coo_flat.h>
 9 | 
10 | namespace cusp
11 | {
12 | namespace detail
13 | {
14 | namespace device
15 | {
16 | 
17 | template <typename Matrix,
18 |           typename ValueType>
19 | void spmv_hyb(const Matrix&    A, 
20 |               const ValueType* x, 
21 |                     ValueType* y)
22 | {
23 |     spmv_ell(A.ell, x, y);
24 |     __spmv_coo_flat<false, false>(A.coo, x, y);
25 | }
26 | 
27 | template <typename Matrix,
28 |           typename ValueType>
29 | void spmv_hyb_tex(const Matrix&    A,
30 |                   const ValueType* x, 
31 |                         ValueType* y)
32 | {
33 |     spmv_ell_tex(A.ell, x, y);
34 |     __spmv_coo_flat<true, false>(A.coo, x, y);
35 | }
36 | 
37 | } // end namespace device
38 | } // end namespace detail
39 | } // end namespace cusp
40 | 
41 | 


--------------------------------------------------------------------------------
/src/configs/AGGREGATION_MULTI_PAIRWISE.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "use_scalar_norm": 1, 
 5 |         "min_coarse_rows": 32, 
 6 |         "matrix_coloring_scheme": "PARALLEL_GREEDY", 
 7 |         "smoother": "BLOCK_JACOBI", 
 8 |         "print_solve_stats": 1, 
 9 |         "obtain_timings": 1, 
10 |         "relaxation_factor": 0.5, 
11 |         "weight_formula": 1, 
12 |         "tolerance" : 1e-06, 
13 |         "norm": "L2", 
14 |         "postsweeps": 3, 
15 |         "merge_singletons": 2, 
16 |         "presweeps": 0, 
17 |         "selector": "MULTI_PAIRWISE", 
18 |         "scope": "main", 
19 |         "convergence": "RELATIVE_INI", 
20 |         "cycle": "F", 
21 |         "print_grid_stats": 1, 
22 |         "algorithm": "AGGREGATION", 
23 |         "solver": "AMG", 
24 |         "aggregation_passes": 1, 
25 |         "max_uncolored_percentage": 0.05, 
26 |         "coarse_solver": "DENSE_LU_SOLVER", 
27 |         "max_iters": 100, 
28 |         "monitor_residual": 1, 
29 |         "max_levels": 50
30 |     }
31 | }
32 | 


--------------------------------------------------------------------------------
/src/configs/eigen_configs/LOBPCG:
--------------------------------------------------------------------------------
 1 | config_version=2
 2 | default:eig_solver=LOBPCG
 3 | default:eig_max_iters=1000
 4 | default:eig_tolerance=1e-4
 5 | default:eig_which=largest
 6 | 
 7 | #gmres accelerator config
 8 | default:solver(main)=FGMRES
 9 | main:gmres_n_restart=10
10 | main:preconditioner(amg)=AMG
11 | 
12 | #outer solver setup
13 | main:convergence=RELATIVE_INI
14 | main:norm=L2
15 | main:use_scalar_norm=1
16 | main:max_iters=2
17 | main:tolerance=1e-4
18 | 
19 | #amg specific
20 | amg:max_iters=1
21 | amg:algorithm=AGGREGATION
22 | amg:selector=SIZE_2
23 | amg:cycle=V
24 | amg:smoother=MULTICOLOR_DILU
25 | amg:presweeps=0 
26 | amg:postsweeps=3 
27 | amg:error_scaling=0
28 | amg:max_levels=100
29 | amg:coarseAgenerator=LOW_DEG
30 | amg:matrix_coloring_scheme=PARALLEL_GREEDY
31 | amg:max_uncolored_percentage=0.05
32 | amg:relaxation_factor=0.75
33 | amg:coarse_solver=DENSE_LU_SOLVER
34 | amg:min_coarse_rows=32
35 | 
36 | 
37 | 
38 | #printing obtions
39 | #main:print_solve_stats=1
40 | main:monitor_residual=1
41 | #main:obtain_timings=1
42 | #amg:print_grid_stats=1
43 | 


--------------------------------------------------------------------------------
/include/cusp/detail/format_utils.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | namespace cusp
 8 | {
 9 | namespace detail
10 | {
11 | 
12 | template <typename OffsetArray, typename IndexArray>
13 | void offsets_to_indices(const OffsetArray& offsets, IndexArray& indices);
14 | 
15 | template <typename IndexArray, typename OffsetArray>
16 | void indices_to_offsets(const IndexArray& indices, OffsetArray& offsets);
17 |     
18 | template <typename MatrixType, typename ArrayType>
19 | void extract_diagonal(const MatrixType& A, ArrayType& output);
20 | 
21 | template <typename Array1, typename Array2, typename Array3>
22 | void sort_by_row(Array1& rows, Array2& columns, Array3& values);
23 | 
24 | template <typename Array1, typename Array2, typename Array3>
25 | void sort_by_row_and_column(Array1& rows, Array2& columns, Array3& values);
26 |     
27 | } // end namespace detail
28 | } // end namespace cusp
29 | 
30 | #include <cusp/detail/format_utils.inl>
31 | 
32 | 


--------------------------------------------------------------------------------
/src/configs/eigen_configs/INVERSE_FGMRES:
--------------------------------------------------------------------------------
 1 | config_version=2
 2 | default:eig_solver=INVERSE_ITERATION
 3 | default:eig_max_iters=400
 4 | default:eig_tolerance=1e-4
 5 | default:eig_which=smallest
 6 | 
 7 | #gmres accelerator config
 8 | default:solver(main)=FGMRES
 9 | main:gmres_n_restart=10
10 | main:preconditioner(amg)=AMG
11 | 
12 | #outer solver setup
13 | main:convergence=RELATIVE_INI
14 | main:norm=L2
15 | main:use_scalar_norm=1
16 | main:max_iters=100
17 | main:tolerance=1e-4
18 | 
19 | #amg specific
20 | amg:max_iters=1
21 | amg:algorithm=AGGREGATION
22 | amg:selector=SIZE_2
23 | amg:cycle=V
24 | amg:smoother=MULTICOLOR_DILU
25 | amg:presweeps=0 
26 | amg:postsweeps=3 
27 | amg:error_scaling=0
28 | amg:max_levels=100
29 | amg:coarseAgenerator=LOW_DEG
30 | amg:matrix_coloring_scheme=PARALLEL_GREEDY
31 | amg:max_uncolored_percentage=0.05
32 | amg:relaxation_factor=0.75
33 | amg:coarse_solver=DENSE_LU_SOLVER
34 | amg:min_coarse_rows=32
35 | 
36 | 
37 | 
38 | #printing obtions
39 | #main:print_solve_stats=1
40 | #main:monitor_residual=1
41 | #main:obtain_timings=1
42 | #amg:print_grid_stats=1
43 | 


--------------------------------------------------------------------------------
/src/configs/V-cheby_poly-smoother.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "determinism_flag":1,
 4 |     "solver": {
 5 |         "scope": "main",
 6 |         "print_grid_stats": 1, 
 7 |         "solver": "AMG", 
 8 |         "scaling":"NONE",
 9 |         "interpolator": "D2",
10 |         "aggressive_levels": 0,
11 |         "interp_max_elements": 4,
12 |         "max_row_sum": 0.9,
13 |         "print_solve_stats": 1, 
14 |         "obtain_timings": 1, 
15 |         "max_iters": 100, 
16 |         "monitor_residual": 1, 
17 |         "convergence": "RELATIVE_INI", 
18 |         "max_levels": 50, 
19 |         "cycle": "V", 
20 |         "smoother": {
21 |            "scope": "cheb_smoother",
22 |            "solver": "CHEBYSHEV_POLY",
23 |            "chebyshev_polynomial_order":2,
24 |            "max_iters":1,
25 |            "preconditioner": {
26 |                "solver": "JACOBI_L1"
27 |            },
28 |            "tolerance": 1e-4
29 |         },
30 |         "tolerance" : 1e-06, 
31 |         "norm": "L2", 
32 |         "presweeps": 0, 
33 |         "postsweeps":3
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/src/configs/FGMRES_AGGREGATION_JACOBI.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "preconditioner": {
 5 |             "error_scaling": 0, 
 6 |             "print_grid_stats": 1, 
 7 |             "algorithm": "AGGREGATION", 
 8 |             "solver": "AMG", 
 9 |             "smoother": "BLOCK_JACOBI", 
10 |             "presweeps": 0, 
11 |             "selector": "SIZE_2", 
12 |             "coarse_solver": "NOSOLVER", 
13 |             "max_iters": 1, 
14 |             "min_coarse_rows": 32, 
15 |             "relaxation_factor": 0.75, 
16 |             "scope": "amg", 
17 |             "max_levels": 50, 
18 |             "postsweeps": 3, 
19 |             "cycle": "V"
20 |         }, 
21 |         "use_scalar_norm": 1, 
22 |         "solver": "FGMRES", 
23 |         "print_solve_stats": 1, 
24 |         "obtain_timings": 1, 
25 |         "max_iters": 100, 
26 |         "monitor_residual": 1, 
27 |         "gmres_n_restart": 32, 
28 |         "convergence": "RELATIVE_INI", 
29 |         "scope": "main", 
30 |         "tolerance" : 1e-06, 
31 |         "norm": "L2"
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/src/configs/PBICGSTAB.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "preconditioner": {
 5 |             "print_vis_data": 0, 
 6 |             "solver": "AMG", 
 7 |             "smoother": {
 8 |                 "scope": "jacobi", 
 9 |                 "solver": "BLOCK_JACOBI", 
10 |                 "monitor_residual": 0, 
11 |                 "print_solve_stats": 0
12 |             }, 
13 |             "print_solve_stats": 0, 
14 |             "interpolator": "D2",
15 |             "presweeps": 1, 
16 |             "max_iters": 1, 
17 |             "monitor_residual": 0, 
18 |             "store_res_history": 0, 
19 |             "scope": "amg", 
20 |             "max_levels": 50, 
21 |             "cycle": "V", 
22 |             "postsweeps": 1
23 |         }, 
24 |         "solver": "PBICGSTAB", 
25 |         "print_solve_stats": 1, 
26 |         "obtain_timings": 1, 
27 |         "max_iters": 100, 
28 |         "monitor_residual": 1, 
29 |         "convergence": "RELATIVE_INI", 
30 |         "scope": "main", 
31 |         "tolerance" : 1e-06, 
32 |         "norm": "L2"
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/include/amgx_eig_c.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <stdio.h>
 8 | #include <stdlib.h>
 9 | #include "amgx_config.h"
10 | #include "amgx_c.h"
11 | 
12 | #if defined(__cplusplus)
13 | extern "C" {
14 | #endif
15 | 
16 | typedef struct AMGX_eigensolver_handle_struct {char AMGX_eigensolver_handle_dummy;} *AMGX_eigensolver_handle;
17 | 
18 | AMGX_RC AMGX_API AMGX_eigensolver_create(AMGX_eigensolver_handle *ret, AMGX_resources_handle rsc, AMGX_Mode mode, const AMGX_config_handle config_eigensolver);
19 | 
20 | AMGX_RC AMGX_API AMGX_eigensolver_setup(AMGX_eigensolver_handle eigensolver, AMGX_matrix_handle mtx);
21 | 
22 | AMGX_RC AMGX_API AMGX_eigensolver_pagerank_setup(AMGX_eigensolver_handle eigensolver, AMGX_vector_handle a);
23 | 
24 | AMGX_RC AMGX_API AMGX_eigensolver_solve(AMGX_eigensolver_handle eigensolver, AMGX_vector_handle x);
25 | 
26 | AMGX_RC AMGX_API AMGX_eigensolver_destroy(AMGX_eigensolver_handle obj);
27 | 
28 | #if defined(__cplusplus)
29 | }//extern "C"
30 | #endif
31 | 


--------------------------------------------------------------------------------
/include/cusp/detail/functional.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | /*! \file functional.h
 6 |  *  \brief Defines templated functors and traits analogous to what
 7 |  *         is found in stl and boost's functional.
 8 |  */
 9 | 
10 | #pragma once
11 | 
12 | #include <cusp/detail/config.h>
13 | 
14 | #include <thrust/functional.h>
15 | 
16 | namespace cusp
17 | {
18 | namespace detail
19 | {
20 | 
21 | template<typename T>
22 |   struct zero_function
23 | {
24 |   __host__ __device__ T operator()(const T &x) const {return T(0);}
25 | }; // end minus
26 | 
27 | template<typename T>
28 |   struct identity_function
29 | {
30 |   __host__ __device__ const T& operator()(const T &x) const {return x;}
31 | }; // end identity
32 | 
33 | } // end namespace detail
34 | } // end namespace cusp
35 | 
36 | // Add identity to amgx::thrust namespace for backward compatibility
37 | namespace amgx
38 | {
39 | namespace thrust
40 | {
41 |   template<typename T>
42 |   using identity = cusp::detail::identity_function<T>;
43 | }
44 | }
45 | 
46 | 


--------------------------------------------------------------------------------
/src/configs/PBICGSTAB_CLASSICAL_JACOBI.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "preconditioner": {
 5 |             "print_vis_data": 0, 
 6 |             "solver": "AMG", 
 7 |             "smoother": {
 8 |                 "scope": "jacobi", 
 9 |                 "solver": "BLOCK_JACOBI", 
10 |                 "monitor_residual": 0, 
11 |                 "print_solve_stats": 0
12 |             }, 
13 |             "print_solve_stats": 0, 
14 |             "presweeps": 1, 
15 |             "interpolator": "D2",
16 |             "max_iters": 1, 
17 |             "monitor_residual": 0, 
18 |             "store_res_history": 0, 
19 |             "scope": "amg", 
20 |             "max_levels": 50, 
21 |             "cycle": "V", 
22 |             "postsweeps": 1
23 |         }, 
24 |         "solver": "PBICGSTAB", 
25 |         "print_solve_stats": 1, 
26 |         "obtain_timings": 1, 
27 |         "max_iters": 100, 
28 |         "monitor_residual": 1, 
29 |         "convergence": "RELATIVE_INI", 
30 |         "scope": "main", 
31 |         "tolerance" : 1e-06, 
32 |         "norm": "L2"
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/src/configs/V-cheby-aggres-L1-trunc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "print_grid_stats": 1, 
 5 |         "solver": "AMG", 
 6 |         "interpolator":"D2",
 7 |         "interp_max_elements": 4,
 8 |         "aggressive_levels": 1,
 9 |         "print_solve_stats": 1, 
10 |         "obtain_timings": 1, 
11 |         "max_iters": 100, 
12 |         "coarse_solver": "NOSOLVER",
13 |         "monitor_residual": 1, 
14 |         "convergence": "RELATIVE_INI", 
15 |         "scope": "main", 
16 |         "max_levels": 50, 
17 |         "error_scaling":3,
18 |         "cycle": "V", 
19 |         "smoother": 
20 |         {
21 |             "solver": "CHEBYSHEV",
22 |             "preconditioner" : 
23 |             {
24 |                 "solver": "JACOBI_L1",
25 |                 "max_iters": 1
26 |             },
27 |             "max_iters": 1,
28 |             "chebyshev_polynomial_order" : 2,
29 |             "chebyshev_lambda_estimate_mode" : 2
30 |         },
31 |         "tolerance" : 1e-06, 
32 |         "norm": "L2", 
33 |         "presweeps": 0, 
34 |         "postsweeps": 1
35 |     }
36 | }
37 | 


--------------------------------------------------------------------------------
/src/configs/PCG_F.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "preconditioner": {
 5 |             "print_grid_stats": 1, 
 6 |             "print_vis_data": 0, 
 7 |             "solver": "AMG", 
 8 |             "smoother": {
 9 |                 "scope": "jacobi", 
10 |                 "solver": "BLOCK_JACOBI", 
11 |                 "monitor_residual": 0, 
12 |                 "print_solve_stats": 0
13 |             }, 
14 |             "print_solve_stats": 0, 
15 |             "presweeps": 1, 
16 |             "interpolator": "D2",
17 |             "max_iters": 1, 
18 |             "monitor_residual": 0, 
19 |             "store_res_history": 0, 
20 |             "scope": "amg", 
21 |             "max_levels": 50, 
22 |             "cycle": "F", 
23 |             "postsweeps": 1
24 |         }, 
25 |         "solver": "PCG", 
26 |         "print_solve_stats": 1, 
27 |         "obtain_timings": 1, 
28 |         "max_iters": 100, 
29 |         "monitor_residual": 1, 
30 |         "convergence": "RELATIVE_INI", 
31 |         "scope": "main", 
32 |         "tolerance" : 1e-06, 
33 |         "norm": "L2"
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/src/configs/PCG_V.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "preconditioner": {
 5 |             "print_grid_stats": 1, 
 6 |             "print_vis_data": 0, 
 7 |             "solver": "AMG", 
 8 |             "smoother": {
 9 |                 "scope": "jacobi", 
10 |                 "solver": "BLOCK_JACOBI", 
11 |                 "monitor_residual": 0, 
12 |                 "print_solve_stats": 0
13 |             }, 
14 |             "print_solve_stats": 0, 
15 |             "presweeps": 1, 
16 |             "interpolator": "D2",
17 |             "max_iters": 1, 
18 |             "monitor_residual": 0, 
19 |             "store_res_history": 0, 
20 |             "scope": "amg", 
21 |             "max_levels": 50, 
22 |             "cycle": "V", 
23 |             "postsweeps": 1
24 |         }, 
25 |         "solver": "PCG", 
26 |         "print_solve_stats": 1, 
27 |         "obtain_timings": 1, 
28 |         "max_iters": 100, 
29 |         "monitor_residual": 1, 
30 |         "convergence": "RELATIVE_INI", 
31 |         "scope": "main", 
32 |         "tolerance" : 1e-06, 
33 |         "norm": "L2"
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/src/configs/PCG_W.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "preconditioner": {
 5 |             "print_grid_stats": 1, 
 6 |             "print_vis_data": 0, 
 7 |             "solver": "AMG", 
 8 |             "smoother": {
 9 |                 "scope": "jacobi", 
10 |                 "solver": "BLOCK_JACOBI", 
11 |                 "monitor_residual": 0, 
12 |                 "print_solve_stats": 0
13 |             }, 
14 |             "print_solve_stats": 0, 
15 |             "presweeps": 1, 
16 |             "interpolator": "D2",
17 |             "max_iters": 1, 
18 |             "monitor_residual": 0, 
19 |             "store_res_history": 0, 
20 |             "scope": "amg", 
21 |             "max_levels": 50, 
22 |             "cycle": "W", 
23 |             "postsweeps": 1
24 |         }, 
25 |         "solver": "PCG", 
26 |         "print_solve_stats": 1, 
27 |         "obtain_timings": 1, 
28 |         "max_iters": 100, 
29 |         "monitor_residual": 1, 
30 |         "convergence": "RELATIVE_INI", 
31 |         "scope": "main", 
32 |         "tolerance" : 1e-06, 
33 |         "norm": "L2"
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/src/configs/agg_cheb4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "determinism_flag": 1, 
 4 |     "solver": {
 5 |         "print_grid_stats": 1, 
 6 |         "algorithm": "AGGREGATION", 
 7 |         "obtain_timings": 1, 
 8 |         "error_scaling": 3,
 9 |         "solver": "AMG", 
10 |         "smoother": 
11 |         {
12 |             "solver": "CHEBYSHEV",
13 |             "preconditioner" : 
14 |             {
15 |                 "solver": "JACOBI_L1",
16 |                 "max_iters": 1
17 |             },
18 |             "max_iters": 1,
19 |             "chebyshev_polynomial_order" : 4,
20 |             "chebyshev_lambda_estimate_mode" : 2
21 |         },
22 |         "presweeps": 0, 
23 |         "postsweeps": 1, 
24 |         "print_solve_stats": 1, 
25 |         "selector": "SIZE_8", 
26 |         "coarsest_sweeps": 1, 
27 |         "max_iters": 100, 
28 |         "monitor_residual": 1, 
29 |         "min_coarse_rows": 2, 
30 |         "scope": "main", 
31 |         "max_levels": 50, 
32 |         "convergence": "RELATIVE_INI", 
33 |         "tolerance" : 1e-06, 
34 |         "norm": "L2",
35 |         "cycle": "V"
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/include/classical/strength/all.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <classical/strength/strength_base.h>
 8 | 
 9 | namespace amgx
10 | {
11 | 
12 | template <class T_Config>
13 | class Strength_All : public Strength_Base<T_Config>
14 | {
15 |         typedef T_Config TConfig;
16 |         typedef typename TConfig::MatPrec ValueType;
17 |         typedef typename TConfig::IndPrec IndexType;
18 |     public:
19 |         Strength_All(AMG_Config &cfg, const std::string &cfg_scope) : Strength_Base<T_Config>(cfg, cfg_scope) {}
20 |         __host__ __device__
21 |         bool strongly_connected(ValueType val, ValueType threshold, ValueType diagonal)
22 |         {
23 |             return true;
24 |         }
25 | };
26 | 
27 | template<class T_Config>
28 | class Strength_All_StrengthFactory : public StrengthFactory<T_Config>
29 | {
30 |     public:
31 |         Strength<T_Config> *create(AMG_Config &cfg, const std::string &cfg_scope) { return new Strength_All<T_Config>(cfg, cfg_scope); }
32 | };
33 | 
34 | } // namespace amgx
35 | 


--------------------------------------------------------------------------------
/external/rapidjson/license.txt:
--------------------------------------------------------------------------------
 1 | Copyright (C) 2011 Milo Yip
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.


--------------------------------------------------------------------------------
/include/miscmath.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | namespace amgx
 8 | {
 9 | template <class Matrix>  typename Matrix::value_type estimate_largest_eigen_value(Matrix &A);
10 | }
11 | 
12 | #include <norm.h>
13 | #include <multiply.h>
14 | #include <blas.h>
15 | 
16 | namespace amgx
17 | {
18 | 
19 | template <class Matrix>
20 | typename Matrix::value_type estimate_largest_eigen_value(Matrix &A)
21 | {
22 |     typedef typename Matrix::TConfig TConfig;
23 |     typedef typename Matrix::value_type ValueTypeA;
24 |     typedef typename TConfig::VecPrec ValueTypeB;
25 |     typedef Vector<TConfig> VVector;
26 |     VVector x(A.get_num_rows()), y(A.get_num_rows());
27 |     fill(x, 1);
28 | 
29 |     for (int i = 0; i < 20; i++)
30 |     {
31 |         ValueTypeB Lmax = get_norm(A, x, LMAX);
32 |         scal(x, ValueTypeB(1) / Lmax);
33 |         multiply(A, x, y);
34 |         x.swap(y);
35 |     }
36 | 
37 |     ValueTypeB retval = get_norm(A, x, L2) / get_norm(A, y, L2);
38 |     return retval;
39 | }
40 | 
41 | } // namespace amgx
42 | 


--------------------------------------------------------------------------------
/src/configs/PCGF_CLASSICAL_F_JACOBI.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "preconditioner": {
 5 |             "print_grid_stats": 1, 
 6 |             "print_vis_data": 0, 
 7 |             "solver": "AMG", 
 8 |             "smoother": {
 9 |                 "scope": "jacobi", 
10 |                 "solver": "BLOCK_JACOBI", 
11 |                 "monitor_residual": 0, 
12 |                 "print_solve_stats": 0
13 |             }, 
14 |             "print_solve_stats": 0, 
15 |             "presweeps": 1, 
16 |             "interpolator": "D2",
17 |             "max_iters": 1, 
18 |             "monitor_residual": 0, 
19 |             "store_res_history": 0, 
20 |             "scope": "amg", 
21 |             "max_levels": 50, 
22 |             "cycle": "F", 
23 |             "postsweeps": 1
24 |         }, 
25 |         "solver": "PCGF", 
26 |         "print_solve_stats": 1, 
27 |         "obtain_timings": 1, 
28 |         "max_iters": 100, 
29 |         "monitor_residual": 1, 
30 |         "convergence": "RELATIVE_INI", 
31 |         "scope": "main", 
32 |         "tolerance" : 1e-06, 
33 |         "norm": "L2"
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/src/configs/PCGF_CLASSICAL_V_JACOBI.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "preconditioner": {
 5 |             "print_grid_stats": 1, 
 6 |             "print_vis_data": 0, 
 7 |             "solver": "AMG", 
 8 |             "smoother": {
 9 |                 "scope": "jacobi", 
10 |                 "solver": "BLOCK_JACOBI", 
11 |                 "monitor_residual": 0, 
12 |                 "print_solve_stats": 0
13 |             }, 
14 |             "print_solve_stats": 0, 
15 |             "presweeps": 1, 
16 |             "interpolator": "D2",
17 |             "max_iters": 1, 
18 |             "monitor_residual": 0, 
19 |             "store_res_history": 0, 
20 |             "scope": "amg", 
21 |             "max_levels": 50, 
22 |             "cycle": "V", 
23 |             "postsweeps": 1
24 |         }, 
25 |         "solver": "PCGF", 
26 |         "print_solve_stats": 1, 
27 |         "obtain_timings": 1, 
28 |         "max_iters": 100, 
29 |         "monitor_residual": 1, 
30 |         "convergence": "RELATIVE_INI", 
31 |         "scope": "main", 
32 |         "tolerance" : 1e-06, 
33 |         "norm": "L2"
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/src/configs/PCGF_CLASSICAL_W_JACOBI.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "preconditioner": {
 5 |             "print_grid_stats": 1, 
 6 |             "print_vis_data": 0, 
 7 |             "solver": "AMG", 
 8 |             "smoother": {
 9 |                 "scope": "jacobi", 
10 |                 "solver": "BLOCK_JACOBI", 
11 |                 "monitor_residual": 0, 
12 |                 "print_solve_stats": 0
13 |             }, 
14 |             "print_solve_stats": 0, 
15 |             "presweeps": 1, 
16 |             "interpolator": "D2",
17 |             "max_iters": 1, 
18 |             "monitor_residual": 0, 
19 |             "store_res_history": 0, 
20 |             "scope": "amg", 
21 |             "max_levels": 50, 
22 |             "cycle": "W", 
23 |             "postsweeps": 1
24 |         }, 
25 |         "solver": "PCGF", 
26 |         "print_solve_stats": 1, 
27 |         "obtain_timings": 1, 
28 |         "max_iters": 100, 
29 |         "monitor_residual": 1, 
30 |         "convergence": "RELATIVE_INI", 
31 |         "scope": "main", 
32 |         "tolerance" : 1e-06, 
33 |         "norm": "L2"
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/src/configs/PCG_CLASSICAL_F_JACOBI.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "preconditioner": {
 5 |             "print_grid_stats": 1, 
 6 |             "print_vis_data": 0, 
 7 |             "solver": "AMG", 
 8 |             "smoother": {
 9 |                 "scope": "jacobi", 
10 |                 "solver": "BLOCK_JACOBI", 
11 |                 "monitor_residual": 0, 
12 |                 "print_solve_stats": 0
13 |             }, 
14 |             "print_solve_stats": 0, 
15 |             "presweeps": 1, 
16 |             "max_iters": 1, 
17 |             "interpolator": "D2",
18 |             "monitor_residual": 0, 
19 |             "store_res_history": 0, 
20 |             "scope": "amg", 
21 |             "max_levels": 50, 
22 |             "cycle": "F", 
23 |             "postsweeps": 1
24 |         }, 
25 |         "solver": "PCG", 
26 |         "print_solve_stats": 1, 
27 |         "obtain_timings": 1, 
28 |         "max_iters": 100, 
29 |         "monitor_residual": 1, 
30 |         "convergence": "RELATIVE_INI", 
31 |         "scope": "main", 
32 |         "tolerance" : 1e-06, 
33 |         "norm": "L2"
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/src/configs/PCG_CLASSICAL_W_JACOBI.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "preconditioner": {
 5 |             "print_grid_stats": 1, 
 6 |             "print_vis_data": 0, 
 7 |             "solver": "AMG", 
 8 |             "smoother": {
 9 |                 "scope": "jacobi", 
10 |                 "solver": "BLOCK_JACOBI", 
11 |                 "monitor_residual": 0, 
12 |                 "print_solve_stats": 0
13 |             }, 
14 |             "print_solve_stats": 0, 
15 |             "presweeps": 1, 
16 |             "interpolator": "D2",
17 |             "max_iters": 1, 
18 |             "monitor_residual": 0, 
19 |             "store_res_history": 0, 
20 |             "scope": "amg", 
21 |             "max_levels": 50, 
22 |             "cycle": "W", 
23 |             "postsweeps": 1
24 |         }, 
25 |         "solver": "PCG", 
26 |         "print_solve_stats": 1, 
27 |         "obtain_timings": 1, 
28 |         "max_iters": 100, 
29 |         "monitor_residual": 1, 
30 |         "convergence": "RELATIVE_INI", 
31 |         "scope": "main", 
32 |         "tolerance" : 1e-06, 
33 |         "norm": "L2"
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/include/cusp/detail/csr_matrix.inl:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #include <cusp/convert.h>
 6 | 
 7 | namespace cusp
 8 | {
 9 | 
10 | //////////////////
11 | // Constructors //
12 | //////////////////
13 |         
14 | // construct from a different matrix
15 | template <typename IndexType, typename ValueType, class MemorySpace>
16 | template <typename MatrixType>
17 | csr_matrix<IndexType,ValueType,MemorySpace>
18 |     ::csr_matrix(const MatrixType& matrix)
19 |     {
20 |         cusp::convert(matrix, *this);
21 |     }
22 | 
23 | //////////////////////
24 | // Member Functions //
25 | //////////////////////
26 | 
27 | // assignment from another matrix
28 | template <typename IndexType, typename ValueType, class MemorySpace>
29 | template <typename MatrixType>
30 |     csr_matrix<IndexType,ValueType,MemorySpace>&
31 |     csr_matrix<IndexType,ValueType,MemorySpace>
32 |     ::operator=(const MatrixType& matrix)
33 |     {
34 |         cusp::convert(matrix, *this);
35 |         
36 |         return *this;
37 |     }
38 | 
39 | } // end namespace cusp
40 | 
41 | 


--------------------------------------------------------------------------------
/src/configs/V-cheby-smoother.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "determinism_flag":1,
 4 |     "solver": {
 5 |         "scope": "main",
 6 |         "print_grid_stats": 1, 
 7 |         "solver": "AMG", 
 8 |         "scaling":"DIAGONAL_SYMMETRIC",
 9 |         "interpolator": "D2",
10 |         "aggressive_levels": 0,
11 |         "interp_max_elements": 4,
12 |         "coarse_solver": "NOSOLVER",
13 |         "print_solve_stats": 1, 
14 |         "obtain_timings": 1, 
15 |         "max_iters": 100, 
16 |         "monitor_residual": 1, 
17 |         "convergence": "RELATIVE_INI", 
18 |         "max_levels": 50, 
19 |         "cycle": "V", 
20 |         "smoother": 
21 |         {
22 |             "solver": "CHEBYSHEV",
23 |             "preconditioner" : 
24 |             {
25 |                 "solver": "NOSOLVER",
26 |                 "max_iters": 1
27 |             },
28 |             "max_iters": 1,
29 |             "chebyshev_polynomial_order" : 4,
30 |             "chebyshev_lambda_estimate_mode" : 2
31 |         },
32 |         "tolerance" : 1e-06, 
33 |         "norm": "L2", 
34 |         "presweeps": 0, 
35 |         "postsweeps":1
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/external/rapidjson/include/rapidjson/filestream.h:
--------------------------------------------------------------------------------
 1 | #ifndef RAPIDJSON_FILESTREAM_H_
 2 | #define RAPIDJSON_FILESTREAM_H_
 3 | 
 4 | #include <cstdio>
 5 | 
 6 | namespace rapidjson {
 7 | 
 8 | //! Wrapper of C file stream for input or output.
 9 | /*!
10 | 	This simple wrapper does not check the validity of the stream.
11 | 	\implements Stream
12 | */
13 | class FileStream {
14 | public:
15 | 	typedef char Ch;	//!< Character type. Only support char.
16 | 
17 | 	FileStream(FILE* fp) : fp_(fp), count_(0) { Read(); }
18 | 	char Peek() const { return current_; }
19 | 	char Take() { char c = current_; Read(); return c; }
20 | 	size_t Tell() const { return count_; }
21 | 	void Put(char c) { fputc(c, fp_); }
22 | 
23 | 	// Not implemented
24 | 	char* PutBegin() { return 0; }
25 | 	size_t PutEnd(char*) { return 0; }
26 | 
27 | private:
28 | 	void Read() {
29 | 		RAPIDJSON_ASSERT(fp_ != 0);
30 | 		int c = fgetc(fp_);
31 | 		if (c != EOF) {
32 | 			current_ = (char)c;
33 | 			count_++;
34 | 		}
35 | 		else
36 | 			current_ = '\0';
37 | 	}
38 | 
39 | 	FILE* fp_;
40 | 	char current_;
41 | 	size_t count_;
42 | };
43 | 
44 | } // namespace rapidjson
45 | 
46 | #endif // RAPIDJSON_FILESTREAM_H_
47 | 


--------------------------------------------------------------------------------
/src/configs/GMRES_AMG_D2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "determinism_flag": 1,
 4 |     "exception_handling" : 1,
 5 |     "solver": {
 6 |         "scope": "main",
 7 |         "print_grid_stats": 1, 
 8 |         "store_res_history": 1, 
 9 |         "solver": "GMRES", 
10 |         "print_solve_stats": 1, 
11 |         "obtain_timings": 1, 
12 |         "preconditioner": {
13 |             "interpolator": "D2", 
14 |             "print_grid_stats": 1, 
15 |             "solver": "AMG", 
16 |             "smoother": "JACOBI_L1", 
17 |             "presweeps": 2, 
18 |             "selector": "PMIS", 
19 |             "coarsest_sweeps": 2, 
20 |             "coarse_solver": "NOSOLVER", 
21 |             "max_iters": 1, 
22 |             "interp_max_elements": 4, 
23 |             "min_coarse_rows": 2, 
24 |             "scope": "amg_solver", 
25 |             "max_levels": 50, 
26 |             "cycle": "V", 
27 |             "postsweeps": 2
28 |         }, 
29 |         "max_iters": 100, 
30 |         "monitor_residual": 1, 
31 |         "gmres_n_restart": 10, 
32 |         "convergence": "RELATIVE_INI", 
33 |         "tolerance" : 1e-06, 
34 |         "norm": "L2"
35 |    }
36 | }
37 | 


--------------------------------------------------------------------------------
/include/cusp/detail/hyb_matrix.inl:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #include <cusp/convert.h>
 6 | #include <cusp/ell_matrix.h>
 7 | #include <cusp/coo_matrix.h>
 8 | 
 9 | namespace cusp
10 | {
11 | 
12 | //////////////////
13 | // Constructors //
14 | //////////////////
15 |         
16 | // construct from another matrix
17 | template <typename IndexType, typename ValueType, class MemorySpace>
18 | template <typename MatrixType>
19 | hyb_matrix<IndexType,ValueType,MemorySpace>
20 |     ::hyb_matrix(const MatrixType& matrix)
21 |     {
22 |         cusp::convert(matrix, *this);
23 |     }
24 | 
25 | //////////////////////
26 | // Member Functions //
27 | //////////////////////
28 |         
29 | template <typename IndexType, typename ValueType, class MemorySpace>
30 | template <typename MatrixType>
31 |     hyb_matrix<IndexType,ValueType,MemorySpace>&
32 |     hyb_matrix<IndexType,ValueType,MemorySpace>
33 |     ::operator=(const MatrixType& matrix)
34 |     {
35 |         cusp::convert(matrix, *this);
36 |         
37 |         return *this;
38 |     }
39 | 
40 | } // end namespace cusp
41 | 
42 | 


--------------------------------------------------------------------------------
/src/configs/AMG_CLASSICAL_L1_TRUNC.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "print_grid_stats": 1, 
 4 |     "store_res_history": 1, 
 5 |     "solver": {
 6 |        "scope": "main",
 7 |        "solver": "FGMRES", 
 8 |        "print_solve_stats": 1, 
 9 |        "obtain_timings": 1, 
10 |        "preconditioner": {
11 |            "interpolator": "D2", 
12 |            "print_grid_stats": 1, 
13 |            "solver": "AMG", 
14 |            "interp_max_elements": 4, 
15 |            "smoother": {
16 |                "relaxation_factor": 1, 
17 |                "scope": "jacobi", 
18 |                "solver": "JACOBI_L1"
19 |            }, 
20 |            "presweeps": 1, 
21 |            "coarsest_sweeps": 1, 
22 |            "coarse_solver": "NOSOLVER", 
23 |            "max_iters": 1, 
24 |            "max_row_sum": 0.9, 
25 |            "scope": "amg_solver", 
26 |            "max_levels": 50, 
27 |            "postsweeps": 1, 
28 |            "cycle": "V"
29 |        }, 
30 |        "max_iters": 100, 
31 |        "monitor_residual": 1, 
32 |        "gmres_n_restart": 10, 
33 |        "convergence": "RELATIVE_INI", 
34 |        "tolerance" : 1e-06, 
35 |        "norm": "L2"
36 |    }
37 | }
38 | 


--------------------------------------------------------------------------------
/src/configs/PCG_CLASSICAL_V_JACOBI.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "preconditioner": {
 5 |             "print_grid_stats": 1, 
 6 |             "print_vis_data": 0, 
 7 |             "solver": "AMG", 
 8 |             "smoother": {
 9 |                 "scope": "jacobi", 
10 |                 "solver": "BLOCK_JACOBI", 
11 |                 "monitor_residual": 0, 
12 |                 "print_solve_stats": 0
13 |             }, 
14 |             "print_solve_stats": 0, 
15 |             "aggressive_levels": 2,
16 |             "presweeps": 1, 
17 |             "interpolator": "D2",
18 |             "max_iters": 1, 
19 |             "monitor_residual": 0, 
20 |             "store_res_history": 0, 
21 |             "scope": "amg", 
22 |             "max_levels": 50, 
23 |             "cycle": "V", 
24 |             "postsweeps": 1
25 |         }, 
26 |         "solver": "PCG", 
27 |         "print_solve_stats": 1, 
28 |         "obtain_timings": 1, 
29 |         "max_iters": 100, 
30 |         "monitor_residual": 1, 
31 |         "convergence": "RELATIVE_INI", 
32 |         "scope": "main", 
33 |         "tolerance" : 1e-06, 
34 |         "norm": "L2"
35 |     }
36 | }
37 | 


--------------------------------------------------------------------------------
/src/solvers/dummy_solver.cu:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #include <solvers/dummy_solver.h>
 6 | #include <cassert>
 7 | 
 8 | #include "amgx_types/util.h"
 9 | 
10 | namespace amgx
11 | {
12 | 
13 | template<class T_Config>
14 | void
15 | Dummy_Solver<T_Config>::solver_setup(bool reuse_matrix_structure)
16 | {
17 | }
18 | 
19 | //launches a single standard cycle
20 | template<class T_Config>
21 | AMGX_STATUS
22 | Dummy_Solver<T_Config>::solve_iteration( VVector &b, VVector &x, bool xIsZero )
23 | {
24 |     if (xIsZero)
25 |     {
26 |         thrust_wrapper::fill<T_Config::memSpace>(x.begin(), x.end(), types::util<ValueTypeB>::get_zero());
27 |         cudaCheckError();
28 |     }
29 | 
30 |     return (this->converged(b, x));
31 | };
32 | 
33 | /****************************************
34 |  * Explict instantiations
35 |  ***************************************/
36 | #define AMGX_CASE_LINE(CASE) template class Dummy_Solver<TemplateMode<CASE>::Type>;
37 | AMGX_FORALL_BUILDS(AMGX_CASE_LINE)
38 | AMGX_FORCOMPLEX_BUILDS(AMGX_CASE_LINE)
39 | #undef AMGX_CASE_LINE
40 | 
41 | } // namespace amgx
42 | 


--------------------------------------------------------------------------------
/include/cusp/detail/device/elementwise.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | namespace cusp
 8 | {
 9 | namespace detail
10 | {
11 | namespace device
12 | {
13 | 
14 | //template <typename Matrix1,
15 | //          typename Matrix2,
16 | //          typename Matrix3,
17 | //          typename BinaryFunction>
18 | //void transform_elementwise(const Matrix1& A,
19 | //                           const Matrix2& B,
20 | //                                 Matrix3& C,
21 | //                                 BinaryFunction op);
22 | 
23 | template <typename Matrix1,
24 |           typename Matrix2,
25 |           typename Matrix3>
26 | void add(const Matrix1& A,
27 |          const Matrix2& B,
28 |                Matrix3& C);
29 | 
30 | template <typename Matrix1,
31 |           typename Matrix2,
32 |           typename Matrix3>
33 | void subtract(const Matrix1& A,
34 |               const Matrix2& B,
35 |                     Matrix3& C);
36 | 
37 | } // end namespace device
38 | } // end namespace detail
39 | } // end namespace cusp
40 | 
41 | #include <cusp/detail/device/elementwise.inl>
42 | 
43 | 


--------------------------------------------------------------------------------
/include/cusp/detail/config.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | /*! \file config.h
 6 |  *  \brief Defines platform configuration.
 7 |  */
 8 | 
 9 | #pragma once
10 | 
11 | #include <thrust/detail/config.h>
12 | #include <thrust/version.h>
13 | 
14 | // Cusp supports Thrust v1.3 and above
15 | #if THRUST_VERSION < 100500
16 | #error "Thrust v1.5.0 or newer is required"
17 | #endif 
18 | 
19 | // decorator for deprecated features
20 | #ifdef THRUST_DEPRECATED
21 | #define CUSP_DEPRECATED THRUST_DEPRECATED
22 | #else
23 | // THRUST_DEPRECATED not available in this version, use C++14 attribute or empty macro
24 | #if __cplusplus >= 201402L
25 | #define CUSP_DEPRECATED [[deprecated]]
26 | #else
27 | #define CUSP_DEPRECATED
28 | #endif
29 | #endif
30 | 
31 | // hooks for profiling
32 | #if defined(CUSP_PROFILE_ENABLED)
33 | // profiling enabled
34 | #define CUSP_PROFILE_SCOPED()  PROFILE_SCOPED()
35 | #define CUSP_PROFILE_DUMP()    cusp::detail::profiler::dump()
36 | #include <cusp/detail/profiler.h>
37 | #else
38 | // profiling disabled
39 | #define CUSP_PROFILE_SCOPED()
40 | #define CUSP_PROFILE_DUMP()
41 | #endif
42 | 
43 | 


--------------------------------------------------------------------------------
/include/cusp/precond/aggregate.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <cusp/detail/config.h>
 8 | 
 9 | #include <cusp/csr_matrix.h>
10 | #include <cusp/coo_matrix.h>
11 | 
12 | #include <thrust/count.h>
13 | 
14 | namespace cusp
15 | {
16 | namespace precond
17 | {
18 | 	template <typename IndexType, typename ValueType, typename MemorySpace,
19 | 		  typename ArrayType>
20 | 	void mis_to_aggregates(const cusp::coo_matrix<IndexType,ValueType,MemorySpace>& C,
21 | 			       const ArrayType& mis,
22 | 				     ArrayType& aggregates);
23 | 
24 | 	template <typename IndexType, typename ValueType,
25 | 		  typename ArrayType>
26 | 	void standard_aggregation(const cusp::coo_matrix<IndexType,ValueType,cusp::device_memory>& C,
27 | 					ArrayType& aggregates);
28 | 
29 | 	template <typename IndexType, typename ValueType,
30 | 		  typename ArrayType>
31 | 	void standard_aggregation(const cusp::csr_matrix<IndexType,ValueType,cusp::host_memory>& C,
32 | 					ArrayType& aggregates);
33 | 
34 | } // end namespace precond
35 | } // end namespace cusp
36 | 
37 | #include <cusp/precond/detail/aggregate.inl>
38 | 


--------------------------------------------------------------------------------
/src/configs/FGMRES_AGGREGATION.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "preconditioner": {
 5 |             "error_scaling": 0, 
 6 |             "print_grid_stats": 1, 
 7 |             "max_uncolored_percentage": 0.05, 
 8 |             "algorithm": "AGGREGATION", 
 9 |             "solver": "AMG", 
10 |             "smoother": "MULTICOLOR_DILU", 
11 |             "presweeps": 0, 
12 |             "selector": "SIZE_2", 
13 |             "coarse_solver": "DENSE_LU_SOLVER", 
14 |             "max_iters": 1, 
15 |             "postsweeps": 3, 
16 |             "min_coarse_rows": 32, 
17 |             "relaxation_factor": 0.75, 
18 |             "scope": "amg", 
19 |             "max_levels": 50, 
20 |             "matrix_coloring_scheme": "PARALLEL_GREEDY", 
21 |             "cycle": "V"
22 |         }, 
23 |         "use_scalar_norm": 1, 
24 |         "solver": "FGMRES", 
25 |         "print_solve_stats": 1, 
26 |         "obtain_timings": 1, 
27 |         "max_iters": 100, 
28 |         "monitor_residual": 1, 
29 |         "gmres_n_restart": 10, 
30 |         "convergence": "RELATIVE_INI", 
31 |         "scope": "main", 
32 |         "tolerance" : 1e-06, 
33 |         "norm": "L2"
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/src/configs/FGMRES_AGGREGATION_DILU.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "preconditioner": {
 5 |             "error_scaling": 0, 
 6 |             "print_grid_stats": 1, 
 7 |             "max_uncolored_percentage": 0.05, 
 8 |             "algorithm": "AGGREGATION", 
 9 |             "solver": "AMG", 
10 |             "smoother": "MULTICOLOR_DILU", 
11 |             "presweeps": 0, 
12 |             "selector": "SIZE_2", 
13 |             "coarse_solver": "DENSE_LU_SOLVER", 
14 |             "max_iters": 1, 
15 |             "postsweeps": 3, 
16 |             "min_coarse_rows": 32, 
17 |             "relaxation_factor": 0.75, 
18 |             "scope": "amg", 
19 |             "max_levels": 50, 
20 |             "matrix_coloring_scheme": "PARALLEL_GREEDY", 
21 |             "cycle": "V"
22 |         }, 
23 |         "use_scalar_norm": 1, 
24 |         "solver": "FGMRES", 
25 |         "print_solve_stats": 1, 
26 |         "obtain_timings": 1, 
27 |         "max_iters": 100, 
28 |         "monitor_residual": 1, 
29 |         "gmres_n_restart": 10, 
30 |         "convergence": "RELATIVE_INI", 
31 |         "scope": "main", 
32 |         "tolerance" : 1e-06, 
33 |         "norm": "L2"
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/src/configs/V-cheby-aggres-L1-trunc-userLambda.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "print_grid_stats": 1, 
 5 |         "solver": "AMG", 
 6 |         "interpolator":"D2",
 7 |         "interp_max_elements": 4,
 8 |         "aggressive_levels": 1,
 9 |         "print_solve_stats": 1, 
10 |         "obtain_timings": 1, 
11 |         "max_iters": 100, 
12 |         "coarse_solver": "NOSOLVER",
13 |         "monitor_residual": 1, 
14 |         "convergence": "RELATIVE_INI", 
15 |         "scope": "main", 
16 |         "max_levels": 50, 
17 |         "error_scaling":3,
18 |         "cycle": "V", 
19 |         "smoother": 
20 |         {
21 |             "solver": "CHEBYSHEV",
22 |             "preconditioner" : 
23 |             {
24 |                 "solver": "JACOBI_L1",
25 |                 "max_iters": 1
26 |             },
27 |             "max_iters": 1,
28 |             "chebyshev_polynomial_order" : 2,
29 |             "chebyshev_lambda_estimate_mode" : 3,
30 |             "cheby_max_lambda" : 0.92,
31 |             "cheby_min_lambda" : 0.07
32 |         },
33 |         "tolerance" : 1e-06, 
34 |         "norm": "L2", 
35 |         "presweeps": 0, 
36 |         "postsweeps": 1
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/src/amgx_c_common.cu:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #include "amgx_c_common.h"
 6 | 
 7 | namespace amgx
 8 | {
 9 | 
10 | AMGX_RC getCAPIerror_x(AMGX_ERROR err)
11 | {
12 |     return (AMGX_RC)((int)(err));
13 | }
14 | 
15 | AMGX_ERROR getAMGXerror(AMGX_RC err)
16 | {
17 |     return (AMGX_ERROR)((int)(err));
18 | }
19 | 
20 | 
21 | void amgx_error_exit(Resources *rsc, int err)
22 | {
23 | #ifdef AMGX_WITH_MPI
24 |     int isInitialized = 0;
25 |     MPI_Initialized(&isInitialized);
26 | 
27 |     if (isInitialized)
28 |         if (rsc != NULL)
29 |         {
30 |             //Resources * res = (Resources*)(((amgx::CWrapper<AMGX_resources_handle>*) rsc)->hdl);
31 |             MPI_Abort(*(rsc->getMpiComm()), err);
32 |         }
33 |         else
34 |         {
35 |             MPI_Abort(MPI_COMM_WORLD, err);
36 |             //MPI_Finalize();
37 |         }
38 |     else
39 |     {
40 |         exit(err);
41 |     }
42 | 
43 | #else
44 |     exit(err);
45 | #endif
46 | }
47 | 
48 | MemCArrManager &get_c_arr_mem_manager(void)
49 | {
50 |     static MemCArrManager man_;
51 |     return man_;
52 | }
53 | 
54 | 
55 | } // namespace amgx


--------------------------------------------------------------------------------
/include/classical/strength/ahat.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <classical/strength/strength_base.h>
 8 | 
 9 | namespace amgx
10 | {
11 | 
12 | template <class T_Config>
13 | class Strength_Ahat : public Strength_Base<T_Config>
14 | {
15 |         typedef T_Config TConfig;
16 |         typedef typename TConfig::MatPrec ValueType;
17 |         typedef typename TConfig::IndPrec IndexType;
18 |     public:
19 |         Strength_Ahat(AMG_Config &cfg, const std::string &cfg_scope) : Strength_Base<T_Config>(cfg, cfg_scope) {}
20 |         __host__ __device__
21 |         bool strongly_connected(ValueType val, ValueType threshold, ValueType diagonal)
22 |         {
23 |             //return (diagonal<0.0) ?  val>threshold : val<threshold;
24 |             return  -val >= -threshold;
25 |         }
26 | };
27 | 
28 | template<class T_Config>
29 | class Strength_Ahat_StrengthFactory: public StrengthFactory<T_Config>
30 | {
31 |     public:
32 |         Strength<T_Config> *create(AMG_Config &cfg, const std::string &cfg_scope) { return new Strength_Ahat<T_Config>(cfg, cfg_scope); }
33 | };
34 | 
35 | } // namespace amgx
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/include/classical/interpolators/common.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | #include <classical/selectors/selector.h>
 7 | #include <classical/interpolators/interpolator.h>
 8 | 
 9 | namespace amgx
10 | {
11 | 
12 | /*
13 |  * hold general routines common to both diatance1 and distance2
14 |  */
15 | 
16 | struct is_non_neg
17 | {
18 |     __host__ __device__
19 |     bool operator()(const int &x)
20 |     {
21 |         return x >= 0;
22 |     }
23 | };
24 | 
25 | template< typename T >
26 | __device__ __forceinline__
27 | bool
28 | sign( T x ) 
29 | { 
30 | 	return x >= T(0); 
31 | }
32 | 
33 | 
34 | __global__
35 | void coarseMarkKernel(int *cf_map, int *mark, const int numEntries);
36 | 
37 | __global__
38 | void modifyCoarseMapKernel(int *cf_map, int *mark, const int numEntries);
39 | 
40 | __global__
41 | void nonZerosPerRowKernel(const int num_rows, const int *cf_map, const int *C_hat_start,
42 |                           const int *C_hat_end, int *nonZerosPerRow);
43 | 
44 | __global__
45 | void nonZerosPerRowSizeKernel(const int num_rows, const int *cf_map, const int *C_hat_size,
46 |                               int *nonZerosPerRow);
47 | 
48 | } // namespace amgx
49 | 


--------------------------------------------------------------------------------
/include/cusp/memory.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | /*! \file memory.h
 6 |  *  \brief Memory spaces and allocators
 7 |  */
 8 | 
 9 | #pragma once
10 | 
11 | #include <cusp/detail/config.h>
12 | #include <thrust/iterator/iterator_traits.h>
13 | 
14 | namespace cusp
15 | {
16 |     using host_memory = amgx::thrust::host_system_tag;
17 |     using device_memory = amgx::thrust::device_system_tag;
18 |     using any_memory = amgx::thrust::any_system_tag;
19 | 
20 |   template<typename T, typename MemorySpace>
21 |   struct default_memory_allocator;
22 |   
23 |   template <typename MemorySpace1, typename MemorySpace2=any_memory, typename MemorySpace3=any_memory>
24 |   struct minimum_space;
25 | 
26 | 
27 | } // end namespace cusp
28 | 
29 | #include <cusp/detail/memory.inl>
30 | #include <basic_types.h>
31 | 
32 | namespace cusp
33 | {
34 |     template <typename T> struct CuspMemMap;
35 |     template <> struct CuspMemMap<host_memory> { static const int value = AMGX_host; };
36 |     template <> struct CuspMemMap<device_memory> { static const int value = AMGX_device; };
37 |     template <> struct CuspMemMap<any_memory> { static const int value = AMGX_host; };
38 | } // end namespace cusp
39 | 
40 | 


--------------------------------------------------------------------------------
/include/memory_info.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | namespace amgx
 8 | {
 9 | class MemoryInfo
10 | {
11 |     public:
12 |         static float getTotalMemory()
13 |         {
14 |             size_t free;
15 |             size_t total;
16 |             cudaMemGetInfo(&free, &total);
17 |             return total / 1024.0 / 1024 / 1024;
18 |         }
19 | 
20 |         static size_t getFreeMemory()
21 |         {
22 |             size_t free;
23 |             size_t total;
24 |             cudaMemGetInfo(&free, &total);
25 |             return free / 1024.0 / 1024 / 1024;
26 |         }
27 | 
28 |         static float getMaxMemoryUsage()
29 |         {
30 |             return max_allocated / 1024.0 / 1024 / 1024;
31 |         }
32 | 
33 |         static void updateMaxMemoryUsage()
34 |         {
35 |             size_t free;
36 |             size_t total;
37 |             cudaMemGetInfo(&free, &total);
38 |             size_t allocated = total - free;
39 | 
40 |             if (allocated > max_allocated)
41 |             {
42 |                 max_allocated = allocated;
43 |             }
44 |         }
45 |     private:
46 |         static size_t max_allocated;
47 | };
48 | }
49 | 


--------------------------------------------------------------------------------
/src/tests/version_test.cu:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #include "unit_test.h"
 6 | #include "version.h"
 7 | #include "amgx_c.h"
 8 | 
 9 | namespace amgx
10 | 
11 | {
12 | 
13 | // parameter is used as test name
14 | DECLARE_UNITTEST_BEGIN(CAPIVersionCheck);
15 | 
16 | void run()
17 | {
18 |     int major, minor;
19 |     AMGX_get_api_version(&major, &minor);
20 |     UNITTEST_ASSERT_EQUAL(major, __AMGX_API_VERSION_MAJOR);
21 |     UNITTEST_ASSERT_EQUAL(minor, __AMGX_API_VERSION_MINOR);
22 | }
23 | 
24 | DECLARE_UNITTEST_END(CAPIVersionCheck);
25 | 
26 | 
27 | // if you want to be able run this test for all available configs you can write this:
28 | //#define AMGX_CASE_LINE(CASE) SampleTest <TemplateMode<CASE>::Type>  TemplateTest_##CASE;
29 | //  AMGX_FORALL_BUILDS(AMGX_CASE_LINE)
30 | //#undef AMGX_CASE_LINE
31 | 
32 | // or run for all device configs
33 | //#define AMGX_CASE_LINE(CASE) SampleTest <TemplateMode<CASE>::Type>  TemplateTest_##CASE;
34 | //  AMGX_FORALL_BUILDS_DEVICE(AMGX_CASE_LINE)
35 | //#undef AMGX_CASE_LINE
36 | 
37 | // or you can specify several desired configs
38 | CAPIVersionCheck<TemplateMode<AMGX_mode_dDDI>::Type>  CAPIVersionCheck_dDDI;
39 | 
40 | 
41 | } //namespace amgx
42 | 


--------------------------------------------------------------------------------
/include/cusp/detail/dispatch/transpose.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #include <cusp/detail/host/transpose.h>
 6 | #include <cusp/detail/device/transpose.h>
 7 | 
 8 | namespace cusp
 9 | {
10 | namespace detail
11 | {
12 | namespace dispatch
13 | {
14 | 
15 | ////////////////
16 | // Host Paths //
17 | ////////////////
18 | template <typename MatrixType1,   typename MatrixType2>
19 | void transpose(const MatrixType1& A, MatrixType2& At,
20 |                cusp::host_memory)
21 | {
22 |     cusp::detail::host::transpose(A, At,
23 |                             	  typename MatrixType1::format(),
24 |                             	  typename MatrixType2::format());
25 | }
26 | 
27 | //////////////////
28 | // Device Paths //
29 | //////////////////
30 | template <typename MatrixType1,   typename MatrixType2>
31 | void transpose(const MatrixType1& A, MatrixType2& At,
32 |                cusp::device_memory)
33 | {
34 |     cusp::detail::device::transpose(A, At,
35 |                             	    typename MatrixType1::format(),
36 |                             	    typename MatrixType2::format());
37 | }
38 | 
39 | } // end namespace dispatch
40 | } // end namespace detail
41 | } // end namespace cusp
42 | 
43 | 


--------------------------------------------------------------------------------
/src/operators/deflated_multiply_operator.cu:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | namespace amgx
 6 | {
 7 | 
 8 | template <class T_Config> class Operator;
 9 | 
10 | }
11 | 
12 | #include <operators/deflated_multiply_operator.h>
13 | #include <blas.h>
14 | 
15 | namespace amgx
16 | {
17 | 
18 | template <typename TConfig>
19 | void DeflatedMultiplyOperator<TConfig>::apply(const Vector<TConfig> &v, Vector<TConfig> &res, ViewType view)
20 | {
21 |     Operator<TConfig> &A = *m_A;
22 |     int offset, size;
23 |     A.getOffsetAndSizeForView(view, &offset, &size);
24 |     copy(v, *m_work, offset, size);
25 |     ValueTypeVec xtv = dot(A, *m_x, *m_work);
26 |     axpy(*m_x, *m_work, types::util<ValueTypeVec>::invert(xtv), offset, size);
27 |     A.apply(*m_work, res, OWNED);
28 |     axpy(*m_work, res, types::util<ValueTypeVec>::invert(m_mu), offset, size);
29 |     ValueTypeVec xtres = dot(A, *m_x, res);
30 |     axpy(*m_x, res, types::util<ValueTypeVec>::invert(xtres), offset, size);
31 | }
32 | 
33 | #define AMGX_CASE_LINE(CASE) template class DeflatedMultiplyOperator<TemplateMode<CASE>::Type>;
34 | AMGX_FORALL_BUILDS(AMGX_CASE_LINE)
35 | AMGX_FORCOMPLEX_BUILDS(AMGX_CASE_LINE)
36 | #undef AMGX_CASE_LINE
37 | 
38 | }
39 | 


--------------------------------------------------------------------------------
/ci/containers/x86_64-ubuntu18.04-gnu7-cuda10.2.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2024 NVIDIA CORPORATION. All Rights Reserved.
 2 | #
 3 | # SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | """
 6 | AmgX base image: x86_64-ubuntu18.04-gnu-cuda10.2
 7 | """
 8 | 
 9 | Stage0 += comment(__doc__, reformat=False)
10 | Stage0 += baseimage(image='nvidia/cuda:10.2-devel-ubuntu18.04')
11 | 
12 | # Last compiler supported for Ubuntu 18.04 by CUDA 10.2
13 | # https://docs.nvidia.com/cuda/archive/10.2/cuda-installation-guide-linux/index.html#system-requirements
14 | compiler = gnu()
15 | Stage0 += compiler
16 | 
17 | # Current minimum version required by AMGX
18 | Stage0 += cmake(eula=True, version='3.7.0')
19 | 
20 | # MPI
21 | Stage0 += mlnx_ofed(version='5.0-2.1.8.0')
22 | Stage0 += gdrcopy(ldconfig=True, version='2.0')
23 | Stage0 += knem(ldconfig=True, version='1.1.3')
24 | Stage0 += ucx(gdrcopy=True, knem=True, ofed=True, cuda=True)
25 | Stage0 += openmpi(
26 |     cuda=True,
27 |     infiniband=True,
28 |     version='4.0.3',
29 |     pmix=True,
30 |     ucx=True,
31 |     toolchain=compiler.toolchain
32 | )
33 | Stage0 += environment(multinode_vars = {
34 |     'OMPI_MCA_pml': 'ucx',
35 |     'OMPI_MCA_btl': '^smcuda,vader,tcp,uct,openib',
36 |     'UCX_MEMTYPE_CACHE': 'n',
37 |     'UCX_TLS': 'rc,cuda_copy,cuda_ipc,gdr_copy,sm'
38 | })
39 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/compilation-issue-report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Compilation issue report
 3 | about: Configuration error with CMake, or build error.
 4 | title: "[Build]"
 5 | labels: build issues
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the issue**
11 | 
12 | A clear and concise description of what the issue is.
13 | 
14 | **Environment information:**
15 |  - OS: [e.g. `Windows 10`, `Ubuntu 22.04`]
16 |  - Compiler version: [e.g. `gcc 9.3.0`, `MSVC 14.31`]
17 |  - CMake version: [e.g. `3.23`]
18 |  - CUDA used for AMGX compilation: [e.g. `CUDA 11.7.0`]
19 |  - MPI version (if applicable): [e.g. `OpenMPI 4.0.3`, `MS-MPI v10.1.2`]
20 |  - AMGX version or commit hash [e.g. `v2.3.0`, `34232979e993d349a03486f7892830a1209b2fc9`]
21 |  - Any related environment variables information
22 | 
23 | **Configuration information** 
24 | 
25 | Provide your `cmake` command line that was used for configuration and it's full output.
26 | 
27 | **Compilation information**
28 | 
29 | Provide your make command
30 | 
31 | **Issue information** 
32 | 
33 | Provide any error messages from your CMake or compiler. It will also greatly help to attach output of `make` command rerun with `VERBOSE=1` to see exact host compiler launch command that issues the error.
34 | 
35 | **Additional context**
36 | 
37 | Add any other context about the problem here.
38 | 


--------------------------------------------------------------------------------
/ci/containers/x86_64-ubuntu18.04-gnu8-cuda11.0.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2024 NVIDIA CORPORATION. All Rights Reserved.
 2 | #
 3 | # SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | """
 6 | AmgX base image: x86_64-ubuntu18.04-gnu-cuda11.0
 7 | """
 8 | 
 9 | Stage0 += comment(__doc__, reformat=False)
10 | Stage0 += baseimage(image='nvidia/cuda:11.0-devel-ubuntu18.04')
11 | 
12 | # Last compiler supported for Ubuntu 18.04 by CUDA 11.0
13 | # https://docs.nvidia.com/cuda/archive/11.0/cuda-installation-guide-linux/index.html#system-requirements
14 | compiler = gnu(version='8')
15 | Stage0 += compiler
16 | 
17 | # Current minimum version required by AMGX
18 | Stage0 += cmake(eula=True, version='3.7.0')
19 | 
20 | # MPI
21 | Stage0 += mlnx_ofed(version='5.0-2.1.8.0')
22 | Stage0 += gdrcopy(ldconfig=True, version='2.0')
23 | Stage0 += knem(ldconfig=True, version='1.1.3')
24 | Stage0 += ucx(gdrcopy=True, knem=True, ofed=True, cuda=True)
25 | Stage0 += openmpi(
26 |     cuda=True,
27 |     infiniband=True,
28 |     version='4.0.3',
29 |     pmix=True,
30 |     ucx=True,
31 |     toolchain=compiler.toolchain
32 | )
33 | Stage0 += environment(multinode_vars = {
34 |     'OMPI_MCA_pml': 'ucx',
35 |     'OMPI_MCA_btl': '^smcuda,vader,tcp,uct,openib',
36 |     'UCX_MEMTYPE_CACHE': 'n',
37 |     'UCX_TLS': 'rc,cuda_copy,cuda_ipc,gdr_copy,sm'
38 | })
39 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: 'AMGX crashes, APIs return unexpected errors, etc. '
 4 | title: "[Issue]"
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the issue**
11 | 
12 | A clear and concise description of what the issue is.
13 | 
14 | **Environment information:**
15 |  - OS: [e.g. `Windows 10`, `Ubuntu 22.04`]
16 |  - CUDA runtime: [e.g. `CUDA 11.7.0`]
17 |  - MPI version (if applicable): [e.g. `OpenMPI 4.0.3`, `MS-MPI v10.1.2`]
18 |  - AMGX version or commit hash [e.g. `v2.3.0`, `34232979e993d349a03486f7892830a1209b2fc9`]
19 |  - NVIDIA driver: [e.g. 517.40]
20 |  - NVIDIA GPU: [e.g. NVIDIA V100]
21 |  - Any related environment variables information
22 | 
23 | **AMGX solver configuration** 
24 | 
25 | If you used AMGX solver configuration to initialize a solver - provide it here.
26 | 
27 | **Matrix Data**
28 | 
29 | It would be very helpful if you can provide matrix data that reproduces the error. If you can share it (matrix, right hand side, initial solution) - describe how to get it or provide download links for this data.
30 | 
31 | **Reproduction steps** 
32 | 
33 | If your AMGX workflow differs from one of AMGX examples - provide minimal reproducible example for the reported issue
34 | 
35 | **Additional context**
36 | 
37 | Add any other context about the problem here.
38 | 


--------------------------------------------------------------------------------
/src/configs/AMG_CLASSICAL_AGGRESSIVE_L1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "scope": "main",
 5 |         "print_grid_stats": 1, 
 6 |         "store_res_history": 1, 
 7 |         "solver": "FGMRES", 
 8 |         "print_solve_stats": 1, 
 9 |         "obtain_timings": 1, 
10 |         "preconditioner": {
11 |             "interpolator": "D2", 
12 |             "print_grid_stats": 1, 
13 |             "aggressive_levels": 1, 
14 |             "solver": "AMG", 
15 |             "smoother": {
16 |                 "relaxation_factor": 1, 
17 |                 "scope": "jacobi", 
18 |                 "solver": "JACOBI_L1"
19 |             }, 
20 |             "presweeps": 1, 
21 |             "selector": "PMIS", 
22 |             "coarsest_sweeps": 1, 
23 |             "coarse_solver": "NOSOLVER", 
24 |             "max_iters": 1, 
25 |             "max_row_sum": 0.9, 
26 |             "strength_threshold": 0.25, 
27 |             "min_coarse_rows": 2, 
28 |             "scope": "amg_solver", 
29 |             "max_levels": 50, 
30 |             "cycle": "V", 
31 |             "postsweeps": 1
32 |         }, 
33 |         "max_iters": 100, 
34 |         "monitor_residual": 1, 
35 |         "gmres_n_restart": 10, 
36 |         "convergence": "RELATIVE_INI", 
37 |         "tolerance" : 1e-06, 
38 |         "norm": "L2"
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/include/eigensolvers/qr.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <matrix.h>
 8 | #include <stack>
 9 | 
10 | namespace amgx
11 | {
12 | 
13 | template <typename TConfig>
14 | class HouseholderQR
15 | {
16 |     public:
17 |         typedef Matrix<TConfig> TMatrix;
18 |         typedef Vector<TConfig> TVector;
19 | 
20 |         typedef typename TConfig::template setMemSpace<AMGX_host  >::Type TConfig_h;
21 |         typedef typename TConfig::template setMemSpace<AMGX_device>::Type TConfig_d;
22 | 
23 |         typedef Vector<TConfig_h> Vector_h;
24 |         typedef Vector<TConfig_d> Vector_d;
25 | 
26 |         typedef typename TConfig::VecPrec ValueTypeVec;
27 | 
28 |         HouseholderQR(TMatrix &A);
29 |         void QR_decomposition(TVector &V);
30 |     private:
31 |         void QR(TVector &V);
32 |         void QR(TVector &V, TVector &R);
33 |         void send_vector(TVector &V, int destination);
34 |         void receive_vector(TVector &V, int source);
35 |         void inverse_phase(TVector &V, TVector &R, int root);
36 |     private:
37 |         TMatrix &m_A;
38 |         Vector_h m_tau;
39 |         TVector m_work;
40 |         bool m_use_R_inverse;
41 |         std::stack<TVector> m_local_comms_stack;
42 | };
43 | 
44 | }
45 | 


--------------------------------------------------------------------------------
/src/configs/AMG_CLASSICAL_L1_AGGRESSIVE_HMIS.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "scope": "main",
 5 |         "print_grid_stats": 1, 
 6 |         "store_res_history": 1, 
 7 |         "solver": "FGMRES", 
 8 |         "print_solve_stats": 1, 
 9 |         "obtain_timings": 1, 
10 |         "preconditioner": {
11 |             "interpolator": "D2", 
12 |             "print_grid_stats": 1, 
13 |             "aggressive_levels": 1, 
14 |             "solver": "AMG", 
15 |             "smoother": {
16 |                 "relaxation_factor": 1, 
17 |                 "scope": "jacobi", 
18 |                 "solver": "JACOBI_L1"
19 |             }, 
20 |             "presweeps": 1, 
21 |             "selector": "HMIS", 
22 |             "coarsest_sweeps": 1, 
23 |             "coarse_solver": "NOSOLVER", 
24 |             "max_iters": 1, 
25 |             "max_row_sum": 0.9, 
26 |             "strength_threshold": 0.25, 
27 |             "min_coarse_rows": 2, 
28 |             "scope": "amg_solver", 
29 |             "max_levels": 50, 
30 |             "cycle": "V", 
31 |             "postsweeps": 1
32 |         }, 
33 |         "max_iters": 100, 
34 |         "monitor_residual": 1, 
35 |         "gmres_n_restart": 10, 
36 |         "convergence": "RELATIVE_INI", 
37 |         "tolerance" : 1e-06, 
38 |         "norm": "L2"
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/src/configs/PCG_AGGREGATION_JACOBI.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "determinism_flag": 1, 
 4 |     "solver": {
 5 |         "preconditioner": {
 6 |             "print_grid_stats": 1, 
 7 |             "algorithm": "AGGREGATION", 
 8 |             "print_vis_data": 0, 
 9 |             "solver": "AMG", 
10 |             "smoother": {
11 |                 "relaxation_factor": 0.8, 
12 |                 "scope": "jacobi", 
13 |                 "solver": "BLOCK_JACOBI", 
14 |                 "monitor_residual": 0, 
15 |                 "print_solve_stats": 0
16 |             }, 
17 |             "print_solve_stats": 0, 
18 |             "presweeps": 0, 
19 |             "interpolator": "D2",
20 |             "selector": "SIZE_2", 
21 |             "coarse_solver": "NOSOLVER", 
22 |             "max_iters": 1, 
23 |             "monitor_residual": 0, 
24 |             "store_res_history": 0, 
25 |             "scope": "amg", 
26 |             "max_levels": 50, 
27 |             "postsweeps": 3, 
28 |             "cycle": "V"
29 |         }, 
30 |         "solver": "PCG", 
31 |         "print_solve_stats": 1, 
32 |         "obtain_timings": 1, 
33 |         "max_iters": 100, 
34 |         "monitor_residual": 1, 
35 |         "convergence": "RELATIVE_INI", 
36 |         "scope": "main", 
37 |         "tolerance" : 1e-06, 
38 |         "norm": "L2"
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/include/norm.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | #include <getvalue.h>
 7 | #include <error.h>
 8 | #include <types.h>
 9 | #include <basic_types.h>
10 | #include <vector.h>
11 | 
12 | #include <amgx_types/util.h>
13 | 
14 | namespace amgx
15 | {
16 | 
17 | /**********************************************************
18 |  * Returns the norm of a vector
19 |  *********************************************************/
20 | template<class VectorType, class MatrixType>
21 | typename types::PODTypes<typename VectorType::value_type>::type get_norm(const MatrixType &A, const VectorType &r, const NormType norm_type, typename types::PODTypes<typename VectorType::value_type>::type norm_factor = 1.0);
22 | 
23 | template <class VectorType, class MatrixType, class PlainVectorType>
24 | void get_norm(const MatrixType &A, const VectorType &r, const int block_size, const NormType norm_type, PlainVectorType &block_nrm, typename types::PODTypes<typename VectorType::value_type>::type norm_factor = 1.0);
25 | 
26 | template <class VectorType, class MatrixType>
27 | void compute_norm_factor(MatrixType &A, VectorType &b, VectorType &x, const NormType normType, typename types::PODTypes<typename VectorType::value_type>::type &normFactor);
28 | 
29 | } // namespace amgx
30 | 
31 | 


--------------------------------------------------------------------------------
/include/cusp/detail/convert.inl:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #include <cusp/detail/dispatch/convert.h>
 6 | 
 7 | #include <cusp/copy.h>
 8 | 
 9 | namespace cusp
10 | {
11 | namespace detail
12 | {
13 |   
14 | // same format
15 | template <typename SourceType, typename DestinationType,
16 |           typename T1>
17 | void convert(const SourceType& src, DestinationType& dst,
18 |              T1, T1)
19 | {
20 |   cusp::copy(src, dst);
21 | }
22 | 
23 | // different formats
24 | template <typename SourceType, typename DestinationType,
25 |           typename T1, typename T2>
26 | void convert(const SourceType& src, DestinationType& dst,
27 |              T1, T2)
28 | {
29 |   cusp::detail::dispatch::convert(src, dst,
30 |       typename SourceType::memory_space(),
31 |       typename DestinationType::memory_space());
32 | }
33 | 
34 | } // end namespace detail
35 | 
36 | /////////////////
37 | // Entry Point //
38 | /////////////////
39 | template <typename SourceType, typename DestinationType>
40 | void convert(const SourceType& src, DestinationType& dst)
41 | {
42 |   CUSP_PROFILE_SCOPED();
43 | 
44 |   cusp::detail::convert(src, dst,
45 |       typename SourceType::format(),
46 |       typename DestinationType::format());
47 | }
48 | 
49 | } // end namespace cusp
50 | 
51 | 


--------------------------------------------------------------------------------
/src/configs/AMG_CLASSICAL_PMIS.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "determinism_flag": 1,
 4 |     "solver": {
 5 |         "scope": "main",
 6 |         "print_grid_stats": 1, 
 7 |         "store_res_history": 1, 
 8 |         "obtain_timings": 1,
 9 |         "solver": "GMRES", 
10 |         "print_solve_stats": 1, 
11 |         "preconditioner": {
12 |             "interpolator": "D2", 
13 |             "solver": "AMG", 
14 |             "cycle": "V", 
15 |             "smoother": {
16 |                 "relaxation_factor": 1, 
17 |                 "scope": "jacobi", 
18 |                 "solver": "JACOBI_L1"
19 |             }, 
20 |             "presweeps": 2, 
21 |             "postsweeps": 2,
22 |             "selector": "PMIS", 
23 |             "coarsest_sweeps": 2, 
24 |             "coarse_solver": "NOSOLVER", 
25 |             "max_iters": 1, 
26 |             "max_row_sum": 0.9, 
27 |             "min_coarse_rows": 2, 
28 |             "scope": "amg_solver", 
29 |             "max_levels": 50, 
30 |             "print_grid_stats": 1, 
31 |             "aggressive_levels": 1, 
32 |             "interp_max_elements": 4 
33 |         }, 
34 |         "max_iters": 100, 
35 |         "monitor_residual": 1, 
36 |         "gmres_n_restart": 10, 
37 |         "convergence": "RELATIVE_INI", 
38 |         "tolerance" : 1e-06, 
39 |         "norm": "L2"
40 |     } 
41 | }
42 | 


--------------------------------------------------------------------------------
/include/aggregation/selectors/dummy.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | #include <aggregation/selectors/agg_selector.h>
 7 | #include <matrix.h>
 8 | 
 9 | namespace amgx
10 | {
11 | namespace aggregation
12 | {
13 | 
14 | template <class T_Config> class DUMMY_Selector;
15 | 
16 | template <class T_Config>
17 | class DUMMY_Selector : public Selector<T_Config>
18 | {
19 |     public:
20 |         typedef T_Config TConfig;
21 |         typedef typename T_Config::MatPrec ValueType;
22 |         typedef typename T_Config::IndPrec IndexType;
23 |         typedef typename T_Config::MemSpace MemorySpace;
24 |         typedef typename Matrix<T_Config>::IVector IVector;
25 | 
26 |         // Constructor
27 |         DUMMY_Selector(AMG_Config &cfg, const std::string &cfg_scope);
28 | 
29 |         void setAggregates( Matrix<T_Config> &A,
30 |                             IVector &aggregates, IVector &aggregates_global, int &num_aggregates);
31 | 
32 |     private:
33 |         int aggregate_size;
34 | 
35 | };
36 | 
37 | template<class T_Config>
38 | class DUMMY_SelectorFactory : public SelectorFactory<T_Config>
39 | {
40 |     public:
41 |         Selector<T_Config> *create(AMG_Config &cfg, const std::string &cfg_scope) { return new DUMMY_Selector<T_Config>(cfg, cfg_scope); }
42 | };
43 | }
44 | }
45 | 


--------------------------------------------------------------------------------
/include/cusp/detail/device/arch.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <cusp/detail/config.h>
 8 | 
 9 | ///TODO: FIX ME:
10 | ///
11 | ///#if THRUST_VERSION >= 100600
12 | ///#include <thrust/system/cuda/detail/arch.h>
13 | ///#else
14 | ///#include <thrust/detail/backend/cuda/arch.h>
15 | ///#endif
16 | 
17 | #include <cuda_runtime_api.h>
18 | 
19 | namespace cusp
20 | {
21 | namespace detail
22 | {
23 | namespace device
24 | {
25 | namespace arch
26 | {
27 | 
28 | template <typename KernelFunction>
29 | size_t max_active_blocks(KernelFunction kernel, const size_t CTA_SIZE, const size_t dynamic_smem_bytes)
30 | {
31 | #if THRUST_VERSION >= 100600
32 |   ///return amgx::thrust::system::cuda::detail::arch::max_active_blocks(kernel, CTA_SIZE, dynamic_smem_bytes);//OLD Thrust
33 |   int numBlocks = 0;
34 |   cudaOccupancyMaxActiveBlocksPerMultiprocessor ( &numBlocks, kernel, CTA_SIZE, dynamic_smem_bytes );//NEW Thrust: THRUST_VERSION >= 100802
35 |   return numBlocks;
36 | #else
37 |   return amgx::thrust::detail::backend::cuda::arch::max_active_blocks(kernel, CTA_SIZE, dynamic_smem_bytes);//Ancient Thrust: This SHOULD trigger error
38 | #endif
39 | }
40 | 
41 | } // end namespace arch
42 | } // end namespace device
43 | } // end namespace detail
44 | } // end namespace cusp
45 | 
46 | 


--------------------------------------------------------------------------------
/src/configs/AMG_CLASSICAL_AGGRESSIVE_L1_TRUNC.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "scope": "main",
 5 |         "print_grid_stats": 1, 
 6 |         "store_res_history": 1, 
 7 |         "solver": "FGMRES", 
 8 |         "print_solve_stats": 1, 
 9 |         "obtain_timings": 1, 
10 |         "preconditioner": {
11 |             "interpolator": "D2", 
12 |             "solver": "AMG", 
13 |             "print_grid_stats": 1, 
14 |             "aggressive_levels": 1, 
15 |             "interp_max_elements": 4, 
16 |             "smoother": {
17 |                 "relaxation_factor": 1, 
18 |                 "scope": "jacobi", 
19 |                 "solver": "JACOBI_L1"
20 |             }, 
21 |             "presweeps": 2, 
22 |             "selector": "PMIS", 
23 |             "coarsest_sweeps": 2, 
24 |             "coarse_solver": "NOSOLVER", 
25 |             "max_iters": 1, 
26 |             "max_row_sum": 0.9, 
27 |             "strength_threshold": 0.25, 
28 |             "min_coarse_rows": 2, 
29 |             "scope": "amg_solver", 
30 |             "max_levels": 50, 
31 |             "cycle": "V", 
32 |             "postsweeps": 2
33 |         }, 
34 |         "max_iters": 100, 
35 |         "monitor_residual": 1, 
36 |         "gmres_n_restart": 10, 
37 |         "convergence": "RELATIVE_INI", 
38 |         "tolerance" : 1e-06, 
39 |         "norm": "L2"
40 |      }
41 | }
42 | 


--------------------------------------------------------------------------------
/src/configs/FGMRES_CLASSICAL_AGGRESSIVE_HMIS.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "scope": "main",
 5 |         "print_grid_stats": 1, 
 6 |         "store_res_history": 1, 
 7 |         "solver": "FGMRES", 
 8 |         "print_solve_stats": 1, 
 9 |         "obtain_timings": 1, 
10 |         "preconditioner": {
11 |             "interpolator": "D2", 
12 |             "solver": "AMG", 
13 |             "print_grid_stats": 1, 
14 |             "aggressive_levels": 1, 
15 |             "interp_max_elements": 4, 
16 |             "smoother": {
17 |                 "relaxation_factor": 1, 
18 |                 "scope": "jacobi", 
19 |                 "solver": "JACOBI_L1"
20 |             }, 
21 |             "presweeps": 2, 
22 |             "selector": "HMIS", 
23 |             "coarsest_sweeps": 2, 
24 |             "coarse_solver": "NOSOLVER", 
25 |             "max_iters": 1, 
26 |             "max_row_sum": 0.9, 
27 |             "strength_threshold": 0.25, 
28 |             "min_coarse_rows": 2, 
29 |             "scope": "amg_solver", 
30 |             "max_levels": 50, 
31 |             "cycle": "V", 
32 |             "postsweeps": 2
33 |         }, 
34 |         "max_iters": 100, 
35 |         "monitor_residual": 1, 
36 |         "gmres_n_restart": 100, 
37 |         "convergence": "RELATIVE_INI", 
38 |         "tolerance" : 1e-06, 
39 |         "norm": "L2"
40 |      }
41 | }
42 | 


--------------------------------------------------------------------------------
/src/configs/FGMRES_CLASSICAL_AGGRESSIVE_PMIS.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "solver": {
 4 |         "scope": "main",
 5 |         "print_grid_stats": 1, 
 6 |         "store_res_history": 1, 
 7 |         "solver": "FGMRES", 
 8 |         "print_solve_stats": 1, 
 9 |         "obtain_timings": 1, 
10 |         "preconditioner": {
11 |             "interpolator": "D2", 
12 |             "solver": "AMG", 
13 |             "print_grid_stats": 1, 
14 |             "aggressive_levels": 1, 
15 |             "interp_max_elements": 4, 
16 |             "smoother": {
17 |                 "relaxation_factor": 1, 
18 |                 "scope": "jacobi", 
19 |                 "solver": "JACOBI_L1"
20 |             }, 
21 |             "presweeps": 2, 
22 |             "selector": "PMIS", 
23 |             "coarsest_sweeps": 2, 
24 |             "coarse_solver": "NOSOLVER", 
25 |             "max_iters": 1, 
26 |             "max_row_sum": 0.9, 
27 |             "strength_threshold": 0.25, 
28 |             "min_coarse_rows": 2, 
29 |             "scope": "amg_solver", 
30 |             "max_levels": 50, 
31 |             "cycle": "V", 
32 |             "postsweeps": 2
33 |         }, 
34 |         "max_iters": 100, 
35 |         "monitor_residual": 1, 
36 |         "gmres_n_restart": 100, 
37 |         "convergence": "RELATIVE_INI", 
38 |         "tolerance" : 1e-06, 
39 |         "norm": "L2"
40 |      }
41 | }
42 | 


--------------------------------------------------------------------------------
/include/cycles/fixed_cycle.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | #include <basic_types.h>
 7 | 
 8 | namespace amgx
 9 | {
10 | template< class T_Config, template< AMGX_VecPrecision t_vecPrec, AMGX_MatPrecision t_matPrec, AMGX_IndPrecision t_indPrec > class CycleDispatcher > class FixedCycle;
11 | }
12 | 
13 | #include <cycles/cycle.h>
14 | #include <amg_level.h>
15 | 
16 | namespace amgx
17 | {
18 | 
19 | template< class T_Config, template< AMGX_VecPrecision t_vecPrec, AMGX_MatPrecision t_matPrec, AMGX_IndPrecision t_indPrec > class CycleDispatcher >
20 | class FixedCycle: public Cycle<T_Config>
21 | {
22 |     public:
23 |         static const AMGX_VecPrecision vecPrec = T_Config::vecPrec;
24 |         static const AMGX_MatPrecision matPrec = T_Config::matPrec;
25 |         static const AMGX_IndPrecision indPrec = T_Config::indPrec;
26 |         typedef AMG<vecPrec, matPrec, indPrec> AMG_Class;
27 |         typedef T_Config TConfig;
28 |         typedef Vector<TConfig> VVector;
29 |         typedef typename T_Config::MatPrec ValueTypeA;
30 |         typedef typename T_Config::template setMemSpace<AMGX_host  >::Type TConfig_h;
31 |         typedef Vector<TConfig_h> Vector_h;
32 | 
33 | 
34 |         void cycle( AMG_Class *amg, AMG_Level<T_Config> *level, VVector &b, VVector &x );
35 |         virtual ~FixedCycle() {};
36 | };
37 | 
38 | } // namespace amgx
39 | 


--------------------------------------------------------------------------------
/include/aggregation/selectors/serial_greedy.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | #include <aggregation/selectors/agg_selector.h>
 7 | #include <matrix.h>
 8 | 
 9 | namespace amgx
10 | {
11 | namespace aggregation
12 | {
13 | 
14 | template <class T_Config> class SerialGreedySelector;
15 | 
16 | template <class T_Config>
17 | class SerialGreedySelector : public Selector<T_Config>
18 | {
19 |     public:
20 |         typedef T_Config TConfig;
21 |         typedef typename T_Config::MatPrec ValueType;
22 |         typedef typename T_Config::IndPrec IndexType;
23 |         typedef typename T_Config::MemSpace MemorySpace;
24 |         typedef typename Matrix<T_Config>::IVector IVector;
25 | 
26 |         // Constructor
27 |         SerialGreedySelector(AMG_Config &cfg, const std::string &cfg_scope);
28 | 
29 |         void setAggregates( Matrix<T_Config> &A,
30 |                             IVector &aggregates, IVector &aggregates_global, int &num_aggregates);
31 | 
32 |     private:
33 |         int aggregate_size;
34 |         int edge_weight_component;
35 | 
36 | };
37 | 
38 | template<class T_Config>
39 | class SerialGreedySelectorFactory : public SelectorFactory<T_Config>
40 | {
41 |     public:
42 |         Selector<T_Config> *create(AMG_Config &cfg, const std::string &cfg_scope) { return new SerialGreedySelector<T_Config>(cfg, cfg_scope); }
43 | };
44 | }
45 | }
46 | 


--------------------------------------------------------------------------------
/src/misc.cu:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #include <misc.h>
 6 | #include <stdio.h>
 7 | #include <stdarg.h>
 8 | 
 9 | #ifdef AMGX_WITH_MPI
10 | #include <mpi.h>
11 | #endif
12 | 
13 | namespace amgx
14 | {
15 | 
16 | #define PRINT_BUF_SIZE 4096
17 | 
18 | void amgx_default_output(const char *msg, int length)
19 | {
20 |     printf("%s", msg);
21 | }
22 | 
23 | void amgx_dist_output(const char *msg, int length)
24 | {
25 | #ifdef AMGX_WITH_MPI
26 |     int rank = 0;
27 |     int mpi_initialized = 0;
28 |     MPI_Initialized(&mpi_initialized); // We want to make sure MPI_Init has been called.
29 | 
30 |     if (mpi_initialized)
31 |     {
32 |         MPI_Comm_rank(MPI_COMM_WORLD, &rank);
33 |     }
34 | 
35 |     if (rank == 0) { amgx_output(msg, length); }
36 | 
37 | #else
38 |     amgx_output(msg, length);
39 | #endif
40 | }
41 | 
42 | AMGX_output_callback amgx_output = amgx_default_output;
43 | AMGX_output_callback error_output = amgx_default_output;
44 | AMGX_output_callback amgx_distributed_output = amgx_dist_output;
45 | 
46 | int amgx_printf(const char *fmt, ...)
47 | {
48 |     int retval = 0;
49 |     char buffer[PRINT_BUF_SIZE];
50 |     va_list ap;
51 |     va_start(ap, fmt);
52 |     retval = vsnprintf(buffer, PRINT_BUF_SIZE, fmt, ap);
53 |     va_end(ap);
54 |     amgx_distributed_output(buffer, strlen(buffer));
55 |     return retval;
56 | }
57 | 
58 | } // namespace amgx
59 | 


--------------------------------------------------------------------------------
/include/cusp/detail/dispatch/multiply.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #include <cusp/array1d.h>
 6 | 
 7 | #include <cusp/detail/host/multiply.h>
 8 | #include <cusp/detail/device/multiply.h>
 9 | 
10 | namespace cusp
11 | {
12 | namespace detail
13 | {
14 | namespace dispatch
15 | {
16 | 
17 | ////////////////
18 | // Host Paths //
19 | ////////////////
20 | template <typename LinearOperator,
21 |           typename MatrixOrVector1,
22 |           typename MatrixOrVector2>
23 | void multiply(const LinearOperator&  A,
24 |               const MatrixOrVector1& B,
25 |                     MatrixOrVector2& C,
26 |               cusp::host_memory,
27 |               cusp::host_memory,
28 |               cusp::host_memory)
29 | {
30 |     cusp::detail::host::multiply(A, B, C);
31 | }
32 | 
33 | //////////////////
34 | // Device Paths //
35 | //////////////////
36 | template <typename LinearOperator,
37 |           typename MatrixOrVector1,
38 |           typename MatrixOrVector2>
39 | void multiply(const LinearOperator&  A,
40 |               const MatrixOrVector1& B,
41 |                     MatrixOrVector2& C,
42 |               cusp::device_memory,
43 |               cusp::device_memory,
44 |               cusp::device_memory)
45 | {
46 |     cusp::detail::device::multiply(A, B, C);
47 | }
48 | 
49 | } // end namespace dispatch
50 | } // end namespace detail
51 | } // end namespace cusp
52 | 
53 | 


--------------------------------------------------------------------------------
/src/convergence/absolute.cu:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #include "amg_config.h"
 6 | #include "convergence/absolute.h"
 7 | 
 8 | namespace amgx
 9 | {
10 | 
11 | template<class TConfig>
12 | AbsoluteConvergence<TConfig>::AbsoluteConvergence(AMG_Config &cfg, const std::string &cfg_scope) : Convergence<TConfig>(cfg, cfg_scope)
13 | {
14 | }
15 | 
16 | template<class TConfig>
17 | void AbsoluteConvergence<TConfig>::convergence_init()
18 | {
19 |     this->m_tolerance = this->m_cfg->AMG_Config::template getParameter<double>("tolerance", this->m_cfg_scope);
20 | }
21 | 
22 | 
23 | template<class TConfig>
24 | AMGX_STATUS AbsoluteConvergence<TConfig>::convergence_update_and_check(const PODVec_h &nrm, const PODVec_h &nrm_ini)
25 | {
26 |     bool res_converged = true;
27 | 
28 |     for (int i = 0; i < nrm.size(); i++)
29 |     {
30 |         bool conv = nrm[i] < this->m_tolerance;
31 |         res_converged = res_converged && conv;
32 |     }
33 | 
34 |     return res_converged ? AMGX_ST_CONVERGED : AMGX_ST_NOT_CONVERGED;
35 | }
36 | 
37 | /****************************************
38 |  * Explict instantiations
39 |  ***************************************/
40 | #define AMGX_CASE_LINE(CASE) template class AbsoluteConvergence<TemplateMode<CASE>::Type>;
41 | AMGX_FORALL_BUILDS(AMGX_CASE_LINE)
42 | AMGX_FORCOMPLEX_BUILDS(AMGX_CASE_LINE)
43 | #undef AMGX_CASE_LINE
44 | 
45 | } // end namespace
46 | 
47 | 


--------------------------------------------------------------------------------
/include/amgx_types/pod_types.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <basic_types.h>
 8 | 
 9 | // definition of base type for supported AMGX datatype which we would call PODType
10 | 
11 | namespace amgx
12 | {
13 | 
14 | namespace types
15 | {
16 | 
17 | // plain data traits
18 | template <typename T>
19 | struct PODTypes;
20 | 
21 | template <>
22 | struct PODTypes <float>
23 | {
24 |     // raw datatype for AMGX datatype
25 |     typedef float type;
26 |     // tconfig vector precision of raw datatype (used in TConfig templates construction)
27 |     static const AMGX_VecPrecision vec_prec = AMGX_vecFloat;
28 |     // number of raw dataitems in AMGX datatype
29 |     static const int pod_items = 1;
30 | };
31 | 
32 | template <>
33 | struct PODTypes <double>
34 | {
35 |     typedef double type;
36 |     static const AMGX_VecPrecision vec_prec = AMGX_vecDouble;
37 |     static const int pod_items = 1;
38 | };
39 | 
40 | template <>
41 | struct PODTypes <cuComplex>
42 | {
43 |     typedef float type;
44 |     static const AMGX_VecPrecision vec_prec = AMGX_vecFloat;
45 |     static const int pod_items = 2;
46 | };
47 | 
48 | template <>
49 | struct PODTypes <cuDoubleComplex>
50 | {
51 |     typedef double type;
52 |     static const AMGX_VecPrecision vec_prec = AMGX_vecDouble;
53 |     static const int pod_items = 2;
54 | };
55 | 
56 | 
57 | } // namespace types
58 | 
59 | } // namespace amgx


--------------------------------------------------------------------------------
/include/aggregation/selectors/serial_bfs_selector.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | #include <aggregation/selectors/agg_selector.h>
 7 | #include <matrix.h>
 8 | 
 9 | namespace amgx
10 | {
11 | namespace aggregation
12 | {
13 | 
14 | template <class T_Config> class Serial_BFS_Selector;
15 | 
16 | template <class T_Config>
17 | class Serial_BFS_Selector : public Selector<T_Config>
18 | {
19 |     public:
20 |         typedef T_Config TConfig;
21 |         typedef typename T_Config::MatPrec ValueType;
22 |         typedef typename T_Config::IndPrec IndexType;
23 |         typedef typename T_Config::MemSpace MemorySpace;
24 |         typedef typename Matrix<T_Config>::IVector IVector;
25 | 
26 |         // Constructor
27 |         Serial_BFS_Selector(AMG_Config &cfg, const std::string &cfg_scope);
28 | 
29 |         void setAggregates( Matrix<T_Config> &A,
30 |                             IVector &aggregates, IVector &aggregates_global, int &num_aggregates);
31 | 
32 |     private:
33 |         int aggregate_size;
34 |         AMG_Config coloring_cfg;
35 |         std::string coloring_cfg_scope;
36 | };
37 | 
38 | template<class T_Config>
39 | class Serial_BFS_SelectorFactory : public SelectorFactory<T_Config>
40 | {
41 |     public:
42 |         Selector<T_Config> *create(AMG_Config &cfg, const std::string &cfg_scope) { return new Serial_BFS_Selector<T_Config>(cfg, cfg_scope); }
43 | };
44 | }
45 | }
46 | 


--------------------------------------------------------------------------------
/include/cusp/precond/smooth.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <cusp/detail/config.h>
 8 | 
 9 | #include <cusp/csr_matrix.h>
10 | #include <cusp/coo_matrix.h>
11 | 
12 | namespace cusp
13 | {
14 | namespace precond
15 | {
16 | 
17 | 	//   Smoothed (final) prolongator defined by P = (I - omega/rho(K) K) * T
18 | 	//   where K = diag(S)^-1 * S and rho(K) is an approximation to the 
19 | 	//   spectral radius of K.
20 | 	template <typename IndexType, typename ValueType, typename MemorySpace>
21 | 	void smooth_prolongator(const cusp::coo_matrix<IndexType,ValueType,MemorySpace>& S,
22 | 				const cusp::coo_matrix<IndexType,ValueType,MemorySpace>& T,
23 | 				      cusp::coo_matrix<IndexType,ValueType,MemorySpace>& P,
24 | 				const ValueType omega = 4.0/3.0,
25 | 				const ValueType rho_Dinv_S = 0.0);
26 | 
27 | 	template <typename IndexType, typename ValueType>
28 | 	void smooth_prolongator(const cusp::csr_matrix<IndexType,ValueType,cusp::host_memory>& S,
29 |                         	const cusp::csr_matrix<IndexType,ValueType,cusp::host_memory>& T,
30 |                               	      cusp::csr_matrix<IndexType,ValueType,cusp::host_memory>& P,
31 |                         	const ValueType omega = 4.0/3.0,
32 |                         	const ValueType rho_Dinv_S = 0.0);
33 | 
34 | } // end namespace precond
35 | } // end namespace cusp
36 | 
37 | #include <cusp/precond/detail/smooth.inl>
38 | 


--------------------------------------------------------------------------------
/ci/containers/x86_64-ubuntu18.04-llvm9-cuda11.0.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2024 NVIDIA CORPORATION. All Rights Reserved.
 2 | #
 3 | # SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | """
 6 | AmgX base image: x86_64-ubuntu18.04-llvm-cuda11.0
 7 | """
 8 | 
 9 | Stage0 += comment(__doc__, reformat=False)
10 | Stage0 += baseimage(image='nvidia/cuda:11.0-devel-ubuntu18.04')
11 | 
12 | # Last compiler supported for Ubuntu 18.04 by CUDA 11.0
13 | # https://docs.nvidia.com/cuda/archive/11.0/cuda-installation-guide-linux/index.html#system-requirements
14 | compiler = llvm(version='9')
15 | Stage0 += compiler
16 | Stage0 += shell(commands=[
17 |     'update-alternatives --install /usr/bin/cc cc /usr/bin/clang-9 40',
18 |     'update-alternatives --install /usr/bin/c++ c++ /usr/bin/clang++-9 60'
19 | ])
20 | 
21 | # Current minimum version required by AMGX
22 | Stage0 += cmake(eula=True, version='3.7.0')
23 | 
24 | # MPI
25 | Stage0 += mlnx_ofed(version='5.0-2.1.8.0')
26 | Stage0 += gdrcopy(ldconfig=True, version='2.0')
27 | Stage0 += knem(ldconfig=True, version='1.1.3')
28 | Stage0 += ucx(gdrcopy=True, knem=True, ofed=True, cuda=True)
29 | Stage0 += openmpi(
30 |     cuda=True,
31 |     infiniband=True,
32 |     version='4.0.3',
33 |     pmix=True,
34 |     ucx=True,
35 |     toolchain=compiler.toolchain
36 | )
37 | Stage0 += environment(multinode_vars = {
38 |     'OMPI_MCA_pml': 'ucx',
39 |     'OMPI_MCA_btl': '^smcuda,vader,tcp,uct,openib',
40 |     'UCX_MEMTYPE_CACHE': 'n',
41 |     'UCX_TLS': 'rc,cuda_copy,cuda_ipc,gdr_copy,sm'
42 | })
43 | 


--------------------------------------------------------------------------------
/include/cusp/elementwise.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | /*! \file elementwise.h
 6 |  *  \brief Elementwise operations on matrices.
 7 |  */
 8 | 
 9 | #pragma once
10 | 
11 | #include <cusp/detail/config.h>
12 | 
13 | namespace cusp
14 | {
15 | 
16 | /*! \addtogroup algorithms Algorithms
17 |  *  \ingroup algorithms
18 |  *  \{
19 |  */
20 | 
21 | //// Uses Matrix1::value_type(0) and Matrix2::value_type(0) for values not present
22 | //template <typename Matrix1,
23 | //          typename Matrix2,
24 | //          typename Matrix3,
25 | //          typename BinaryFunction>
26 | //void transform_elementwise(const Matrix1& A,
27 | //                           const Matrix2& B,
28 | //                                 Matrix3& C,
29 | //                                 BinaryFunction op);
30 | 
31 | /*! \p add : Compute the sum of two matrices
32 |  */
33 | template <typename Matrix1,
34 |           typename Matrix2,
35 |           typename Matrix3>
36 | void add(const Matrix1& A,
37 |          const Matrix2& B,
38 |                Matrix3& C);
39 | 
40 | /*! \p add : Compute the difference of two matrices
41 |  */
42 | template <typename Matrix1,
43 |           typename Matrix2,
44 |           typename Matrix3>
45 | void subtract(const Matrix1& A,
46 |               const Matrix2& B,
47 |                     Matrix3& C);
48 | /*! \}
49 |  */
50 | 
51 | } // end namespace cusp
52 | 
53 | #include <cusp/detail/elementwise.inl>
54 | 
55 | 


--------------------------------------------------------------------------------
/ci/README.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | SPDX-FileCopyrightText: 2024 NVIDIA CORPORATION. All Rights Reserved.
 3 | 
 4 | SPDX-License-Identifier: BSD-3-Clause
 5 | -->
 6 | 
 7 | Continuous integration
 8 | ===
 9 | 
10 | **WIP**: Adding continuous integration to AmgX is currently a work in progress.
11 | 
12 | * [`./ci/run.sh`](run.sh) runs the whole CI pipeline locally: it builds the
13 |   docker containers for each supported environment, builds AmgX for that
14 |   environment, and runs the AmgX tests. 
15 | * [`./ci/test.sh`](test.sh) performs a clean run of the AmgX tests.
16 | 
17 | The containers are specified using [`HPCCM`], see [`containers/`](containers).
18 | 
19 | [`HPCCM`]: https://github.com/NVIDIA/hpc-container-maker
20 | 
21 | The behavior of the CI system is configured using the following environment variables:
22 | 
23 | * `AMGX_CI_CONTAINERS="<list>"`: list of containers to test. By default all
24 |   containers are tested.
25 | * `AMGX_CI_KEEP_BUILD=0|1`: whether the build directories are preserved across
26 |   CI runs. The default is `0`, i.e., the build directories are cleaned and amgx
27 |   is re-built from scratch on every run.
28 |   
29 | * `AMGX_CI_CONTAINER_FILE`: dump container build recipe to a file in the current
30 |   working directory: `Dockerfile_${baseimage}`.
31 | 
32 | For example, to only test the `x86_64-ubuntu18.04-gnu7-cuda10.2.py` container,
33 | preserving the build directory (e.g. during development):
34 | 
35 | ```shell
36 | AMGX_CI_CONTAINERS="x86_64-ubuntu18.04-gnu7-cuda10.2.py" AMGX_CI_KEEP_BUILD=1 ./ci/run.sh
37 | ```
38 | 


--------------------------------------------------------------------------------
/external/rapidjson/include/rapidjson/stringbuffer.h:
--------------------------------------------------------------------------------
 1 | #ifndef RAPIDJSON_STRINGBUFFER_H_
 2 | #define RAPIDJSON_STRINGBUFFER_H_
 3 | 
 4 | #include "rapidjson.h"
 5 | #include "internal/stack.h"
 6 | 
 7 | namespace rapidjson {
 8 | 
 9 | //! Represents an in-memory output stream.
10 | /*!
11 | 	\tparam Encoding Encoding of the stream.
12 | 	\tparam Allocator type for allocating memory buffer.
13 | 	\implements Stream
14 | */
15 | template <typename Encoding, typename Allocator = CrtAllocator>
16 | struct GenericStringBuffer {
17 | 	typedef typename Encoding::Ch Ch;
18 | 
19 | 	GenericStringBuffer(Allocator* allocator = 0, size_t capacity = kDefaultCapacity) : stack_(allocator, capacity) {}
20 | 
21 | 	void Put(Ch c) { *stack_.template Push<Ch>() = c; }
22 | 
23 | 	void Clear() { stack_.Clear(); }
24 | 
25 | 	const char* GetString() const {
26 | 		// Push and pop a null terminator. This is safe.
27 | 		*stack_.template Push<Ch>() = '\0';
28 | 		stack_.template Pop<Ch>(1);
29 | 
30 | 		return stack_.template Bottom<Ch>();
31 | 	}
32 | 
33 | 	size_t Size() const { return stack_.GetSize(); }
34 | 
35 | 	static const size_t kDefaultCapacity = 256;
36 | 	mutable internal::Stack<Allocator> stack_;
37 | };
38 | 
39 | typedef GenericStringBuffer<UTF8<> > StringBuffer;
40 | 
41 | //! Implement specialized version of PutN() with memset() for better performance.
42 | template<>
43 | inline void PutN(GenericStringBuffer<UTF8<> >& stream, char c, size_t n) {
44 | 	memset(stream.stack_.Push<char>(n), c, n * sizeof(c));
45 | }
46 | 
47 | } // namespace rapidjson
48 | 
49 | #endif // RAPIDJSON_STRINGBUFFER_H_
50 | 


--------------------------------------------------------------------------------
/src/tests/truncate_count_test.cu:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #include "unit_test.h"
 6 | #include <matrix.h>
 7 | #include <truncate.h>
 8 | #include "test_utils.h"
 9 | #include "util.h"
10 | #include <cusp/gallery/poisson.h>
11 | 
12 | namespace amgx
13 | {
14 | 
15 | DECLARE_UNITTEST_BEGIN(truncateCountTest);
16 | 
17 | void run()
18 | {
19 |     typedef Vector<typename TConfig::template setVecPrec<AMGX_vecInt>::Type> IVector;
20 |     const int N = 100;
21 |     Matrix<TConfig> A;
22 |     A.addProps(CSR);
23 |     MatrixCusp<TConfig, cusp::csr_format> Aw(&A);
24 |     cusp::gallery::poisson5pt(Aw, N, N);
25 |     IVector count(A.get_num_rows(), 0);
26 |     VVector x(A.get_num_rows(), 4.), new_row_sum(A.get_num_rows(), 0.);
27 |     const double trunc_factor = 0.5;
28 |     countTruncElements(A, trunc_factor, x, count, new_row_sum);
29 |     int new_count = amgx::thrust::reduce(count.begin(), count.end());
30 |     this->PrintOnFail("truncateCountTest: new nnz should = num rows");
31 |     UNITTEST_ASSERT_TRUE(A.get_num_rows() == new_count);
32 | 
33 |     for (int i = 0; i < new_row_sum.size(); i++)
34 |     {
35 |         this->PrintOnFail("truncateCountTest: new_row_sum[i] should = 4 for all i");
36 |         UNITTEST_ASSERT_TRUE(fabs(new_row_sum[i] - 4.) <= 1e-6);
37 |     }
38 | }
39 | 
40 | DECLARE_UNITTEST_END(truncateCountTest);
41 | 
42 | truncateCountTest<TemplateMode<AMGX_mode_dDDI>::Type> truncateCountTest_instance_mode_dDDI;
43 | 
44 | } // end namespace amgx
45 | 


--------------------------------------------------------------------------------
/src/cycles/v_cycle.cu:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #include <cycles/v_cycle.h>
 6 | 
 7 | namespace amgx
 8 | {
 9 | 
10 | template< AMGX_VecPrecision t_vecPrec, AMGX_MatPrecision t_matPrec, AMGX_IndPrecision t_indPrec >
11 | void V_CycleDispatcher<t_vecPrec, t_matPrec, t_indPrec>::dispatch( AMG_Class *amg, AMG_Level<TConfig_h> *level, Vector<TConfig_h> &b, Vector<TConfig_h> &x ) const
12 | {
13 |     V_Cycle<TConfig_h>( amg, level, b, x );
14 | }
15 | 
16 | template< AMGX_VecPrecision t_vecPrec, AMGX_MatPrecision t_matPrec, AMGX_IndPrecision t_indPrec >
17 | void V_CycleDispatcher<t_vecPrec, t_matPrec, t_indPrec>::dispatch( AMG_Class *amg, AMG_Level<TConfig_d> *level, Vector<TConfig_d> &b, Vector<TConfig_d> &x ) const
18 | {
19 |     V_Cycle<TConfig_d>( amg, level, b, x );
20 | }
21 | 
22 | /****************************************
23 |  * Explict instantiations
24 |  ***************************************/
25 | template class V_CycleDispatcher<AMGX_vecDouble, AMGX_matDouble, AMGX_indInt>;
26 | template class V_CycleDispatcher<AMGX_vecFloat, AMGX_matFloat, AMGX_indInt>;
27 | template class V_CycleDispatcher<AMGX_vecDouble, AMGX_matFloat, AMGX_indInt>;
28 | 
29 | template class V_CycleDispatcher<AMGX_vecComplex, AMGX_matComplex, AMGX_indInt>;
30 | template class V_CycleDispatcher<AMGX_vecDoubleComplex, AMGX_matComplex, AMGX_indInt>;
31 | template class V_CycleDispatcher<AMGX_vecDoubleComplex, AMGX_matDoubleComplex, AMGX_indInt>;
32 | } // namespace amgx
33 | 
34 | 


--------------------------------------------------------------------------------
/src/thread_manager.cu:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #include <cassert>
 6 | #include "thread_manager.h"
 7 | #include "vector.h"
 8 | 
 9 | namespace amgx
10 | {
11 | 
12 | // ///////////////////////////////////////////////////////////////////////////////////////////////////////////
13 | 
14 | ThreadWorker::ThreadWorker(ThreadManager *manager, int skills) :
15 |     m_manager(manager),
16 |     m_skills(skills)
17 | {
18 | }
19 | 
20 | ThreadWorker::~ThreadWorker()
21 | {
22 | }
23 | 
24 | float ThreadWorker::estimate_workload()
25 | {
26 |     return 0.0;
27 | }
28 | 
29 | void ThreadWorker::push_task(AsyncTask *task)
30 | {
31 | }
32 | 
33 | void ThreadWorker::wait_empty()
34 | {
35 | }
36 | 
37 | void ThreadWorker::run()
38 | {
39 | }
40 | 
41 | // ///////////////////////////////////////////////////////////////////////////////////////////////////////////
42 | 
43 | void ThreadManager::setup_streams( int num_streams, bool priority, bool serialize )
44 | {
45 | }
46 | 
47 | void ThreadManager::join_threads()
48 | {
49 | }
50 | 
51 | void ThreadManager::wait_threads()
52 | {
53 | }
54 | 
55 | void ThreadManager::spawn_threads(size_t pool_size,
56 |                                   size_t max_alloc_size)
57 | {
58 | }
59 | 
60 | void ThreadManager::push_work(AsyncTask *task, bool use_cnp)
61 | {
62 | }
63 | 
64 | // ///////////////////////////////////////////////////////////////////////////////////////////////////////////
65 | 
66 | void InitTask::exec()
67 | {
68 | }
69 | 
70 | }   // namespace amgx
71 | 
72 | 


--------------------------------------------------------------------------------
/examples/Makefile.cray:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2011-2024 NVIDIA CORPORATION. All Rights Reserved.
 2 | #
 3 | # SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | # CUDA Toolkit location
 6 | cc = cc
 7 | CC = CC
 8 | COMMON_L = -ldl -lamgxsh -L../lib -Wl,-rpath=../lib
 9 | CUDA_DIR = $(CRAY_CUDATOOLKIT_DIR)
10 | CUDA_L = -lcudart -L$(CUDA_DIR)/lib64
11 | CUDA_I = -I$(CUDA_DIR)/include
12 | MPI_DIR = $(CRAY_MPICH2_DIR)
13 | MPI_L = -lmpich -L$(MPI_DIR)/lib
14 | MPI_I = -I$(MPI_DIR)/include
15 | 
16 | # Examples
17 | amgx_capi:
18 | 	$(cc) -O2 amgx_capi.c $(CUDA_I) -o amgx_capi $(CUDA_L) $(COMMON_L)
19 | 
20 | amgx_mpi_capi:
21 | 	$(cc) -O2 amgx_mpi_capi.c $(CUDA_I) $(MPI_I) -o amgx_mpi_capi $(CUDA_L) $(MPI_L) $(COMMON_L)
22 | 
23 | amgx_capi_dynamic:
24 | 	$(cc) -O2 amgx_capi.c -o amgx_capi_dynamic $(CUDA_I) -DAMGX_DYNAMIC_LOADING $(CUDA_L) $(COMMON_L)
25 | 
26 | amgx_mpi_capi_dynamic:
27 | 	$(cc) -O2 amgx_mpi_capi.c -o amgx_mpi_capi_dynamic $(CUDA_I) $(MPI_I) -DAMGX_DYNAMIC_LOADING $(CUDA_L) $(COMMON_L) $(MPI_L)
28 | 
29 | amgx_mpi_capi_agg:
30 | 	$(cc) -O2 amgx_mpi_capi_agg.c $(CUDA_I) $(MPI_I) -o amgx_mpi_capi_agg $(CUDA_L) $(COMMON_L) $(MPI_L)
31 | 
32 | amgx_mpi_capi_cla:
33 | 	$(cc) -O2 amgx_mpi_capi_cla.c $(CUDA_I) $(MPI_I) -o amgx_mpi_capi_cla $(CUDA_L) $(COMMON_L) $(MPI_L)
34 | 
35 | # All
36 | all: amgx_capi amgx_mpi_capi amgx_capi_dynamic amgx_mpi_capi_dynamic amgx_mpi_capi_agg amgx_mpi_capi_cla
37 | 
38 | # Clean
39 | clean:
40 | 	rm -f amgx_capi
41 | 	rm -f amgx_mpi_capi
42 | 	rm -f amgx_capi_dynamic
43 | 	rm -f amgx_mpi_capi_dynamic
44 | 	rm -f amgx_mpi_capi_cla
45 | 	rm -f amgx_mpi_capi_agg
46 | 	rm -f *.o
47 | 


--------------------------------------------------------------------------------
/LICENSES/BSD-3-Clause.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2017 - 2024 NVIDIA CORPORATION. All rights reserved.
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions
 5 | are met:
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright
 8 |    notice, this list of conditions and the following disclaimer.
 9 | 2. Redistributions in binary form must reproduce the above copyright
10 |    notice, this list of conditions and the following disclaimer in the
11 |    documentation and/or other materials provided with the distribution.
12 | 3. Neither the name of NVIDIA CORPORATION nor the names of its
13 |    contributors may be used to endorse or promote products derived
14 |    from this software without specific prior written permission.
15 | 
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | 


--------------------------------------------------------------------------------
/include/cusp/transpose.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | /*! \file transpose.h
 6 |  *  \brief Matrix transpose
 7 |  */
 8 | 
 9 | #pragma once
10 | 
11 | #include <cusp/detail/config.h>
12 | 
13 | namespace cusp
14 | {
15 | 
16 | /*! \addtogroup algorithms Algorithms
17 |  *  \ingroup algorithms
18 |  *  \{
19 |  */
20 | 
21 | /*! \p transpose : transpose a matrix
22 |  *
23 |  * \param A input matrix
24 |  * \param At output matrix (transpose of A)
25 |  *
26 |  * \tparam MatrixType1 matrix
27 |  * \tparam MatrixType2 matrix
28 |  *
29 |  *  The following code snippet demonstrates how to use \p transpose.
30 |  *
31 |  *  \code
32 |  *  #include <cusp/transpose.h>
33 |  *  #include <cusp/array2d.h>
34 |  *  #include <cusp/print.h>
35 |  *  
36 |  *  int main(void)
37 |  *  {
38 |  *      // initialize a 2x3 matrix
39 |  *      cusp::array2d<float, cusp::host_memory> A(2,3);
40 |  *      A(0,0) = 10;  A(0,1) = 20;  A(0,2) = 30;
41 |  *      A(1,0) = 40;  A(1,1) = 50;  A(1,2) = 60;
42 |  *  
43 |  *      // print A
44 |  *      cusp::print(A);
45 |  *  
46 |  *      // compute the transpose
47 |  *      cusp::array2d<float, cusp::host_memory> At;
48 |  *      cusp::transpose(A, At);
49 |  *  
50 |  *      // print A^T
51 |  *      cusp::print(At);
52 |  *  
53 |  *      return 0;
54 |  *  }
55 |  *  \endcode
56 |  */
57 | template <typename MatrixType1, typename MatrixType2>
58 | void transpose(const MatrixType1& A, MatrixType2& At);
59 | 
60 | /*! \}
61 |  */
62 | 
63 | } // end namespace cusp
64 | 
65 | #include <cusp/detail/transpose.inl>
66 | 
67 | 


--------------------------------------------------------------------------------
/examples/generate_poisson.cu:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #include <cusp/coo_matrix.h>
 6 | #include <cusp/gallery/poisson.h>
 7 | #include <cusp/io/matrix_market.h>
 8 | #include <fstream>
 9 | #include <cstdlib>
10 | 
11 | int main(int argc, char **argv)
12 | {
13 |   cusp::coo_matrix<int,double,cusp::host_memory> A;
14 |   const char *fname = NULL;
15 |   // check command line arguments
16 |   for (int i=1; i < argc; i++)
17 |   {
18 |     if (strncmp(argv[i],"-p",100) == 0)
19 |     {
20 |       int points = atoi(argv[++i]);
21 |       int x = atoi(argv[++i]);
22 |       int y = atoi(argv[++i]);
23 |       int z;
24 | 
25 |       switch(points)
26 |       {
27 |         case 5:
28 |           cusp::gallery::poisson5pt(A,x,y);
29 |           break;
30 |         case 7:
31 |           z = atoi(argv[++i]);
32 |           cusp::gallery::poisson7pt(A,x,y,z);
33 |           break;
34 |         case 9:
35 |           cusp::gallery::poisson9pt(A,x,y);
36 |           break;
37 |         case 27:
38 |           z = atoi(argv[++i]);
39 |           cusp::gallery::poisson27pt(A,x,y,z);
40 |           break;
41 |         default:
42 |           printf("Error invalid number of poisson points specified, valid numbers are 5, 7, 9, 27\n");
43 |           exit(0);
44 |       }  
45 |     }
46 |     else if (strncmp(argv[i],"-o",100) == 0)
47 |     {
48 |       i++;
49 |       fname = argv[i];
50 |     }
51 |   }
52 | 
53 |   // output
54 |   if (fname == NULL)
55 |   {
56 |     fname = "output.mtx";
57 |   }
58 | 
59 |   cusp::io::write_matrix_market_file(A,fname);
60 | }
61 | 
62 | 


--------------------------------------------------------------------------------
/src/operators/solve_operator.cu:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | namespace amgx
 6 | {
 7 | 
 8 | template <class T_Config> class Operator;
 9 | 
10 | }
11 | 
12 | #include <operators/solve_operator.h>
13 | #include <solvers/solver.h>
14 | #include <blas.h>
15 | 
16 | #include "amgx_types/util.h"
17 | 
18 | namespace amgx
19 | {
20 | 
21 | template <typename TConfig>
22 | SolveOperator<TConfig>::~SolveOperator()
23 | {
24 |     delete m_solver;
25 | }
26 | 
27 | template <typename TConfig>
28 | void SolveOperator<TConfig>::apply(const Vector<TConfig> &v, Vector<TConfig> &res, ViewType view)
29 | {
30 |     Operator<TConfig> &A = *this->m_A;
31 |     Vector<TConfig> &v_ = const_cast<Vector<TConfig>&>(v);
32 |     int offset, size;
33 |     A.getOffsetAndSizeForView(view, &offset, &size);
34 |     // Fill initial solution with 0s before solving.
35 |     fill(res, types::util<typename Vector<TConfig>::value_type>::get_zero(), offset, size);
36 |     AMGX_STATUS solve_status = m_solver->solve(v_, res, false);
37 | 
38 |     if (solve_status != AMGX_ST_CONVERGED)
39 |     {
40 |         FatalError("OperatorSolve: solver did not converge.", AMGX_ERR_CONFIGURATION);
41 |     }
42 | }
43 | 
44 | template <typename TConfig>
45 | void SolveOperator<TConfig>::setup()
46 | {
47 |     assert(m_A);
48 |     assert(m_solver);
49 |     m_solver->setup(*m_A, false);
50 | }
51 | 
52 | #define AMGX_CASE_LINE(CASE) template class SolveOperator<TemplateMode<CASE>::Type>;
53 | AMGX_FORALL_BUILDS(AMGX_CASE_LINE)
54 | AMGX_FORCOMPLEX_BUILDS(AMGX_CASE_LINE)
55 | #undef AMGX_CASE_LINE
56 | 
57 | }
58 | 


--------------------------------------------------------------------------------
/include/cusp/relaxation/jacobi.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | /*! \file jacobi.h
 6 |  *  \brief Jacobi relaxation.
 7 |  */
 8 | 
 9 | #pragma once
10 | 
11 | #include <cusp/detail/config.h>
12 | 
13 | #include <cusp/linear_operator.h>
14 | 
15 | namespace cusp
16 | {
17 | namespace relaxation
18 | {
19 | 
20 | template <typename ValueType, typename MemorySpace>
21 | class jacobi
22 | {
23 |     ValueType default_omega;
24 |     cusp::array1d<ValueType,MemorySpace> diagonal;
25 |     cusp::array1d<ValueType,MemorySpace> temp;
26 | 
27 | public:
28 |     jacobi();
29 | 
30 |     template <typename MatrixType>
31 |     jacobi(const MatrixType& A, ValueType omega=1.0);
32 |     
33 |     // ignores initial x
34 |     template<typename MatrixType, typename VectorType1, typename VectorType2>
35 |     void presmooth(const MatrixType& A, const VectorType1& b, VectorType2& x);
36 |    
37 |     // smooths initial x
38 |     template<typename MatrixType, typename VectorType1, typename VectorType2>
39 |     void postsmooth(const MatrixType& A, const VectorType1& b, VectorType2& x);
40 | 
41 |     template <typename MatrixType, typename VectorType1, typename VectorType2>
42 |     void operator()(const MatrixType& A, const VectorType1& b, VectorType2& x);
43 |         
44 |     template <typename MatrixType, typename VectorType1, typename VectorType2>
45 |     void operator()(const MatrixType& A, const VectorType1& b, VectorType2& x, ValueType omega);
46 | };
47 | 
48 | } // end namespace relaxation
49 | } // end namespace cusp
50 | 
51 | #include <cusp/relaxation/detail/jacobi.inl>
52 | 
53 | 


--------------------------------------------------------------------------------
/ci/run.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env sh
 2 | 
 3 | # SPDX-FileCopyrightText: 2024 NVIDIA CORPORATION. All Rights Reserved.
 4 | #
 5 | # SPDX-License-Identifier: BSD-3-Clause
 6 | 
 7 | #
 8 | # Supports following environment variables:
 9 | #
10 | # AMGX_CI_CONTAINERS: list of containers to run. Default: all containers.
11 | #
12 | # AMGX_CI_KEEP_BUILD: preserves build directory. Default: build directories
13 | # are cleaned up each time.
14 | set -ex
15 | 
16 | if command -v shellcheck ; then
17 |     shellcheck ci/*.sh
18 | fi
19 | 
20 | CONTAINERS=$(ls ci/containers)
21 | if [ -n "${AMGX_CI_CONTAINERS}" ]; then
22 |     CONTAINERS="${AMGX_CI_CONTAINERS}"
23 | fi
24 | 
25 | KEEP_BUILD=0
26 | if [ -n "${AMGX_CI_KEEP_BUILD}" ]; then
27 |     KEEP_BUILD="${AMGX_CI_KEEP_BUILD}"
28 | fi
29 | 
30 | CONTAINER_FILE=/dev/fd/2
31 | 
32 | for CONTAINER in $CONTAINERS; do
33 |     BASE_NAME=$(basename "${CONTAINER}" .py)
34 |     BASE_IMG="amgx:base_${BASE_NAME}"
35 |     BUILD_DIR="build_${BASE_NAME}"
36 |     RECIPE="ci/containers/${CONTAINER}"
37 |     if ! test -f "${RECIPE}"; then
38 |         echo "Container at \"${RECIPE}\" does not exist"
39 |         exit 1
40 |     fi
41 | 
42 |     if [ -n "${AMGX_CI_CONTAINER_FILE}" ]; then
43 |         CONTAINER_FILE="Dockerfile_${BASE_NAME}"
44 |     fi
45 | 
46 |     hpccm --recipe "${RECIPE}" --format=docker \
47 |         | tee "${CONTAINER_FILE}" | \
48 |         docker build -t "${BASE_IMG}" -
49 |     nvidia-docker \
50 |         run \
51 |         -v "$(pwd -LP)":/amgx \
52 |         -u "$(id -u "${USER}")":"$(id -g "${USER}")" \
53 |         "${BASE_IMG}" \
54 |         bash -c "cd /amgx/ && AMGX_CI_KEEP_BUILD=${KEEP_BUILD} ./ci/test.sh ${BUILD_DIR}"
55 | done
56 | 


--------------------------------------------------------------------------------
/src/configs/AMG_CLASSICAL_AGGRESSIVE_CHEB_L1_TRUNC.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "config_version": 2, 
 3 |     "determinism_flag": 1,
 4 |     "solver":{
 5 |         "scope": "main",
 6 |         "solver": "PCG", 
 7 |         "store_res_history": 1, 
 8 |         "print_solve_stats": 1, 
 9 |         "obtain_timings": 1, 
10 |         "preconditioner": {
11 |            "print_grid_stats": 1, 
12 |            "scope": "amg_solver", 
13 |            "interpolator": "D2", 
14 |            "solver": "AMG", 
15 |            "max_levels": 50, 
16 |            "selector": "PMIS", 
17 |            "cycle": "V", 
18 |            "presweeps": 0, 
19 |            "postsweeps": 3,
20 |            "coarsest_sweeps": 2, 
21 |            "min_coarse_rows": 2, 
22 |            "coarse_solver": "NOSOLVER", 
23 |            "max_iters": 1, 
24 |            "max_row_sum": 0.9, 
25 |            "strength_threshold": 0.25, 
26 |            "error_scaling":3,
27 |            "print_grid_stats": 1, 
28 |            "aggressive_levels": 1, 
29 |            "interp_max_elements": 4, 
30 |            "smoother": {
31 |               "relaxation_factor": 0.91, 
32 |               "scope": "jacobi", 
33 |               "solver": "CHEBYSHEV",
34 |               "preconditioner" : 
35 |               {
36 |                   "solver": "JACOBI_L1",
37 |                   "max_iters": 1
38 |               },
39 |               "chebyshev_polynomial_order": 2,
40 |               "chebyshev_lambda_estimate_mode": 2
41 |             } 
42 |          },
43 |         "max_iters": 100, 
44 |         "monitor_residual": 1, 
45 |         "convergence": "RELATIVE_INI", 
46 |         "tolerance" : 1e-06, 
47 |         "norm": "L2"
48 |     } 
49 | }
50 | 


--------------------------------------------------------------------------------
/include/eigensolvers/eigenvector_solver.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <matrix.h>
 8 | #include <eigensolvers/eigensolver.h>
 9 | 
10 | namespace amgx
11 | {
12 | 
13 | template <class TConfig>
14 | class EigenVectorSolver
15 | {
16 |     public:
17 |         typedef Matrix<TConfig> MMatrix;
18 |         typedef Vector<TConfig> VVector;
19 | 
20 |         typedef typename TConfig::template setMemSpace<AMGX_host  >::Type TConfig_h;
21 |         typedef typename TConfig::template setMemSpace<AMGX_device>::Type TConfig_d;
22 | 
23 |         typedef Matrix<TConfig_h> Matrix_h;
24 |         typedef Matrix<TConfig_d> Matrix_d;
25 | 
26 |         typedef Vector<TConfig_h> Vector_h;
27 |         typedef Vector<TConfig_d> Vector_d;
28 | 
29 |         typedef typename TConfig::MatPrec ValueTypeMat;
30 |         typedef typename TConfig::VecPrec ValueTypeVec;
31 |         typedef typename TConfig::IndPrec IndType;
32 | 
33 |         EigenVectorSolver(AMG_Config &cfg, const std::string &cfg_scope);
34 |         ~EigenVectorSolver();
35 | 
36 |         void setup(Operator<TConfig> &A);
37 |         AMGX_STATUS solve(ValueTypeVec eigenvalue, VVector &eigenvector);
38 |     private:
39 |         AMG_Config m_cfg;
40 |         Operator<TConfig> *m_A;
41 |         EigenSolver<TConfig> *m_solver;
42 | };
43 | 
44 | template <class TConfig>
45 | class EigenVectorSolverFactory
46 | {
47 |     public:
48 |         static EigenVectorSolver<TConfig> *create(std::string &name);
49 |     private:
50 |         static EigenVectorSolver<TConfig> *create_inverse_iteration();
51 | };
52 | 
53 | }
54 | 


--------------------------------------------------------------------------------
/src/cycles/w_cycle.cu:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #include <cycles/w_cycle.h>
 6 | 
 7 | namespace amgx
 8 | {
 9 | 
10 | template< AMGX_VecPrecision t_vecPrec, AMGX_MatPrecision t_matPrec, AMGX_IndPrecision t_indPrec >
11 | void W_CycleDispatcher<t_vecPrec, t_matPrec, t_indPrec>::dispatch( AMG_Class *amg, AMG_Level<TConfig_h> *level, Vector<TConfig_h> &b, Vector<TConfig_h> &x ) const
12 | {
13 |     W_Cycle<TConfig_h>( amg, level, b, x );
14 |     W_Cycle<TConfig_h>( amg, level, b, x );
15 | }
16 | 
17 | template< AMGX_VecPrecision t_vecPrec, AMGX_MatPrecision t_matPrec, AMGX_IndPrecision t_indPrec >
18 | void W_CycleDispatcher<t_vecPrec, t_matPrec, t_indPrec>::dispatch( AMG_Class *amg, AMG_Level<TConfig_d> *level, Vector<TConfig_d> &b, Vector<TConfig_d> &x  ) const
19 | {
20 |     AMGX_CPU_PROFILER( "W_Cycle::dispatch " );
21 |     W_Cycle<TConfig_d>( amg, level, b, x );
22 |     W_Cycle<TConfig_d>( amg, level, b, x );
23 | }
24 | 
25 | /****************************************
26 |  * Explict instantiations
27 |  ***************************************/
28 | template class W_CycleDispatcher<AMGX_vecDouble, AMGX_matDouble, AMGX_indInt>;
29 | template class W_CycleDispatcher<AMGX_vecFloat, AMGX_matFloat, AMGX_indInt>;
30 | template class W_CycleDispatcher<AMGX_vecDouble, AMGX_matFloat, AMGX_indInt>;
31 | 
32 | template class W_CycleDispatcher<AMGX_vecComplex, AMGX_matComplex, AMGX_indInt>;
33 | template class W_CycleDispatcher<AMGX_vecDoubleComplex, AMGX_matComplex, AMGX_indInt>;
34 | template class W_CycleDispatcher<AMGX_vecDoubleComplex, AMGX_matDoubleComplex, AMGX_indInt>;
35 | 
36 | } // namespace amgx
37 | 


--------------------------------------------------------------------------------
/src/cycles/f_cycle.cu:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #include <cycles/f_cycle.h>
 6 | #include <cycles/w_cycle.h>
 7 | #include <cycles/v_cycle.h>
 8 | 
 9 | namespace amgx
10 | {
11 | 
12 | template< AMGX_VecPrecision t_vecPrec, AMGX_MatPrecision t_matPrec, AMGX_IndPrecision t_indPrec >
13 | void F_CycleDispatcher<t_vecPrec, t_matPrec, t_indPrec>::dispatch( AMG_Class *amg, AMG_Level<TConfig_h> *level, Vector<TConfig_h> &b, Vector<TConfig_h> &x ) const
14 | {
15 |     W_Cycle<TConfig_h>( amg, level, b, x );
16 |     V_Cycle<TConfig_h>( amg, level, b, x );
17 | }
18 | 
19 | template< AMGX_VecPrecision t_vecPrec, AMGX_MatPrecision t_matPrec, AMGX_IndPrecision t_indPrec >
20 | void F_CycleDispatcher<t_vecPrec, t_matPrec, t_indPrec>::dispatch( AMG_Class *amg, AMG_Level<TConfig_d> *level, Vector<TConfig_d> &b, Vector<TConfig_d> &x ) const
21 | {
22 |     W_Cycle<TConfig_d>( amg, level, b, x );
23 |     V_Cycle<TConfig_d>( amg, level, b, x );
24 | }
25 | 
26 | /****************************************
27 |  * Explict instantiations
28 |  ***************************************/
29 | template class F_CycleDispatcher<AMGX_vecDouble, AMGX_matDouble, AMGX_indInt>;
30 | template class F_CycleDispatcher<AMGX_vecFloat, AMGX_matFloat, AMGX_indInt>;
31 | template class F_CycleDispatcher<AMGX_vecDouble, AMGX_matFloat, AMGX_indInt>;
32 | 
33 | template class F_CycleDispatcher<AMGX_vecComplex, AMGX_matComplex, AMGX_indInt>;
34 | template class F_CycleDispatcher<AMGX_vecDoubleComplex, AMGX_matComplex, AMGX_indInt>;
35 | template class F_CycleDispatcher<AMGX_vecDoubleComplex, AMGX_matDoubleComplex, AMGX_indInt>;
36 | 
37 | } // namespace amgx
38 | 


--------------------------------------------------------------------------------
/include/cusp/detail/host/reference/ell.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #ifndef __ELL_H__
 6 | #define __ELL_H__
 7 | 
 8 | ////////////////////////////////////////////////////////////////////////////////
 9 | //! Compute y += A*x for a sparse ELL matrix A and column vectors x and y
10 | //! @param num_rows             number of rows in A
11 | //! @param num_cols             number of columns in A
12 | //! @param num_entries_per_row  number columns in each row (smaller rows are zero padded)
13 | //! @param stride               seperation between row entries (stride >= num_rows, for alignment)
14 | //! @param Aj                   ELL column indices
15 | //! @param Ax                   ELL nonzero values
16 | //! @param x                    column vector
17 | //! @param y                    column vector
18 | ////////////////////////////////////////////////////////////////////////////////
19 | template <typename IndexType, typename ValueType>
20 | void ell_matvec(const IndexType num_rows,
21 |                 const IndexType num_cols,
22 |                 const IndexType num_entries_per_row,
23 |                 const IndexType stride,
24 |                 const IndexType * Aj, 
25 |                 const ValueType * Ax, 
26 |                 const ValueType * x,
27 |                       ValueType * y)
28 | {
29 |     for(IndexType n = 0; n < num_entries_per_row; n++){
30 |         const IndexType * Aj_n = Aj + n * stride;
31 |         const ValueType * Ax_n = Ax + n * stride;
32 |         for(IndexType i = 0; i < num_rows; i++){
33 |             y[i] += Ax_n[i] * x[Aj_n[i]];
34 |         }
35 |     }
36 | }
37 | 
38 | 
39 | 
40 | #endif
41 | 


--------------------------------------------------------------------------------
/examples/convert.c:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #include "amgx_c.h"
 6 | #include <string>
 7 | 
 8 | // Tool for conversion MatrixMarket files to binary files (mainly for faster reading or storing on disk)
 9 | // compilation: g++ convert.c -o convert -lamgxsh -L../lib -Wl,-rpath=../lib
10 | // run: convert <MMfile>
11 | 
12 | int main(int argc, char *argv[])
13 | {
14 |     AMGX_config_handle cfg;
15 |     AMGX_matrix_handle A;
16 |     AMGX_vector_handle b, x;
17 |     AMGX_resources_handle rsrc;
18 |     AMGX_Mode mode = AMGX_mode_hDDI;
19 | 
20 |     if (argc < 1)
21 |     {
22 |         printf("Specify matrix file as first argument");
23 |         exit(2);
24 |     }
25 | 
26 |     AMGX_SAFE_CALL(AMGX_initialize());
27 |     AMGX_SAFE_CALL(AMGX_config_create(&cfg, "config_version=2, matrix_writer=binary"));
28 |     AMGX_resources_create_simple(&rsrc, cfg);
29 |     AMGX_matrix_create(&A, rsrc, mode);
30 |     AMGX_vector_create(&x, rsrc, mode);
31 |     AMGX_vector_create(&b, rsrc, mode);
32 |     std::string arg = argv[1];
33 |     int n, bsize_x, bsize_y, sol_size, sol_bsize;
34 |     AMGX_read_system(A, b, x, arg.c_str());
35 |     AMGX_matrix_get_size(A, &n, &bsize_x, &bsize_y);
36 |     AMGX_vector_get_size(x, &sol_size, &sol_bsize);
37 | 
38 |     if (sol_size == 0 || sol_bsize == 0)
39 |     {
40 |         printf("Initializing solution with 0\n");
41 |         AMGX_vector_set_zero(x, n, bsize_x);
42 |     }
43 | 
44 |     arg = arg + ".bin";
45 |     AMGX_write_system(A, b, x, arg.c_str());
46 |     AMGX_resources_destroy(rsrc);
47 |     AMGX_SAFE_CALL(AMGX_config_destroy(cfg));
48 |     AMGX_SAFE_CALL(AMGX_finalize());
49 | }
50 | 


--------------------------------------------------------------------------------
/include/cusp/graph/maximal_independent_set.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | /*! \file maximal_independent_set.h
 6 |  *  \brief Maximal independent set of a graph
 7 |  */
 8 | 
 9 | #pragma once
10 | 
11 | #include <cusp/detail/config.h>
12 | 
13 | namespace cusp
14 | {
15 | namespace graph
16 | {
17 | /*! \addtogroup algorithms Algorithms
18 |  *  \ingroup algorithms
19 |  *  \{
20 |  */
21 | 
22 | /*! \p maximal_independent_set : computes a maximal independent set (MIS)
23 |  * a graph.  The MIS is a set of vertices such that (1) no two vertices
24 |  * are adjacent and (2) it is not possible to add another vertex to thes
25 |  * set without violating the first property.  The MIS(k) is a generalization
26 |  * of the MIS with the property that no two vertices in the set are joined
27 |  * by a path of \p k edges or less.  The standard MIS is therefore a MIS(1).
28 |  *
29 |  * The MIS(k) is represented by an array of {0,1} values.  Specifically,  
30 |  * <tt>stencil[i]</tt> is 1 if vertex \p i is a member of the MIS(k) and
31 |  * 0 otherwise.
32 |  *
33 |  * \param A symmetric matrix that represents a graph
34 |  * \param stencil array to hold the MIS(k)
35 |  * \param k radius of independence
36 |  *
37 |  * \tparam Matrix matrix
38 |  * \tparam Array array
39 |  *
40 |  *  \see http://en.wikipedia.org/wiki/Maximal_independent_set
41 |  */
42 |     
43 | template <typename Matrix, typename Array>
44 | size_t maximal_independent_set(const Matrix& A, Array& stencil, size_t k = 1);
45 | 
46 | /*! \}
47 |  */
48 | 
49 | 
50 | } // end namespace graph
51 | } // end namespace cusp
52 | 
53 | #include <cusp/graph/detail/maximal_independent_set.inl>
54 | 
55 | 


--------------------------------------------------------------------------------
/src/classical/interpolators/common.cu:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #include <types.h>
 6 | #include <classical/interpolators/common.h>
 7 | 
 8 | namespace amgx
 9 | {
10 | 
11 | /*
12 |  * Count the # of non-zeros per row
13 |  */
14 | __global__
15 | void nonZerosPerRowKernel(const int num_rows, const int *cf_map, const int *C_hat_start,
16 |                           const int *C_hat_end, int *nonZerosPerRow)
17 | {
18 |     for (int tIdx = threadIdx.x + blockIdx.x * blockDim.x; tIdx < num_rows; tIdx += gridDim.x * blockDim.x)
19 |     {
20 |         int nonZeros = 0;
21 | 
22 |         if (cf_map[tIdx] >= 0)
23 |         {
24 |             nonZeros = 1;
25 |         }
26 |         else if (cf_map[tIdx] == STRONG_FINE)
27 |         {
28 |             nonZeros = 0;
29 |         }
30 |         else
31 |         {
32 |             nonZeros = C_hat_end[tIdx] - C_hat_start[tIdx];
33 |         }
34 | 
35 |         nonZerosPerRow[tIdx] = nonZeros;
36 |     }
37 | }
38 | 
39 | __global__
40 | void nonZerosPerRowSizeKernel(const int num_rows, const int *cf_map,
41 |                               const int *C_hat_size, int *nonZerosPerRow)
42 | {
43 |     for (int tIdx = threadIdx.x + blockIdx.x * blockDim.x; tIdx < num_rows; tIdx += gridDim.x * blockDim.x)
44 |     {
45 |         int nonZeros = 0;
46 | 
47 |         if (cf_map[tIdx] >= 0)
48 |         {
49 |             nonZeros = 1;
50 |         }
51 |         else if (cf_map[tIdx] == STRONG_FINE)
52 |         {
53 |             nonZeros = 0;
54 |         }
55 |         else
56 |         {
57 |             nonZeros = C_hat_size[tIdx];
58 |         }
59 | 
60 |         nonZerosPerRow[tIdx] = nonZeros;
61 |     }
62 | }
63 | 
64 | 
65 | 
66 | } // namespace amgx
67 | 


--------------------------------------------------------------------------------
/include/convergence/relative_ini.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <convergence/convergence.h>
 8 | 
 9 | namespace amgx
10 | {
11 | 
12 | template<typename TConfig>
13 | class RelativeIniConvergence : public Convergence<TConfig>
14 | {
15 |     public:
16 |         static const AMGX_VecPrecision vecPrec = TConfig::vecPrec;
17 |         static const AMGX_MatPrecision matPrec = TConfig::matPrec;
18 |         static const AMGX_IndPrecision indPrec = TConfig::indPrec;
19 |         typedef Vector<TemplateConfig<AMGX_host, vecPrec, matPrec, indPrec> > Vector_h;
20 |         typedef typename TConfig::VecPrec ValueTypeB;
21 |         typedef typename types::PODTypes<ValueTypeB>::type PODValueTypeB;
22 |         typedef typename TConfig::template setMemSpace<AMGX_host>::Type TConfig_h;
23 |         typedef typename TConfig::template setMemSpace<AMGX_device>::Type TConfig_d;
24 |         typedef Vector<typename TConfig::template setVecPrec<types::PODTypes<ValueTypeB>::vec_prec>::Type> PODVec;
25 |         typedef Vector<typename TConfig_h::template setVecPrec<types::PODTypes<ValueTypeB>::vec_prec>::Type> PODVec_h;
26 |         RelativeIniConvergence(AMG_Config &amg, const std::string &cfg_scope);
27 | 
28 |         void convergence_init();
29 | 
30 |         AMGX_STATUS convergence_update_and_check(const PODVec_h &nrm, const PODVec_h &nrm_ini);
31 | 
32 | };
33 | 
34 | template<typename TConfig>
35 | class RelativeIniConvergenceFactory : public ConvergenceFactory<TConfig>
36 | {
37 |     public:
38 |         Convergence<TConfig> *create(AMG_Config &cfg, const std::string &cfg_scope) { return new RelativeIniConvergence<TConfig>(cfg, cfg_scope); }
39 | };
40 | 
41 | } // end namespace amgx
42 | 


--------------------------------------------------------------------------------
/include/cusp/detail/device/spmv/coo_serial.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <thrust/device_ptr.h>
 8 | 
 9 | namespace cusp
10 | {
11 | namespace detail
12 | {
13 | namespace device
14 | {
15 | 
16 | // COO format SpMV kernel that uses only one thread
17 | // This is incredibly slow, so it is only useful for testing purposes,
18 | // *extremely* small matrices, or a few elements at the end of a 
19 | // larger matrix
20 | 
21 | template <typename IndexType, typename ValueType>
22 | __global__ void
23 | spmv_coo_serial_kernel(const IndexType num_entries,
24 |                        const IndexType * I, 
25 |                        const IndexType * J, 
26 |                        const ValueType * V, 
27 |                        const ValueType * x, 
28 |                              ValueType * y)
29 | {
30 |     for(IndexType n = 0; n < num_entries; n++)
31 |     {
32 |         y[I[n]] += V[n] * x[J[n]];
33 |     }
34 | }
35 | 
36 | 
37 | template <typename Matrix,
38 |           typename ValueType>
39 | void spmv_coo_serial_device(const Matrix&    A, 
40 |                             const ValueType* x, 
41 |                                   ValueType* y)
42 | {
43 |     typedef typename Matrix::index_type IndexType;
44 | 
45 |     const IndexType * I = amgx::thrust::raw_pointer_cast(&A.row_indices[0]);
46 |     const IndexType * J = amgx::thrust::raw_pointer_cast(&A.column_indices[0]);
47 |     const ValueType * V = amgx::thrust::raw_pointer_cast(&A.values[0]);
48 | 
49 |     spmv_coo_serial_kernel<IndexType,ValueType> <<<1,1>>>
50 |         (A.num_entries, I, J, V, x, y);
51 | }
52 | 
53 | } // end namespace device
54 | } // end namespace detail
55 | } // end namespace cusp
56 | 
57 | 


--------------------------------------------------------------------------------
/include/cusp/precond/detail/diagonal.inl:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | /*! \file diagonal.inl
 6 |  *  \brief Inline file for diagonal.h
 7 |  */
 8 | 
 9 | #include <cusp/blas.h>
10 | #include <cusp/detail/format_utils.h>
11 | 
12 | #include <thrust/functional.h>
13 | #include <thrust/transform.h>
14 | 
15 | namespace cusp
16 | {
17 | namespace precond
18 | {
19 | namespace detail
20 | {
21 |     template <typename T>
22 |         struct reciprocal
23 |     {
24 |         __host__ __device__
25 |         T operator()(const T& v)
26 |         {
27 |             return T(1.0) / v;
28 |         }
29 |     };
30 | 
31 | } // end namespace detail
32 | 
33 | 
34 | // constructor
35 | template <typename ValueType, typename MemorySpace>
36 |     template<typename MatrixType>
37 |     diagonal<ValueType,MemorySpace>
38 |     ::diagonal(const MatrixType& A)
39 |         : linear_operator<ValueType,MemorySpace>(A.num_rows, A.num_cols, A.num_rows)
40 |     {
41 |         // extract the main diagonal
42 |         cusp::detail::extract_diagonal(A, diagonal_reciprocals);
43 |     
44 |         // invert the entries
45 |         thrust_wrapper::transform(diagonal_reciprocals.begin(), diagonal_reciprocals.end(),
46 |                           diagonal_reciprocals.begin(), detail::reciprocal<ValueType>());
47 |     }
48 |         
49 | // linear operator
50 | template <typename ValueType, typename MemorySpace>
51 |     template <typename VectorType1, typename VectorType2>
52 |     void diagonal<ValueType, MemorySpace>
53 |     ::operator()(const VectorType1& x, VectorType2& y) const
54 |     {
55 |         cusp::blas::xmy(diagonal_reciprocals, x, y);
56 |     }
57 | 
58 | } // end namespace precond
59 | } // end namespace cusp
60 | 
61 | 


--------------------------------------------------------------------------------
/include/cusp/detail/device/generalized_spmv/coo_serial.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <cusp/coo_matrix.h>
 8 | 
 9 | namespace cusp
10 | {
11 | namespace detail
12 | {
13 | namespace device
14 | {
15 | 
16 | // COO format SpMV kernel that uses only one thread
17 | // This is incredibly slow, so it is only useful for testing purposes,
18 | // *extremely* small matrices, or a few elements at the end of a 
19 | // larger matrix
20 | 
21 | template <typename IndexType, typename ValueType>
22 | __global__ void
23 | spmv_coo_serial_kernel(const IndexType num_nonzeros,
24 |                        const IndexType * I, 
25 |                        const IndexType * J, 
26 |                        const ValueType * V, 
27 |                        const ValueType * x, 
28 |                              ValueType * y)
29 | {
30 |     for(IndexType n = 0; n < num_nonzeros; n++)
31 |     {
32 |         y[I[n]] += V[n] * x[J[n]];
33 |     }
34 | }
35 | 
36 | 
37 | template <typename IndexType, typename ValueType>
38 | void spmv_coo_serial_device(const coo_matrix<IndexType,ValueType,cusp::device_memory>& coo, 
39 |                             const ValueType * d_x, 
40 |                                   ValueType * d_y)
41 | {
42 |     const IndexType * I = amgx::thrust::raw_pointer_cast(&coo.row_indices[0]);
43 |     const IndexType * J = amgx::thrust::raw_pointer_cast(&coo.column_indices[0]);
44 |     const ValueType * V = amgx::thrust::raw_pointer_cast(&coo.values[0]);
45 | 
46 |     spmv_coo_serial_kernel<IndexType,ValueType> <<<1,1>>>
47 |         (coo.num_nonzeros, coo.I, coo.J, coo.V, d_x, d_y);
48 | }
49 | 
50 | } // end namespace device
51 | } // end namespace detail
52 | } // end namespace cusp
53 | 
54 | 


--------------------------------------------------------------------------------
/include/cusp/detail/device/generalized_spmv/hyb.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <cusp/hyb_matrix.h>
 8 | 
 9 | #include <cusp/detail/device/spmv/ell.h>
10 | #include <cusp/detail/device/spmv/coo.h>
11 | 
12 | namespace cusp
13 | {
14 | namespace detail
15 | {
16 | namespace device
17 | {
18 | 
19 | // SpMV kernels for the hybrid ELL/COO matrix format.
20 | template <typename IndexType, typename ValueType>
21 | void spmv_hyb(const cusp::hyb_matrix<IndexType, ValueType, cusp::device_memory>& hyb, 
22 |               const ValueType * x, 
23 |                     ValueType * y)
24 | {
25 |     cusp::detail::device::spmv(hyb.ell, x, y);
26 |     cusp::detail::device::spmv(hyb.coo, x, y);
27 | }
28 | 
29 | template <typename IndexType, typename ValueType>
30 | void spmv_hyb_tex(const cusp::hyb_matrix<IndexType, ValueType, cusp::device_memory>& hyb, 
31 |                   const ValueType * x, 
32 |                         ValueType * y)
33 | {
34 |     cusp::detail::device::spmv_tex(hyb.ell, x, y);
35 |     cusp::detail::device::spmv_tex(hyb.coo, x, y);
36 | }
37 | 
38 |     
39 | template <typename IndexType, typename ValueType>
40 | void spmv(const cusp::hyb_matrix<IndexType, ValueType, cusp::device_memory>& hyb, 
41 |           const ValueType * x, 
42 |                 ValueType * y)
43 | {
44 |     spmv_hyb(hyb, x, y);
45 | }
46 | 
47 | template <typename IndexType, typename ValueType>
48 | void spmv_tex(const cusp::hyb_matrix<IndexType, ValueType, cusp::device_memory>& hyb, 
49 |               const ValueType * x, 
50 |                     ValueType * y)
51 | {
52 |     spmv_hyb_tex(hyb, x, y);
53 | }
54 | 
55 | } // end namespace device
56 | } // end namespace detail
57 | } // end namespace cusp
58 | 
59 | 


--------------------------------------------------------------------------------
/include/convergence/absolute.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <convergence/convergence.h>
 8 | 
 9 | namespace amgx
10 | {
11 | 
12 | template<class TConfig>
13 | class AbsoluteConvergence : public Convergence<TConfig>
14 | {
15 |     public:
16 |         static const AMGX_VecPrecision vecPrec = TConfig::vecPrec;
17 |         static const AMGX_MatPrecision matPrec = TConfig::matPrec;
18 |         static const AMGX_IndPrecision indPrec = TConfig::indPrec;
19 |         typedef Vector<TemplateConfig<AMGX_host, vecPrec, matPrec, indPrec> > Vector_h;
20 |         typedef typename TConfig::VecPrec ValueTypeB;
21 |         typedef typename TConfig::MatPrec ValueTypeA;
22 |         typedef typename types::PODTypes<ValueTypeB>::type PODValueTypeB;
23 |         typedef typename TConfig::template setMemSpace<AMGX_host>::Type TConfig_h;
24 |         typedef typename TConfig::template setMemSpace<AMGX_device>::Type TConfig_d;
25 |         typedef Vector<typename TConfig::template setVecPrec<types::PODTypes<ValueTypeB>::vec_prec>::Type> PODVec;
26 |         typedef Vector<typename TConfig_h::template setVecPrec<types::PODTypes<ValueTypeB>::vec_prec>::Type> PODVec_h;
27 |         AbsoluteConvergence(AMG_Config &amg, const std::string &cfg_scope);
28 | 
29 |         void convergence_init();
30 | 
31 |         AMGX_STATUS convergence_update_and_check(const PODVec_h &nrm, const PODVec_h &nrm_ini);
32 | };
33 | 
34 | template<class TConfig>
35 | class AbsoluteConvergenceFactory : public ConvergenceFactory<TConfig>
36 | {
37 |     public:
38 |         Convergence<TConfig> *create(AMG_Config &cfg, const std::string &cfg_scope) { return new AbsoluteConvergence<TConfig>(cfg, cfg_scope); }
39 | };
40 | 
41 | } // end namespace amgx
42 | 


--------------------------------------------------------------------------------
/include/cusp/relaxation/polynomial.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | /*! \file polynomial.h
 6 |  *  \brief polynomial relaxation.
 7 |  */
 8 | 
 9 | #pragma once
10 | 
11 | #include <cusp/detail/config.h>
12 | 
13 | #include <cusp/linear_operator.h>
14 | 
15 | namespace cusp
16 | {
17 | namespace relaxation
18 | {
19 | 
20 | template <typename ValueType, typename MemorySpace>
21 | class polynomial
22 | {
23 |     cusp::array1d<ValueType, host_memory> default_coefficients;
24 |     cusp::array1d<ValueType, MemorySpace> residual;
25 |     cusp::array1d<ValueType, MemorySpace> h;
26 |     cusp::array1d<ValueType, MemorySpace> y;
27 | 
28 | public:
29 |     polynomial();
30 | 
31 |     template <typename MatrixType, typename VectorType>
32 |     polynomial(const MatrixType& A, const VectorType& coefficients);
33 | 
34 |     // ignores initial x
35 |     template<typename MatrixType, typename VectorType1, typename VectorType2>
36 |     void presmooth(const MatrixType& A, const VectorType1& b, VectorType2& x);
37 |    
38 |     // smooths initial x
39 |     template<typename MatrixType, typename VectorType1, typename VectorType2>
40 |     void postsmooth(const MatrixType& A, const VectorType1& b, VectorType2& x);
41 | 
42 |     template <typename MatrixType, typename VectorType1, typename VectorType2>
43 |     void operator()(const MatrixType& A, const VectorType1& b, VectorType2& x) const;
44 | 
45 |     template <typename MatrixType, typename VectorType1, typename VectorType2, typename VectorType3>
46 |     void operator()(const MatrixType& A, const VectorType1& b, VectorType2& x, VectorType3& coeffients);
47 | };
48 | 
49 | } // end namespace relaxation
50 | } // end namespace cusp
51 | 
52 | #include <cusp/relaxation/detail/polynomial.inl>
53 | 
54 | 


--------------------------------------------------------------------------------
/include/convergence/relative_max.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2011 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <convergence/convergence.h>
 8 | 
 9 | namespace amgx
10 | {
11 | 
12 | template<class TConfig>
13 | class RelativeMaxConvergence : public Convergence<TConfig>
14 | {
15 |     public:
16 |         static const AMGX_VecPrecision vecPrec = TConfig::vecPrec;
17 |         static const AMGX_MatPrecision matPrec = TConfig::matPrec;
18 |         static const AMGX_IndPrecision indPrec = TConfig::indPrec;
19 |         typedef Vector<TemplateConfig<AMGX_host, vecPrec, matPrec, indPrec> > Vector_h;
20 |         typedef typename TConfig::VecPrec ValueTypeB;
21 |         typedef typename TConfig::template setMemSpace<AMGX_host>::Type TConfig_h;
22 |         typedef typename TConfig::template setMemSpace<AMGX_device>::Type TConfig_d;
23 |         typedef typename types::PODTypes<ValueTypeB>::type PODValueTypeB;
24 |         typedef Vector<typename TConfig::template setVecPrec<types::PODTypes<ValueTypeB>::vec_prec>::Type> PODVec;
25 |         typedef Vector<typename TConfig_h::template setVecPrec<types::PODTypes<ValueTypeB>::vec_prec>::Type> PODVec_h;
26 | 
27 |         RelativeMaxConvergence(AMG_Config &amg, const std::string &cfg_scope);
28 | 
29 |         void convergence_init();
30 | 
31 |         AMGX_STATUS convergence_update_and_check(const PODVec_h &nrm, const PODVec_h &nrm_ini);
32 | 
33 |     private:
34 |         PODVec_h _max_nrm;
35 | };
36 | 
37 | template<class TConfig>
38 | class RelativeMaxConvergenceFactory : public ConvergenceFactory<TConfig>
39 | {
40 |     public:
41 |         Convergence<TConfig> *create(AMG_Config &cfg, const std::string &cfg_scope) { return new RelativeMaxConvergence<TConfig>(cfg, cfg_scope); }
42 | };
43 | 
44 | } // end namespace amgx
45 | 
46 | 


--------------------------------------------------------------------------------
/include/cusp/linear_operator.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | /*! \file linear_operator.h
 6 |  *  \brief Abstract interface for iterative solvers
 7 |  */
 8 | 
 9 | #pragma once
10 | 
11 | #include <cusp/detail/config.h>
12 | 
13 | #include <cusp/format.h>
14 | #include <cusp/blas.h>
15 | #include <cusp/exception.h>
16 | #include <cusp/detail/matrix_base.h>
17 | 
18 | namespace cusp
19 | {
20 | 
21 | template <typename ValueType, typename MemorySpace, typename IndexType=int>
22 | class linear_operator : public cusp::detail::matrix_base<IndexType,ValueType,MemorySpace,cusp::unknown_format>
23 | {
24 |   typedef cusp::detail::matrix_base<IndexType,ValueType,MemorySpace,cusp::unknown_format> Parent;
25 |  public:
26 |   linear_operator()
27 |       : Parent() {}
28 | 
29 |   linear_operator(IndexType num_rows, IndexType num_cols)
30 |       : Parent(num_rows, num_cols) {}
31 | 
32 |   linear_operator(IndexType num_rows, IndexType num_cols, IndexType num_entries)
33 |       : Parent(num_rows, num_cols, num_entries) {}
34 | }; // linear_operator
35 | 
36 | template <typename ValueType, typename MemorySpace, typename IndexType=int>
37 | class identity_operator : public linear_operator<ValueType,MemorySpace,IndexType>
38 | {       
39 |     typedef linear_operator<ValueType,MemorySpace> Parent;
40 |     public:
41 | 
42 |     identity_operator() 
43 |         : Parent() {}
44 |     
45 |     identity_operator(IndexType num_rows, IndexType num_cols)
46 |         : Parent(num_rows, num_cols) {}
47 | 
48 |     template <typename VectorType1,
49 |               typename VectorType2>
50 |     void operator()(const VectorType1& x, VectorType2& y) const
51 |     {
52 |         cusp::blas::copy(x, y);
53 |     }
54 | }; // identity_operator
55 | 
56 | } // end namespace cusp
57 | 
58 | 


--------------------------------------------------------------------------------
/examples/install_makefiles_mpi/Makefile:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2011 - 2024 NVIDIA CORPORATION. All Rights Reserved.
 2 | #
 3 | # SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | # CUDA Toolkit location
 6 | INC_D = -I/usr/local/cuda/include
 7 | LIB_D = -L/usr/local/cuda/lib64
 8 | LIB_L = -lcusparse -lcublas -lcudart -ldl
 9 | 
10 | # Examples
11 | amgx_capi:
12 | 	gcc -O2 -std=c99 amgx_capi.c -c $(INC_D)
13 | 	g++ -O2 amgx_capi.o -o amgx_capi $(LIB_D) $(LIB_L) -L../lib -lamgxsh -Wl,-rpath=../lib
14 | 
15 | amgx_mpi_capi:
16 | 	mpicc -O2 -std=c99 amgx_mpi_capi.c -c $(INC_D) 
17 | 	mpic++ -O2 amgx_mpi_capi.o -o amgx_mpi_capi $(LIB_D) $(LIB_L) -L../lib -lamgxsh -lmpi -Wl,-rpath=../lib
18 | 
19 | amgx_capi_dynamic:
20 | 	gcc -O2 -std=c99 amgx_capi.c -c -o amgx_capi_dynamic.o $(INC_D) -DAMGX_DYNAMIC_LOADING
21 | 	g++ -O2 amgx_capi_dynamic.o -o amgx_capi_dynamic $(LIB_D) $(LIB_L) -Wl,-rpath=../lib
22 | 
23 | amgx_mpi_capi_dynamic:
24 | 	mpicc -O2 -std=c99 amgx_mpi_capi.c -c -o amgx_mpi_capi_dynamic.o $(INC_D) -DAMGX_DYNAMIC_LOADING
25 | 	mpic++ -O2 amgx_mpi_capi_dynamic.o -o amgx_mpi_capi_dynamic $(LIB_D) $(LIB_L) -lmpi -Wl,-rpath=../lib
26 | 
27 | amgx_mpi_capi_agg:
28 | 	mpicc -O2 -std=c99 amgx_mpi_capi_agg.c -c $(INC_D) 
29 | 	mpic++ -O2 amgx_mpi_capi_agg.o -o amgx_mpi_capi_agg $(LIB_D) $(LIB_L) -L../lib -lamgxsh -lmpi -Wl,-rpath=../lib
30 | 
31 | amgx_mpi_capi_cla:
32 | 	mpicc -O2 -std=c99 amgx_mpi_capi_cla.c -c $(INC_D) 
33 | 	mpic++ -O2 amgx_mpi_capi_cla.o -o amgx_mpi_capi_cla $(LIB_D) $(LIB_L) -L../lib -lamgxsh -lmpi -Wl,-rpath=../lib
34 | 
35 | # All
36 | all: amgx_capi amgx_mpi_capi amgx_capi_dynamic amgx_mpi_capi_dynamic amgx_mpi_capi_agg amgx_mpi_capi_cla
37 | 
38 | # Clean
39 | clean:
40 | 	rm -f amgx_capi
41 | 	rm -f amgx_mpi_capi
42 | 	rm -f amgx_capi_dynamic
43 | 	rm -f amgx_mpi_capi_dynamic
44 | 	rm -f amgx_mpi_capi_cla
45 | 	rm -f amgx_mpi_capi_agg
46 | 	rm -f *.o
47 | 


--------------------------------------------------------------------------------
/external/rapidjson/readme.txt:
--------------------------------------------------------------------------------
 1 | rapidjson v0.11
 2 | 
 3 | Copyright (c) 2011 Milo Yip (miloyip@gmail.com)
 4 | 
 5 | http://code.google.com/p/rapidjson/
 6 | 
 7 | 16 Nov 2012
 8 | 
 9 | 1. Introduction
10 | Rapidjson is a JSON parser and generator for C++. It was inspired by rapidxml http://rapidxml.sourceforge.net/
11 | Rapidjson is small but complete. It supports both SAX and DOM style API. The SAX parser is only a half thousand lines of code.
12 | Rapidjson is fast. Its performance can be comparable to strlen(). It also optionally supports SSE2/SSE4.1 for acceleration.
13 | Rapidjson is self-contained. It does not depend on external libraries such as BOOST. It even does not depend on STL.
14 | Rapidjson is memory friendly. Each JSON value costs exactly 16/20 bytes for 32/64-bit machines (excluding text string). By default it uses a fast memory allocator, and the parser allocates memory compactly during parsing. 
15 | 
16 | For the full features please refer to the user guide.
17 | 
18 | JSON(JavaScript Object Notation) is a light-weight data exchange format.
19 | More information about JSON can be obtained at
20 | http://json.org/
21 | http://www.ietf.org/rfc/rfc4627.txt
22 | 
23 | 2. Installation
24 | 
25 | Rapidjson is a header-only C++ library. Just copy the rapidjson/include/rapidjson folder to system or project's include path.
26 | 
27 | To build the tests and examples,
28 | 1. obtain premake4 http://industriousone.com/premake/download
29 | 2. Copy premake4 executable to rapidjson/build
30 | 3. Run rapidjson/build/premake.bat on Windows, rapidjson/build/premake on Linux or other platforms
31 | 4. On Windows, build the solution at rapidjson/build/vs2008/ or /vs2010/
32 | 5. On other platforms, run GNU make at rapidjson/build/gmake/ (e.g., make -f test.make config=release32, make -f example.make config=debug32)
33 | 6. On success, the executable are generated at rapidjson/bin
34 | 


--------------------------------------------------------------------------------
/include/eigensolvers/subspace_iteration_eigensolver.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <eigensolvers/eigensolver.h>
 8 | 
 9 | namespace amgx
10 | {
11 | 
12 | template <class TConfig>
13 | class SubspaceIteration_EigenSolver : public EigenSolver<TConfig>
14 | {
15 |     public:
16 |         typedef EigenSolver<TConfig> Base;
17 | 
18 |         typedef typename Base::TConfig_h TConfig_h;
19 |         typedef typename Base::VVector VVector;
20 |         typedef typename Base::MMatrix MMatrix;
21 |         typedef typename Base::Vector_h Vector_h;
22 |         typedef typename Base::Matrix_h Matrix_h;
23 |         typedef typename Base::ValueTypeMat ValueTypeMat;
24 |         typedef typename Base::ValueTypeVec ValueTypeVec;
25 | 
26 |         SubspaceIteration_EigenSolver(AMG_Config &cfg, const std::string &cfg_scope);
27 |         ~SubspaceIteration_EigenSolver();
28 | 
29 |         void solver_setup();
30 |         void solver_pagerank_setup(VVector &a);
31 |         void solve_init(VVector &x);
32 |         bool solve_iteration(VVector &x);
33 |         void solve_finalize();
34 |     private:
35 |         void orthonormalize(VVector &V);
36 |     private:
37 |         VVector m_X;
38 |         VVector m_V;
39 |         VVector m_H;
40 |         VVector m_R;
41 |         int m_subspace_size;
42 |         int m_wanted_count;
43 |         ValueTypeVec m_initial_residual;
44 | };
45 | 
46 | template<class TConfig>
47 | class SubspaceIteration_EigenSolverFactory : public EigenSolverFactory<TConfig>
48 | {
49 |     public:
50 |         EigenSolver<TConfig> *create(AMG_Config &cfg, const std::string &cfg_scope, ThreadManager *tmng)
51 |         {
52 |             return new SubspaceIteration_EigenSolver<TConfig>(cfg, cfg_scope);
53 |         }
54 | };
55 | 
56 | }
57 | 


--------------------------------------------------------------------------------
/include/eigensolvers/arnoldi_eigensolver.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2013 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <eigensolvers/eigensolver.h>
 8 | #include <vector>
 9 | #include <cusp/array2d.h>
10 | 
11 | namespace amgx
12 | {
13 | 
14 | template <class TConfig>
15 | class Arnoldi_EigenSolver : public EigenSolver<TConfig>
16 | {
17 |     public:
18 |         typedef EigenSolver<TConfig> Base;
19 | 
20 |         typedef typename Base::TConfig_h TConfig_h;
21 |         typedef typename Base::VVector VVector;
22 |         typedef typename Base::MMatrix MMatrix;
23 |         typedef typename Base::Vector_h Vector_h;
24 |         typedef typename Base::Matrix_h Matrix_h;
25 |         typedef typename Base::ValueTypeMat ValueTypeMat;
26 |         typedef typename Base::ValueTypeVec ValueTypeVec;
27 | 
28 |         Arnoldi_EigenSolver(AMG_Config &cfg, const std::string &cfg_scope);
29 |         ~Arnoldi_EigenSolver();
30 | 
31 |         void solver_setup();
32 |         void solver_pagerank_setup(VVector &a);
33 |         void solve_init(VVector &x);
34 |         bool solve_iteration(VVector &x);
35 |         void solve_finalize();
36 | 
37 |     private:
38 |         void free_allocated();
39 |     private:
40 |         int m_krylov_size;
41 |         std::vector<VVector *> m_V_vectors;
42 |         Vector_h m_H;
43 |         Vector_h m_H_tmp;
44 |         Vector_h m_ritz_eigenvalues;
45 |         Vector_h m_ritz_eigenvectors;
46 |         ValueTypeVec m_beta;
47 | };
48 | 
49 | template<class TConfig>
50 | class Arnoldi_EigenSolverFactory : public EigenSolverFactory<TConfig>
51 | {
52 |     public:
53 |         EigenSolver<TConfig> *create(AMG_Config &cfg, const std::string &cfg_scope, ThreadManager *tmng)
54 |         {
55 |             return new Arnoldi_EigenSolver<TConfig>(cfg, cfg_scope);
56 |         }
57 | };
58 | 
59 | }
60 | 


--------------------------------------------------------------------------------
/include/cusp/detail/host/reference/dia.h:
--------------------------------------------------------------------------------
 1 | // SPDX-FileCopyrightText: 2008 - 2025 NVIDIA CORPORATION. All Rights Reserved.
 2 | //
 3 | // SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | #ifndef __DIA_H__
 6 | #define __DIA_H__
 7 | 
 8 | #include <algorithm>
 9 | 
10 | 
11 | /*
12 |  * Compute Y += A*X for DIA matrix A and dense vectors X,Y
13 |  *
14 |  *
15 |  * Input Arguments:
16 |  *   I  n_row            - number of rows in A
17 |  *   I  n_col            - number of columns in A
18 |  *   I  n_diags          - number of diagonals
19 |  *   I  L                - length of each diagonal
20 |  *   I  offsets[n_diags] - diagonal offsets 
21 |  *   T  diags[n_diags,L] - nonzeros 
22 |  *   T  Xx[n_col]        - input vector
23 |  *
24 |  * Output Arguments:
25 |  *   T  Yx[n_row]        - output vector 
26 |  *
27 |  * Note:
28 |  *   Output array Yx must be preallocated
29 |  *   Negative offsets correspond to lower diagonals
30 |  *   Positive offsets correspond to upper diagonals
31 |  *
32 |  */
33 | template <class I, class T>
34 | void dia_matvec(const I n_row,
35 |                 const I n_col,
36 |                 const I n_diags,
37 |                 const I L,
38 | 	            const I offsets[], 
39 | 	            const T diags[], 
40 | 	            const T Xx[],
41 | 	                  T Yx[])
42 | {
43 |     for(I i = 0; i < n_diags; i++){
44 |         const I k = offsets[i];  //diagonal offset
45 | 
46 |         const I i_start = std::max(0,-k);
47 |         const I j_start = std::max(0, k);
48 |         const I j_end   = std::min(std::min(n_row + k, n_col),L);
49 | 
50 |         const I N = j_end - j_start;  //number of elements to process
51 | 
52 |         const T * diag = diags + i*L + j_start;
53 |         const T * x = Xx + j_start;
54 |               T * y = Yx + i_start;
55 | 
56 |         for(I n = 0; n < N; n++){
57 |             y[n] += diag[n] * x[n]; 
58 |         }
59 |     }
60 | }
61 | 
62 | 
63 | #endif
64 | 


--------------------------------------------------------------------------------