├── requirements.txt
├── test
    ├── CMakeLists.txt
    └── gtest
    │   ├── CMakeLists.txt
    │   ├── add_test.cpp
    │   ├── util.hpp
    │   ├── transpose_test.cpp
    │   └── triangular_solve_test.cpp
├── include
    ├── spblas
    │   ├── vendor
    │   │   ├── armpl
    │   │   │   ├── armpl.hpp
    │   │   │   ├── algorithms.hpp
    │   │   │   ├── detail
    │   │   │   │   ├── detail.hpp
    │   │   │   │   ├── create_matrix_handle.hpp
    │   │   │   │   └── export_matrix_handle.hpp
    │   │   │   ├── types.hpp
    │   │   │   ├── operation_state_t.hpp
    │   │   │   ├── triangular_solve_impl.hpp
    │   │   │   └── multiply_impl.hpp
    │   │   ├── cusparse
    │   │   │   ├── cusparse.hpp
    │   │   │   ├── multiply.hpp
    │   │   │   ├── type_validation.hpp
    │   │   │   ├── detail
    │   │   │   │   ├── abstract_operation_state.hpp
    │   │   │   │   ├── get_transpose.hpp
    │   │   │   │   ├── cusparse_tensors.hpp
    │   │   │   │   └── spmv_state_t.hpp
    │   │   │   ├── operation_state_t.hpp
    │   │   │   ├── cuda_allocator.hpp
    │   │   │   ├── types.hpp
    │   │   │   ├── exception.hpp
    │   │   │   └── spmv_impl.hpp
    │   │   ├── rocsparse
    │   │   │   ├── rocsparse.hpp
    │   │   │   ├── multiply.hpp
    │   │   │   ├── type_validation.hpp
    │   │   │   ├── detail
    │   │   │   │   ├── abstract_operation_state.hpp
    │   │   │   │   ├── get_transpose.hpp
    │   │   │   │   ├── rocsparse_tensors.hpp
    │   │   │   │   ├── spmv_state_t.hpp
    │   │   │   │   └── spmv_impl.hpp
    │   │   │   ├── operation_state_t.hpp
    │   │   │   ├── hip_allocator.hpp
    │   │   │   ├── types.hpp
    │   │   │   └── exception.hpp
    │   │   ├── aoclsparse
    │   │   │   ├── detail
    │   │   │   │   ├── detail.hpp
    │   │   │   │   └── create_matrix_handle.hpp
    │   │   │   ├── aoclsparse.hpp
    │   │   │   ├── algorithms.hpp
    │   │   │   ├── types.hpp
    │   │   │   ├── operation_state_t.hpp
    │   │   │   ├── spmv_impl.hpp
    │   │   │   ├── spmm_impl.hpp
    │   │   │   └── triangular_solve_impl.hpp
    │   │   └── onemkl_sycl
    │   │   │   ├── onemkl_sycl.hpp
    │   │   │   ├── algorithms.hpp
    │   │   │   ├── detail
    │   │   │       ├── detail.hpp
    │   │   │       ├── get_queue.hpp
    │   │   │       ├── execution_policy.hpp
    │   │   │       ├── get_pointer_device.hpp
    │   │   │       ├── get_matrix_handle.hpp
    │   │   │       └── create_matrix_handle.hpp
    │   │   │   ├── types.hpp
    │   │   │   ├── spmv_impl.hpp
    │   │   │   ├── operation_state_t.hpp
    │   │   │   ├── spmm_impl.hpp
    │   │   │   └── triangular_solve_impl.hpp
    │   ├── views
    │   │   ├── view_base.hpp
    │   │   ├── matrix_opt.hpp
    │   │   ├── scaled_view.hpp
    │   │   ├── views.hpp
    │   │   ├── csc_view.hpp
    │   │   ├── csr_view.hpp
    │   │   ├── matrix_opt_impl.hpp
    │   │   └── inspectors.hpp
    │   ├── algorithms
    │   │   ├── detail
    │   │   │   ├── spgemm
    │   │   │   │   ├── spgemm.hpp
    │   │   │   │   ├── spgemm_innerproduct.hpp
    │   │   │   │   └── spgemm_outerproduct.hpp
    │   │   │   └── sparse_dot_product.hpp
    │   │   ├── scale.hpp
    │   │   ├── scaled.hpp
    │   │   ├── transpose.hpp
    │   │   ├── scaled_impl.hpp
    │   │   ├── add.hpp
    │   │   ├── algorithms.hpp
    │   │   ├── triangular_solve.hpp
    │   │   ├── transposed.hpp
    │   │   ├── scale_impl.hpp
    │   │   ├── multiply.hpp
    │   │   ├── transpose_impl.hpp
    │   │   ├── triangular_solve_impl.hpp
    │   │   ├── multiply_impl.hpp
    │   │   └── add_impl.hpp
    │   ├── detail
    │   │   ├── detail.hpp
    │   │   ├── concepts.hpp
    │   │   ├── mdspan.hpp
    │   │   ├── triangular_types.hpp
    │   │   ├── tuple_concept.hpp
    │   │   ├── ranges.hpp
    │   │   ├── types.hpp
    │   │   ├── index.hpp
    │   │   ├── tag_invoke.hpp
    │   │   ├── operation_info_t.hpp
    │   │   ├── log.hpp
    │   │   └── view_inspectors.hpp
    │   ├── spblas.hpp
    │   ├── backend
    │   │   ├── backend.hpp
    │   │   ├── concepts.hpp
    │   │   ├── algorithms.hpp
    │   │   ├── hash_accumulator.hpp
    │   │   ├── csr_builder.hpp
    │   │   ├── spa_accumulator.hpp
    │   │   └── cpos.hpp
    │   └── concepts.hpp
    └── CMakeLists.txt
├── .clang-format
├── examples
    ├── cusparse
    │   ├── CMakeLists.txt
    │   ├── util.hpp
    │   └── cusparse_simple_spmv.cpp
    ├── rocsparse
    │   ├── CMakeLists.txt
    │   ├── util.hpp
    │   └── rocsparse_simple_spmv.cpp
    ├── device
    │   ├── CMakeLists.txt
    │   └── device_spmv.cpp
    ├── CMakeLists.txt
    ├── simple_spmv.cpp
    ├── spmm_csc.cpp
    ├── simple_spmm.cpp
    ├── matrix_opt_example.cpp
    ├── simple_sptrsv.cpp
    └── simple_spgemm.cpp
├── .pre-commit-config.yaml
├── notes
    ├── spgemm.cpp
    ├── spmv.hpp
    ├── matrices.hpp
    └── matrix_data_structure_notes.hpp
├── .github
    ├── pull_request_template.md
    └── workflows
    │   └── ci.yml
└── LICENSE


/requirements.txt:
--------------------------------------------------------------------------------
1 | pre-commit
2 | 


--------------------------------------------------------------------------------
/test/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_subdirectory(gtest)
2 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/armpl/armpl.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include "algorithms.hpp"
4 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/cusparse/cusparse.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include "multiply.hpp"
4 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/cusparse/multiply.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include "spmv_impl.hpp"
4 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/rocsparse/rocsparse.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include "multiply.hpp"
4 | 


--------------------------------------------------------------------------------
/include/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | 
2 | add_library(spblas INTERFACE)
3 | target_include_directories(spblas INTERFACE .)
4 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/aoclsparse/detail/detail.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include "create_matrix_handle.hpp"
4 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/onemkl_sycl/onemkl_sycl.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include "algorithms.hpp"
4 | #include <cstdint>
5 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/rocsparse/multiply.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include <spblas/vendor/rocsparse/detail/spmv_impl.hpp>
4 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/armpl/algorithms.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include "multiply_impl.hpp"
4 | 
5 | #include "triangular_solve_impl.hpp"
6 | 


--------------------------------------------------------------------------------
/include/spblas/views/view_base.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | namespace spblas {
4 | 
5 | class view_base {};
6 | 
7 | } // namespace spblas
8 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/armpl/detail/detail.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include "armpl.hpp"
4 | #include "create_matrix_handle.hpp"
5 | #include "export_matrix_handle.hpp"
6 | 


--------------------------------------------------------------------------------
/include/spblas/algorithms/detail/spgemm/spgemm.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include "spgemm_gustavsons.hpp"
4 | #include "spgemm_innerproduct.hpp"
5 | #include "spgemm_outerproduct.hpp"
6 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/onemkl_sycl/algorithms.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include "spgemm_impl.hpp"
4 | #include "spmm_impl.hpp"
5 | #include "spmv_impl.hpp"
6 | #include "triangular_solve_impl.hpp"
7 | 


--------------------------------------------------------------------------------
/.clang-format:
--------------------------------------------------------------------------------
1 | ---
2 | BasedOnStyle: LLVM
3 | PointerAlignment: Left
4 | ColumnLimit: 80
5 | AlwaysBreakTemplateDeclarations: Yes
6 | AllowShortFunctionsOnASingleLine: Empty
7 | SpaceAfterCStyleCast: Yes
8 | ---
9 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/onemkl_sycl/detail/detail.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include "create_matrix_handle.hpp"
4 | #include "execution_policy.hpp"
5 | #include "get_matrix_handle.hpp"
6 | #include "get_queue.hpp"
7 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/onemkl_sycl/types.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cstdint>
 4 | 
 5 | namespace spblas {
 6 | 
 7 | using index_t = std::int32_t;
 8 | using offset_t = index_t;
 9 | 
10 | } // namespace spblas
11 | 


--------------------------------------------------------------------------------
/examples/cusparse/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | function(add_cuda_example example_name)
2 |   add_executable(${example_name} ${example_name}.cpp)
3 |   target_link_libraries(${example_name} spblas fmt)
4 | endfunction()
5 | 
6 | add_cuda_example(cusparse_simple_spmv)
7 | 


--------------------------------------------------------------------------------
/examples/rocsparse/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | function(add_rocm_example example_name)
2 |   add_executable(${example_name} ${example_name}.cpp)
3 |   target_link_libraries(${example_name} spblas fmt)
4 | endfunction()
5 | 
6 | add_rocm_example(rocsparse_simple_spmv)
7 | 


--------------------------------------------------------------------------------
/include/spblas/detail/detail.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include <spblas/detail/index.hpp>
4 | #include <spblas/detail/mdspan.hpp>
5 | #include <spblas/detail/ranges.hpp>
6 | #include <spblas/detail/tag_invoke.hpp>
7 | #include <spblas/detail/types.hpp>
8 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/armpl/types.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cstdint>
 4 | #include <spblas/vendor/armpl/detail/armpl.hpp>
 5 | 
 6 | namespace spblas {
 7 | 
 8 | using index_t = armpl_int_t;
 9 | using offset_t = index_t;
10 | 
11 | } // namespace spblas
12 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/aoclsparse/aoclsparse.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025      Advanced Micro Devices, Inc. All Rights reserved.
 3 |  * $COPYRIGHT$
 4 |  *
 5 |  * Additional copyrights may follow
 6 |  *
 7 |  * $HEADER$
 8 |  */
 9 | 
10 | #pragma once
11 | 
12 | #include "algorithms.hpp"
13 | 


--------------------------------------------------------------------------------
/include/spblas/algorithms/scale.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/concepts.hpp>
 4 | 
 5 | namespace spblas {
 6 | 
 7 | template <typename Scalar, matrix M>
 8 | void scale(Scalar alpha, M&& m);
 9 | 
10 | template <typename Scalar, vector V>
11 | void scale(Scalar alpha, V&& v);
12 | 
13 | } // namespace spblas
14 | 


--------------------------------------------------------------------------------
/include/spblas/algorithms/scaled.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/concepts.hpp>
 4 | 
 5 | namespace spblas {
 6 | 
 7 | template <typename Scalar, matrix M>
 8 | auto scaled(Scalar alpha, M&& m);
 9 | 
10 | template <typename Scalar, vector V>
11 | auto scaled(Scalar alpha, V&& v);
12 | 
13 | } // namespace spblas
14 | 


--------------------------------------------------------------------------------
/include/spblas/views/matrix_opt.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/backend/concepts.hpp>
 4 | #include <spblas/backend/cpos.hpp>
 5 | #include <spblas/concepts.hpp>
 6 | 
 7 | namespace spblas {
 8 | 
 9 | // Matrix optimization data for a tensor `T`.
10 | template <typename T>
11 | class matrix_opt;
12 | 
13 | } // namespace spblas
14 | 


--------------------------------------------------------------------------------
/include/spblas/views/scaled_view.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/backend/concepts.hpp>
 4 | #include <spblas/backend/cpos.hpp>
 5 | #include <spblas/concepts.hpp>
 6 | 
 7 | namespace spblas {
 8 | 
 9 | // Scale a tensor of type `T` by a scaling factor of type `S`.
10 | template <typename S, typename T>
11 | class scaled_view;
12 | 
13 | } // namespace spblas
14 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | 
 3 | - repo: https://github.com/pre-commit/mirrors-clang-format
 4 |   rev: v16.0.6
 5 |   hooks:
 6 |     - id: clang-format
 7 | 
 8 | - repo: https://github.com/pre-commit/pre-commit-hooks
 9 |   rev: v4.4.0
10 |   hooks:
11 |     - id: trailing-whitespace
12 |     - id: end-of-file-fixer
13 |     - id: mixed-line-ending
14 |     - id: check-added-large-files
15 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/aoclsparse/algorithms.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025      Advanced Micro Devices, Inc. All Rights reserved.
 3 |  * $COPYRIGHT$
 4 |  *
 5 |  * Additional copyrights may follow
 6 |  *
 7 |  * $HEADER$
 8 |  */
 9 | 
10 | #pragma once
11 | 
12 | #include "spgemm_impl.hpp"
13 | #include "spmm_impl.hpp"
14 | #include "spmv_impl.hpp"
15 | #include "triangular_solve_impl.hpp"
16 | 


--------------------------------------------------------------------------------
/include/spblas/views/views.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/views/csc_view.hpp>
 4 | #include <spblas/views/csr_view.hpp>
 5 | #include <spblas/views/inspectors.hpp>
 6 | #include <spblas/views/matrix_opt.hpp>
 7 | #include <spblas/views/matrix_opt_impl.hpp>
 8 | #include <spblas/views/scaled_view.hpp>
 9 | #include <spblas/views/scaled_view_impl.hpp>
10 | #include <spblas/views/view_base.hpp>
11 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/aoclsparse/types.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025      Advanced Micro Devices, Inc. All Rights reserved.
 3 |  * $COPYRIGHT$
 4 |  *
 5 |  * Additional copyrights may follow
 6 |  *
 7 |  * $HEADER$
 8 |  */
 9 | 
10 | #pragma once
11 | 
12 | #include "aoclsparse.h"
13 | #include <cstdint>
14 | 
15 | namespace spblas {
16 | 
17 | using index_t = aoclsparse_int;
18 | using offset_t = aoclsparse_int;
19 | 
20 | } // namespace spblas
21 | 


--------------------------------------------------------------------------------
/include/spblas/algorithms/transpose.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/concepts.hpp>
 4 | #include <spblas/detail/operation_info_t.hpp>
 5 | 
 6 | namespace spblas {
 7 | 
 8 | template <matrix A, matrix B>
 9 | operation_info_t transpose_inspect(A&& a, B&& b);
10 | 
11 | template <matrix A, matrix B>
12 | void transpose(operation_info_t& info, A&& a, B&& b);
13 | 
14 | template <matrix M>
15 | auto transposed(M&& m);
16 | 
17 | } // namespace spblas
18 | 


--------------------------------------------------------------------------------
/include/spblas/spblas.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #if defined(SPBLAS_ENABLE_ONEMKL_SYCL) || defined(SPBLAS_ENABLE_ARMPL) ||      \
 4 |     defined(SPBLAS_ENABLE_AOCLSPARSE) || defined(SPBLAS_ENABLE_ROCSPARSE) ||   \
 5 |     defined(SPBLAS_ENABLE_CUSPARSE)
 6 | #define SPBLAS_VENDOR_BACKEND true
 7 | #endif
 8 | 
 9 | #include <spblas/algorithms/algorithms.hpp>
10 | #include <spblas/concepts.hpp>
11 | #include <spblas/views/views.hpp>
12 | 
13 | #include <spblas/backend/backend.hpp>
14 | 


--------------------------------------------------------------------------------
/include/spblas/algorithms/scaled_impl.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/concepts.hpp>
 4 | #include <spblas/views/scaled_view.hpp>
 5 | 
 6 | namespace spblas {
 7 | 
 8 | template <typename Scalar, vector V>
 9 | auto scaled(Scalar alpha, V&& v) {
10 |   return scaled_view(alpha, std::forward<V>(v));
11 | }
12 | 
13 | template <typename Scalar, matrix M>
14 | auto scaled(Scalar alpha, M&& m) {
15 |   return scaled_view(alpha, std::forward<M>(m));
16 | }
17 | 
18 | } // namespace spblas
19 | 


--------------------------------------------------------------------------------
/include/spblas/detail/concepts.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/backend/cpos.hpp>
 4 | 
 5 | #include <any>
 6 | #include <concepts>
 7 | #include <iterator>
 8 | 
 9 | #include <spblas/detail/tuple_concept.hpp>
10 | 
11 | namespace spblas {
12 | 
13 | namespace __detail {
14 | 
15 | template <typename M>
16 | concept matrix = requires(M& m) {
17 |   { __backend::size(m) } -> std::weakly_incrementable;
18 |   { __backend::shape(m) } -> tuple_like<std::size_t, std::size_t>;
19 | };
20 | 
21 | }
22 | 
23 | } // namespace spblas
24 | 


--------------------------------------------------------------------------------
/include/spblas/algorithms/add.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/concepts.hpp>
 4 | #include <spblas/detail/operation_info_t.hpp>
 5 | 
 6 | namespace spblas {
 7 | 
 8 | template <matrix A, matrix B, matrix C>
 9 | void add(A&& a, B&& b, C&& c);
10 | 
11 | template <vector A, vector B, vector C>
12 | void add(A&& a, B&& b, C&& c);
13 | 
14 | template <matrix A, matrix B, matrix C>
15 | operation_info_t add_inspect(A&& a, B&& b, C&& c);
16 | 
17 | template <matrix A, matrix B, matrix C>
18 | void add_inspect(operation_info_t& info, A&& a, B&& b, C&& c);
19 | 
20 | } // namespace spblas
21 | 


--------------------------------------------------------------------------------
/notes/spgemm.cpp:
--------------------------------------------------------------------------------
 1 | #include <sparse_blas/sparse_blas.hpp>
 2 | 
 3 | int main(int argc, char** argv) {
 4 |   using namespace spblas;
 5 | 
 6 |   csr_matrix<float> a(/* ... */);
 7 |   csr_matrix<float> b(/* ... */);
 8 |   csr_matrix<float> c;
 9 | 
10 |   auto info = multiply_inspect(a, b, c);
11 | 
12 |   // Allocate more memory for c based on `info`
13 | 
14 |   auto [values, rowptr, colind] = allocate_memory_for(info);
15 | 
16 |   // `info` also has implementation-specific optimization data.
17 | 
18 |   multiply_execute(info, a, b, c);
19 | 
20 |   // update_info_for_new_values(info, {a, left_operand_t});
21 | 
22 |   return 0;
23 | }
24 | 


--------------------------------------------------------------------------------
/include/spblas/detail/mdspan.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <version>
 4 | 
 5 | #if __has_include(<mdspan>)
 6 | #include <mdspan>
 7 | #endif
 8 | 
 9 | #if defined(__cpp_lib_mdspan) && __cpp_lib_mdspan >= 202207L
10 | 
11 | namespace spblas {
12 | namespace __mdspan = std;
13 | }
14 | 
15 | #elif __has_include(<experimental/mdspan>)
16 | 
17 | #include <experimental/mdspan>
18 | 
19 | namespace spblas {
20 | namespace __mdspan = std::experimental;
21 | }
22 | 
23 | #else
24 | 
25 | static_assert(false, "spblas requires mdspan.  Compile with a C++23 compiler "
26 |                      "or download the std/experimental implementation.");
27 | 
28 | #endif
29 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | **Summary:**
 2 |     Short summary of key additions or changes or fixes, including public facing issue
 3 |     or bug being address if it exists
 4 | 
 5 | **Details:**
 6 | 
 7 |  - list of key changes to aide present and future reviewers in understanding what
 8 |  - is happening in this PR
 9 | 
10 | **Merge Checklist:**
11 | 
12 |  - [ ] Passing CI
13 |  - [ ] Update documentation or README.md
14 |  - [ ] Additional Test/example added (if applicable) and passing
15 |  - [ ] At least one reviewer approval
16 |  - [ ] (optional) Clang sanitizer scan run and triaged
17 |  - [ ] Clang formatter applied (verified as part of passing CI)
18 | 


--------------------------------------------------------------------------------
/include/spblas/algorithms/algorithms.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/algorithms/scale.hpp>
 4 | #include <spblas/algorithms/scale_impl.hpp>
 5 | 
 6 | // #include <spblas/algorithms/multiply.hpp>
 7 | 
 8 | #ifndef SPBLAS_VENDOR_BACKEND
 9 | #include <spblas/algorithms/multiply_impl.hpp>
10 | #include <spblas/algorithms/triangular_solve_impl.hpp>
11 | #endif
12 | 
13 | #include <spblas/algorithms/add.hpp>
14 | #include <spblas/algorithms/add_impl.hpp>
15 | 
16 | #include <spblas/algorithms/scaled.hpp>
17 | #include <spblas/algorithms/scaled_impl.hpp>
18 | 
19 | #include <spblas/algorithms/transpose.hpp>
20 | #include <spblas/algorithms/transpose_impl.hpp>
21 | 


--------------------------------------------------------------------------------
/examples/device/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | function(add_device_example example_name)
 2 |   add_executable(${example_name} ${example_name}.cpp)
 3 |   if (ENABLE_ROCSPARSE)
 4 |     set_source_files_properties(${example_name}.cpp PROPERTIES LANGUAGE HIP)
 5 |     target_link_libraries(${example_name} roc::rocthrust)
 6 |   elseif (ENABLE_CUSPARSE)
 7 |     target_link_libraries(${example_name} Thrust)
 8 |   elseif (ENABLE_ONEMKL_SYCL)
 9 |     target_link_libraries(${example_name} sycl_thrust)
10 |   else()
11 |     message(FATAL_ERROR "Device backend not found.")
12 |   endif()
13 |   target_link_libraries(${example_name} spblas fmt)
14 | endfunction()
15 | 
16 | add_device_example(device_spmv)
17 | 


--------------------------------------------------------------------------------
/examples/rocsparse/util.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <hip/hip_runtime.h>
 4 | 
 5 | #define HIP_CHECK(expression)                                                  \
 6 |   do {                                                                         \
 7 |     const hipError_t status = expression;                                      \
 8 |     if (status != hipSuccess) {                                                \
 9 |       std::cerr << "HIP error " << status << ": " << hipGetErrorString(status) \
10 |                 << " at " << __FILE__ << ":" << __LINE__ << std::endl;         \
11 |     }                                                                          \
12 |   } while (false)
13 | 


--------------------------------------------------------------------------------
/include/spblas/algorithms/triangular_solve.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/concepts.hpp>
 4 | #include <spblas/detail/operation_info_t.hpp>
 5 | 
 6 | template <class ExecutionPolicy, in - matrix InMat, class Triangle,
 7 |           class DiagonalStorage, in - vector InVec, out - vector OutVec>
 8 | void triangular_matrix_vector_solve(ExecutionPolicy&& exec, InMat A, Triangle t,
 9 |                                     DiagonalStorage d, InVec b, OutVec x);
10 | 
11 | namespace spblas {
12 | 
13 | template <matrix A, class Triangle, class DiagonalStorage, vector B, vector X>
14 | void triangular_solve(A&& a, Triangle uplo, DiagonalStorage diag, B&& b, X&& x);
15 | 
16 | } // namespace spblas
17 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/cusparse/type_validation.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/detail/types.hpp>
 4 | #include <spblas/vendor/cusparse/types.hpp>
 5 | 
 6 | namespace spblas {
 7 | namespace detail {
 8 | 
 9 | template <typename T>
10 | static constexpr bool has_valid_cusparse_matrix_types_v =
11 |     is_valid_cusparse_scalar_type_v<tensor_scalar_t<T>> &&
12 |     is_valid_cusparse_index_type_v<tensor_index_t<T>> &&
13 |     is_valid_cusparse_index_type_v<tensor_offset_t<T>>;
14 | 
15 | template <typename T>
16 | static constexpr bool has_valid_cusparse_vector_types_v =
17 |     is_valid_cusparse_scalar_type_v<tensor_scalar_t<T>>;
18 | 
19 | } // namespace detail
20 | } // namespace spblas
21 | 


--------------------------------------------------------------------------------
/examples/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | function(add_example example_name)
 2 |   add_executable(${example_name} ${example_name}.cpp)
 3 |   target_link_libraries(${example_name} spblas fmt)
 4 | endfunction()
 5 | 
 6 | # CPU examples
 7 | if (SPBLAS_CPU_BACKEND)
 8 |   add_example(simple_spmv)
 9 |   add_example(simple_spmm)
10 |   add_example(simple_spgemm)
11 |   add_example(simple_sptrsv)
12 |   add_example(spmm_csc)
13 |   add_example(matrix_opt_example)
14 | endif()
15 | 
16 | # GPU examples
17 | if (SPBLAS_GPU_BACKEND)
18 |   add_subdirectory(device)
19 |   if (ENABLE_CUSPARSE)
20 |     add_subdirectory(cusparse)
21 |   endif()
22 |   if (ENABLE_ROCSPARSE)
23 |     add_subdirectory(rocsparse)
24 |   endif()
25 | endif()
26 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/rocsparse/type_validation.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/detail/types.hpp>
 4 | #include <spblas/vendor/rocsparse/types.hpp>
 5 | 
 6 | namespace spblas {
 7 | namespace detail {
 8 | 
 9 | template <typename T>
10 | static constexpr bool has_valid_rocsparse_matrix_types_v =
11 |     is_valid_rocsparse_scalar_type_v<tensor_scalar_t<T>> &&
12 |     is_valid_rocsparse_index_type_v<tensor_index_t<T>> &&
13 |     is_valid_rocsparse_index_type_v<tensor_offset_t<T>>;
14 | 
15 | template <typename T>
16 | static constexpr bool has_valid_rocsparse_vector_types_v =
17 |     is_valid_rocsparse_scalar_type_v<tensor_scalar_t<T>>;
18 | 
19 | } // namespace detail
20 | } // namespace spblas
21 | 


--------------------------------------------------------------------------------
/include/spblas/detail/triangular_types.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | namespace spblas {
 4 | 
 5 | struct upper_triangle_t {
 6 |   explicit upper_triangle_t() = default;
 7 | };
 8 | inline constexpr upper_triangle_t upper_triangle{};
 9 | 
10 | struct lower_triangle_t {
11 |   explicit lower_triangle_t() = default;
12 | };
13 | inline constexpr lower_triangle_t lower_triangle{};
14 | 
15 | struct implicit_unit_diagonal_t {
16 |   explicit implicit_unit_diagonal_t() = default;
17 | };
18 | inline constexpr implicit_unit_diagonal_t implicit_unit_diagonal{};
19 | 
20 | struct explicit_diagonal_t {
21 |   explicit explicit_diagonal_t() = default;
22 | };
23 | inline constexpr explicit_diagonal_t explicit_diagonal{};
24 | 
25 | } // namespace spblas
26 | 


--------------------------------------------------------------------------------
/include/spblas/algorithms/transposed.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/detail/view_inspectors.hpp>
 4 | 
 5 | namespace spblas {
 6 | 
 7 | template <matrix M>
 8 |   requires(__detail::is_csr_view_v<M>)
 9 | auto transposed(M&& m) {
10 |   return csc_view<tensor_scalar_t<M>, tensor_index_t<M>, tensor_offset_t<M>>(
11 |       m.values(), m.rowptr(), m.colind(), {m.shape()[1], m.shape()[0]},
12 |       m.size());
13 | }
14 | 
15 | template <matrix M>
16 |   requires(__detail::is_csc_view_v<M>)
17 | auto transposed(M&& m) {
18 |   return csr_view<tensor_scalar_t<M>, tensor_index_t<M>, tensor_offset_t<M>>(
19 |       m.values(), m.colptr(), m.rowind(), {m.shape()[1], m.shape()[0]},
20 |       m.size());
21 | }
22 | 
23 | } // namespace spblas
24 | 


--------------------------------------------------------------------------------
/notes/spmv.hpp:
--------------------------------------------------------------------------------
 1 | #include <sparse_blas/sparse_blas.hpp>
 2 | 
 3 | int main(int argc, char** argv) {
 4 |   using namespace spblas;
 5 | 
 6 |   csr_matrix<float> a(/* ... */);
 7 |   dense_vector<float> x(/* ... */);
 8 |   dense_vector<float> y;
 9 | 
10 |   operation_info_t info;
11 | 
12 |   device_policy policy;
13 | 
14 |   multiply_inspect(info, policy, a, x, y);
15 |   multiply_inspect(info, policy, transposed(a), x, y);
16 | 
17 |   // Allocate more memory for y based on `info`
18 | 
19 |   while (/* ... */) {
20 |     multiply_execute(info, policy, a, x, y);
21 |     // do something with y, update x...
22 |     multiply_execute(info, policy, transposed(a), y, x);
23 |     // Maybe do some more stuff...
24 |   }
25 | 
26 |   return 0;
27 | }
28 | 


--------------------------------------------------------------------------------
/examples/cusparse/util.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cuda_runtime.h>
 4 | 
 5 | #define CUDA_CHECK(expression)                                                 \
 6 |   do {                                                                         \
 7 |     const cudaError_t status = expression;                                     \
 8 |     if (status != cudaSuccess) {                                               \
 9 |       std::cerr << "CUDA error " << status << ": "                             \
10 |                 << cudaGetErrorString(status) << " at " << __FILE__ << ":"     \
11 |                 << __LINE__ << std::endl;                                      \
12 |     }                                                                          \
13 |   } while (false)
14 | 


--------------------------------------------------------------------------------
/include/spblas/backend/backend.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/backend/algorithms.hpp>
 4 | #include <spblas/backend/concepts.hpp>
 5 | #include <spblas/backend/cpos.hpp>
 6 | #include <spblas/backend/generate.hpp>
 7 | #include <spblas/backend/view_customizations.hpp>
 8 | 
 9 | #ifdef SPBLAS_ENABLE_ONEMKL_SYCL
10 | #include <spblas/vendor/onemkl_sycl/onemkl_sycl.hpp>
11 | #endif
12 | 
13 | #ifdef SPBLAS_ENABLE_ARMPL
14 | #include <spblas/vendor/armpl/armpl.hpp>
15 | #endif
16 | 
17 | #ifdef SPBLAS_ENABLE_AOCLSPARSE
18 | #include <spblas/vendor/aoclsparse/aoclsparse.hpp>
19 | #endif
20 | 
21 | #ifdef SPBLAS_ENABLE_ROCSPARSE
22 | #include <spblas/vendor/rocsparse/rocsparse.hpp>
23 | #endif
24 | 
25 | #ifdef SPBLAS_ENABLE_CUSPARSE
26 | #include <spblas/vendor/cusparse/cusparse.hpp>
27 | #endif
28 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/cusparse/detail/abstract_operation_state.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cusparse.h>
 4 | #include <memory>
 5 | 
 6 | namespace spblas {
 7 | namespace __cusparse {
 8 | 
 9 | class abstract_operation_state_t {
10 | public:
11 |   // Common state that all operations need
12 |   cusparseHandle_t handle() const {
13 |     return handle_;
14 |   }
15 | 
16 |   // Make std::default_delete a friend so unique_ptr can delete us
17 |   friend struct std::default_delete<abstract_operation_state_t>;
18 | 
19 | protected:
20 |   abstract_operation_state_t() {
21 |     cusparseCreate(&handle_);
22 |   }
23 | 
24 |   virtual ~abstract_operation_state_t() {
25 |     if (handle_) {
26 |       cusparseDestroy(handle_);
27 |     }
28 |   }
29 | 
30 |   cusparseHandle_t handle_;
31 | };
32 | 
33 | } // namespace __cusparse
34 | } // namespace spblas
35 | 


--------------------------------------------------------------------------------
/include/spblas/algorithms/scale_impl.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/backend/backend.hpp>
 4 | #include <spblas/concepts.hpp>
 5 | 
 6 | #include <algorithm>
 7 | 
 8 | namespace spblas {
 9 | 
10 | namespace {
11 | 
12 | template <typename Scalar, typename T>
13 |   requires(matrix<T> || vector<T>)
14 | void scale_impl_(Scalar alpha, T&& t) {
15 |   auto&& values = __backend::values(t);
16 |   std::for_each(__ranges::begin(values), __ranges::end(values),
17 |                 [&](auto&& v) { v *= alpha; });
18 | }
19 | 
20 | } // namespace
21 | 
22 | template <typename Scalar, matrix M>
23 | void scale(Scalar alpha, M&& m) {
24 |   scale_impl_(alpha, std::forward<M>(m));
25 | }
26 | 
27 | template <typename Scalar, vector V>
28 | void scale(Scalar alpha, V&& v) {
29 |   scale_impl_(alpha, std::forward<V>(v));
30 | }
31 | 
32 | } // namespace spblas
33 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/rocsparse/detail/abstract_operation_state.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <memory>
 4 | #include <rocsparse/rocsparse.h>
 5 | 
 6 | namespace spblas {
 7 | namespace __rocsparse {
 8 | 
 9 | class abstract_operation_state_t {
10 | public:
11 |   // Common state that all operations need
12 |   rocsparse_handle handle() const {
13 |     return handle_;
14 |   }
15 | 
16 |   // Make std::default_delete a friend so unique_ptr can delete us
17 |   friend struct std::default_delete<abstract_operation_state_t>;
18 | 
19 | protected:
20 |   abstract_operation_state_t() {
21 |     rocsparse_create_handle(&handle_);
22 |   }
23 | 
24 |   virtual ~abstract_operation_state_t() {
25 |     if (handle_) {
26 |       rocsparse_destroy_handle(handle_);
27 |     }
28 |   }
29 | 
30 |   rocsparse_handle handle_;
31 | };
32 | 
33 | } // namespace __rocsparse
34 | } // namespace spblas
35 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/onemkl_sycl/detail/get_queue.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/vendor/onemkl_sycl/detail/execution_policy.hpp>
 4 | 
 5 | namespace spblas {
 6 | 
 7 | namespace __mkl {
 8 | 
 9 | template <typename T>
10 | sycl::queue get_queue(const spblas::mkl::parallel_policy& policy, T* ptr) {
11 |   return policy.get_queue(ptr);
12 | }
13 | 
14 | template <typename T>
15 | sycl::queue& get_queue(spblas::mkl::device_policy& policy, T* ptr) {
16 |   return policy.get_queue();
17 | }
18 | 
19 | } // namespace __mkl
20 | 
21 | } // namespace spblas
22 | 
23 | #if __has_include(<thrust/execution_policy.h>)
24 | 
25 | #include <thrust/execution_policy.h>
26 | 
27 | namespace spblas {
28 | 
29 | namespace __mkl {
30 | 
31 | template <typename T>
32 | sycl::queue& get_queue(thrust::execution_policy& policy, T* ptr) {
33 |   return policy.get_queue();
34 | }
35 | 
36 | } // namespace __mkl
37 | 
38 | } // namespace spblas
39 | 
40 | #endif
41 | 


--------------------------------------------------------------------------------
/include/spblas/algorithms/multiply.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/concepts.hpp>
 4 | #include <spblas/detail/operation_info_t.hpp>
 5 | 
 6 | namespace spblas {
 7 | 
 8 | template <matrix A, vector B, vector C>
 9 | void multiply(A&& a, B&& b, C&& c);
10 | 
11 | template <matrix A, matrix B, matrix C>
12 | void multiply(A&& a, B&& b, C&& c);
13 | 
14 | template <matrix A, matrix B, matrix C>
15 | operation_info_t multiply_inspect(A&& a, B&& b, C&& c);
16 | 
17 | template <matrix A, matrix B, matrix C>
18 | void multiply_inspect(operation_info_t& info, A&& a, B&& b, C&& c);
19 | 
20 | template <matrix A, matrix B, matrix C>
21 | operation_info_t multiply_compute(A&& a, B&& b, C&& c);
22 | 
23 | template <matrix A, matrix B, matrix C>
24 | void multiply_compute(operation_info_t& info, A&& a, B&& b, C&& c);
25 | 
26 | template <matrix A, matrix B, matrix C>
27 | void multiply_fill(operation_info_t& info, A&& a, B&& b, C&& c);
28 | 
29 | } // namespace spblas
30 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/armpl/detail/create_matrix_handle.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <armpl_sparse.h>
 4 | #include <spblas/detail/view_inspectors.hpp>
 5 | 
 6 | namespace spblas {
 7 | 
 8 | namespace __armpl {
 9 | 
10 | template <matrix M>
11 |   requires __detail::is_csr_view_v<M>
12 | armpl_spmat_t create_matrix_handle(M&& m) {
13 |   armpl_spmat_t handle;
14 |   __armpl::create_spmat_csr<tensor_scalar_t<M>>(
15 |       &handle, m.shape()[0], m.shape()[1], m.rowptr().data(), m.colind().data(),
16 |       m.values().data(), ARMPL_SPARSE_CREATE_NOCOPY);
17 |   return handle;
18 | }
19 | 
20 | template <matrix M>
21 |   requires __detail::is_csc_view_v<M>
22 | armpl_spmat_t create_matrix_handle(M&& m) {
23 |   armpl_spmat_t handle;
24 |   __armpl::create_spmat_csc<tensor_scalar_t<M>>(
25 |       &handle, m.shape()[0], m.shape()[1], m.rowind().data(), m.colptr().data(),
26 |       m.values().data(), ARMPL_SPARSE_CREATE_NOCOPY);
27 |   return handle;
28 | }
29 | 
30 | } // namespace __armpl
31 | 
32 | } // namespace spblas
33 | 


--------------------------------------------------------------------------------
/include/spblas/detail/tuple_concept.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <any>
 4 | #include <concepts>
 5 | #include <iterator>
 6 | #include <tuple>
 7 | 
 8 | namespace spblas {
 9 | 
10 | namespace __detail {
11 | 
12 | template <typename T, std::size_t I, typename U = std::any>
13 | concept tuple_element_gettable = requires(T tuple) {
14 |   { get<I>(tuple) } -> std::convertible_to<U>;
15 | };
16 | 
17 | template <typename T, typename... Args>
18 | concept tuple_like =
19 |     requires {
20 |       typename std::tuple_size<std::remove_cvref_t<T>>::type;
21 |       requires std::same_as<
22 |           std::remove_cvref_t<
23 |               decltype(std::tuple_size_v<std::remove_cvref_t<T>>)>,
24 |           std::size_t>;
25 |     } && sizeof...(Args) == std::tuple_size_v<std::remove_cvref_t<T>> &&
26 |     []<std::size_t... I>(std::index_sequence<I...>) {
27 |       return (tuple_element_gettable<T, I, Args> && ...);
28 |     }(std::make_index_sequence<std::tuple_size_v<std::remove_cvref_t<T>>>());
29 | 
30 | } // namespace __detail
31 | } // namespace spblas
32 | 


--------------------------------------------------------------------------------
/include/spblas/detail/ranges.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <version>
 4 | 
 5 | #if defined(__cpp_lib_ranges) && __cpp_lib_ranges >= 201911L &&                \
 6 |     defined(__cpp_lib_ranges_zip) && __cpp_lib_ranges_zip >= 202110L
 7 | 
 8 | #include <ranges>
 9 | 
10 | namespace spblas {
11 | 
12 | namespace __ranges = ::std::ranges;
13 | 
14 | namespace __detail {
15 | 
16 | namespace __ranges {
17 | 
18 | template <typename T>
19 | concept view = ::std::ranges::view<T>;
20 | 
21 | }
22 | 
23 | } // namespace __detail
24 | 
25 | } // namespace spblas
26 | 
27 | #elif __has_include(<range/v3/all.hpp>)
28 | 
29 | #include <range/v3/all.hpp>
30 | 
31 | namespace spblas {
32 | 
33 | namespace __ranges = ::ranges;
34 | 
35 | namespace __detail {
36 | 
37 | namespace __ranges {
38 | 
39 | template <typename T>
40 | concept view = ::ranges::view_<T>;
41 | 
42 | }
43 | 
44 | } // namespace __detail
45 | 
46 | } // namespace spblas
47 | 
48 | #else
49 | static_assert(
50 |     false,
51 |     "spblas requires support for std::ranges.  Compile with C++23 or later.");
52 | 
53 | #endif
54 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/cusparse/detail/get_transpose.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cusparse.h>
 4 | #include <spblas/detail/view_inspectors.hpp>
 5 | 
 6 | namespace spblas {
 7 | namespace __cusparse {
 8 | 
 9 | //
10 | // Takes in a CSR or CSR_transpose (aka CSC) or CSC or CSC_transpose
11 | // and returns the cusparseOperation_t value associated with it being
12 | // represented in the CSR format
13 | //
14 | //     CSR = CSR + NON_TRANSPOSE
15 | //     CSR_transpose = CSR + TRANSPOSE
16 | //     CSC = CSR + TRANSPOSE
17 | //     CSC_transpose = CSR + NON_TRANSPOSE
18 | //
19 | template <matrix M>
20 | cusparseOperation_t get_transpose(M&& m) {
21 |   static_assert(__detail::has_csr_base<M> || __detail::has_csc_base<M>);
22 |   if constexpr (__detail::has_base<M>) {
23 |     return get_transpose(m.base());
24 |   } else if constexpr (__detail::is_csr_view_v<M>) {
25 |     return CUSPARSE_OPERATION_NON_TRANSPOSE;
26 |   } else if constexpr (__detail::is_csc_view_v<M>) {
27 |     return CUSPARSE_OPERATION_TRANSPOSE;
28 |   }
29 | }
30 | 
31 | } // namespace __cusparse
32 | } // namespace spblas
33 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/rocsparse/detail/get_transpose.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <rocsparse/rocsparse.h>
 4 | #include <spblas/detail/view_inspectors.hpp>
 5 | 
 6 | namespace spblas {
 7 | namespace __rocsparse {
 8 | 
 9 | //
10 | // Takes in a CSR or CSR_transpose (aka CSC) or CSC or CSC_transpose
11 | // and returns the rocsparse_operation value associated with it being
12 | // represented in the CSR format
13 | //
14 | //     CSR = CSR + NON_TRANSPOSE
15 | //     CSR_transpose = CSR + TRANSPOSE
16 | //     CSC = CSR + TRANSPOSE
17 | //     CSC_transpose = CSR + NON_TRANSPOSE
18 | //
19 | template <matrix M>
20 | rocsparse_operation get_transpose(M&& m) {
21 |   static_assert(__detail::has_csr_base<M> || __detail::has_csc_base<M>);
22 |   if constexpr (__detail::has_base<M>) {
23 |     return get_transpose(m.base());
24 |   } else if constexpr (__detail::is_csr_view_v<M>) {
25 |     return rocsparse_operation_none;
26 |   } else if constexpr (__detail::is_csc_view_v<M>) {
27 |     return rocsparse_operation_transpose;
28 |   }
29 | }
30 | 
31 | } // namespace __rocsparse
32 | } // namespace spblas
33 | 


--------------------------------------------------------------------------------
/test/gtest/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | enable_testing()
 2 | 
 3 | set(TEST_SOURCES)
 4 | 
 5 | # CPU tests
 6 | if (SPBLAS_CPU_BACKEND)
 7 |   list(APPEND TEST_SOURCES
 8 |        spmv_test.cpp
 9 |        spmm_test.cpp
10 |        spgemm_test.cpp
11 |        spgemm_csr_csc.cpp
12 |        add_test.cpp
13 |        transpose_test.cpp
14 |        triangular_solve_test.cpp)
15 | endif()
16 | 
17 | # GPU tests
18 | if (SPBLAS_GPU_BACKEND)
19 |   if (ENABLE_ROCSPARSE)
20 |     set_source_files_properties(device/spmv_test.cpp PROPERTIES LANGUAGE HIP)
21 |   endif()
22 |   list(APPEND TEST_SOURCES device/spmv_test.cpp)
23 | endif()
24 | 
25 | add_executable(spblas-tests ${TEST_SOURCES})
26 | target_link_libraries(spblas-tests spblas fmt GTest::gtest_main)
27 | 
28 | # Backend-specific test configuration
29 | if (ENABLE_ROCSPARSE)
30 |   target_link_libraries(spblas-tests roc::rocthrust)
31 | elseif (ENABLE_CUSPARSE)
32 |   target_link_libraries(spblas-tests Thrust)
33 | elseif (ENABLE_ONEMKL_SYCL)
34 |   target_link_libraries(spblas-tests sycl_thrust)
35 | endif()
36 | 
37 | include(GoogleTest)
38 | gtest_discover_tests(spblas-tests)
39 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/rocsparse/operation_state_t.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "detail/abstract_operation_state.hpp"
 4 | #include <memory>
 5 | 
 6 | namespace spblas {
 7 | namespace __rocsparse {
 8 | 
 9 | class operation_state_t {
10 | public:
11 |   operation_state_t() = default;
12 |   operation_state_t(std::unique_ptr<abstract_operation_state_t>&& state)
13 |       : state_(std::move(state)) {}
14 | 
15 |   // Move-only
16 |   operation_state_t(operation_state_t&&) = default;
17 |   operation_state_t& operator=(operation_state_t&&) = default;
18 | 
19 |   // No copying
20 |   operation_state_t(const operation_state_t&) = delete;
21 |   operation_state_t& operator=(const operation_state_t&) = delete;
22 | 
23 |   // Access the underlying state
24 |   template <typename T>
25 |   T* get_state() {
26 |     return dynamic_cast<T*>(state_.get());
27 |   }
28 | 
29 |   template <typename T>
30 |   const T* get_state() const {
31 |     return dynamic_cast<const T*>(state_.get());
32 |   }
33 | 
34 | private:
35 |   std::unique_ptr<abstract_operation_state_t> state_;
36 | };
37 | 
38 | } // namespace __rocsparse
39 | } // namespace spblas
40 | 


--------------------------------------------------------------------------------
/include/spblas/backend/concepts.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/backend/cpos.hpp>
 4 | #include <spblas/detail/types.hpp>
 5 | 
 6 | namespace spblas {
 7 | 
 8 | namespace __backend {
 9 | 
10 | template <typename T>
11 | concept row_iterable = requires(T& t) { rows(t); };
12 | 
13 | template <typename T>
14 | concept column_iterable = requires(T& t) { columns(t); };
15 | 
16 | template <typename T>
17 | concept row_lookupable = requires(T& t) { lookup_row(t, tensor_index_t<T>{}); };
18 | 
19 | template <typename T>
20 | concept column_lookupable =
21 |     requires(T& t) { lookup_column(t, tensor_index_t<T>{}); };
22 | 
23 | namespace {
24 | 
25 | template <typename T>
26 | concept lookupable_matrix =
27 |     requires(T& t, tensor_index_t<T> i, tensor_index_t<T> j) {
28 |       { lookup(t, i, j) };
29 |     };
30 | 
31 | template <typename T>
32 | concept lookupable_vector = requires(T& t, tensor_index_t<T> i) {
33 |   { lookup(t, i) };
34 | };
35 | 
36 | } // namespace
37 | 
38 | template <typename T>
39 | concept lookupable = lookupable_matrix<T> || lookupable_vector<T>;
40 | 
41 | } // namespace __backend
42 | 
43 | } // namespace spblas
44 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/cusparse/operation_state_t.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "detail/abstract_operation_state.hpp"
 4 | #include <memory>
 5 | 
 6 | namespace spblas {
 7 | 
 8 | namespace __cusparse {
 9 | 
10 | class operation_state_t {
11 | public:
12 |   operation_state_t() = default;
13 |   operation_state_t(std::unique_ptr<abstract_operation_state_t>&& state)
14 |       : state_(std::move(state)) {}
15 | 
16 |   // Move-only
17 |   operation_state_t(operation_state_t&&) = default;
18 |   operation_state_t& operator=(operation_state_t&&) = default;
19 | 
20 |   // No copying
21 |   operation_state_t(const operation_state_t&) = delete;
22 |   operation_state_t& operator=(const operation_state_t&) = delete;
23 | 
24 |   // Access the underlying state
25 |   template <typename T>
26 |   T* get_state() {
27 |     return dynamic_cast<T*>(state_.get());
28 |   }
29 | 
30 |   template <typename T>
31 |   const T* get_state() const {
32 |     return dynamic_cast<const T*>(state_.get());
33 |   }
34 | 
35 | private:
36 |   std::unique_ptr<abstract_operation_state_t> state_;
37 | };
38 | 
39 | } // namespace __cusparse
40 | 
41 | } // namespace spblas
42 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/onemkl_sycl/detail/execution_policy.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/vendor/onemkl_sycl/detail/get_pointer_device.hpp>
 4 | #include <sycl/sycl.hpp>
 5 | 
 6 | namespace spblas {
 7 | 
 8 | namespace mkl {
 9 | 
10 | class parallel_policy {
11 | public:
12 |   parallel_policy() {}
13 | 
14 |   template <typename T>
15 |   sycl::queue get_queue(T* ptr) const {
16 |     return spblas::__mkl::get_pointer_queue(ptr);
17 |   }
18 | 
19 |   sycl::queue get_queue() const {
20 |     return sycl::queue(sycl::default_selector_v);
21 |   }
22 | };
23 | 
24 | class device_policy {
25 | public:
26 |   device_policy(const sycl::queue& queue) : queue_(queue) {}
27 | 
28 |   sycl::queue& get_queue() {
29 |     return queue_;
30 |   }
31 | 
32 |   const sycl::queue& get_queue() const {
33 |     return queue_;
34 |   }
35 | 
36 |   sycl::device get_device() const {
37 |     return queue_.get_device();
38 |   }
39 | 
40 |   sycl::context get_context() const {
41 |     return queue_.get_context();
42 |   }
43 | 
44 | private:
45 |   sycl::queue queue_;
46 | };
47 | 
48 | inline parallel_policy par;
49 | 
50 | } // namespace mkl
51 | 
52 | } // namespace spblas
53 | 


--------------------------------------------------------------------------------
/include/spblas/algorithms/detail/sparse_dot_product.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <optional>
 4 | 
 5 | #include <spblas/backend/spa_accumulator.hpp>
 6 | 
 7 | namespace spblas {
 8 | 
 9 | namespace __detail {
10 | 
11 | template <typename T, typename I, typename A, typename B>
12 | std::optional<T> sparse_dot_product(__backend::spa_accumulator<T, I>& acc,
13 |                                     A&& a, B&& b) {
14 |   acc.clear();
15 | 
16 |   for (auto&& [i, v] : a) {
17 |     acc[i] = v;
18 |   }
19 | 
20 |   T sum = 0;
21 |   bool implicit_zero = true;
22 |   for (auto&& [i, v] : b) {
23 |     if (acc.contains(i)) {
24 |       sum += acc[i] * v;
25 |       implicit_zero = false;
26 |     }
27 |   }
28 | 
29 |   if (implicit_zero) {
30 |     return {};
31 |   } else {
32 |     return sum;
33 |   }
34 | }
35 | 
36 | template <typename Set, typename A, typename B>
37 | bool sparse_intersection(Set&& set, A&& a, B&& b) {
38 |   set.clear();
39 | 
40 |   for (auto&& [i, v] : a) {
41 |     set.insert(i);
42 |   }
43 | 
44 |   for (auto&& [i, v] : b) {
45 |     if (set.contains(i)) {
46 |       return true;
47 |     }
48 |   }
49 | 
50 |   return false;
51 | }
52 | 
53 | } // namespace __detail
54 | 
55 | } // namespace spblas
56 | 


--------------------------------------------------------------------------------
/include/spblas/concepts.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <concepts>
 4 | #include <spblas/detail/concepts.hpp>
 5 | #include <spblas/detail/ranges.hpp>
 6 | #include <spblas/views/inspectors.hpp>
 7 | #include <spblas/views/view_base.hpp>
 8 | 
 9 | namespace spblas {
10 | 
11 | /*
12 |   The following types fulfill the matrix concept:
13 |   - Instantiations of csr_view<...>
14 |   - Instantiations of csc_view<...>
15 |   - Instantiations of mdspan<...> with rank 2
16 |   - Instantiations of scaled_view<T> where M is a matrix
17 | */
18 | 
19 | template <typename M>
20 | concept matrix = __detail::is_csr_view_v<M> || __detail::is_csc_view_v<M> ||
21 |                  __detail::is_matrix_mdspan_v<M> || __detail::matrix<M>;
22 | 
23 | /*
24 |   The following types fulfill the vector concept:
25 |   - Random access range (e.g. std::vector<...>)
26 | */
27 | 
28 | template <typename V>
29 | concept vector = __ranges::random_access_range<V> && !matrix<V>;
30 | 
31 | template <typename T>
32 | concept tensor = matrix<T> || vector<T>;
33 | 
34 | template <typename T>
35 | concept view = tensor<T> &&
36 |                (std::derived_from<std::remove_cvref_t<T>, view_base> ||
37 |                 __detail::is_matrix_mdspan_v<T> || __detail::__ranges::view<T>);
38 | 
39 | } // namespace spblas
40 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/onemkl_sycl/detail/get_pointer_device.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <sycl/sycl.hpp>
 4 | #include <vector>
 5 | 
 6 | namespace spblas {
 7 | 
 8 | namespace __mkl {
 9 | 
10 | inline std::vector<sycl::context> global_contexts_;
11 | 
12 | template <typename T>
13 | std::pair<sycl::device, sycl::context> get_pointer_device(T* ptr) {
14 |   if (global_contexts_.empty()) {
15 |     for (auto&& platform : sycl::platform::get_platforms()) {
16 |       sycl::context context(platform.get_devices());
17 | 
18 |       global_contexts_.push_back(context);
19 |     }
20 |   }
21 | 
22 |   for (auto&& context : global_contexts_) {
23 |     try {
24 |       sycl::device device = sycl::get_pointer_device(ptr, context);
25 |       return {device, context};
26 |     } catch (...) {
27 |     }
28 |   }
29 | 
30 |   throw std::runtime_error(
31 |       "get_pointer_device: could not locate device corresponding to pointer");
32 | }
33 | 
34 | template <typename T>
35 | sycl::queue get_pointer_queue(T* ptr) {
36 |   try {
37 |     auto&& [device, context] = get_pointer_device(ptr);
38 |     return sycl::queue(context, device);
39 |   } catch (...) {
40 |     return sycl::queue(sycl::cpu_selector_v);
41 |   }
42 | }
43 | 
44 | } // namespace __mkl
45 | 
46 | } // namespace spblas
47 | 


--------------------------------------------------------------------------------
/include/spblas/backend/algorithms.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/backend/concepts.hpp>
 4 | #include <spblas/backend/cpos.hpp>
 5 | #include <spblas/views/views.hpp>
 6 | 
 7 | namespace spblas {
 8 | 
 9 | namespace __backend {
10 | 
11 | template <matrix M, typename F>
12 |   requires(__backend::row_iterable<M>)
13 | void for_each(M&& m, F&& f) {
14 |   for (auto&& [i, row] : __backend::rows(m)) {
15 |     for (auto&& [j, v] : row) {
16 |       f(std::make_tuple(std::tuple{i, j}, std::reference_wrapper(v)));
17 |     }
18 |   }
19 | }
20 | 
21 | template <matrix M, typename F>
22 |   requires(__backend::column_iterable<M>)
23 | void for_each(M&& m, F&& f) {
24 |   for (auto&& [j, column] : __backend::columns(m)) {
25 |     for (auto&& [i, v] : column) {
26 |       f(std::make_tuple(std::tuple{i, j}, std::reference_wrapper(v)));
27 |     }
28 |   }
29 | }
30 | 
31 | template <vector V, typename F>
32 |   requires(__backend::lookupable<V> && __ranges::random_access_range<V>)
33 | void for_each(V&& v, F&& f) {
34 |   using index_type = __ranges::range_size_t<V>;
35 |   for (index_type i = 0; i < __backend::shape(v); i++) {
36 |     auto&& value = __backend::lookup(v, i);
37 |     f(std::make_tuple(i, std::reference_wrapper(value)));
38 |   }
39 | }
40 | 
41 | } // namespace __backend
42 | 
43 | } // namespace spblas
44 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/onemkl_sycl/detail/get_matrix_handle.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <oneapi/mkl.hpp>
 4 | 
 5 | #include <spblas/detail/log.hpp>
 6 | #include <spblas/detail/operation_info_t.hpp>
 7 | #include <spblas/detail/ranges.hpp>
 8 | #include <spblas/detail/view_inspectors.hpp>
 9 | #include <spblas/views/matrix_opt.hpp>
10 | 
11 | #include <spblas/vendor/onemkl_sycl/detail/create_matrix_handle.hpp>
12 | 
13 | namespace spblas {
14 | 
15 | namespace __mkl {
16 | 
17 | template <matrix M>
18 | oneapi::mkl::sparse::matrix_handle_t
19 | get_matrix_handle(sycl::queue& q, M&& m,
20 |                   oneapi::mkl::sparse::matrix_handle_t handle = nullptr) {
21 |   if constexpr (__detail::is_matrix_opt_v<decltype(m)>) {
22 |     log_trace("using A as matrix_opt");
23 | 
24 |     if (m.matrix_handle_ == nullptr) {
25 |       m.matrix_handle_ = create_matrix_handle(q, m.base());
26 |     }
27 | 
28 |     return m.matrix_handle_;
29 |   } else if constexpr (__detail::has_base<M>) {
30 |     return get_matrix_handle(q, m.base(), handle);
31 |   } else if (handle != nullptr) {
32 |     log_trace("using A from operation_info_t");
33 | 
34 |     return handle;
35 |   } else {
36 |     log_trace("using A as csr_base");
37 | 
38 |     return create_matrix_handle(q, m);
39 |   }
40 | }
41 | 
42 | } // namespace __mkl
43 | 
44 | } // namespace spblas
45 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/cusparse/detail/cusparse_tensors.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cusparse.h>
 4 | 
 5 | #include <spblas/detail/types.hpp>
 6 | #include <spblas/detail/view_inspectors.hpp>
 7 | #include <spblas/vendor/cusparse/exception.hpp>
 8 | #include <spblas/vendor/cusparse/types.hpp>
 9 | 
10 | namespace spblas {
11 | 
12 | namespace __cusparse {
13 | 
14 | template <matrix M>
15 |   requires __detail::is_csr_view_v<M>
16 | cusparseSpMatDescr_t create_cusparse_handle(M&& m) {
17 |   cusparseSpMatDescr_t mat_descr;
18 |   __cusparse::throw_if_error(cusparseCreateCsr(
19 |       &mat_descr, __backend::shape(m)[0], __backend::shape(m)[1],
20 |       m.values().size(), m.rowptr().data(), m.colind().data(),
21 |       m.values().data(), detail::cusparse_index_type_v<tensor_offset_t<M>>,
22 |       detail::cusparse_index_type_v<tensor_index_t<M>>,
23 |       CUSPARSE_INDEX_BASE_ZERO, detail::cuda_data_type_v<tensor_scalar_t<M>>));
24 | 
25 |   return mat_descr;
26 | }
27 | 
28 | template <vector V>
29 |   requires __ranges::contiguous_range<V>
30 | cusparseDnVecDescr_t create_cusparse_handle(V&& v) {
31 |   cusparseDnVecDescr_t vec_descr;
32 |   __cusparse::throw_if_error(
33 |       cusparseCreateDnVec(&vec_descr, __backend::shape(v), __ranges::data(v),
34 |                           detail::cuda_data_type_v<tensor_scalar_t<V>>));
35 | 
36 |   return vec_descr;
37 | }
38 | 
39 | } // namespace __cusparse
40 | 
41 | } // namespace spblas
42 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/rocsparse/detail/rocsparse_tensors.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <rocsparse/rocsparse.h>
 4 | 
 5 | #include <spblas/detail/types.hpp>
 6 | #include <spblas/detail/view_inspectors.hpp>
 7 | #include <spblas/vendor/rocsparse/exception.hpp>
 8 | #include <spblas/vendor/rocsparse/types.hpp>
 9 | 
10 | namespace spblas {
11 | namespace __rocsparse {
12 | 
13 | template <matrix M>
14 |   requires __detail::is_csr_view_v<M>
15 | rocsparse_spmat_descr create_rocsparse_handle(M&& m) {
16 |   rocsparse_spmat_descr mat_descr;
17 |   throw_if_error(rocsparse_create_csr_descr(
18 |       &mat_descr, __backend::shape(m)[0], __backend::shape(m)[1],
19 |       m.values().size(), m.rowptr().data(), m.colind().data(),
20 |       m.values().data(), detail::rocsparse_index_type_v<tensor_offset_t<M>>,
21 |       detail::rocsparse_index_type_v<tensor_index_t<M>>,
22 |       rocsparse_index_base_zero,
23 |       detail::rocsparse_data_type_v<tensor_scalar_t<M>>));
24 | 
25 |   return mat_descr;
26 | }
27 | 
28 | template <vector V>
29 |   requires __ranges::contiguous_range<V>
30 | rocsparse_dnvec_descr create_rocsparse_handle(V&& v) {
31 |   rocsparse_dnvec_descr vec_descr;
32 |   throw_if_error(rocsparse_create_dnvec_descr(
33 |       &vec_descr, __backend::shape(v), __ranges::data(v),
34 |       detail::rocsparse_data_type_v<tensor_scalar_t<V>>));
35 | 
36 |   return vec_descr;
37 | }
38 | 
39 | } // namespace __rocsparse
40 | } // namespace spblas
41 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/cusparse/detail/spmv_state_t.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cusparse.h>
 4 | #include <memory>
 5 | 
 6 | #include "abstract_operation_state.hpp"
 7 | 
 8 | namespace spblas {
 9 | namespace __cusparse {
10 | 
11 | class spmv_state_t : public abstract_operation_state_t {
12 | public:
13 |   spmv_state_t() = default;
14 |   ~spmv_state_t() {
15 |     if (a_descr_) {
16 |       cusparseDestroySpMat(a_descr_);
17 |     }
18 |     if (b_descr_) {
19 |       cusparseDestroyDnVec(b_descr_);
20 |     }
21 |     if (c_descr_) {
22 |       cusparseDestroyDnVec(c_descr_);
23 |     }
24 |   }
25 | 
26 |   // Accessors for the descriptors
27 |   cusparseSpMatDescr_t a_descriptor() const {
28 |     return a_descr_;
29 |   }
30 |   cusparseDnVecDescr_t b_descriptor() const {
31 |     return b_descr_;
32 |   }
33 |   cusparseDnVecDescr_t c_descriptor() const {
34 |     return c_descr_;
35 |   }
36 | 
37 |   // Setters for the descriptors
38 |   void set_a_descriptor(cusparseSpMatDescr_t descr) {
39 |     a_descr_ = descr;
40 |   }
41 |   void set_b_descriptor(cusparseDnVecDescr_t descr) {
42 |     b_descr_ = descr;
43 |   }
44 |   void set_c_descriptor(cusparseDnVecDescr_t descr) {
45 |     c_descr_ = descr;
46 |   }
47 | 
48 | private:
49 |   cusparseSpMatDescr_t a_descr_ = nullptr;
50 |   cusparseDnVecDescr_t b_descr_ = nullptr;
51 |   cusparseDnVecDescr_t c_descr_ = nullptr;
52 | };
53 | 
54 | } // namespace __cusparse
55 | } // namespace spblas
56 | 


--------------------------------------------------------------------------------
/include/spblas/detail/types.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cstddef>
 4 | #include <type_traits>
 5 | 
 6 | #ifdef SPBLAS_ENABLE_ONEMKL_SYCL
 7 | #include <spblas/vendor/onemkl_sycl/types.hpp>
 8 | #endif
 9 | 
10 | #ifdef SPBLAS_ENABLE_ARMPL
11 | #include <spblas/vendor/armpl/types.hpp>
12 | #endif
13 | 
14 | #ifdef SPBLAS_ENABLE_AOCLSPARSE
15 | #include <spblas/vendor/aoclsparse/types.hpp>
16 | #endif
17 | 
18 | #ifdef SPBLAS_ENABLE_ROCSPARSE
19 | #include <spblas/vendor/rocsparse/types.hpp>
20 | #endif
21 | 
22 | #ifdef SPBLAS_ENABLE_CUSPARSE
23 | #include <spblas/vendor/cusparse/types.hpp>
24 | #endif
25 | 
26 | namespace spblas {
27 | 
28 | #ifndef SPBLAS_VENDOR_BACKEND
29 | using index_t = std::size_t;
30 | using offset_t = index_t;
31 | #endif
32 | 
33 | template <typename T>
34 | struct tensor_traits {
35 |   using scalar_type = typename std::remove_cvref_t<T>::scalar_type;
36 |   using scalar_reference = typename std::remove_cvref_t<T>::scalar_reference;
37 |   using index_type = typename std::remove_cvref_t<T>::index_type;
38 |   using offset_type = typename std::remove_cvref_t<T>::offset_type;
39 | };
40 | 
41 | template <typename T>
42 | using tensor_scalar_t = typename tensor_traits<T>::scalar_type;
43 | 
44 | template <typename T>
45 | using tensor_scalar_reference_t = typename tensor_traits<T>::scalar_reference;
46 | 
47 | template <typename T>
48 | using tensor_index_t = typename tensor_traits<T>::index_type;
49 | 
50 | template <typename T>
51 | using tensor_offset_t = typename tensor_traits<T>::offset_type;
52 | 
53 | } // namespace spblas
54 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2023, SparseBLAS
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this
 9 |    list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 |    this list of conditions and the following disclaimer in the documentation
13 |    and/or other materials provided with the distribution.
14 | 
15 | 3. Neither the name of the copyright holder nor the names of its
16 |    contributors may be used to endorse or promote products derived from
17 |    this software without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/armpl/operation_state_t.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/vendor/armpl/detail/armpl.hpp>
 4 | 
 5 | namespace spblas {
 6 | 
 7 | namespace __armpl {
 8 | 
 9 | struct operation_state_t {
10 |   armpl_spmat_t a_handle = nullptr;
11 |   armpl_spmat_t b_handle = nullptr;
12 |   armpl_spmat_t c_handle = nullptr;
13 |   armpl_spmat_t d_handle = nullptr;
14 | 
15 |   operation_state_t() = default;
16 | 
17 |   operation_state_t(armpl_spmat_t a_handle, armpl_spmat_t b_handle,
18 |                     armpl_spmat_t c_handle, armpl_spmat_t d_handle)
19 |       : a_handle(a_handle), b_handle(b_handle), c_handle(c_handle),
20 |         d_handle(d_handle) {}
21 | 
22 |   operation_state_t(operation_state_t&& other) {
23 |     *this = std::move(other);
24 |   }
25 | 
26 |   operation_state_t& operator=(operation_state_t&& other) {
27 |     a_handle = other.a_handle;
28 |     b_handle = other.b_handle;
29 |     c_handle = other.c_handle;
30 |     d_handle = other.d_handle;
31 | 
32 |     other.a_handle = other.b_handle = other.c_handle = other.d_handle = nullptr;
33 | 
34 |     return *this;
35 |   }
36 | 
37 |   operation_state_t(const operation_state_t& other) = delete;
38 | 
39 |   ~operation_state_t() {
40 |     release_matrix_handle(a_handle);
41 |     release_matrix_handle(b_handle);
42 |     release_matrix_handle(c_handle);
43 |     release_matrix_handle(d_handle);
44 |   }
45 | 
46 | private:
47 |   void release_matrix_handle(armpl_spmat_t& handle) {
48 |     if (handle != nullptr) {
49 |       armpl_spmat_destroy(handle);
50 |     }
51 |   }
52 | };
53 | 
54 | } // namespace __armpl
55 | 
56 | } // namespace spblas
57 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/aoclsparse/operation_state_t.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025      Advanced Micro Devices, Inc. All Rights reserved.
 3 |  * $COPYRIGHT$
 4 |  *
 5 |  * Additional copyrights may follow
 6 |  *
 7 |  * $HEADER$
 8 |  */
 9 | 
10 | #pragma once
11 | 
12 | #include "aoclsparse.h"
13 | 
14 | namespace spblas {
15 | 
16 | namespace __aoclsparse {
17 | 
18 | struct operation_state_t {
19 |   aoclsparse_matrix a_handle = nullptr;
20 |   aoclsparse_matrix b_handle = nullptr;
21 |   aoclsparse_matrix c_handle = nullptr;
22 | 
23 |   operation_state_t() = default;
24 | 
25 |   operation_state_t(aoclsparse_matrix a_handle, aoclsparse_matrix b_handle,
26 |                     aoclsparse_matrix c_handle)
27 |       : a_handle(a_handle), b_handle(b_handle), c_handle(c_handle) {}
28 | 
29 |   operation_state_t(operation_state_t&& other) {
30 |     *this = std::move(other);
31 |   }
32 | 
33 |   operation_state_t& operator=(operation_state_t&& other) {
34 |     a_handle = other.a_handle;
35 |     b_handle = other.b_handle;
36 |     c_handle = other.c_handle;
37 | 
38 |     other.a_handle = other.b_handle = other.c_handle = nullptr;
39 | 
40 |     return *this;
41 |   }
42 | 
43 |   operation_state_t(const operation_state_t& other) = delete;
44 | 
45 |   ~operation_state_t() {
46 |     release_matrix_handle(a_handle);
47 |     release_matrix_handle(b_handle);
48 |     release_matrix_handle(c_handle);
49 |   }
50 | 
51 | private:
52 |   void release_matrix_handle(aoclsparse_matrix handle) {
53 |     if (handle != nullptr) {
54 |       aoclsparse_destroy(&handle);
55 |     }
56 |   }
57 | };
58 | 
59 | } // namespace __aoclsparse
60 | 
61 | } // namespace spblas
62 | 


--------------------------------------------------------------------------------
/examples/simple_spmv.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <spblas/spblas.hpp>
 3 | 
 4 | #include <fmt/core.h>
 5 | #include <fmt/ranges.h>
 6 | 
 7 | int main(int argc, char** argv) {
 8 |   using namespace spblas;
 9 | 
10 |   using T = float;
11 | 
12 |   spblas::index_t m = 100;
13 |   spblas::index_t n = 100;
14 |   spblas::index_t nnz_in = 10;
15 | 
16 |   fmt::print("\n\t###########################################################"
17 |              "######################");
18 |   fmt::print("\n\t### Running SpMV Example:");
19 |   fmt::print("\n\t###");
20 |   fmt::print("\n\t###   y = alpha * A * x");
21 |   fmt::print("\n\t###");
22 |   fmt::print("\n\t### with ");
23 |   fmt::print("\n\t### A, in CSR format, of size ({}, {}) with nnz = {}", m, n,
24 |              nnz_in);
25 |   fmt::print("\n\t### x, a dense vector, of size ({}, {})", n, 1);
26 |   fmt::print("\n\t### y, a dense vector, of size ({}, {})", m, 1);
27 |   fmt::print("\n\t### using float and spblas::index_t (size = {} bytes)",
28 |              sizeof(spblas::index_t));
29 |   fmt::print("\n\t###########################################################"
30 |              "######################");
31 |   fmt::print("\n");
32 | 
33 |   auto&& [values, rowptr, colind, shape, nnz] =
34 |       generate_csr<T, spblas::index_t>(m, n, nnz_in);
35 | 
36 |   csr_view<T, spblas::index_t> a(values, rowptr, colind, shape, nnz);
37 | 
38 |   // Scale every value of `a` by 5 in place.
39 |   // scale(5.f, a);
40 | 
41 |   std::vector<T> x(n, 1);
42 |   std::vector<T> y(m, 0);
43 | 
44 |   T alpha = 1.2f;
45 |   auto a_scaled = scaled(alpha, a);
46 | 
47 |   // y = alpha * A * x
48 |   multiply(a_scaled, x, y);
49 | 
50 |   fmt::print("\tExample is completed!\n");
51 | 
52 |   return 0;
53 | }
54 | 


--------------------------------------------------------------------------------
/examples/spmm_csc.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <spblas/spblas.hpp>
 3 | 
 4 | #include <fmt/core.h>
 5 | #include <fmt/ranges.h>
 6 | 
 7 | int main(int argc, char** argv) {
 8 |   using namespace spblas;
 9 |   namespace md = spblas::__mdspan;
10 | 
11 |   using T = float;
12 | 
13 |   spblas::index_t m = 100;
14 |   spblas::index_t n = 10;
15 |   spblas::index_t k = 100;
16 |   spblas::index_t nnz_in = 10;
17 | 
18 |   fmt::print("\n\t###########################################################"
19 |              "######################");
20 |   fmt::print("\n\t### Running SpMM Example:");
21 |   fmt::print("\n\t###");
22 |   fmt::print("\n\t###   Y = alpha * A * X");
23 |   fmt::print("\n\t###");
24 |   fmt::print("\n\t### with ");
25 |   fmt::print("\n\t### A, in CSC format, of size ({}, {}) with nnz = {}", m, k,
26 |              nnz_in);
27 |   fmt::print("\n\t### x, a dense matrix, of size ({}, {})", k, n);
28 |   fmt::print("\n\t### y, a dense vector, of size ({}, {})", m, n);
29 |   fmt::print("\n\t### using float and spblas::index_t (size = {} bytes)",
30 |              sizeof(spblas::index_t));
31 |   fmt::print("\n\t###########################################################"
32 |              "######################");
33 |   fmt::print("\n");
34 | 
35 |   auto&& [values, colptr, rowind, shape, nnz] = generate_csc<T>(m, k, nnz_in);
36 | 
37 |   csc_view<T> a(values, colptr, rowind, shape, nnz);
38 | 
39 |   std::vector<T> x_values(k * n, 1);
40 |   std::vector<T> y_values(m * n, 0);
41 | 
42 |   md::mdspan x(x_values.data(), k, n);
43 |   md::mdspan y(y_values.data(), m, n);
44 | 
45 |   // y = A * (alpha * x)
46 |   multiply(a, scaled(2.f, x), y);
47 | 
48 |   fmt::print("{}\n", spblas::__backend::values(y));
49 | 
50 |   fmt::print("\tExample is completed!\n");
51 | 
52 |   return 0;
53 | }
54 | 


--------------------------------------------------------------------------------
/include/spblas/backend/hash_accumulator.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <functional>
 4 | #include <span>
 5 | #include <tuple>
 6 | #include <unordered_map>
 7 | #include <unordered_set>
 8 | #include <vector>
 9 | 
10 | #include <spblas/detail/ranges.hpp>
11 | 
12 | namespace spblas {
13 | 
14 | namespace __backend {
15 | 
16 | template <typename T, std::integral I>
17 | class hash_accumulator {
18 | public:
19 |   hash_accumulator(I count) {}
20 | 
21 |   T& operator[](I pos) {
22 |     return hash_[pos];
23 |   }
24 | 
25 |   bool contains(I pos) {
26 |     return hash_.contains(pos);
27 |   }
28 | 
29 |   void clear() {
30 |     hash_.clear();
31 |   }
32 | 
33 |   I size() const {
34 |     return hash_.size();
35 |   }
36 | 
37 |   bool empty() {
38 |     return hash_.empty();
39 |   }
40 | 
41 |   void sort() {}
42 | 
43 |   auto get() {
44 |     std::vector<std::pair<I, T>> values(hash_.begin(), hash_.end());
45 | 
46 |     std::sort(values.begin(), values.end(), [](auto&& a, auto&& b) {
47 |       return std::get<0>(a) < std::get<0>(b);
48 |     });
49 | 
50 |     return values;
51 |   }
52 | 
53 | private:
54 |   std::unordered_map<I, T> hash_;
55 | };
56 | 
57 | template <std::integral T>
58 | class hash_set {
59 | public:
60 |   hash_set(T count) {}
61 | 
62 |   void insert(T key) {
63 |     set_.insert(key);
64 |   }
65 | 
66 |   bool contains(T key) {
67 |     return set_.contains(key);
68 |   }
69 | 
70 |   void clear() {
71 |     set_.clear();
72 |   }
73 | 
74 |   T size() const {
75 |     return set_.size();
76 |   }
77 | 
78 |   bool empty() {
79 |     return set_.empty();
80 |   }
81 | 
82 |   auto get() const {
83 |     return __ranges::views::all(set_);
84 |   }
85 | 
86 | private:
87 |   std::unordered_set<T> set_;
88 | };
89 | 
90 | } // namespace __backend
91 | 
92 | } // namespace spblas
93 | 


--------------------------------------------------------------------------------
/test/gtest/add_test.cpp:
--------------------------------------------------------------------------------
 1 | #include <gtest/gtest.h>
 2 | 
 3 | #include "util.hpp"
 4 | #include <spblas/backend/spa_accumulator.hpp>
 5 | #include <spblas/spblas.hpp>
 6 | 
 7 | #include <fmt/core.h>
 8 | #include <fmt/ranges.h>
 9 | 
10 | TEST(CsrView, Add_CSR_CSR_CSR) {
11 |   using T = float;
12 |   using I = spblas::index_t;
13 | 
14 |   for (auto&& [m, n, nnz] : util::dims) {
15 |     auto [a_values, a_rowptr, a_colind, a_shape, a_nnz] =
16 |         spblas::generate_csr<T, I>(m, n, nnz);
17 | 
18 |     auto [b_values, b_rowptr, b_colind, b_shape, b_nnz] =
19 |         spblas::generate_csr<T, I>(m, n, nnz);
20 | 
21 |     spblas::csr_view<T, I> a(a_values, a_rowptr, a_colind, a_shape, a_nnz);
22 |     spblas::csr_view<T, I> b(b_values, b_rowptr, b_colind, b_shape, b_nnz);
23 | 
24 |     std::vector<I> c_rowptr(m + 1);
25 | 
26 |     spblas::csr_view<T, I> c(nullptr, c_rowptr.data(), nullptr, {m, n}, 0);
27 | 
28 |     auto info = spblas::add_inspect(a, b, c);
29 | 
30 |     std::vector<T> c_values(info.result_nnz());
31 |     std::vector<I> c_colind(info.result_nnz());
32 | 
33 |     c.update(c_values, c_rowptr, c_colind);
34 | 
35 |     spblas::add_compute(info, a, b, c);
36 | 
37 |     spblas::__backend::spa_accumulator<T, I> c_row_ref(
38 |         spblas::__backend::shape(c)[1]);
39 | 
40 |     for (I i = 0; i < spblas::__backend::shape(c)[0]; i++) {
41 |       c_row_ref.clear();
42 | 
43 |       for (auto&& [j, v] : spblas::__backend::lookup_row(a, i)) {
44 |         c_row_ref[j] += v;
45 |       }
46 | 
47 |       for (auto&& [j, v] : spblas::__backend::lookup_row(b, i)) {
48 |         c_row_ref[j] += v;
49 |       }
50 | 
51 |       auto&& c_row = spblas::__backend::lookup_row(c, i);
52 | 
53 |       for (auto&& [j, v] : c_row) {
54 |         EXPECT_EQ_(c_row_ref[j], v);
55 |       }
56 |     }
57 |   }
58 | }
59 | 


--------------------------------------------------------------------------------
/examples/simple_spmm.cpp:
--------------------------------------------------------------------------------
 1 | #include <spblas/spblas.hpp>
 2 | 
 3 | #include <fmt/core.h>
 4 | #include <fmt/ranges.h>
 5 | 
 6 | int main(int argc, char** argv) {
 7 |   using namespace spblas;
 8 |   namespace md = spblas::__mdspan;
 9 | 
10 |   using T = float;
11 | 
12 |   spblas::index_t m = 100;
13 |   spblas::index_t n = 10;
14 |   spblas::index_t k = 100;
15 |   spblas::index_t nnz_in = 10;
16 | 
17 |   fmt::print("\n\t###########################################################"
18 |              "######################");
19 |   fmt::print("\n\t### Running SpMM Example:");
20 |   fmt::print("\n\t###");
21 |   fmt::print("\n\t###   Y = alpha * A * X");
22 |   fmt::print("\n\t###");
23 |   fmt::print("\n\t### with ");
24 |   fmt::print("\n\t### A, in CSR format, of size ({}, {}) with nnz = {}", m, k,
25 |              nnz_in);
26 |   fmt::print("\n\t### x, a dense matrix, of size ({}, {})", k, n);
27 |   fmt::print("\n\t### y, a dense vector, of size ({}, {})", m, n);
28 |   fmt::print("\n\t### using float and spblas::index_t (size = {} bytes)",
29 |              sizeof(spblas::index_t));
30 |   fmt::print("\n\t###########################################################"
31 |              "######################");
32 |   fmt::print("\n");
33 | 
34 |   auto&& [values, rowptr, colind, shape, nnz] = generate_csr<T>(m, k, nnz_in);
35 | 
36 |   csr_view<T> a(values, rowptr, colind, shape, nnz);
37 | 
38 |   std::vector<T> x_values(k * n, 1);
39 |   std::vector<T> y_values(m * n, 0);
40 | 
41 |   md::mdspan x(x_values.data(), k, n);
42 |   md::mdspan y(y_values.data(), m, n);
43 | 
44 |   auto a_view = scaled(2.f, a);
45 | 
46 |   // y = A * (alpha * x)
47 |   multiply(a_view, scaled(2.f, x), y);
48 | 
49 |   fmt::print("{}\n", spblas::__backend::values(y));
50 | 
51 |   fmt::print("\tExample is completed!\n");
52 | 
53 |   return 0;
54 | }
55 | 


--------------------------------------------------------------------------------
/test/gtest/util.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <gtest/gtest.h>
 4 | #include <tuple>
 5 | #include <vector>
 6 | 
 7 | #define EXPECT_EQ_(t, u)                                                       \
 8 |   if constexpr (std::floating_point<std::remove_cvref_t<decltype((t))>> ||     \
 9 |                 std::floating_point<std::remove_cvref_t<decltype((u))>>) {     \
10 |     auto epsilon =                                                             \
11 |         64 *                                                                   \
12 |         std::numeric_limits<std::remove_cvref_t<decltype((t))>>::epsilon();    \
13 |     auto abs_th =                                                              \
14 |         std::numeric_limits<std::remove_cvref_t<decltype((t))>>::min();        \
15 |     auto diff = std::abs((t) - (u));                                           \
16 |     auto norm = std::min(                                                      \
17 |         std::abs((t)) + std::abs((u)),                                         \
18 |         std::numeric_limits<std::remove_cvref_t<decltype((t))>>::max());       \
19 |     auto abs_error = std::max(abs_th, epsilon * norm);                         \
20 |     EXPECT_NEAR((t), (u), abs_error);                                          \
21 |   } else {                                                                     \
22 |     EXPECT_EQ((t), (u));                                                       \
23 |   }
24 | 
25 | namespace util {
26 | 
27 | inline auto dims =
28 |     std::vector({std::tuple(1000, 100, 100), std::tuple(100, 1000, 10000),
29 |                  std::tuple(40, 40, 1000)});
30 | 
31 | inline auto square_dims =
32 |     std::vector({std::tuple(1000, 1000, 100), std::tuple(100, 100, 100),
33 |                  std::tuple(40, 40, 1000)});
34 | 
35 | } // namespace util
36 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/armpl/detail/export_matrix_handle.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/detail/operation_info_t.hpp>
 4 | 
 5 | #include <armpl_sparse.h>
 6 | #include <spblas/detail/view_inspectors.hpp>
 7 | 
 8 | namespace spblas {
 9 | 
10 | namespace __armpl {
11 | 
12 | template <matrix M>
13 |   requires __detail::is_csr_view_v<M>
14 | void export_matrix_handle(operation_info_t& info, M&& matrix,
15 |                           armpl_spmat_t matrix_handle) {
16 |   auto nnz = info.result_nnz();
17 |   armpl_int_t m, n;
18 |   armpl_int_t *rowptr, *colind;
19 |   tensor_scalar_t<M>* values;
20 |   __armpl::export_spmat_csr<tensor_scalar_t<M>>(matrix_handle, 0, &m, &n,
21 |                                                 &rowptr, &colind, &values);
22 | 
23 |   std::copy(values, values + nnz, matrix.values().begin());
24 |   std::copy(colind, colind + nnz, matrix.colind().begin());
25 |   std::copy(rowptr, rowptr + m + 1, matrix.rowptr().begin());
26 | 
27 |   free(values);
28 |   free(rowptr);
29 |   free(colind);
30 | }
31 | 
32 | template <matrix M>
33 |   requires __detail::is_csc_view_v<M>
34 | void export_matrix_handle(operation_info_t& info, M&& matrix,
35 |                           armpl_spmat_t matrix_handle) {
36 |   auto nnz = info.result_nnz();
37 |   armpl_int_t m, n;
38 |   armpl_int_t *colptr, *rowind;
39 |   tensor_scalar_t<M>* values;
40 |   __armpl::export_spmat_csc<tensor_scalar_t<M>>(matrix_handle, 0, &m, &n,
41 |                                                 &rowind, &colptr, &values);
42 | 
43 |   std::copy(values, values + nnz, matrix.values().begin());
44 |   std::copy(rowind, rowind + nnz, matrix.rowind().begin());
45 |   std::copy(colptr, colptr + n + 1, matrix.colptr().begin());
46 | 
47 |   free(values);
48 |   free(colptr);
49 |   free(rowind);
50 | }
51 | 
52 | } // namespace __armpl
53 | 
54 | } // namespace spblas
55 | 


--------------------------------------------------------------------------------
/examples/matrix_opt_example.cpp:
--------------------------------------------------------------------------------
 1 | #include <spblas/spblas.hpp>
 2 | 
 3 | #include <fmt/core.h>
 4 | #include <fmt/ranges.h>
 5 | 
 6 | int main(int argc, char** argv) {
 7 |   using namespace spblas;
 8 |   namespace md = spblas::__mdspan;
 9 | 
10 |   using T = float;
11 | 
12 |   spblas::index_t m = 100;
13 |   spblas::index_t n = 10;
14 |   spblas::index_t k = 100;
15 |   spblas::index_t nnz_in = 10;
16 | 
17 |   fmt::print("\n\t###########################################################"
18 |              "######################");
19 |   fmt::print("\n\t### Running SpMM Example:");
20 |   fmt::print("\n\t###");
21 |   fmt::print("\n\t###   Y = alpha * A * X");
22 |   fmt::print("\n\t###");
23 |   fmt::print("\n\t### with ");
24 |   fmt::print("\n\t### A, in CSR format, of size ({}, {}) with nnz = {}", m, k,
25 |              nnz_in);
26 |   fmt::print("\n\t### x, a dense matrix, of size ({}, {})", k, n);
27 |   fmt::print("\n\t### y, a dense vector, of size ({}, {})", m, n);
28 |   fmt::print("\n\t### using float and spblas::index_t (size = {} bytes)",
29 |              sizeof(spblas::index_t));
30 |   fmt::print("\n\t###########################################################"
31 |              "######################");
32 |   fmt::print("\n");
33 | 
34 |   auto&& [values, rowptr, colind, shape, nnz] = generate_csr<T>(m, k, nnz_in);
35 | 
36 |   csr_view<T> a(values, rowptr, colind, shape, nnz);
37 | 
38 |   matrix_opt a_opt(a);
39 | 
40 |   std::vector<T> x_values(k * n, 1);
41 |   std::vector<T> y_values(m * n, 0);
42 | 
43 |   md::mdspan x(x_values.data(), k, n);
44 |   md::mdspan y(y_values.data(), m, n);
45 | 
46 |   auto a_view = scaled(2.f, a);
47 | 
48 |   // y = A * (alpha * x)
49 |   multiply(a_opt, scaled(2.f, x), y);
50 | 
51 |   fmt::print("{}\n", spblas::__backend::values(y));
52 | 
53 |   fmt::print("\tExample is completed!\n");
54 | 
55 |   return 0;
56 | }
57 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/cusparse/cuda_allocator.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "exception.hpp"
 4 | #include <cuda_runtime.h>
 5 | 
 6 | namespace spblas {
 7 | 
 8 | namespace cusparse {
 9 | 
10 | template <typename T, std::size_t Alignment = 0>
11 | class cuda_allocator {
12 | public:
13 |   using value_type = T;
14 |   using pointer = T*;
15 |   using const_pointer = const T*;
16 |   using reference = T&;
17 |   using const_reference = const T&;
18 |   using size_type = std::size_t;
19 |   using difference_type = std::ptrdiff_t;
20 | 
21 |   cuda_allocator() noexcept {}
22 |   cuda_allocator(cudaStream_t stream) noexcept : stream_(stream) {}
23 | 
24 |   template <typename U>
25 |   cuda_allocator(const cuda_allocator<U, Alignment>& other) noexcept
26 |       : stream_(other.stream()) {}
27 | 
28 |   cuda_allocator(const cuda_allocator&) = default;
29 |   cuda_allocator& operator=(const cuda_allocator&) = default;
30 |   ~cuda_allocator() = default;
31 | 
32 |   using is_always_equal = std::false_type;
33 | 
34 |   pointer allocate(std::size_t size) {
35 |     void* ptr;
36 |     this->throw_if_failure(cudaMallocAsync(&ptr, size * sizeof(T), stream()));
37 | 
38 |     return reinterpret_cast<T*>(ptr);
39 |   }
40 | 
41 |   void deallocate(pointer ptr, std::size_t n = 0) {
42 |     if (ptr != nullptr) {
43 |       this->throw_if_failure(cudaFreeAsync(ptr, stream()));
44 |     }
45 |   }
46 | 
47 |   bool operator==(const cuda_allocator&) const = default;
48 |   bool operator!=(const cuda_allocator&) const = default;
49 | 
50 |   template <typename U>
51 |   struct rebind {
52 |     using other = cuda_allocator<U, Alignment>;
53 |   };
54 | 
55 |   cudaStream_t stream() const noexcept {
56 |     return this->stream_;
57 |   }
58 | 
59 | private:
60 |   void throw_if_failure(cudaError_t error) {
61 |     if (error != cudaSuccess) {
62 |       throw std::bad_alloc{};
63 |     }
64 |   }
65 | 
66 |   cudaStream_t stream_ = nullptr;
67 | };
68 | 
69 | } // namespace cusparse
70 | 
71 | } // namespace spblas
72 | 


--------------------------------------------------------------------------------
/include/spblas/backend/csr_builder.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/views/csr_view.hpp>
 4 | 
 5 | namespace spblas {
 6 | 
 7 | namespace __backend {
 8 | 
 9 | template <typename T, std::integral I = index_t, std::integral O = I>
10 | class csr_builder {
11 | public:
12 |   csr_builder(csr_view<T, I, O> view) : view_(view) {
13 |     view_.rowptr()[0] = 0;
14 |   }
15 | 
16 |   template <__ranges::forward_range Row>
17 |   void insert_row(I row_index, Row&& row) {
18 |     if (j_ptr_ + __ranges::size(row) > __ranges::size(view_.values()) ||
19 |         j_ptr_ + __ranges::size(row) > __ranges::size(view_.colind())) {
20 |       throw std::runtime_error("csr_builder: not enough space in CSR.");
21 |     }
22 | 
23 |     if (row_index + 1 >= __ranges::size(view_.rowptr())) {
24 |       throw std::runtime_error("csr_builder: not enough rows in CSR.");
25 |     }
26 | 
27 |     while (i_ < row_index) {
28 |       view_.rowptr()[i_ + 1] = j_ptr_;
29 |       i_++;
30 |     }
31 | 
32 |     for (auto&& [j, v] : row) {
33 |       view_.values()[j_ptr_] = v;
34 |       view_.colind()[j_ptr_] = j;
35 |       j_ptr_++;
36 |     }
37 |     view_.rowptr()[i_ + 1] = j_ptr_;
38 |     i_++;
39 |   }
40 | 
41 |   void finish() {
42 |     while (i_ < view_.shape()[0]) {
43 |       view_.rowptr()[i_ + 1] = j_ptr_;
44 |       i_++;
45 |     }
46 |   }
47 | 
48 | private:
49 |   csr_view<T, I, O> view_;
50 |   O j_ptr_ = 0;
51 |   I i_ = 0;
52 | };
53 | 
54 | template <typename T, std::integral I = index_t, std::integral O = I>
55 | class csc_builder {
56 | public:
57 |   csc_builder(csc_view<T, I, O> view) : builder_(transposed(view)) {}
58 | 
59 |   template <__ranges::forward_range Column>
60 |   void insert_column(I column_index, Column&& column) {
61 |     builder_.insert_row(column_index, std::forward<Column>(column));
62 |   }
63 | 
64 |   void finish() {
65 |     builder_.finish();
66 |   }
67 | 
68 | private:
69 |   csr_builder<T, I> builder_;
70 | };
71 | 
72 | } // namespace __backend
73 | 
74 | } // namespace spblas
75 | 


--------------------------------------------------------------------------------
/include/spblas/algorithms/transpose_impl.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/concepts.hpp>
 4 | #include <spblas/detail/operation_info_t.hpp>
 5 | #include <spblas/detail/view_inspectors.hpp>
 6 | 
 7 | namespace spblas {
 8 | 
 9 | template <matrix A, matrix B>
10 | operation_info_t transpose_inspect(A&& a, B&& b) {
11 |   return {};
12 | }
13 | 
14 | template <matrix A, matrix B>
15 |   requires(__detail::is_csr_view_v<A> && __detail::is_csr_view_v<B>)
16 | void transpose(A&& a, B&& b) {
17 |   if (__backend::shape(a)[0] != __backend::shape(b)[1] ||
18 |       __backend::shape(a)[1] != __backend::shape(b)[0]) {
19 |     throw std::invalid_argument(
20 |         "transpose: matrix dimensions are incompatible.");
21 |   }
22 |   if (b.values().size() < __backend::size(a) ||
23 |       b.colind().size() < __backend::size(a)) {
24 |     throw std::runtime_error("transpose: Transpose ran out of memory.");
25 |   }
26 |   using O = tensor_offset_t<B>;
27 | 
28 |   const auto b_base = __detail::get_ultimate_base(b);
29 |   const auto b_rowptr = b_base.rowptr();
30 |   const auto b_colind = b_base.colind();
31 |   const auto b_values = b_base.values();
32 | 
33 |   __ranges::fill(b_rowptr, 0);
34 | 
35 |   for (auto&& [i, row] : __backend::rows(a)) {
36 |     for (auto&& [j, _] : row) {
37 |       b_rowptr[j + 1]++;
38 |     }
39 |   }
40 | 
41 |   std::exclusive_scan(b_rowptr.begin(), b_rowptr.end(), b_rowptr.begin(), O{});
42 | 
43 |   for (auto&& [i, row] : __backend::rows(a)) {
44 |     for (auto&& [j, v] : row) {
45 |       const auto out_idx = b_rowptr[j + 1];
46 |       b_colind[out_idx] = i;
47 |       b_values[out_idx] = v;
48 |       b_rowptr[j + 1]++;
49 |     }
50 |   }
51 | 
52 |   b.update(b.values(), b.rowptr(), b.colind(), b.shape(), a.size());
53 | }
54 | 
55 | template <matrix A, matrix B>
56 |   requires(__detail::is_csr_view_v<A> && __detail::is_csr_view_v<B>)
57 | void transpose(operation_info_t& info, A&& a, B&& b) {
58 |   transpose(std::forward<A>(a), std::forward<B>(b));
59 | }
60 | 
61 | } // namespace spblas
62 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/rocsparse/hip_allocator.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "exception.hpp"
 4 | #include <hip/hip_runtime.h>
 5 | 
 6 | namespace spblas {
 7 | 
 8 | namespace rocsparse {
 9 | 
10 | template <typename T, std::size_t Alignment = 0>
11 | class hip_allocator {
12 | public:
13 |   using value_type = T;
14 |   using pointer = T*;
15 |   using const_pointer = const T*;
16 |   using reference = T&;
17 |   using const_reference = const T&;
18 |   using size_type = std::size_t;
19 |   using difference_type = std::ptrdiff_t;
20 | 
21 |   hip_allocator() noexcept {}
22 |   hip_allocator(hipStream_t stream) noexcept : stream_(stream) {}
23 | 
24 |   template <typename U>
25 |   hip_allocator(const hip_allocator<U, Alignment>& other) noexcept
26 |       : stream_(other.stream()) {}
27 | 
28 |   hip_allocator(const hip_allocator&) = default;
29 |   hip_allocator& operator=(const hip_allocator&) = default;
30 |   ~hip_allocator() = default;
31 | 
32 |   using is_always_equal = std::false_type;
33 | 
34 |   pointer allocate(std::size_t size) {
35 |     void* ptr;
36 |     hipError_t error = hipMallocAsync(&ptr, size * sizeof(T), stream());
37 |     throw_if_failure(error);
38 | 
39 |     return reinterpret_cast<T*>(ptr);
40 |   }
41 | 
42 |   void deallocate(pointer ptr, std::size_t n = 0) {
43 |     if (ptr != nullptr) {
44 |       hipError_t error = hipFreeAsync(ptr, stream());
45 |       throw_if_failure(error);
46 |     }
47 |   }
48 | 
49 |   bool operator==(const hip_allocator&) const = default;
50 |   bool operator!=(const hip_allocator&) const = default;
51 | 
52 |   template <typename U>
53 |   struct rebind {
54 |     using other = hip_allocator<U, Alignment>;
55 |   };
56 | 
57 |   hipStream_t stream() const noexcept {
58 |     return stream_;
59 |   }
60 | 
61 | private:
62 |   void throw_if_failure(hipError_t error) {
63 |     if (error != hipSuccess) {
64 |       throw std::bad_alloc{};
65 |     }
66 |   }
67 | 
68 |   hipStream_t stream_ = nullptr;
69 | };
70 | 
71 | } // namespace rocsparse
72 | 
73 | } // namespace spblas
74 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/aoclsparse/detail/create_matrix_handle.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/vendor/aoclsparse/aocl_wrappers.hpp>
 4 | 
 5 | #include <spblas/detail/view_inspectors.hpp>
 6 | 
 7 | namespace spblas {
 8 | 
 9 | namespace __aoclsparse {
10 | 
11 | template <matrix M>
12 |   requires __detail::is_csr_view_v<M>
13 | aoclsparse_matrix create_matrix_handle(M&& m) {
14 |   aoclsparse_matrix handle = nullptr;
15 |   aoclsparse_status status = __aoclsparse::aoclsparse_create_csr(
16 |       &handle, aoclsparse_index_base_zero, m.shape()[0], m.shape()[1], m.size(),
17 |       m.rowptr().data(), m.colind().data(), m.values().data());
18 | 
19 |   if (status != aoclsparse_status_success) {
20 |     throw std::runtime_error("create_matrix_handle: AOCL-Sparse failed while "
21 |                              "creating matrix handle.");
22 |   }
23 | 
24 |   return handle;
25 | }
26 | 
27 | template <matrix M>
28 |   requires __detail::is_csc_view_v<M>
29 | aoclsparse_matrix create_matrix_handle(M&& m) {
30 |   aoclsparse_matrix handle = nullptr;
31 |   aoclsparse_status status = __aoclsparse::aoclsparse_create_csr(
32 |       &handle, aoclsparse_index_base_zero, m.shape()[1], m.shape()[0], m.size(),
33 |       m.colptr().data(), m.rowind().data(), m.values().data());
34 | 
35 |   if (status != aoclsparse_status_success) {
36 |     throw std::runtime_error("create_matrix_handle: AOCL-Sparse failed while "
37 |                              "creating matrix handle.");
38 |   }
39 | 
40 |   return handle;
41 | }
42 | 
43 | template <matrix M>
44 | aoclsparse_operation get_transpose(M&& m) {
45 |   static_assert(__detail::has_csr_base<M> || __detail::has_csc_base<M>);
46 |   if constexpr (__detail::has_base<M>) {
47 |     return get_transpose(m.base());
48 |   } else if constexpr (__detail::is_csr_view_v<M>) {
49 |     return aoclsparse_operation_none;
50 |   } else if constexpr (__detail::is_csc_view_v<M>) {
51 |     return aoclsparse_operation_transpose;
52 |   }
53 | }
54 | 
55 | } // namespace __aoclsparse
56 | 
57 | } // namespace spblas
58 | 


--------------------------------------------------------------------------------
/examples/simple_sptrsv.cpp:
--------------------------------------------------------------------------------
 1 | #include <spblas/spblas.hpp>
 2 | 
 3 | #include <fmt/core.h>
 4 | #include <fmt/ranges.h>
 5 | 
 6 | int main(int argc, char** argv) {
 7 |   using namespace spblas;
 8 | 
 9 |   using T = float;
10 | 
11 |   spblas::index_t m = 100;
12 |   spblas::index_t nnz_in = 20;
13 | 
14 |   fmt::print("\n\t###########################################################"
15 |              "######################");
16 |   fmt::print("\n\t### Running SpTRSV Example:");
17 |   fmt::print("\n\t###");
18 |   fmt::print("\n\t###   solve for x:  A * x = alpha * b");
19 |   fmt::print("\n\t###");
20 |   fmt::print("\n\t### with ");
21 |   fmt::print("\n\t### A, in CSR format, of size ({}, {}) with nnz = {}", m, m,
22 |              nnz_in);
23 |   fmt::print("\n\t### x, a dense vector, of size ({}, {})", m, 1);
24 |   fmt::print("\n\t### b, a dense vector, of size ({}, {})", m, 1);
25 |   fmt::print("\n\t### using float and spblas::index_t (size = {} bytes)",
26 |              sizeof(spblas::index_t));
27 |   fmt::print("\n\t###########################################################"
28 |              "######################");
29 |   fmt::print("\n");
30 | 
31 |   auto&& [values, rowptr, colind, shape, nnz] =
32 |       generate_csr<T, spblas::index_t>(m, m, nnz_in);
33 | 
34 |   // scale values of matrix to make the implicit unit diagonal matrix
35 |   // be diagonally dominant, so it is solveable
36 |   T scale_factor = 1e-3f;
37 |   std::transform(values.begin(), values.end(), values.begin(),
38 |                  [scale_factor](T val) { return scale_factor * val; });
39 | 
40 |   csr_view<T, spblas::index_t> a(values, rowptr, colind, shape, nnz);
41 | 
42 |   // Scale every value of `a` by 5 in place.
43 |   // scale(5.f, a);
44 | 
45 |   std::vector<T> x(m, 0);
46 |   std::vector<T> b(m, 1);
47 | 
48 |   T alpha = 1.2f;
49 |   auto b_scaled = scaled(alpha, b);
50 | 
51 |   // solve for x:  lower(A) * x = alpha * b
52 |   triangular_solve(a, spblas::lower_triangle_t{},
53 |                    spblas::implicit_unit_diagonal_t{}, b_scaled, x);
54 | 
55 |   fmt::print("\tExample is completed!\n");
56 | 
57 |   return 0;
58 | }
59 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/cusparse/types.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <complex>
 4 | #include <cstdint>
 5 | #include <type_traits>
 6 | 
 7 | #include <cuda.h>
 8 | #include <cusparse.h>
 9 | 
10 | namespace spblas {
11 | 
12 | using index_t = std::int32_t;
13 | using offset_t = index_t;
14 | 
15 | namespace detail {
16 | 
17 | template <typename T>
18 | constexpr static bool is_valid_cusparse_scalar_type_v =
19 |     std::is_floating_point_v<T> || std::is_same_v<T, std::int8_t> ||
20 |     std::is_same_v<T, std::int32_t>;
21 | 
22 | template <typename T>
23 | constexpr static bool is_valid_cusparse_index_type_v =
24 |     std::is_same_v<T, std::int32_t> || std::is_same_v<T, std::int64_t>;
25 | 
26 | template <typename T>
27 | struct cuda_data_type;
28 | 
29 | template <>
30 | struct cuda_data_type<float> {
31 |   constexpr static cudaDataType_t value = CUDA_R_32F;
32 | };
33 | 
34 | template <>
35 | struct cuda_data_type<double> {
36 |   constexpr static cudaDataType_t value = CUDA_R_64F;
37 | };
38 | 
39 | template <>
40 | struct cuda_data_type<std::complex<float>> {
41 |   constexpr static cudaDataType_t value = CUDA_C_32F;
42 | };
43 | 
44 | template <>
45 | struct cuda_data_type<std::complex<double>> {
46 |   constexpr static cudaDataType_t value = CUDA_C_64F;
47 | };
48 | 
49 | template <>
50 | struct cuda_data_type<std::int8_t> {
51 |   constexpr static cudaDataType_t value = CUDA_R_8I;
52 | };
53 | 
54 | template <>
55 | struct cuda_data_type<std::int32_t> {
56 |   constexpr static cudaDataType_t value = CUDA_R_32I;
57 | };
58 | 
59 | template <typename T>
60 | constexpr static cudaDataType_t cuda_data_type_v = cuda_data_type<T>::value;
61 | 
62 | template <typename T>
63 | struct cuda_index_type;
64 | 
65 | template <>
66 | struct cuda_index_type<std::int32_t> {
67 |   constexpr static cusparseIndexType_t value = CUSPARSE_INDEX_32I;
68 | };
69 | 
70 | template <>
71 | struct cuda_index_type<std::int64_t> {
72 |   constexpr static cusparseIndexType_t value = CUSPARSE_INDEX_64I;
73 | };
74 | 
75 | template <typename T>
76 | constexpr static cusparseIndexType_t cusparse_index_type_v =
77 |     cuda_index_type<T>::value;
78 | 
79 | } // namespace detail
80 | 
81 | } // namespace spblas
82 | 


--------------------------------------------------------------------------------
/include/spblas/views/csc_view.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <span>
 4 | #include <spblas/detail/detail.hpp>
 5 | #include <spblas/views/view_base.hpp>
 6 | 
 7 | namespace spblas {
 8 | 
 9 | template <typename T, std::integral I = index_t, std::integral O = I>
10 | class csc_view : public view_base {
11 | public:
12 |   using scalar_type = T;
13 |   using scalar_reference = T&;
14 |   using index_type = I;
15 |   using offset_type = O;
16 | 
17 |   csc_view(T* values, O* colptr, I* rowind, index<I> shape, O nnz)
18 |       : values_(values, nnz), colptr_(colptr, shape[1] + 1),
19 |         rowind_(rowind, nnz), shape_(shape), nnz_(nnz) {
20 |     if (colptr_.data() == nullptr) {
21 |       colptr_ = std::span<O>((O*) nullptr, (O*) nullptr);
22 |     }
23 |   }
24 | 
25 |   template <__ranges::contiguous_range V, __ranges::contiguous_range R,
26 |             __ranges::contiguous_range C>
27 |   csc_view(V&& values, R&& colptr, C&& rowind, index<I> shape, O nnz)
28 |       : values_(__ranges::data(values), __ranges::size(values)),
29 |         colptr_(__ranges::data(colptr), __ranges::size(colptr)),
30 |         rowind_(__ranges::data(rowind), __ranges::size(rowind)), shape_(shape),
31 |         nnz_(nnz) {}
32 | 
33 |   void update(std::span<T> values, std::span<O> colptr, std::span<I> rowind) {
34 |     values_ = values;
35 |     colptr_ = colptr;
36 |     rowind_ = rowind;
37 |   }
38 | 
39 |   void update(std::span<T> values, std::span<O> colptr, std::span<I> rowind,
40 |               index<I> shape, O nnz) {
41 |     values_ = values;
42 |     colptr_ = colptr;
43 |     rowind_ = rowind;
44 |     shape_ = shape;
45 |     nnz_ = nnz;
46 |   }
47 | 
48 |   std::span<T> values() const noexcept {
49 |     return values_;
50 |   }
51 |   std::span<O> colptr() const noexcept {
52 |     return colptr_;
53 |   }
54 |   std::span<I> rowind() const noexcept {
55 |     return rowind_;
56 |   }
57 | 
58 |   index<I> shape() const noexcept {
59 |     return shape_;
60 |   }
61 | 
62 |   O size() const noexcept {
63 |     return nnz_;
64 |   }
65 | 
66 | private:
67 |   std::span<T> values_;
68 |   std::span<O> colptr_;
69 |   std::span<I> rowind_;
70 |   index<I> shape_;
71 |   O nnz_;
72 | };
73 | 
74 | } // namespace spblas
75 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/onemkl_sycl/spmv_impl.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <oneapi/mkl.hpp>
 4 | 
 5 | #include <spblas/detail/log.hpp>
 6 | #include <spblas/detail/operation_info_t.hpp>
 7 | #include <spblas/detail/ranges.hpp>
 8 | #include <spblas/detail/view_inspectors.hpp>
 9 | 
10 | #include <spblas/vendor/onemkl_sycl/detail/detail.hpp>
11 | 
12 | //
13 | // Defines the following APIs for SpMV:
14 | //
15 | // y = alpha* op(A) * x
16 | //
17 | //  where A is a sparse matrices of CSR format and
18 | //  x/y are dense vectors
19 | //
20 | // //operation_info_t multiply_inspect(A, x, y)
21 | // //void multiply_inspect(operation_info_t, A, x, y)
22 | //
23 | // //void multiply_compute(operation_info_t, A, x, y)
24 | // void multiply(A, x, y)
25 | //
26 | 
27 | namespace spblas {
28 | 
29 | template <typename ExecutionPolicy, matrix A, vector X, vector Y>
30 |   requires((__detail::has_csr_base<A> || __detail::has_csc_base<A>) &&
31 |            __detail::has_contiguous_range_base<X> &&
32 |            __ranges::contiguous_range<Y>)
33 | void multiply(ExecutionPolicy&& policy, A&& a, X&& x, Y&& y) {
34 |   log_trace("");
35 |   auto x_base = __detail::get_ultimate_base(x);
36 | 
37 |   auto alpha_optional = __detail::get_scaling_factor(a, x);
38 |   tensor_scalar_t<A> alpha = alpha_optional.value_or(1);
39 | 
40 |   auto a_data = __detail::get_ultimate_base(a).values().data();
41 | 
42 |   auto&& q = __mkl::get_queue(policy, a_data);
43 | 
44 |   auto a_handle = __mkl::get_matrix_handle(q, a);
45 |   auto a_transpose = __mkl::get_transpose(a);
46 | 
47 |   oneapi::mkl::sparse::gemv(q, a_transpose, alpha, a_handle,
48 |                             __ranges::data(x_base), 0.0, __ranges::data(y))
49 |       .wait();
50 | 
51 |   if (!__detail::has_matrix_opt(a)) {
52 |     oneapi::mkl::sparse::release_matrix_handle(q, &a_handle).wait();
53 |   }
54 | }
55 | 
56 | template <matrix A, vector X, vector Y>
57 |   requires((__detail::has_csr_base<A> || __detail::has_csc_base<A>) &&
58 |            __detail::has_contiguous_range_base<X> &&
59 |            __ranges::contiguous_range<Y>)
60 | void multiply(A&& a, X&& x, Y&& y) {
61 |   multiply(mkl::par, std::forward<A>(a), std::forward<X>(x),
62 |            std::forward<Y>(y));
63 | }
64 | 
65 | } // namespace spblas
66 | 


--------------------------------------------------------------------------------
/examples/device/device_spmv.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <spblas/spblas.hpp>
 3 | 
 4 | #include <thrust/device_vector.h>
 5 | 
 6 | #include <fmt/core.h>
 7 | #include <fmt/ranges.h>
 8 | 
 9 | int main(int argc, char** argv) {
10 |   using value_t = float;
11 |   using index_t = spblas::index_t;
12 |   using offset_t = spblas::offset_t;
13 | 
14 |   index_t m = 100;
15 |   index_t n = 100;
16 |   index_t nnz_in = 10;
17 | 
18 |   fmt::print("\n\t###########################################################"
19 |              "######################");
20 |   fmt::print("\n\t### Running SpMV Example:");
21 |   fmt::print("\n\t###");
22 |   fmt::print("\n\t###   y = alpha * A * x");
23 |   fmt::print("\n\t###");
24 |   fmt::print("\n\t### with ");
25 |   fmt::print("\n\t### A, in CSR format, of size ({}, {}) with nnz = {}", m, n,
26 |              nnz_in);
27 |   fmt::print("\n\t### x, a dense vector, of size ({}, {})", n, 1);
28 |   fmt::print("\n\t### y, a dense vector, of size ({}, {})", m, 1);
29 |   fmt::print("\n\t### using float and spblas::index_t (size = {} bytes)",
30 |              sizeof(index_t));
31 |   fmt::print("\n\t###########################################################"
32 |              "######################");
33 |   fmt::print("\n");
34 | 
35 |   auto&& [values, rowptr, colind, shape, nnz] =
36 |       spblas::generate_csr<value_t, index_t, offset_t>(m, n, nnz_in);
37 | 
38 |   thrust::device_vector<value_t> d_values(values);
39 |   thrust::device_vector<offset_t> d_rowptr(rowptr);
40 |   thrust::device_vector<index_t> d_colind(colind);
41 | 
42 |   spblas::csr_view<value_t, index_t, offset_t> a(
43 |       d_values.data().get(), d_rowptr.data().get(), d_colind.data().get(),
44 |       shape, nnz);
45 | 
46 |   // Scale every value of `a` by 5 in place.
47 |   // scale(5.f, a);
48 | 
49 |   std::vector<value_t> x(n, 1);
50 |   std::vector<value_t> y(m, 0);
51 | 
52 |   thrust::device_vector<value_t> d_x(x);
53 |   thrust::device_vector<value_t> d_y(y);
54 | 
55 |   std::span<value_t> x_span(d_x.data().get(), n);
56 |   std::span<value_t> y_span(d_y.data().get(), m);
57 | 
58 |   // y = A * x
59 |   spblas::multiply(a, x_span, y_span);
60 | 
61 |   thrust::copy(d_y.begin(), d_y.end(), y.begin());
62 | 
63 |   fmt::print("\tExample is completed!\n");
64 | 
65 |   return 0;
66 | }
67 | 


--------------------------------------------------------------------------------
/include/spblas/views/csr_view.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <span>
 4 | #include <spblas/detail/detail.hpp>
 5 | #include <spblas/views/view_base.hpp>
 6 | 
 7 | namespace spblas {
 8 | 
 9 | template <typename T, typename I, typename O>
10 | class csr_builder;
11 | 
12 | template <typename T, std::integral I = index_t, std::integral O = I>
13 | class csr_view : public view_base {
14 | public:
15 |   using scalar_type = T;
16 |   using scalar_reference = T&;
17 |   using index_type = I;
18 |   using offset_type = O;
19 | 
20 |   csr_view(T* values, O* rowptr, I* colind, index<I> shape, O nnz)
21 |       : values_(values, nnz), rowptr_(rowptr, shape[0] + 1),
22 |         colind_(colind, nnz), shape_(shape), nnz_(nnz) {
23 |     if (rowptr_.data() == nullptr) {
24 |       rowptr_ = std::span<O>((O*) nullptr, (O*) nullptr);
25 |     }
26 |   }
27 | 
28 |   template <__ranges::contiguous_range V, __ranges::contiguous_range R,
29 |             __ranges::contiguous_range C>
30 |   csr_view(V&& values, R&& rowptr, C&& colind, index<I> shape, O nnz)
31 |       : values_(__ranges::data(values), __ranges::size(values)),
32 |         rowptr_(__ranges::data(rowptr), __ranges::size(rowptr)),
33 |         colind_(__ranges::data(colind), __ranges::size(colind)), shape_(shape),
34 |         nnz_(nnz) {}
35 | 
36 |   void update(std::span<T> values, std::span<O> rowptr, std::span<I> colind) {
37 |     values_ = values;
38 |     rowptr_ = rowptr;
39 |     colind_ = colind;
40 |   }
41 | 
42 |   void update(std::span<T> values, std::span<O> rowptr, std::span<I> colind,
43 |               index<I> shape, O nnz) {
44 |     values_ = values;
45 |     rowptr_ = rowptr;
46 |     colind_ = colind;
47 |     shape_ = shape;
48 |     nnz_ = nnz;
49 |   }
50 | 
51 |   std::span<T> values() const noexcept {
52 |     return values_;
53 |   }
54 |   std::span<O> rowptr() const noexcept {
55 |     return rowptr_;
56 |   }
57 |   std::span<I> colind() const noexcept {
58 |     return colind_;
59 |   }
60 | 
61 |   index<I> shape() const noexcept {
62 |     return shape_;
63 |   }
64 | 
65 |   O size() const noexcept {
66 |     return nnz_;
67 |   }
68 | 
69 |   friend class csr_builder<T, I, O>;
70 | 
71 | private:
72 |   std::span<T> values_;
73 |   std::span<O> rowptr_;
74 |   std::span<I> colind_;
75 |   index<I> shape_;
76 |   O nnz_;
77 | };
78 | 
79 | } // namespace spblas
80 | 


--------------------------------------------------------------------------------
/include/spblas/backend/spa_accumulator.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <functional>
  4 | #include <span>
  5 | #include <tuple>
  6 | #include <vector>
  7 | 
  8 | #include <spblas/detail/ranges.hpp>
  9 | 
 10 | namespace spblas {
 11 | 
 12 | namespace __backend {
 13 | 
 14 | template <typename T, std::integral I>
 15 | class spa_accumulator {
 16 | public:
 17 |   spa_accumulator(I count) : data_(count), set_(count, false) {}
 18 | 
 19 |   T& operator[](I pos) {
 20 |     if (!set_[pos]) {
 21 |       stored_.push_back(pos);
 22 |       set_[pos] = true;
 23 |     }
 24 |     return data_[pos];
 25 |   }
 26 | 
 27 |   bool contains(I pos) {
 28 |     return set_[pos];
 29 |   }
 30 | 
 31 |   void clear() {
 32 |     for (auto&& pos : stored_) {
 33 |       set_[pos] = false;
 34 |       data_[pos] = 0;
 35 |     }
 36 |     stored_.clear();
 37 |   }
 38 | 
 39 |   I size() const {
 40 |     return stored_.size();
 41 |   }
 42 | 
 43 |   bool empty() {
 44 |     return size() == 0;
 45 |   }
 46 | 
 47 |   void sort() {
 48 |     std::sort(stored_.begin(), stored_.end());
 49 |   }
 50 | 
 51 |   auto get() {
 52 |     std::span data(data_);
 53 |     std::span stored(stored_);
 54 | 
 55 |     return stored | __ranges::views::transform([=](auto idx) {
 56 |              return std::make_tuple(idx, std::reference_wrapper(data[idx]));
 57 |            });
 58 |   }
 59 | 
 60 | private:
 61 |   std::vector<T> data_;
 62 |   std::vector<bool> set_;
 63 |   std::vector<I> stored_;
 64 | };
 65 | 
 66 | template <std::integral T>
 67 | class spa_set {
 68 | public:
 69 |   spa_set(T count) : set_(count, false) {}
 70 | 
 71 |   void insert(T key) {
 72 |     if (!set_[key]) {
 73 |       stored_.push_back(key);
 74 |       set_[key] = true;
 75 |     }
 76 |   }
 77 | 
 78 |   bool contains(T key) {
 79 |     return set_[key];
 80 |   }
 81 | 
 82 |   void clear() {
 83 |     for (auto&& pos : stored_) {
 84 |       set_[pos] = false;
 85 |     }
 86 |     stored_.clear();
 87 |   }
 88 | 
 89 |   T size() const {
 90 |     return stored_.size();
 91 |   }
 92 | 
 93 |   bool empty() {
 94 |     return size() == 0;
 95 |   }
 96 | 
 97 |   auto get() const {
 98 |     return std::span(stored_);
 99 |   }
100 | 
101 | private:
102 |   std::vector<bool> set_;
103 |   std::vector<T> stored_;
104 | };
105 | 
106 | } // namespace __backend
107 | 
108 | } // namespace spblas
109 | 


--------------------------------------------------------------------------------
/examples/simple_spgemm.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include <spblas/spblas.hpp>
 4 | 
 5 | #include <fmt/core.h>
 6 | #include <fmt/printf.h>
 7 | #include <fmt/ranges.h>
 8 | 
 9 | int main(int argc, char** argv) {
10 |   using namespace spblas;
11 |   namespace md = spblas::__mdspan;
12 | 
13 |   using T = float;
14 | 
15 |   spblas::index_t m = 100;
16 |   spblas::index_t n = 10;
17 |   spblas::index_t k = 100;
18 |   spblas::index_t nnz = 100;
19 | 
20 |   fmt::print("\n\t###########################################################"
21 |              "######################");
22 |   fmt::print("\n\t### Running SpGEMM Example:");
23 |   fmt::print("\n\t###");
24 |   fmt::print("\n\t###   C = A * B");
25 |   fmt::print("\n\t###");
26 |   fmt::print("\n\t### with ");
27 |   fmt::print("\n\t### A, in CSR format, of size ({}, {}) with nnz = {}", m, k,
28 |              nnz);
29 |   fmt::print("\n\t### B, in CSR format, of size ({}, {}) with nnz = {}", k, n,
30 |              nnz);
31 |   fmt::print("\n\t### C, in CSR format, of size ({}, {}) with nnz to be"
32 |              " determined",
33 |              m, n);
34 |   fmt::print("\n\t### using float and spblas::index_t (size = {} bytes)",
35 |              sizeof(spblas::index_t));
36 |   fmt::print("\n\t###########################################################"
37 |              "######################");
38 |   fmt::print("\n");
39 | 
40 |   auto&& [a_values, a_rowptr, a_colind, a_shape, as] =
41 |       generate_csr<T>(m, k, nnz);
42 |   auto&& [b_values, b_rowptr, b_colind, b_shape, bs] =
43 |       generate_csr<T>(k, n, nnz);
44 | 
45 |   csr_view<T> a(a_values, a_rowptr, a_colind, a_shape, nnz);
46 |   csr_view<T> b(b_values, b_rowptr, b_colind, b_shape, nnz);
47 | 
48 |   std::vector<spblas::index_t> c_rowptr(m + 1);
49 | 
50 |   csr_view<T> c(nullptr, c_rowptr.data(), nullptr, {m, n}, 0);
51 | 
52 |   auto info = multiply_compute(scaled(1.f, a), b, c);
53 | 
54 |   fmt::print("\t\t C_nnz = {}", info.result_nnz());
55 | 
56 |   std::vector<T> c_values(info.result_nnz());
57 |   std::vector<spblas::index_t> c_colind(info.result_nnz());
58 |   c.update(c_values, c_rowptr, c_colind);
59 | 
60 |   multiply_fill(info, scaled(1.f, a), b, c);
61 | 
62 |   for (auto&& [i, row] : spblas::__backend::rows(c)) {
63 |     fmt::print("{}: {}\n", i, row);
64 |   }
65 | 
66 |   fmt::print("\tExample is completed!\n");
67 | 
68 |   return 0;
69 | }
70 | 


--------------------------------------------------------------------------------
/include/spblas/detail/index.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <any>
 4 | #include <cassert>
 5 | #include <concepts>
 6 | #include <limits>
 7 | #include <tuple>
 8 | 
 9 | #include <spblas/detail/tuple_concept.hpp>
10 | #include <spblas/detail/types.hpp>
11 | 
12 | namespace spblas {
13 | 
14 | template <std::integral T = spblas::index_t>
15 | class index {
16 | public:
17 |   using index_type = T;
18 | 
19 |   using first_type = T;
20 |   using second_type = T;
21 | 
22 |   constexpr index_type operator[](index_type dim) const noexcept {
23 |     if (dim == 0) {
24 |       return first;
25 |     } else {
26 |       return second;
27 |     }
28 |   }
29 | 
30 |   constexpr index(index_type first, index_type second)
31 |       : first(first), second(second) {}
32 | 
33 |   template <typename Tuple>
34 |     requires(!std::is_same_v<Tuple, index> && __detail::tuple_like<Tuple, T, T>)
35 |   constexpr index(Tuple tuple) : first(get<0>(tuple)), second(get<1>(tuple)) {}
36 | 
37 |   template <std::integral U>
38 |   constexpr index(std::initializer_list<U> tuple) {
39 |     assert(tuple.size() == 2);
40 |     first = *tuple.begin();
41 |     second = *(tuple.begin() + 1);
42 |   }
43 | 
44 |   constexpr bool operator==(const index&) const noexcept = default;
45 | 
46 |   index() = default;
47 |   ~index() = default;
48 |   index(const index&) = default;
49 |   index& operator=(const index&) = default;
50 |   index(index&&) = default;
51 |   index& operator=(index&&) = default;
52 | 
53 |   index_type first;
54 |   index_type second;
55 | };
56 | 
57 | template <std::size_t Index, std::integral I>
58 | inline constexpr I get(spblas::index<I> index)
59 |   requires(Index <= 1)
60 | {
61 |   if constexpr (Index == 0) {
62 |     return index.first;
63 |   }
64 |   if constexpr (Index == 1) {
65 |     return index.second;
66 |   }
67 | }
68 | 
69 | } // namespace spblas
70 | 
71 | namespace std {
72 | 
73 | template <std::size_t Index, std::integral I>
74 | struct tuple_element<Index, spblas::index<I>>
75 |     : tuple_element<Index, std::tuple<I, I>> {};
76 | 
77 | template <std::integral I>
78 | struct tuple_size<spblas::index<I>> : integral_constant<std::size_t, 2> {};
79 | 
80 | template <std::size_t Index, std::integral I>
81 | inline constexpr I get(spblas::index<I> index)
82 |   requires(Index <= 1)
83 | {
84 |   if constexpr (Index == 0) {
85 |     return index.first;
86 |   }
87 |   if constexpr (Index == 1) {
88 |     return index.second;
89 |   }
90 | }
91 | 
92 | } // namespace std
93 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/aoclsparse/spmv_impl.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025      Advanced Micro Devices, Inc. All Rights reserved.
 3 |  * $COPYRIGHT$
 4 |  *
 5 |  * Additional copyrights may follow
 6 |  *
 7 |  * $HEADER$
 8 |  */
 9 | 
10 | #pragma once
11 | 
12 | #include "aoclsparse.h"
13 | #include <cstdint>
14 | 
15 | #include "aocl_wrappers.hpp"
16 | #include <fmt/core.h>
17 | #include <spblas/detail/log.hpp>
18 | #include <spblas/detail/operation_info_t.hpp>
19 | #include <spblas/detail/ranges.hpp>
20 | #include <spblas/detail/view_inspectors.hpp>
21 | 
22 | //
23 | // Defines the following APIs for SpMV:
24 | //
25 | // y =alpha* op(A) * x
26 | //
27 | //  where A is a sparse matrices of CSR format and
28 | //  x/y are dense vectors
29 | 
30 | namespace spblas {
31 | 
32 | template <matrix A, vector X, vector Y>
33 |   requires(__detail::has_csr_base<A> || __detail::has_csc_base<A>) &&
34 |           __detail::has_contiguous_range_base<X> &&
35 |           __ranges::contiguous_range<Y>
36 | void multiply(A&& a, X&& x, Y&& y) {
37 |   log_trace("");
38 |   auto a_base = __detail::get_ultimate_base(a);
39 |   auto x_base = __detail::get_ultimate_base(x);
40 | 
41 |   aoclsparse_matrix csrA = __aoclsparse::create_matrix_handle(a_base);
42 |   aoclsparse_operation opA = __aoclsparse::get_transpose(a);
43 | 
44 |   using T = tensor_scalar_t<A>;
45 |   using I = tensor_index_t<A>;
46 |   using O = tensor_offset_t<A>;
47 | 
48 |   auto alpha_optional = __detail::get_scaling_factor(a, x);
49 |   T alpha = alpha_optional.value_or(1);
50 | 
51 |   aoclsparse_mat_descr descr = NULL;
52 |   aoclsparse_status status = aoclsparse_create_mat_descr(&descr);
53 |   if (status != aoclsparse_status_success) {
54 |     fmt::print("\t descr creation failed\n");
55 |   }
56 |   aoclsparse_set_mat_type(descr, aoclsparse_matrix_type_general);
57 |   aoclsparse_index_base indexing = aoclsparse_index_base_zero;
58 | 
59 |   // Do we need these two
60 |   aoclsparse_set_mv_hint(csrA, opA, descr, 1);
61 |   aoclsparse_optimize(csrA);
62 | 
63 |   T beta = static_cast<T>(0.0);
64 |   status = __aoclsparse::aoclsparse_mv(opA, &alpha, csrA, descr,
65 |                                        __ranges::data(x_base), &beta,
66 |                                        __ranges::data(y));
67 |   if (status != aoclsparse_status_success) {
68 |     fmt::print("\t SpMV failed: {}\n", (int) status);
69 |   }
70 | 
71 |   aoclsparse_destroy(&csrA);
72 |   aoclsparse_destroy_mat_descr(descr);
73 | }
74 | 
75 | } // namespace spblas
76 | 


--------------------------------------------------------------------------------
/include/spblas/algorithms/triangular_solve_impl.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/backend/backend.hpp>
 4 | #include <spblas/concepts.hpp>
 5 | #include <spblas/detail/log.hpp>
 6 | #include <spblas/detail/operation_info_t.hpp>
 7 | #include <spblas/detail/triangular_types.hpp>
 8 | 
 9 | namespace spblas {
10 | 
11 | template <matrix A, class Triangle, class DiagonalStorage, vector B, vector X>
12 |   requires(__backend::row_iterable<A> && __backend::lookupable<B> &&
13 |            __backend::lookupable<X>)
14 | void triangular_solve(A&& a, Triangle t, DiagonalStorage d, B&& b, X&& x) {
15 |   static_assert(std::is_same_v<Triangle, upper_triangle_t> ||
16 |                 std::is_same_v<Triangle, lower_triangle_t>);
17 |   assert(__backend::shape(a)[0] == __backend::shape(a)[1]);
18 | 
19 |   assert(__backend::shape(a)[1] == __backend::shape(x) &&
20 |          __backend::shape(a)[0] == __backend::shape(b));
21 | 
22 |   using T = tensor_scalar_t<A>;
23 |   using V = decltype(std::declval<tensor_scalar_t<A>>() *
24 |                      std::declval<tensor_scalar_t<X>>());
25 | 
26 |   T diagonal_value = 0;
27 | 
28 |   if constexpr (std::is_same_v<Triangle, upper_triangle_t>) {
29 |     for (auto&& [i, a_row] : __ranges::views::reverse(__backend::rows(a))) {
30 |       V dot_product = 0;
31 |       for (auto&& [k, a_v] : a_row) {
32 |         if (k > i) {
33 |           dot_product += a_v * __backend::lookup(x, k);
34 |         } else if (i == k) {
35 |           diagonal_value = a_v;
36 |         }
37 |       }
38 |       if constexpr (std::is_same_v<DiagonalStorage, explicit_diagonal_t>) {
39 |         __backend::lookup(x, i) =
40 |             (__backend::lookup(b, i) - dot_product) / diagonal_value;
41 |       } else {
42 |         __backend::lookup(x, i) = __backend::lookup(b, i) - dot_product;
43 |       }
44 |     }
45 |   } else if constexpr (std::is_same_v<Triangle, lower_triangle_t>) {
46 |     for (auto&& [i, a_row] : __backend::rows(a)) {
47 |       V dot_product = 0;
48 |       for (auto&& [k, a_v] : a_row) {
49 |         if (k < i) {
50 |           dot_product += a_v * __backend::lookup(x, k);
51 |         } else if (i == k) {
52 |           diagonal_value = a_v;
53 |         }
54 |       }
55 |       if constexpr (std::is_same_v<DiagonalStorage, explicit_diagonal_t>) {
56 |         __backend::lookup(x, i) =
57 |             (__backend::lookup(b, i) - dot_product) / diagonal_value;
58 |       } else {
59 |         __backend::lookup(x, i) = __backend::lookup(b, i) - dot_product;
60 |       }
61 |     }
62 |   }
63 | }
64 | 
65 | } // namespace spblas
66 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/onemkl_sycl/operation_state_t.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <oneapi/mkl.hpp>
 4 | 
 5 | namespace spblas {
 6 | 
 7 | namespace __mkl {
 8 | 
 9 | struct operation_state_t {
10 | 
11 |   oneapi::mkl::sparse::matrix_handle_t a_handle = nullptr;
12 |   oneapi::mkl::sparse::matrix_handle_t b_handle = nullptr;
13 |   oneapi::mkl::sparse::matrix_handle_t c_handle = nullptr;
14 |   oneapi::mkl::sparse::matrix_handle_t d_handle = nullptr;
15 | 
16 |   oneapi::mkl::sparse::matmat_descr_t descr = nullptr;
17 | 
18 |   void* c_rowptr = nullptr;
19 | 
20 |   sycl::queue q;
21 | 
22 |   operation_state_t() = default;
23 | 
24 |   operation_state_t(oneapi::mkl::sparse::matrix_handle_t a_handle,
25 |                     oneapi::mkl::sparse::matrix_handle_t b_handle,
26 |                     oneapi::mkl::sparse::matrix_handle_t c_handle,
27 |                     oneapi::mkl::sparse::matrix_handle_t d_handle,
28 |                     oneapi::mkl::sparse::matmat_descr_t descr, void* c_rowptr,
29 |                     sycl::queue q)
30 |       : a_handle(a_handle), b_handle(b_handle), c_handle(c_handle),
31 |         d_handle(d_handle), descr(descr), c_rowptr(c_rowptr), q(q) {}
32 |   operation_state_t(operation_state_t&& other) {
33 |     *this = std::move(other);
34 |   }
35 | 
36 |   operation_state_t& operator=(operation_state_t&& other) {
37 |     a_handle = other.a_handle;
38 |     b_handle = other.b_handle;
39 |     c_handle = other.c_handle;
40 |     d_handle = other.d_handle;
41 | 
42 |     descr = other.descr;
43 |     c_rowptr = other.c_rowptr;
44 |     q = other.q;
45 | 
46 |     other.a_handle = other.b_handle = other.c_handle = other.d_handle = nullptr;
47 |     other.descr = nullptr;
48 |     other.c_rowptr = nullptr;
49 | 
50 |     return *this;
51 |   }
52 | 
53 |   operation_state_t(const operation_state_t& other) = delete;
54 | 
55 |   ~operation_state_t() {
56 |     release_matrix_handle(a_handle);
57 |     release_matrix_handle(b_handle);
58 |     release_matrix_handle(c_handle);
59 |     release_matrix_handle(d_handle);
60 |     release_matmat_descr(descr);
61 |   }
62 | 
63 | private:
64 |   void release_matrix_handle(oneapi::mkl::sparse::matrix_handle_t& handle) {
65 |     if (handle != nullptr) {
66 |       oneapi::mkl::sparse::release_matrix_handle(q, &handle);
67 |     }
68 |   }
69 | 
70 |   void release_matmat_descr(oneapi::mkl::sparse::matmat_descr_t& descr) {
71 |     if (descr != nullptr) {
72 |       oneapi::mkl::sparse::release_matmat_descr(&descr);
73 |     }
74 |   }
75 | };
76 | 
77 | } // namespace __mkl
78 | 
79 | } // namespace spblas
80 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/rocsparse/types.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <complex>
 4 | #include <cstdint>
 5 | #include <type_traits>
 6 | 
 7 | #include <rocsparse/rocsparse.h>
 8 | 
 9 | namespace spblas {
10 | 
11 | using index_t = std::int32_t;
12 | using offset_t = index_t;
13 | 
14 | namespace detail {
15 | 
16 | template <typename T>
17 | constexpr static bool is_valid_rocsparse_scalar_type_v =
18 |     std::is_same_v<T, std::int32_t> || std::is_same_v<T, std::uint32_t> ||
19 |     std::is_floating_point_v<T>;
20 | 
21 | template <typename T>
22 | constexpr static bool is_valid_rocsparse_index_type_v =
23 |     std::is_same_v<T, std::uint16_t> || std::is_same_v<T, std::int32_t> ||
24 |     std::is_same_v<T, std::int64_t>;
25 | 
26 | template <typename T>
27 | struct rocsparse_data_type;
28 | 
29 | template <>
30 | struct rocsparse_data_type<std::int32_t> {
31 |   constexpr static rocsparse_datatype value = rocsparse_datatype_i32_r;
32 | };
33 | 
34 | template <>
35 | struct rocsparse_data_type<std::uint32_t> {
36 |   constexpr static rocsparse_datatype value = rocsparse_datatype_u32_r;
37 | };
38 | 
39 | template <>
40 | struct rocsparse_data_type<float> {
41 |   constexpr static rocsparse_datatype value = rocsparse_datatype_f32_r;
42 | };
43 | 
44 | template <>
45 | struct rocsparse_data_type<double> {
46 |   constexpr static rocsparse_datatype value = rocsparse_datatype_f64_r;
47 | };
48 | 
49 | template <>
50 | struct rocsparse_data_type<std::complex<float>> {
51 |   constexpr static rocsparse_datatype value = rocsparse_datatype_f32_c;
52 | };
53 | 
54 | template <>
55 | struct rocsparse_data_type<std::complex<double>> {
56 |   constexpr static rocsparse_datatype value = rocsparse_datatype_f64_c;
57 | };
58 | 
59 | template <typename T>
60 | constexpr static rocsparse_datatype rocsparse_data_type_v =
61 |     rocsparse_data_type<T>::value;
62 | 
63 | template <typename T>
64 | struct rocsparse_index_type;
65 | 
66 | template <>
67 | struct rocsparse_index_type<std::uint16_t> {
68 |   constexpr static rocsparse_indextype value = rocsparse_indextype_u16;
69 | };
70 | 
71 | template <>
72 | struct rocsparse_index_type<std::int32_t> {
73 |   constexpr static rocsparse_indextype value = rocsparse_indextype_i32;
74 | };
75 | 
76 | template <>
77 | struct rocsparse_index_type<std::int64_t> {
78 |   constexpr static rocsparse_indextype value = rocsparse_indextype_i64;
79 | };
80 | 
81 | template <typename T>
82 | constexpr static rocsparse_indextype rocsparse_index_type_v =
83 |     rocsparse_index_type<T>::value;
84 | 
85 | } // namespace detail
86 | 
87 | } // namespace spblas
88 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/onemkl_sycl/detail/create_matrix_handle.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <oneapi/mkl.hpp>
 4 | 
 5 | #include <spblas/detail/view_inspectors.hpp>
 6 | 
 7 | namespace spblas {
 8 | 
 9 | namespace __mkl {
10 | 
11 | template <matrix M>
12 |   requires __detail::is_csr_view_v<M>
13 | oneapi::mkl::sparse::matrix_handle_t create_matrix_handle(sycl::queue& q,
14 |                                                           M&& m) {
15 |   oneapi::mkl::sparse::matrix_handle_t handle = nullptr;
16 |   oneapi::mkl::sparse::init_matrix_handle(&handle);
17 | 
18 |   oneapi::mkl::sparse::set_csr_data(
19 |       q, handle, m.shape()[0], m.shape()[1], oneapi::mkl::index_base::zero,
20 |       m.rowptr().data(), m.colind().data(), m.values().data())
21 |       .wait();
22 | 
23 |   return handle;
24 | }
25 | 
26 | template <matrix M>
27 |   requires __detail::is_csc_view_v<M>
28 | oneapi::mkl::sparse::matrix_handle_t create_matrix_handle(sycl::queue& q,
29 |                                                           M&& m) {
30 |   oneapi::mkl::sparse::matrix_handle_t handle = nullptr;
31 |   oneapi::mkl::sparse::init_matrix_handle(&handle);
32 | 
33 |   oneapi::mkl::sparse::set_csr_data(
34 |       q, handle, m.shape()[1], m.shape()[0], oneapi::mkl::index_base::zero,
35 |       m.colptr().data(), m.rowind().data(), m.values().data())
36 |       .wait();
37 | 
38 |   return handle;
39 | }
40 | 
41 | template <matrix M>
42 |   requires __detail::has_base<M>
43 | oneapi::mkl::sparse::matrix_handle_t create_matrix_handle(sycl::queue& q,
44 |                                                           M&& m) {
45 |   return create_matrix_handle(q, m.base());
46 | }
47 | 
48 | //
49 | // Takes in a CSR or CSR_transpose (aka CSC) or CSC or CSC_transpose
50 | //  and returns the transpose value associated with it being represented
51 | // in the CSR format (since oneMKL SYCL currently does not have CSC
52 | // format
53 | //
54 | //     CSR = CSR + nontrans
55 | //     CSR_transpose = CSR + trans
56 | //     CSC = CSR + trans
57 | //     CSC_transpose -> CSR + nontrans
58 | //
59 | template <matrix M>
60 | oneapi::mkl::transpose get_transpose(M&& m) {
61 |   static_assert(__detail::has_csr_base<M> || __detail::has_csc_base<M>);
62 |   if constexpr (__detail::has_base<M>) {
63 |     return get_transpose(m.base());
64 |   } else if constexpr (__detail::is_csr_view_v<M>) {
65 |     return oneapi::mkl::transpose::nontrans;
66 |   } else if constexpr (__detail::is_csc_view_v<M>) {
67 |     return oneapi::mkl::transpose::trans;
68 |   }
69 | }
70 | 
71 | } // namespace __mkl
72 | 
73 | } // namespace spblas
74 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/rocsparse/detail/spmv_state_t.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <memory>
 4 | #include <rocsparse/rocsparse.h>
 5 | 
 6 | #include "../hip_allocator.hpp"
 7 | #include "abstract_operation_state.hpp"
 8 | 
 9 | namespace spblas {
10 | namespace __rocsparse {
11 | 
12 | class spmv_state_t : public abstract_operation_state_t {
13 | public:
14 |   spmv_state_t() : spmv_state_t(rocsparse::hip_allocator<char>{}) {}
15 | 
16 |   spmv_state_t(rocsparse::hip_allocator<char> alloc)
17 |       : alloc_(alloc), buffer_size_(0), workspace_(nullptr), a_descr_(nullptr),
18 |         b_descr_(nullptr), c_descr_(nullptr) {}
19 | 
20 |   ~spmv_state_t() {
21 |     if (workspace_) {
22 |       alloc_.deallocate(workspace_, buffer_size_);
23 |     }
24 |     if (a_descr_) {
25 |       rocsparse_destroy_spmat_descr(a_descr_);
26 |     }
27 |     if (b_descr_) {
28 |       rocsparse_destroy_dnvec_descr(b_descr_);
29 |     }
30 |     if (c_descr_) {
31 |       rocsparse_destroy_dnvec_descr(c_descr_);
32 |     }
33 |   }
34 | 
35 |   // Workspace management
36 |   void* workspace() const {
37 |     return workspace_;
38 |   }
39 |   size_t buffer_size() const {
40 |     return buffer_size_;
41 |   }
42 | 
43 |   void allocate_workspace(size_t size) {
44 |     if (size > buffer_size_) {
45 |       if (workspace_) {
46 |         alloc_.deallocate(workspace_, buffer_size_);
47 |       }
48 |       buffer_size_ = size;
49 |       workspace_ = alloc_.allocate(size);
50 |     }
51 |   }
52 | 
53 |   // Descriptor accessors
54 |   rocsparse_spmat_descr a_descriptor() const {
55 |     return a_descr_;
56 |   }
57 |   rocsparse_dnvec_descr b_descriptor() const {
58 |     return b_descr_;
59 |   }
60 |   rocsparse_dnvec_descr c_descriptor() const {
61 |     return c_descr_;
62 |   }
63 | 
64 |   // Descriptor setters
65 |   void set_a_descriptor(rocsparse_spmat_descr descr) {
66 |     if (a_descr_) {
67 |       rocsparse_destroy_spmat_descr(a_descr_);
68 |     }
69 |     a_descr_ = descr;
70 |   }
71 | 
72 |   void set_b_descriptor(rocsparse_dnvec_descr descr) {
73 |     if (b_descr_) {
74 |       rocsparse_destroy_dnvec_descr(b_descr_);
75 |     }
76 |     b_descr_ = descr;
77 |   }
78 | 
79 |   void set_c_descriptor(rocsparse_dnvec_descr descr) {
80 |     if (c_descr_) {
81 |       rocsparse_destroy_dnvec_descr(c_descr_);
82 |     }
83 |     c_descr_ = descr;
84 |   }
85 | 
86 | private:
87 |   rocsparse::hip_allocator<char> alloc_;
88 |   size_t buffer_size_;
89 |   char* workspace_;
90 | 
91 |   // Descriptors
92 |   rocsparse_spmat_descr a_descr_;
93 |   rocsparse_dnvec_descr b_descr_;
94 |   rocsparse_dnvec_descr c_descr_;
95 | };
96 | 
97 | } // namespace __rocsparse
98 | } // namespace spblas
99 | 


--------------------------------------------------------------------------------
/include/spblas/views/matrix_opt_impl.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/backend/concepts.hpp>
 4 | #include <spblas/backend/cpos.hpp>
 5 | #include <spblas/concepts.hpp>
 6 | 
 7 | #ifdef SPBLAS_ENABLE_ONEMKL_SYCL
 8 | #include <oneapi/mkl.hpp>
 9 | #include <sycl/sycl.hpp>
10 | #endif
11 | 
12 | namespace spblas {
13 | 
14 | template <matrix M>
15 |   requires(view<M> &&
16 |            (__detail::is_csr_view_v<M> || __detail::is_csc_view_v<M>) )
17 | class matrix_opt<M> : public view_base {
18 | public:
19 |   using scalar_type = tensor_scalar_t<M>;
20 |   using scalar_reference = tensor_scalar_reference_t<M>;
21 |   using index_type = tensor_index_t<M>;
22 |   using offset_type = tensor_offset_t<M>;
23 | 
24 |   matrix_opt(M matrix) : matrix_(matrix) {
25 | #ifdef SPBLAS_ENABLE_ONEMKL_SYCL
26 |     matrix_handle_ = nullptr;
27 | #endif
28 |   }
29 | 
30 |   ~matrix_opt() {
31 | #ifdef SPBLAS_ENABLE_ONEMKL_SYCL
32 |     if (matrix_handle_) {
33 |       // q here needs to be on same context as queue in operations,
34 |       // idealy from execution policy
35 |       sycl::queue q(sycl::cpu_selector_v);
36 |       oneapi::mkl::sparse::release_matrix_handle(q, &matrix_handle_, {}).wait();
37 |       matrix_handle_ = nullptr;
38 |     }
39 | #endif
40 |   }
41 | 
42 |   auto shape() const noexcept {
43 |     return __backend::shape(base());
44 |   }
45 | 
46 |   index_type size() const noexcept {
47 |     return __backend::size(base());
48 |   }
49 | 
50 |   auto base() {
51 |     return matrix_;
52 |   }
53 | 
54 |   auto base() const {
55 |     return matrix_;
56 |   }
57 | 
58 | private:
59 |   friend auto tag_invoke(__backend::size_fn_, matrix_opt matrix) {
60 |     return matrix.size();
61 |   }
62 | 
63 |   friend auto tag_invoke(__backend::shape_fn_, matrix_opt matrix) {
64 |     return matrix.shape();
65 |   }
66 | 
67 |   friend scalar_reference tag_invoke(__backend::lookup_fn_, matrix_opt matrix,
68 |                                      index_type i, index_type j)
69 |     requires(__backend::lookupable<M>)
70 |   {
71 |     return __backend::lookup(matrix.base(), i, j);
72 |   }
73 | 
74 |   friend auto tag_invoke(__backend::rows_fn_, matrix_opt matrix)
75 |     requires(__backend::row_iterable<M>)
76 |   {
77 |     return __backend::rows(matrix.base());
78 |   }
79 | 
80 |   friend auto tag_invoke(__backend::lookup_row_fn_, matrix_opt matrix,
81 |                          index_type row_index)
82 |     requires(__backend::row_lookupable<M>)
83 |   {
84 |     return __backend::lookup_row(matrix.base(), row_index);
85 |   }
86 | 
87 | public:
88 |   M matrix_;
89 | 
90 | #ifdef SPBLAS_ENABLE_ONEMKL_SYCL
91 |   oneapi::mkl::sparse::matrix_handle_t matrix_handle_;
92 | #endif
93 | };
94 | 
95 | template <typename T>
96 | matrix_opt(T t) -> matrix_opt<T>;
97 | 
98 | } // namespace spblas
99 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/cusparse/exception.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cuda_runtime.h>
 4 | #include <cusparse.h>
 5 | #include <stdexcept>
 6 | #include <string>
 7 | 
 8 | namespace spblas {
 9 | 
10 | namespace __cusparse {
11 | 
12 | // Throw an exception if the cudaError_t is not cudaSuccess.
13 | void throw_if_error(cudaError_t error_code, std::string prefix = "") {
14 |   if (error_code == cudaSuccess) {
15 |     return;
16 |   }
17 |   std::string name = cudaGetErrorName(error_code);
18 |   std::string message = cudaGetErrorString(error_code);
19 |   throw std::runtime_error(prefix + "CUDA encountered an error " + name +
20 |                            ": \"" + message + "\"");
21 | }
22 | 
23 | // Throw an exception if the cusparseStatus_t is not CUSPARSE_STATUS_SUCCESS.
24 | void throw_if_error(cusparseStatus_t error_code) {
25 |   if (error_code == CUSPARSE_STATUS_SUCCESS) {
26 |     return;
27 |   } else if (error_code == CUSPARSE_STATUS_NOT_INITIALIZED) {
28 |     throw std::runtime_error(
29 |         "cuSPARSE encountered an error: \"CUSPARSE_STATUS_NOT_INITIALIZED\"");
30 |   } else if (error_code == CUSPARSE_STATUS_ALLOC_FAILED) {
31 |     throw std::runtime_error(
32 |         "cuSPARSE encountered an error: \"CUSPARSE_STATUS_ALLOC_FAILED\"");
33 |   } else if (error_code == CUSPARSE_STATUS_INVALID_VALUE) {
34 |     throw std::runtime_error(
35 |         "cuSPARSE encountered an error: \"CUSPARSE_STATUS_INVALID_VALUE\"");
36 |   } else if (error_code == CUSPARSE_STATUS_ARCH_MISMATCH) {
37 |     throw std::runtime_error(
38 |         "cuSPARSE encountered an error: \"CUSPARSE_STATUS_ARCH_MISMATCH\"");
39 |   } else if (error_code == CUSPARSE_STATUS_EXECUTION_FAILED) {
40 |     throw std::runtime_error(
41 |         "cuSPARSE encountered an error: \"CUSPARSE_STATUS_EXECUTION_FAILED\"");
42 |   } else if (error_code == CUSPARSE_STATUS_INTERNAL_ERROR) {
43 |     throw std::runtime_error(
44 |         "cuSPARSE encountered an error: \"CUSPARSE_STATUS_INTERNAL_ERROR\"");
45 |   } else if (error_code == CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED) {
46 |     throw std::runtime_error("cuSPARSE encountered an error: "
47 |                              "\"CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED\"");
48 |   } else if (error_code == CUSPARSE_STATUS_NOT_SUPPORTED) {
49 |     throw std::runtime_error(
50 |         "cuSPARSE encountered an error: \"CUSPARSE_STATUS_NOT_SUPPORTED\"");
51 |   } else if (error_code == CUSPARSE_STATUS_INSUFFICIENT_RESOURCES) {
52 |     throw std::runtime_error("cuSPARSE encountered an error: "
53 |                              "\"CUSPARSE_STATUS_INSUFFICIENT_RESOURCES\"");
54 |   } else {
55 |     throw std::runtime_error(
56 |         "cuSPARSE encountered an error: \"unknown error\"");
57 |   }
58 | }
59 | 
60 | } // namespace __cusparse
61 | 
62 | } // namespace spblas
63 | 


--------------------------------------------------------------------------------
/test/gtest/transpose_test.cpp:
--------------------------------------------------------------------------------
 1 | #include <gtest/gtest.h>
 2 | 
 3 | #include "util.hpp"
 4 | #include <spblas/backend/spa_accumulator.hpp>
 5 | #include <spblas/spblas.hpp>
 6 | 
 7 | #include <fmt/core.h>
 8 | #include <fmt/ranges.h>
 9 | 
10 | TEST(CsrView, Transpose) {
11 |   using T = float;
12 |   using I = spblas::index_t;
13 |   using O = spblas::offset_t;
14 | 
15 |   for (auto&& [m, k, nnz] : util::dims) {
16 |     // Generate CSR Matrix A.
17 |     auto [a_values, a_rowptr, a_colind, a_shape, a_nnz] =
18 |         spblas::generate_csr<T, I>(m, k, nnz);
19 | 
20 |     spblas::csr_view<T, I, O> a(a_values, a_rowptr, a_colind, a_shape, a_nnz);
21 | 
22 |     // Transpose; B = A_T
23 | 
24 |     spblas::index b_shape(a.shape()[1], a.shape()[0]);
25 | 
26 |     std::vector<O> b_rowptr(b_shape[0] + 1);
27 |     std::vector<I> b_colind(a.size());
28 |     std::vector<T> b_values(a.size());
29 | 
30 |     spblas::csr_view<T, I, O> b(b_values, b_rowptr, b_colind, b_shape,
31 |                                 a.size());
32 | 
33 |     auto info = spblas::transpose_inspect(a, b);
34 |     spblas::transpose(info, a, b);
35 | 
36 |     // Create transposed COO for reference.
37 |     std::vector<T> ref_values;
38 |     std::vector<I> ref_rowind;
39 |     std::vector<I> ref_colind;
40 | 
41 |     for (auto&& [i, row] : spblas::__backend::rows(a)) {
42 |       for (auto&& [j, v] : row) {
43 |         ref_values.push_back(v);
44 |         ref_rowind.push_back(j);
45 |         ref_colind.push_back(i);
46 |       }
47 |     }
48 | 
49 |     // Create COO from transposed matrix for test.
50 |     std::vector<T> test_values;
51 |     std::vector<T> test_rowind;
52 |     std::vector<T> test_colind;
53 | 
54 |     for (auto&& [i, row] : spblas::__backend::rows(b)) {
55 |       for (auto&& [j, v] : row) {
56 |         test_values.push_back(v);
57 |         test_rowind.push_back(i);
58 |         test_colind.push_back(j);
59 |       }
60 |     }
61 | 
62 |     // Ensure both COO matrices are sorted.
63 |     spblas::__ranges::sort(
64 |         spblas::__ranges::views::zip(ref_rowind, ref_colind, ref_values));
65 |     spblas::__ranges::sort(
66 |         spblas::__ranges::views::zip(test_rowind, test_colind, test_values));
67 | 
68 |     EXPECT_EQ(ref_values.size(), test_values.size());
69 |     EXPECT_EQ(ref_rowind.size(), test_rowind.size());
70 |     EXPECT_EQ(ref_colind.size(), test_colind.size());
71 | 
72 |     for (auto&& [a, b] :
73 |          spblas::__ranges::views::zip(ref_values, test_values)) {
74 |       EXPECT_EQ_(a, b);
75 |     }
76 | 
77 |     for (auto&& [a, b] :
78 |          spblas::__ranges::views::zip(ref_rowind, test_rowind)) {
79 |       EXPECT_EQ(a, b);
80 |     }
81 | 
82 |     for (auto&& [a, b] :
83 |          spblas::__ranges::views::zip(ref_colind, test_colind)) {
84 |       EXPECT_EQ(a, b);
85 |     }
86 |   }
87 | }
88 | 


--------------------------------------------------------------------------------
/include/spblas/detail/tag_invoke.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <type_traits>
 4 | #include <utility>
 5 | 
 6 | namespace spblas {
 7 | 
 8 | namespace _tag_invoke {
 9 | void tag_invoke();
10 | 
11 | struct _fn {
12 |   template <typename CPO, typename... Args>
13 |   constexpr auto operator()(CPO cpo, Args&&... args) const
14 |       noexcept(noexcept(tag_invoke((CPO&&) cpo, (Args&&) args...)))
15 |           -> decltype(tag_invoke((CPO&&) cpo, (Args&&) args...)) {
16 |     return tag_invoke((CPO&&) cpo, (Args&&) args...);
17 |   }
18 | };
19 | 
20 | template <typename CPO, typename... Args>
21 | using tag_invoke_result_t =
22 |     decltype(tag_invoke(std::declval<CPO>(), std::declval<Args>()...));
23 | 
24 | using yes_type = char;
25 | using no_type = char (&)[2];
26 | 
27 | template <typename CPO, typename... Args>
28 | auto try_tag_invoke(int) //
29 |     noexcept(noexcept(tag_invoke(std::declval<CPO>(), std::declval<Args>()...)))
30 |         -> decltype(static_cast<void>(tag_invoke(std::declval<CPO>(),
31 |                                                  std::declval<Args>()...)),
32 |                     yes_type{});
33 | 
34 | template <typename CPO, typename... Args>
35 | no_type try_tag_invoke(...) noexcept(false);
36 | 
37 | template <template <typename...> class T, typename... Args>
38 | struct defer {
39 |   using type = T<Args...>;
40 | };
41 | 
42 | struct empty {};
43 | } // namespace _tag_invoke
44 | 
45 | namespace _tag_invoke_cpo {
46 | inline constexpr _tag_invoke::_fn tag_invoke{};
47 | }
48 | using namespace _tag_invoke_cpo;
49 | 
50 | template <auto& CPO>
51 | using tag_t = std::remove_cvref_t<decltype(CPO)>;
52 | 
53 | using _tag_invoke::tag_invoke_result_t;
54 | 
55 | template <typename CPO, typename... Args>
56 | inline constexpr bool is_tag_invocable_v =
57 |     (sizeof(_tag_invoke::try_tag_invoke<CPO, Args...>(0)) ==
58 |      sizeof(_tag_invoke::yes_type));
59 | 
60 | template <typename CPO, typename... Args>
61 | struct tag_invoke_result
62 |     : std::conditional_t<is_tag_invocable_v<CPO, Args...>,
63 |                          _tag_invoke::defer<tag_invoke_result_t, CPO, Args...>,
64 |                          _tag_invoke::empty> {};
65 | 
66 | template <typename CPO, typename... Args>
67 | using is_tag_invocable = std::bool_constant<is_tag_invocable_v<CPO, Args...>>;
68 | 
69 | template <typename CPO, typename... Args>
70 | inline constexpr bool is_nothrow_tag_invocable_v =
71 |     noexcept(_tag_invoke::try_tag_invoke<CPO, Args...>(0));
72 | 
73 | template <typename CPO, typename... Args>
74 | using is_nothrow_tag_invocable =
75 |     std::bool_constant<is_nothrow_tag_invocable_v<CPO, Args...>>;
76 | 
77 | template <typename CPO, typename... Args>
78 | concept tag_invocable = (sizeof(_tag_invoke::try_tag_invoke<CPO, Args...>(0)) ==
79 |                          sizeof(_tag_invoke::yes_type));
80 | 
81 | } // namespace spblas
82 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/onemkl_sycl/spmm_impl.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <oneapi/mkl.hpp>
 4 | 
 5 | #include <spblas/detail/log.hpp>
 6 | #include <spblas/detail/operation_info_t.hpp>
 7 | #include <spblas/detail/ranges.hpp>
 8 | #include <spblas/detail/view_inspectors.hpp>
 9 | #include <spblas/views/matrix_opt.hpp>
10 | 
11 | #include <spblas/vendor/onemkl_sycl/detail/detail.hpp>
12 | 
13 | //
14 | // Defines the following APIs for SpMM:
15 | //
16 | //  Y = alpha * op(A) * X
17 | //
18 | //  where A is a sparse matrices of CSR format and
19 | //  X/Y are dense matrices of row_major format
20 | //
21 | // //operation_info_t multiply_inspect(A, x, y)
22 | // //void multiply_inspect(operation_info_t, A, x, y)
23 | //
24 | // //void multiply_compute(operation_info_t, A, x, y)
25 | // void multiply(A, x, y)
26 | //
27 | 
28 | namespace spblas {
29 | 
30 | template <typename ExecutionPolicy, matrix A, matrix X, matrix Y>
31 |   requires(
32 |       (__detail::has_csr_base<A> || __detail::has_csc_base<A>) &&
33 |       __detail::has_mdspan_matrix_base<X> && __detail::is_matrix_mdspan_v<Y> &&
34 |       std::is_same_v<typename __detail::ultimate_base_type_t<X>::layout_type,
35 |                      __mdspan::layout_right> &&
36 |       std::is_same_v<typename std::remove_cvref_t<Y>::layout_type,
37 |                      __mdspan::layout_right>)
38 | void multiply(ExecutionPolicy&& policy, A&& a, X&& x, Y&& y) {
39 |   log_trace("");
40 |   auto x_base = __detail::get_ultimate_base(x);
41 | 
42 |   auto alpha_optional = __detail::get_scaling_factor(a, x);
43 |   tensor_scalar_t<A> alpha = alpha_optional.value_or(1);
44 | 
45 |   auto a_data = __detail::get_ultimate_base(a).values().data();
46 |   auto&& q = __mkl::get_queue(policy, a_data);
47 | 
48 |   auto a_handle = __mkl::get_matrix_handle(q, a);
49 |   auto a_transpose = __mkl::get_transpose(a);
50 | 
51 |   oneapi::mkl::sparse::gemm(q, oneapi::mkl::layout::row_major, a_transpose,
52 |                             oneapi::mkl::transpose::nontrans, alpha, a_handle,
53 |                             x_base.data_handle(), x_base.extent(1),
54 |                             x_base.extent(1), 0.0, y.data_handle(), y.extent(1))
55 |       .wait();
56 | 
57 |   if (!__detail::has_matrix_opt(a)) {
58 |     oneapi::mkl::sparse::release_matrix_handle(q, &a_handle).wait();
59 |   }
60 | }
61 | 
62 | template <matrix A, matrix X, matrix Y>
63 |   requires(
64 |       (__detail::has_csr_base<A> || __detail::has_csc_base<A>) &&
65 |       __detail::has_mdspan_matrix_base<X> && __detail::is_matrix_mdspan_v<Y> &&
66 |       std::is_same_v<typename __detail::ultimate_base_type_t<X>::layout_type,
67 |                      __mdspan::layout_right> &&
68 |       std::is_same_v<typename std::remove_cvref_t<Y>::layout_type,
69 |                      __mdspan::layout_right>)
70 | void multiply(A&& a, X&& x, Y&& y) {
71 |   multiply(mkl::par, std::forward<A>(a), std::forward<X>(x),
72 |            std::forward<Y>(y));
73 | }
74 | 
75 | } // namespace spblas
76 | 


--------------------------------------------------------------------------------
/include/spblas/views/inspectors.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/detail/mdspan.hpp>
 4 | #include <spblas/views/csc_view.hpp>
 5 | #include <spblas/views/csr_view.hpp>
 6 | #include <spblas/views/matrix_opt.hpp>
 7 | #include <spblas/views/scaled_view.hpp>
 8 | 
 9 | namespace spblas {
10 | 
11 | namespace __detail {
12 | 
13 | // Inspector for csr_view
14 | 
15 | template <typename T>
16 | struct is_instantiation_of_csr_view {
17 |   static constexpr bool value = false;
18 | };
19 | 
20 | template <typename T, std::integral I, std::integral O>
21 | struct is_instantiation_of_csr_view<csr_view<T, I, O>> {
22 |   static constexpr bool value = true;
23 | };
24 | 
25 | template <typename T>
26 | static constexpr bool is_csr_view_v =
27 |     is_instantiation_of_csr_view<std::remove_cvref_t<T>>::value;
28 | 
29 | // Inspector for csc_view
30 | 
31 | template <typename T>
32 | struct is_instantiation_of_csc_view {
33 |   static constexpr bool value = false;
34 | };
35 | 
36 | template <typename T, std::integral I, std::integral O>
37 | struct is_instantiation_of_csc_view<csc_view<T, I, O>> {
38 |   static constexpr bool value = true;
39 | };
40 | 
41 | template <typename T>
42 | static constexpr bool is_csc_view_v =
43 |     is_instantiation_of_csc_view<std::remove_cvref_t<T>>::value;
44 | 
45 | // Inspector for mdspan
46 | 
47 | template <typename T>
48 | struct is_matrix_instantiation_of_mdspan {
49 |   static constexpr bool value = false;
50 | };
51 | 
52 | template <typename T, typename Extents, typename LayoutPolicy,
53 |           typename AccessorPolicy>
54 |   requires(Extents::rank() == 2)
55 | struct is_matrix_instantiation_of_mdspan<
56 |     __mdspan::mdspan<T, Extents, LayoutPolicy, AccessorPolicy>> {
57 |   static constexpr bool value = true;
58 | };
59 | 
60 | template <typename T>
61 | static constexpr bool is_matrix_mdspan_v =
62 |     is_matrix_instantiation_of_mdspan<std::remove_cvref_t<T>>::value;
63 | 
64 | template <typename T>
65 | struct is_instantiation_of_scaled_view {
66 |   static constexpr bool value = false;
67 | };
68 | 
69 | template <typename S, typename T>
70 | struct is_instantiation_of_scaled_view<scaled_view<S, T>> {
71 |   static constexpr bool value = true;
72 | };
73 | 
74 | template <typename T>
75 | static constexpr bool is_scaled_view_v =
76 |     is_instantiation_of_scaled_view<std::remove_cvref_t<T>>::value;
77 | 
78 | template <typename T>
79 | static constexpr bool is_scaled_view_matrix_v =
80 |     is_scaled_view_v<T> && matrix<decltype(std::declval<T>().base())>;
81 | 
82 | template <typename T>
83 | struct is_instantiation_of_matrix_opt {
84 |   static constexpr bool value = false;
85 | };
86 | 
87 | template <typename T>
88 | struct is_instantiation_of_matrix_opt<matrix_opt<T>> {
89 |   static constexpr bool value = true;
90 | };
91 | 
92 | template <typename T>
93 | static constexpr bool is_matrix_opt_v =
94 |     is_instantiation_of_matrix_opt<std::remove_cvref_t<T>>::value;
95 | 
96 | } // namespace __detail
97 | 
98 | } // namespace spblas
99 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/aoclsparse/spmm_impl.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025      Advanced Micro Devices, Inc. All Rights reserved.
 3 |  * $COPYRIGHT$
 4 |  *
 5 |  * Additional copyrights may follow
 6 |  *
 7 |  * $HEADER$
 8 |  */
 9 | 
10 | #pragma once
11 | 
12 | #include "aoclsparse.h"
13 | #include <cstdint>
14 | 
15 | #include "aocl_wrappers.hpp"
16 | #include <fmt/core.h>
17 | #include <spblas/detail/log.hpp>
18 | #include <spblas/detail/operation_info_t.hpp>
19 | #include <spblas/detail/ranges.hpp>
20 | #include <spblas/detail/view_inspectors.hpp>
21 | 
22 | //
23 | // Defines the following APIs for SpMM:
24 | //
25 | //  Y = alpha * op(A) * X
26 | //
27 | //  where A is a sparse matrices of CSR format and
28 | //  X/Y are dense matrices of row_major format
29 | //
30 | 
31 | namespace spblas {
32 | 
33 | template <matrix A, matrix X, matrix Y>
34 |   requires(
35 |       (__detail::has_csr_base<A> || __detail::has_csc_base<A>) &&
36 |       __detail::has_mdspan_matrix_base<X> && __detail::is_matrix_mdspan_v<Y> &&
37 |       std::is_same_v<typename __detail::ultimate_base_type_t<X>::layout_type,
38 |                      __mdspan::layout_right> &&
39 |       std::is_same_v<typename std::remove_cvref_t<Y>::layout_type,
40 |                      __mdspan::layout_right>)
41 | void multiply(A&& a, X&& x, Y&& y) {
42 |   log_trace("");
43 |   auto a_base = __detail::get_ultimate_base(a);
44 |   auto x_base = __detail::get_ultimate_base(x);
45 |   auto y_base = __detail::get_ultimate_base(y);
46 | 
47 |   aoclsparse_matrix csrA = __aoclsparse::create_matrix_handle(a_base);
48 | 
49 |   aoclsparse_operation opA = __aoclsparse::get_transpose(a);
50 | 
51 |   using T = tensor_scalar_t<A>;
52 |   using I = tensor_index_t<A>;
53 |   using O = tensor_offset_t<A>;
54 | 
55 |   auto alpha_optional = __detail::get_scaling_factor(a, x);
56 |   tensor_scalar_t<A> alpha = alpha_optional.value_or(1);
57 | 
58 |   aoclsparse_mat_descr descr = NULL;
59 |   aoclsparse_status status = aoclsparse_create_mat_descr(&descr);
60 |   if (status != aoclsparse_status_success) {
61 |     fmt::print("\t Descr creation failed: {}\n", (int) status);
62 |   }
63 |   aoclsparse_set_mat_type(descr, aoclsparse_matrix_type_general);
64 |   aoclsparse_index_base indexing = aoclsparse_index_base_zero;
65 |   aoclsparse_order layout = aoclsparse_order_row;
66 | 
67 |   const index_t a_nrows = __backend::shape(a_base)[0];
68 |   const index_t a_ncols = __backend::shape(a_base)[1];
69 |   const index_t nrhs = x_base.extent(1);
70 |   const index_t ldx = x_base.extent(1);
71 |   const index_t ldy = y_base.extent(1);
72 | 
73 |   T beta = static_cast<T>(0.0);
74 |   status = __aoclsparse::aoclsparse_csrmm(opA, alpha, csrA, descr, layout,
75 |                                           x_base.data_handle(), nrhs, ldx, beta,
76 |                                           y.data_handle(), ldy);
77 |   if (status != aoclsparse_status_success) {
78 |     fmt::print("\t SpMM failed: {}\n", (int) status);
79 |   }
80 |   aoclsparse_destroy(&csrA);
81 |   aoclsparse_destroy_mat_descr(descr);
82 | }
83 | } // namespace spblas
84 | 


--------------------------------------------------------------------------------
/include/spblas/detail/operation_info_t.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <spblas/detail/index.hpp>
  4 | #include <spblas/detail/types.hpp>
  5 | 
  6 | #ifdef SPBLAS_ENABLE_ONEMKL_SYCL
  7 | #include <spblas/vendor/onemkl_sycl/operation_state_t.hpp>
  8 | #endif
  9 | 
 10 | #ifdef SPBLAS_ENABLE_ARMPL
 11 | #include <spblas/vendor/armpl/operation_state_t.hpp>
 12 | #endif
 13 | 
 14 | #ifdef SPBLAS_ENABLE_AOCLSPARSE
 15 | #include <spblas/vendor/aoclsparse/operation_state_t.hpp>
 16 | #endif
 17 | 
 18 | #ifdef SPBLAS_ENABLE_CUSPARSE
 19 | #include <spblas/vendor/cusparse/operation_state_t.hpp>
 20 | #endif
 21 | 
 22 | #ifdef SPBLAS_ENABLE_ROCSPARSE
 23 | #include <spblas/vendor/rocsparse/operation_state_t.hpp>
 24 | #endif
 25 | 
 26 | namespace spblas {
 27 | 
 28 | class operation_info_t {
 29 | public:
 30 |   auto result_shape() {
 31 |     return result_shape_;
 32 |   }
 33 | 
 34 |   auto result_nnz() {
 35 |     return result_nnz_;
 36 |   }
 37 | 
 38 |   operation_info_t() = default;
 39 | 
 40 |   operation_info_t(index<> result_shape, offset_t result_nnz)
 41 |       : result_shape_(result_shape), result_nnz_(result_nnz) {}
 42 | 
 43 | #ifdef SPBLAS_ENABLE_ONEMKL_SYCL
 44 |   operation_info_t(index<> result_shape, offset_t result_nnz,
 45 |                    __mkl::operation_state_t&& state)
 46 |       : result_shape_(result_shape), result_nnz_(result_nnz),
 47 |         state_(std::move(state)) {}
 48 | #endif
 49 | 
 50 | #ifdef SPBLAS_ENABLE_ARMPL
 51 |   operation_info_t(index<> result_shape, offset_t result_nnz,
 52 |                    __armpl::operation_state_t&& state)
 53 |       : result_shape_(result_shape), result_nnz_(result_nnz),
 54 |         state_(std::move(state)) {}
 55 | #endif
 56 | 
 57 | #ifdef SPBLAS_ENABLE_AOCLSPARSE
 58 |   operation_info_t(index<> result_shape, offset_t result_nnz,
 59 |                    __aoclsparse::operation_state_t&& state)
 60 |       : result_shape_(result_shape), result_nnz_(result_nnz),
 61 |         state_(std::move(state)) {}
 62 | #endif
 63 | 
 64 | #ifdef SPBLAS_ENABLE_CUSPARSE
 65 |   operation_info_t(index<> result_shape, offset_t result_nnz,
 66 |                    __cusparse::operation_state_t&& state)
 67 |       : result_shape_(result_shape), result_nnz_(result_nnz),
 68 |         state_(std::move(state)) {}
 69 | #endif
 70 | 
 71 |   void update_impl_(index<> result_shape, offset_t result_nnz) {
 72 |     result_shape_ = result_shape;
 73 |     result_nnz_ = result_nnz;
 74 |   }
 75 | 
 76 | private:
 77 |   index<> result_shape_;
 78 |   offset_t result_nnz_;
 79 | 
 80 | #ifdef SPBLAS_ENABLE_ONEMKL_SYCL
 81 | public:
 82 |   __mkl::operation_state_t state_;
 83 | #endif
 84 | 
 85 | #ifdef SPBLAS_ENABLE_ARMPL
 86 | public:
 87 |   __armpl::operation_state_t state_;
 88 | #endif
 89 | 
 90 | #ifdef SPBLAS_ENABLE_AOCLSPARSE
 91 | public:
 92 |   __aoclsparse::operation_state_t state_;
 93 | #endif
 94 | 
 95 | #ifdef SPBLAS_ENABLE_CUSPARSE
 96 | public:
 97 |   __cusparse::operation_state_t state_;
 98 | #endif
 99 | 
100 | #ifdef SPBLAS_ENABLE_ROCSPARSE
101 | public:
102 |   __rocsparse::operation_state_t state_;
103 | #endif
104 | };
105 | 
106 | } // namespace spblas
107 | 


--------------------------------------------------------------------------------
/include/spblas/backend/cpos.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <spblas/detail/tag_invoke.hpp>
 4 | 
 5 | namespace spblas {
 6 | 
 7 | namespace __backend {
 8 | 
 9 | struct size_fn_ {
10 |   template <typename T>
11 |     requires(spblas::is_tag_invocable_v<size_fn_, T>)
12 |   constexpr auto operator()(T&& t) const {
13 |     return spblas::tag_invoke(size_fn_{}, std::forward<T>(t));
14 |   }
15 | };
16 | 
17 | inline constexpr auto size = size_fn_{};
18 | 
19 | struct shape_fn_ {
20 |   template <typename T>
21 |     requires(spblas::is_tag_invocable_v<shape_fn_, T>)
22 |   constexpr auto operator()(T&& t) const {
23 |     return spblas::tag_invoke(shape_fn_{}, std::forward<T>(t));
24 |   }
25 | };
26 | 
27 | inline constexpr auto shape = shape_fn_{};
28 | 
29 | struct values_fn_ {
30 |   template <typename T>
31 |     requires(spblas::is_tag_invocable_v<values_fn_, T>)
32 |   constexpr auto operator()(T&& t) const {
33 |     return spblas::tag_invoke(values_fn_{}, std::forward<T>(t));
34 |   }
35 | };
36 | 
37 | inline constexpr auto values = values_fn_{};
38 | 
39 | struct rows_fn_ {
40 |   template <typename T>
41 |     requires(spblas::is_tag_invocable_v<rows_fn_, T>)
42 |   constexpr auto operator()(T&& t) const {
43 |     return spblas::tag_invoke(rows_fn_{}, std::forward<T>(t));
44 |   }
45 | };
46 | 
47 | inline constexpr auto rows = rows_fn_{};
48 | 
49 | struct columns_fn_ {
50 |   template <typename T>
51 |     requires(spblas::is_tag_invocable_v<columns_fn_, T>)
52 |   constexpr auto operator()(T&& t) const {
53 |     return spblas::tag_invoke(columns_fn_{}, std::forward<T>(t));
54 |   }
55 | };
56 | 
57 | inline constexpr auto columns = columns_fn_{};
58 | 
59 | struct lookup_fn_ {
60 |   template <typename T, typename... Args>
61 |     requires(spblas::is_tag_invocable_v<lookup_fn_, T, Args...>)
62 |   constexpr tag_invoke_result_t<lookup_fn_, T, Args...>
63 |   operator()(T&& t, Args&&... args) const {
64 |     return spblas::tag_invoke(lookup_fn_{}, std::forward<T>(t),
65 |                               std::forward<Args>(args)...);
66 |   }
67 | };
68 | 
69 | inline constexpr auto lookup = lookup_fn_{};
70 | 
71 | struct lookup_row_fn_ {
72 |   template <typename T, typename... Args>
73 |     requires(spblas::is_tag_invocable_v<lookup_row_fn_, T, Args...>)
74 |   constexpr tag_invoke_result_t<lookup_row_fn_, T, Args...>
75 |   operator()(T&& t, Args&&... args) const {
76 |     return spblas::tag_invoke(lookup_row_fn_{}, std::forward<T>(t),
77 |                               std::forward<Args>(args)...);
78 |   }
79 | };
80 | 
81 | inline constexpr auto lookup_row = lookup_row_fn_{};
82 | 
83 | struct lookup_column_fn_ {
84 |   template <typename T, typename... Args>
85 |     requires(spblas::is_tag_invocable_v<lookup_column_fn_, T, Args...>)
86 |   constexpr tag_invoke_result_t<lookup_column_fn_, T, Args...>
87 |   operator()(T&& t, Args&&... args) const {
88 |     return spblas::tag_invoke(lookup_column_fn_{}, std::forward<T>(t),
89 |                               std::forward<Args>(args)...);
90 |   }
91 | };
92 | 
93 | inline constexpr auto lookup_column = lookup_column_fn_{};
94 | 
95 | } // namespace __backend
96 | 
97 | } // namespace spblas
98 | 


--------------------------------------------------------------------------------
/examples/rocsparse/rocsparse_simple_spmv.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <spblas/spblas.hpp>
 3 | 
 4 | #include <hip/hip_runtime.h>
 5 | 
 6 | #include "util.hpp"
 7 | 
 8 | #include <fmt/core.h>
 9 | #include <fmt/ranges.h>
10 | 
11 | int main(int argc, char** argv) {
12 |   using value_t = float;
13 |   using index_t = spblas::index_t;
14 |   using offset_t = spblas::offset_t;
15 | 
16 |   index_t m = 100;
17 |   index_t n = 100;
18 |   index_t nnz_in = 10;
19 | 
20 |   fmt::print("\n\t###########################################################"
21 |              "######################");
22 |   fmt::print("\n\t### Running SpMV Example:");
23 |   fmt::print("\n\t###");
24 |   fmt::print("\n\t###   y = alpha * A * x");
25 |   fmt::print("\n\t###");
26 |   fmt::print("\n\t### with ");
27 |   fmt::print("\n\t### A, in CSR format, of size ({}, {}) with nnz = {}", m, n,
28 |              nnz_in);
29 |   fmt::print("\n\t### x, a dense vector, of size ({}, {})", n, 1);
30 |   fmt::print("\n\t### y, a dense vector, of size ({}, {})", m, 1);
31 |   fmt::print("\n\t### using float and spblas::index_t (size = {} bytes)",
32 |              sizeof(spblas::index_t));
33 |   fmt::print("\n\t###########################################################"
34 |              "######################");
35 |   fmt::print("\n");
36 | 
37 |   auto&& [values, rowptr, colind, shape, nnz] =
38 |       spblas::generate_csr<value_t, index_t, offset_t>(m, n, nnz_in);
39 | 
40 |   value_t* d_values;
41 |   offset_t* d_rowptr;
42 |   index_t* d_colind;
43 | 
44 |   HIP_CHECK(hipMalloc(&d_values, values.size() * sizeof(value_t)));
45 |   HIP_CHECK(hipMalloc(&d_rowptr, rowptr.size() * sizeof(offset_t)));
46 |   HIP_CHECK(hipMalloc(&d_colind, colind.size() * sizeof(index_t)));
47 | 
48 |   HIP_CHECK(hipMemcpy(d_values, values.data(), values.size() * sizeof(value_t),
49 |                       hipMemcpyDefault));
50 |   HIP_CHECK(hipMemcpy(d_rowptr, rowptr.data(), rowptr.size() * sizeof(offset_t),
51 |                       hipMemcpyDefault));
52 |   HIP_CHECK(hipMemcpy(d_colind, colind.data(), colind.size() * sizeof(index_t),
53 |                       hipMemcpyDefault));
54 | 
55 |   spblas::csr_view<value_t, index_t, offset_t> a(d_values, d_rowptr, d_colind,
56 |                                                  shape, nnz);
57 | 
58 |   // Scale every value of `a` by 5 in place.
59 |   // scale(5.f, a);
60 | 
61 |   std::vector<value_t> x(n, 1);
62 |   std::vector<value_t> y(m, 0);
63 | 
64 |   value_t* d_x;
65 |   value_t* d_y;
66 | 
67 |   HIP_CHECK(hipMalloc(&d_x, x.size() * sizeof(value_t)));
68 |   HIP_CHECK(hipMalloc(&d_y, y.size() * sizeof(value_t)));
69 | 
70 |   HIP_CHECK(
71 |       hipMemcpy(d_x, x.data(), x.size() * sizeof(value_t), hipMemcpyDefault));
72 |   HIP_CHECK(
73 |       hipMemcpy(d_y, y.data(), y.size() * sizeof(value_t), hipMemcpyDefault));
74 | 
75 |   std::span<value_t> x_span(d_x, n);
76 |   std::span<value_t> y_span(d_y, m);
77 | 
78 |   // y = A * x
79 |   spblas::operation_info_t info;
80 |   spblas::multiply(info, a, x_span, y_span);
81 | 
82 |   HIP_CHECK(
83 |       hipMemcpy(y.data(), d_y, y.size() * sizeof(value_t), hipMemcpyDefault));
84 | 
85 |   fmt::print("\tExample is completed!\n");
86 | 
87 |   return 0;
88 | }
89 | 


--------------------------------------------------------------------------------
/examples/cusparse/cusparse_simple_spmv.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <spblas/spblas.hpp>
 3 | 
 4 | #include <cuda_runtime.h>
 5 | 
 6 | #include "util.hpp"
 7 | 
 8 | #include <fmt/core.h>
 9 | #include <fmt/ranges.h>
10 | 
11 | int main(int argc, char** argv) {
12 |   using value_t = float;
13 |   using index_t = spblas::index_t;
14 |   using offset_t = spblas::offset_t;
15 | 
16 |   index_t m = 100;
17 |   index_t n = 100;
18 |   index_t nnz_in = 10;
19 | 
20 |   fmt::print("\n\t###########################################################"
21 |              "######################");
22 |   fmt::print("\n\t### Running SpMV Example:");
23 |   fmt::print("\n\t###");
24 |   fmt::print("\n\t###   y = alpha * A * x");
25 |   fmt::print("\n\t###");
26 |   fmt::print("\n\t### with ");
27 |   fmt::print("\n\t### A, in CSR format, of size ({}, {}) with nnz = {}", m, n,
28 |              nnz_in);
29 |   fmt::print("\n\t### x, a dense vector, of size ({}, {})", n, 1);
30 |   fmt::print("\n\t### y, a dense vector, of size ({}, {})", m, 1);
31 |   fmt::print("\n\t### using float and spblas::index_t (size = {} bytes)",
32 |              sizeof(spblas::index_t));
33 |   fmt::print("\n\t###########################################################"
34 |              "######################");
35 |   fmt::print("\n");
36 | 
37 |   auto&& [values, rowptr, colind, shape, nnz] =
38 |       spblas::generate_csr<value_t, index_t, offset_t>(m, n, nnz_in);
39 | 
40 |   value_t* d_values;
41 |   offset_t* d_rowptr;
42 |   index_t* d_colind;
43 | 
44 |   CUDA_CHECK(cudaMalloc(&d_values, values.size() * sizeof(value_t)));
45 |   CUDA_CHECK(cudaMalloc(&d_rowptr, rowptr.size() * sizeof(offset_t)));
46 |   CUDA_CHECK(cudaMalloc(&d_colind, colind.size() * sizeof(index_t)));
47 | 
48 |   CUDA_CHECK(cudaMemcpy(d_values, values.data(),
49 |                         values.size() * sizeof(value_t), cudaMemcpyDefault));
50 |   CUDA_CHECK(cudaMemcpy(d_rowptr, rowptr.data(),
51 |                         rowptr.size() * sizeof(offset_t), cudaMemcpyDefault));
52 |   CUDA_CHECK(cudaMemcpy(d_colind, colind.data(),
53 |                         colind.size() * sizeof(index_t), cudaMemcpyDefault));
54 | 
55 |   spblas::csr_view<value_t, index_t, offset_t> a(d_values, d_rowptr, d_colind,
56 |                                                  shape, nnz);
57 | 
58 |   // Scale every value of `a` by 5 in place.
59 |   // scale(5.f, a);
60 | 
61 |   std::vector<value_t> x(n, 1);
62 |   std::vector<value_t> y(m, 0);
63 | 
64 |   value_t* d_x;
65 |   value_t* d_y;
66 | 
67 |   CUDA_CHECK(cudaMalloc(&d_x, x.size() * sizeof(value_t)));
68 |   CUDA_CHECK(cudaMalloc(&d_y, y.size() * sizeof(value_t)));
69 | 
70 |   CUDA_CHECK(
71 |       cudaMemcpy(d_x, x.data(), x.size() * sizeof(value_t), cudaMemcpyDefault));
72 |   CUDA_CHECK(
73 |       cudaMemcpy(d_y, y.data(), y.size() * sizeof(value_t), cudaMemcpyDefault));
74 | 
75 |   std::span<value_t> x_span(d_x, n);
76 |   std::span<value_t> y_span(d_y, m);
77 | 
78 |   // y = A * x
79 |   spblas::operation_info_t info;
80 |   spblas::multiply(info, a, x_span, y_span);
81 | 
82 |   CUDA_CHECK(
83 |       cudaMemcpy(y.data(), d_y, y.size() * sizeof(value_t), cudaMemcpyDefault));
84 | 
85 |   fmt::print("\tExample is completed!\n");
86 | 
87 |   return 0;
88 | }
89 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/onemkl_sycl/triangular_solve_impl.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <oneapi/mkl.hpp>
 4 | 
 5 | #include <spblas/detail/log.hpp>
 6 | #include <spblas/detail/operation_info_t.hpp>
 7 | #include <spblas/detail/ranges.hpp>
 8 | #include <spblas/detail/view_inspectors.hpp>
 9 | 
10 | #include <spblas/detail/triangular_types.hpp>
11 | 
12 | namespace spblas {
13 | 
14 | //  Mappings from Triangular Solve input args to oneMKL vendor input args
15 | //
16 | //  using   A = L + D + U as a strict decomposition of triangular parts
17 | //
18 | //  spblas_ref input            ->   oneMKL SpTRSV input
19 | //  uplo(op(A))                 ->   op(uplo(A))
20 | //
21 | //  upper + nontrans  (D+U)     ->   nontrans  + upper (D+U)
22 | //  lower + nontrans  (L+D)     ->   nontrans  + lower (L+D)
23 | //  upper + trans     (L+D)^T   ->   trans     + lower (L+D)^T
24 | //  lower + trans     (D+U)^T   ->   trans     + upper (D+U)^T
25 | //  upper + conjtrans (L+D)^H   ->   conjtrans + lower (L+D)^H
26 | //  lower + conjtrans (D+U)^H   ->   conjtrans + upper (D+U)^H
27 | //
28 | 
29 | template <matrix A, class Triangle, class DiagonalStorage, vector B, vector X>
30 |   requires __detail::has_csr_base<A> &&
31 |            __detail::has_contiguous_range_base<B> &&
32 |            __ranges::contiguous_range<X>
33 | void triangular_solve(A&& a, Triangle uplo, DiagonalStorage diag, B&& b,
34 |                       X&& x) {
35 |   log_trace("");
36 |   static_assert(std::is_same_v<Triangle, upper_triangle_t> ||
37 |                 std::is_same_v<Triangle, lower_triangle_t>);
38 |   static_assert(std::is_same_v<DiagonalStorage, explicit_diagonal_t> ||
39 |                 std::is_same_v<DiagonalStorage, implicit_unit_diagonal_t>);
40 | 
41 |   auto a_base = __detail::get_ultimate_base(a);
42 |   auto b_base = __detail::get_ultimate_base(b);
43 | 
44 |   using T = tensor_scalar_t<A>;
45 |   using I = tensor_index_t<A>;
46 |   using O = tensor_offset_t<A>;
47 | 
48 |   auto alpha_optional = __detail::get_scaling_factor(a, b);
49 |   T alpha = alpha_optional.value_or(1);
50 | 
51 |   sycl::queue q(sycl::cpu_selector_v);
52 | 
53 |   oneapi::mkl::sparse::matrix_handle_t a_handle = nullptr;
54 |   oneapi::mkl::sparse::init_matrix_handle(&a_handle);
55 | 
56 |   oneapi::mkl::sparse::set_csr_data(
57 |       q, a_handle, __backend::shape(a_base)[0], __backend::shape(a_base)[1],
58 |       oneapi::mkl::index_base::zero, a_base.rowptr().data(),
59 |       a_base.colind().data(), a_base.values().data())
60 |       .wait();
61 | 
62 |   auto op = oneapi::mkl::transpose::nontrans;
63 | 
64 |   auto uplo_val =
65 |       std::is_same_v<Triangle, upper_triangle_t>
66 |           ? oneapi::mkl::uplo::upper
67 |           : oneapi::mkl::uplo::lower; // someday apply mapping with op
68 | 
69 |   auto diag_val = std::is_same_v<DiagonalStorage, explicit_diagonal_t>
70 |                       ? oneapi::mkl::diag::nonunit
71 |                       : oneapi::mkl::diag::unit;
72 | 
73 |   oneapi::mkl::sparse::trsv(q, uplo_val, op, diag_val, alpha, a_handle,
74 |                             __ranges::data(b_base), __ranges::data(x))
75 |       .wait();
76 | 
77 |   oneapi::mkl::sparse::release_matrix_handle(q, &a_handle).wait();
78 | 
79 | } // triangular_solve
80 | 
81 | } // namespace spblas
82 | 


--------------------------------------------------------------------------------
/notes/matrices.hpp:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | template <typename C>
  4 | concept matrix =
  5 |     /*
  6 |       is an instance of csr_view OR
  7 |       is an instance of csc_view OR
  8 |       is an instance of dense_view OR
  9 |       is an mdspan OR
 10 |       is an instance of matrix.
 11 |     */
 12 |     ;
 13 | 
 14 | template <matrix A, matrix B, matrix C>
 15 | void multiply(A&& a, B&& b, C&& c) {
 16 |   using T = matrix_value_t<A>;
 17 |   using I = matrix_index_t<A>;
 18 |   using O = matrix_offset_t<A>;
 19 | }
 20 | 
 21 | template <matrix A, matrix B, matrix C, matrix D>
 22 | void multiply(A&& a, B&& b, C&& c, D&& d) {
 23 |   using T = matrix_value_t<A>;
 24 |   using I = matrix_index_t<A>;
 25 |   using O = matrix_offset_t<A>;
 26 | }
 27 | 
 28 | /*
 29 |   There are three types of things one could pass into a Sparse BLAS function:
 30 | 
 31 |   1) A view.  This is a non-owning, lightweight object that reference data owned
 32 |      by the user in a particular format.  Since it only views data, but does not
 33 |   own it, it cannot reallocate its memory to insert more values if it runs out
 34 |   of space. See csr_view below.
 35 | 
 36 |   2) An opaque matrix object.  This object owns its data and is able to
 37 |   reallocate memory to grow.  When it is constructed, it either creates a copy
 38 |   of user data or "steals" it via move construction.  The internal format is
 39 |   opaque and unknown to the user.  Extracting data will likely require a copy.
 40 |      See matrix below.
 41 | 
 42 |   3) A known-format matrix object.  This object owns its data and is able to
 43 |   reallocate memory to grow.  When it is constructed, it either creates a copy
 44 |   of user data or "steals" it via move construction.  Since its format is known,
 45 |      users can easily "steal" its data via a move-like primitive.
 46 | */
 47 | 
 48 | template <typename T, typename I = std::size_t,
 49 |           typename O = /* implementation-defined */>
 50 | class csr_view {
 51 | public:
 52 |   using value_type = T;
 53 |   using index_type = I;
 54 |   using offset_type = O;
 55 | 
 56 |   csr_view(I m, I n, O nnz, T* values, I* rowptr, I* colind)
 57 |       : m_(m), n_(n), nnz_(nnz), values_(values), rowptr_(rowptr),
 58 |         colind_(colind) {}
 59 | 
 60 |   T* values_data() {
 61 |     return values_;
 62 |   }
 63 |   I* rowptr_data() {
 64 |     return rowptr_;
 65 |   }
 66 |   I* colind_data() {
 67 |     return colind_;
 68 |   }
 69 | 
 70 | private:
 71 |   T* values_;
 72 |   I* rowptr_;
 73 |   I* colind_;
 74 |   I m_, n_, nnz_;
 75 | 
 76 |   /* Implementation-defined stuff */
 77 | };
 78 | 
 79 | template <typename T, typename I>
 80 | class matrix {
 81 | public:
 82 |   matrix(I m, I n);
 83 | 
 84 |   void insert(I i, I j, T value);
 85 | 
 86 |   template <Matrix M>
 87 |   matrix(const Matrix& m) {
 88 |     /* Make a copy of matrix `m` */
 89 |   }
 90 | 
 91 |   template <Matrix M>
 92 |     requires(std::is_rvalue_reference_v<M>)
 93 |   matrix(Matrix&& m) {
 94 |     /* Steal all of the data in `m` */
 95 |   }
 96 | 
 97 | private:
 98 |   /* implementation defined stuff */
 99 | };
100 | 
101 | template <typename T, typename I, typename O>
102 | class csr_matrix {
103 | public:
104 |   csr_matrix(I m, I n);
105 | 
106 | private:
107 |   std::vector<T> values_;
108 |   std::vector<I> colind_;
109 |   std::vector<O> rowptr_;
110 |   I m_, n_;
111 |   O nnz_;
112 | };
113 | 


--------------------------------------------------------------------------------
/notes/matrix_data_structure_notes.hpp:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | template <typename C>
  4 | concept matrix =
  5 |     /*
  6 |       is an instance of csr_view OR
  7 |       is an instance of csc_view OR
  8 |       is an instance of dense_view OR
  9 |       is an mdspan OR
 10 |       is an instance of matrix.
 11 |     */
 12 |     ;
 13 | 
 14 | template <matrix A, matrix B, matrix C>
 15 | void multiply(A&& a, B&& b, C&& c) {
 16 |   using T = matrix_value_t<A>;
 17 |   using I = matrix_index_t<A>;
 18 |   using O = matrix_offset_t<A>;
 19 | }
 20 | 
 21 | template <matrix A, matrix B, matrix C, matrix D>
 22 | void multiply(A&& a, B&& b, C&& c, D&& d) {
 23 |   using T = matrix_value_t<A>;
 24 |   using I = matrix_index_t<A>;
 25 |   using O = matrix_offset_t<A>;
 26 | }
 27 | 
 28 | /*
 29 |   There are three types of things one could pass into a Sparse BLAS function:
 30 | 
 31 |   1) A view.  This is a non-owning, lightweight object that reference data owned
 32 |      by the user in a particular format.  Since it only views data, but does not
 33 |   own it, it cannot reallocate its memory to insert more values if it runs out
 34 |   of space. See csr_view below.
 35 | 
 36 |   2) An opaque matrix object.  This object owns its data and is able to
 37 |   reallocate memory to grow.  When it is constructed, it either creates a copy
 38 |   of user data or "steals" it via move construction.  The internal format is
 39 |   opaque and unknown to the user.  Extracting data will likely require a copy.
 40 |      See matrix below.
 41 | 
 42 |   3) A known-format matrix object.  This object owns its data and is able to
 43 |   reallocate memory to grow.  When it is constructed, it either creates a copy
 44 |   of user data or "steals" it via move construction.  Since its format is known,
 45 |      users can easily "steal" its data via a move-like primitive.
 46 | */
 47 | 
 48 | template <typename T, typename I = std::size_t,
 49 |           typename O = /* implementation-defined */>
 50 | class csr_view {
 51 | public:
 52 |   using scalar_type = T;
 53 |   using index_type = I;
 54 |   using offset_type = O;
 55 | 
 56 |   csr_view(I m, I n, O nnz, T* values, I* rowptr, I* colind)
 57 |       : m_(m), n_(n), nnz_(nnz), values_(values), rowptr_(rowptr),
 58 |         colind_(colind) {}
 59 | 
 60 |   T* values_data() {
 61 |     return values_;
 62 |   }
 63 |   I* rowptr_data() {
 64 |     return rowptr_;
 65 |   }
 66 |   I* colind_data() {
 67 |     return colind_;
 68 |   }
 69 | 
 70 | private:
 71 |   T* values_;
 72 |   I* rowptr_;
 73 |   I* colind_;
 74 |   I m_, n_, nnz_;
 75 | 
 76 |   /* Implementation-defined stuff */
 77 | };
 78 | 
 79 | template <typename T, typename I>
 80 | class matrix {
 81 | public:
 82 |   matrix(I m, I n);
 83 | 
 84 |   void insert(I i, I j, T value);
 85 | 
 86 |   template <Matrix M>
 87 |   matrix(const Matrix& m) {
 88 |     /* Make a copy of matrix `m` */
 89 |   }
 90 | 
 91 |   template <Matrix M>
 92 |     requires(std::is_rvalue_reference_v<M>)
 93 |   matrix(Matrix&& m) {
 94 |     /* Steal all of the data in `m` */
 95 |   }
 96 | 
 97 | private:
 98 |   /* implementation defined stuff */
 99 | };
100 | 
101 | template <typename T, typename I, typename O>
102 | class csr_matrix {
103 | public:
104 |   csr_matrix(I m, I n);
105 | 
106 | private:
107 |   std::vector<T> values_;
108 |   std::vector<I> colind_;
109 |   std::vector<O> rowptr_;
110 |   I m_, n_;
111 |   O nnz_;
112 | };
113 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/rocsparse/detail/spmv_impl.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <rocsparse/rocsparse.h>
 4 | 
 5 | #include <spblas/detail/operation_info_t.hpp>
 6 | #include <spblas/detail/ranges.hpp>
 7 | #include <spblas/detail/view_inspectors.hpp>
 8 | #include <spblas/vendor/rocsparse/detail/get_transpose.hpp>
 9 | #include <spblas/vendor/rocsparse/detail/rocsparse_tensors.hpp>
10 | #include <spblas/vendor/rocsparse/detail/spmv_state_t.hpp>
11 | #include <spblas/vendor/rocsparse/operation_state_t.hpp>
12 | #include <spblas/vendor/rocsparse/type_validation.hpp>
13 | 
14 | namespace spblas {
15 | 
16 | template <matrix A, vector B, vector C>
17 |   requires(__detail::has_csr_base<A> &&
18 |            __detail::has_contiguous_range_base<B> &&
19 |            __ranges::contiguous_range<C> &&
20 |            detail::has_valid_rocsparse_matrix_types_v<A> &&
21 |            detail::has_valid_rocsparse_vector_types_v<B> &&
22 |            detail::has_valid_rocsparse_vector_types_v<C>)
23 | void multiply(operation_info_t& info, A&& a, B&& b, C&& c) {
24 |   auto a_base = __detail::get_ultimate_base(a);
25 |   auto b_base = __detail::get_ultimate_base(b);
26 | 
27 |   auto alpha_optional = __detail::get_scaling_factor(a, b);
28 |   tensor_scalar_t<A> alpha = alpha_optional.value_or(1);
29 |   tensor_scalar_t<A> beta = 0;
30 | 
31 |   // Get or create state
32 |   auto state = info.state_.get_state<__rocsparse::spmv_state_t>();
33 |   if (!state) {
34 |     info.state_ = __rocsparse::operation_state_t(
35 |         std::make_unique<__rocsparse::spmv_state_t>());
36 |     state = info.state_.get_state<__rocsparse::spmv_state_t>();
37 |   }
38 | 
39 |   // Create descriptors
40 |   auto a_descr = __rocsparse::create_rocsparse_handle(a_base);
41 |   auto b_descr = __rocsparse::create_rocsparse_handle(b_base);
42 |   auto c_descr = __rocsparse::create_rocsparse_handle(c);
43 | 
44 |   state->set_a_descriptor(a_descr);
45 |   state->set_b_descriptor(b_descr);
46 |   state->set_c_descriptor(c_descr);
47 | 
48 |   // Get operation type based on matrix format
49 |   auto a_transpose = __rocsparse::get_transpose(a);
50 | 
51 |   // Get buffer size
52 |   size_t buffer_size = 0;
53 |   __rocsparse::throw_if_error(rocsparse_spmv(
54 |       state->handle(), a_transpose, &alpha, state->a_descriptor(),
55 |       state->b_descriptor(), &beta, state->c_descriptor(),
56 |       detail::rocsparse_data_type_v<tensor_scalar_t<A>>,
57 |       rocsparse_spmv_alg_csr_stream, rocsparse_spmv_stage_buffer_size,
58 |       &buffer_size, nullptr));
59 | 
60 |   // Allocate buffer if needed
61 |   state->allocate_workspace(buffer_size);
62 | 
63 |   // Execute SpMV
64 |   __rocsparse::throw_if_error(rocsparse_spmv(
65 |       state->handle(), a_transpose, &alpha, state->a_descriptor(),
66 |       state->b_descriptor(), &beta, state->c_descriptor(),
67 |       detail::rocsparse_data_type_v<tensor_scalar_t<A>>,
68 |       rocsparse_spmv_alg_csr_stream, rocsparse_spmv_stage_compute, &buffer_size,
69 |       state->workspace()));
70 | }
71 | 
72 | template <matrix A, vector B, vector C>
73 |   requires(__detail::has_csr_base<A> &&
74 |            __detail::has_contiguous_range_base<B> &&
75 |            __ranges::contiguous_range<C> &&
76 |            detail::has_valid_rocsparse_matrix_types_v<A> &&
77 |            detail::has_valid_rocsparse_vector_types_v<B> &&
78 |            detail::has_valid_rocsparse_vector_types_v<C>)
79 | void multiply(A&& a, B&& b, C&& c) {
80 |   operation_info_t info;
81 |   multiply(info, std::forward<A>(a), std::forward<B>(b), std::forward<C>(c));
82 | }
83 | 
84 | } // namespace spblas
85 | 


--------------------------------------------------------------------------------
/include/spblas/algorithms/multiply_impl.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <spblas/backend/backend.hpp>
  4 | #include <spblas/concepts.hpp>
  5 | #include <spblas/detail/log.hpp>
  6 | 
  7 | #include <spblas/algorithms/transposed.hpp>
  8 | #include <spblas/backend/csr_builder.hpp>
  9 | #include <spblas/backend/spa_accumulator.hpp>
 10 | #include <spblas/detail/operation_info_t.hpp>
 11 | 
 12 | #include "detail/spgemm/spgemm.hpp"
 13 | 
 14 | #include <algorithm>
 15 | 
 16 | namespace spblas {
 17 | 
 18 | // C = AB
 19 | // SpMV
 20 | template <matrix A, vector B, vector C>
 21 |   requires(__backend::lookupable<B> && __backend::lookupable<C>)
 22 | void multiply(A&& a, B&& b, C&& c) {
 23 |   log_trace("");
 24 |   if (__backend::shape(a)[0] != __backend::shape(c) ||
 25 |       __backend::shape(a)[1] != __backend::shape(b)) {
 26 |     throw std::invalid_argument(
 27 |         "multiply: matrix and vector dimensions are incompatible.");
 28 |   }
 29 | 
 30 |   __backend::for_each(c, [](auto&& e) {
 31 |     auto&& [_, v] = e;
 32 |     v = 0;
 33 |   });
 34 | 
 35 |   __backend::for_each(a, [&](auto&& e) {
 36 |     auto&& [idx, a_v] = e;
 37 |     auto&& [i, k] = idx;
 38 |     __backend::lookup(c, i) += a_v * __backend::lookup(b, k);
 39 |   });
 40 | }
 41 | 
 42 | // C = AB
 43 | // SpMM
 44 | template <matrix A, matrix B, matrix C>
 45 |   requires(__backend::lookupable<B> && __backend::lookupable<C>)
 46 | void multiply(A&& a, B&& b, C&& c) {
 47 |   log_trace("");
 48 |   if (__backend::shape(a)[0] != __backend::shape(c)[0] ||
 49 |       __backend::shape(b)[1] != __backend::shape(c)[1] ||
 50 |       __backend::shape(a)[1] != __backend::shape(b)[0]) {
 51 |     throw std::invalid_argument(
 52 |         "multiply: matrix dimensions are incompatible.");
 53 |   }
 54 | 
 55 |   __backend::for_each(c, [](auto&& e) {
 56 |     auto&& [_, v] = e;
 57 |     v = 0;
 58 |   });
 59 | 
 60 |   __backend::for_each(a, [&](auto&& e) {
 61 |     auto&& [idx, a_v] = e;
 62 |     auto&& [i, k] = idx;
 63 |     for (std::size_t j = 0; j < __backend::shape(b)[1]; j++) {
 64 |       __backend::lookup(c, i, j) += a_v * __backend::lookup(b, k, j);
 65 |     }
 66 |   });
 67 | }
 68 | 
 69 | template <matrix A, matrix B, matrix C>
 70 | operation_info_t multiply_inspect(A&& a, B&& b, C&& c) {
 71 |   return operation_info_t{};
 72 | }
 73 | 
 74 | template <matrix A, matrix B, matrix C>
 75 | void multiply_inspect(operation_info_t& info, A&& a, B&& b, C&& c){};
 76 | 
 77 | template <matrix A, matrix B, matrix C>
 78 |   requires(__backend::row_iterable<A> && __backend::row_iterable<B> &&
 79 |            __detail::is_csr_view_v<C>)
 80 | void multiply_compute(operation_info_t& info, A&& a, B&& b, C&& c) {
 81 |   auto new_info = multiply_compute(std::forward<A>(a), std::forward<B>(b),
 82 |                                    std::forward<C>(c));
 83 |   info.update_impl_(new_info.result_shape(), new_info.result_nnz());
 84 | }
 85 | 
 86 | template <matrix A, matrix B, matrix C>
 87 |   requires(__backend::column_iterable<A> && __backend::column_iterable<B> &&
 88 |            __detail::is_csc_view_v<C>)
 89 | void multiply_compute(operation_info_t& info, A&& a, B&& b, C&& c) {
 90 |   auto new_info = multiply_compute(std::forward<A>(a), std::forward<B>(b),
 91 |                                    std::forward<C>(c));
 92 |   info.update_impl_(new_info.result_shape(), new_info.result_nnz());
 93 | }
 94 | 
 95 | // C = AB
 96 | template <matrix A, matrix B, matrix C>
 97 | void multiply_fill(operation_info_t info, A&& a, B&& b, C&& c) {
 98 |   log_trace("");
 99 |   multiply(a, b, c);
100 | }
101 | 
102 | } // namespace spblas
103 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/aoclsparse/triangular_solve_impl.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025      Advanced Micro Devices, Inc. All Rights reserved.
 3 |  * $COPYRIGHT$
 4 |  *
 5 |  * Additional copyrights may follow
 6 |  *
 7 |  * $HEADER$
 8 |  */
 9 | 
10 | #pragma once
11 | 
12 | #include "aoclsparse.h"
13 | #include <cstdint>
14 | 
15 | #include "aocl_wrappers.hpp"
16 | #include <fmt/core.h>
17 | #include <spblas/detail/log.hpp>
18 | #include <spblas/detail/operation_info_t.hpp>
19 | #include <spblas/detail/ranges.hpp>
20 | #include <spblas/detail/view_inspectors.hpp>
21 | 
22 | //
23 | // Defines the following APIs for SpMV:
24 | //
25 | // y =alpha* op(A) * x
26 | //
27 | //  where A is a sparse matrices of CSR format and
28 | //  x/y are dense vectors
29 | 
30 | #include <spblas/detail/triangular_types.hpp>
31 | 
32 | namespace spblas {
33 | 
34 | template <matrix A, class Triangle, class DiagonalStorage, vector B, vector X>
35 |   requires __detail::has_csr_base<A> &&
36 |            __detail::has_contiguous_range_base<B> &&
37 |            __ranges::contiguous_range<X>
38 | void triangular_solve(A&& a, Triangle uplo, DiagonalStorage diag, B&& b,
39 |                       X&& x) {
40 |   log_trace("");
41 |   static_assert(std::is_same_v<Triangle, upper_triangle_t> ||
42 |                 std::is_same_v<Triangle, lower_triangle_t>);
43 |   static_assert(std::is_same_v<DiagonalStorage, explicit_diagonal_t> ||
44 |                 std::is_same_v<DiagonalStorage, implicit_unit_diagonal_t>);
45 | 
46 |   auto a_base = __detail::get_ultimate_base(a);
47 |   auto b_base = __detail::get_ultimate_base(b);
48 | 
49 |   using T = tensor_scalar_t<A>;
50 |   using I = tensor_index_t<A>;
51 |   using O = tensor_offset_t<A>;
52 | 
53 |   auto alpha_optional = __detail::get_scaling_factor(a, x);
54 |   T alpha = alpha_optional.value_or(1);
55 | 
56 |   aoclsparse_matrix csrA = NULL;
57 |   aoclsparse_mat_descr descr = NULL;
58 |   aoclsparse_status status = aoclsparse_create_mat_descr(&descr);
59 |   if (status != aoclsparse_status_success) {
60 |     fmt::print("\t descr creation failed\n");
61 |   }
62 |   aoclsparse_set_mat_type(descr, aoclsparse_matrix_type_triangular);
63 |   aoclsparse_index_base indexing = aoclsparse_index_base_zero;
64 |   aoclsparse_operation opA = aoclsparse_operation_none;
65 | 
66 |   const index_t a_nrows = __backend::shape(a_base)[0];
67 |   const index_t a_ncols = __backend::shape(a_base)[1];
68 |   const aoclsparse_int nnz = a_base.rowptr().data()[a_nrows];
69 | 
70 |   status = __aoclsparse::aoclsparse_create_csr(
71 |       &csrA, indexing, a_nrows, a_ncols, nnz, a_base.rowptr().data(),
72 |       a_base.colind().data(), a_base.values().data());
73 |   if (status != aoclsparse_status_success) {
74 |     fmt::print("\t csr matrix creation failed: {}\n", (int) status);
75 |   }
76 |   if (std::is_same_v<Triangle, lower_triangle_t>) {
77 |     aoclsparse_set_mat_fill_mode(descr, aoclsparse_fill_mode_lower);
78 |   } else if (std::is_same_v<Triangle, upper_triangle_t>) {
79 |     aoclsparse_set_mat_fill_mode(descr, aoclsparse_fill_mode_upper);
80 |   }
81 | 
82 |   if (std::is_same_v<DiagonalStorage, explicit_diagonal_t>) {
83 |     aoclsparse_set_mat_diag_type(descr, aoclsparse_diag_type_non_unit);
84 |   } else if (std::is_same_v<DiagonalStorage, implicit_unit_diagonal_t>) {
85 |     aoclsparse_set_mat_diag_type(descr, aoclsparse_diag_type_unit);
86 |   }
87 | 
88 |   status = __aoclsparse::aoclsparse_trsv(
89 |       opA, alpha, csrA, descr, __ranges::data(b_base), __ranges::data(x));
90 |   if (status != aoclsparse_status_success) {
91 |     fmt::print("\t triangular solve failed: {} \n", (int) status);
92 |   }
93 | 
94 |   aoclsparse_destroy(&csrA);
95 |   aoclsparse_destroy_mat_descr(descr);
96 | }
97 | 
98 | } // namespace spblas
99 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/cusparse/spmv_impl.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cusparse.h>
 4 | 
 5 | #include <spblas/detail/log.hpp>
 6 | #include <spblas/detail/operation_info_t.hpp>
 7 | #include <spblas/detail/ranges.hpp>
 8 | #include <spblas/detail/view_inspectors.hpp>
 9 | #include <spblas/vendor/cusparse/detail/cusparse_tensors.hpp>
10 | #include <spblas/vendor/cusparse/detail/get_transpose.hpp>
11 | #include <spblas/vendor/cusparse/detail/spmv_state_t.hpp>
12 | #include <spblas/vendor/cusparse/operation_state_t.hpp>
13 | #include <spblas/vendor/cusparse/type_validation.hpp>
14 | 
15 | namespace spblas {
16 | 
17 | template <matrix A, vector X, vector Y>
18 |   requires(__detail::has_csr_base<A> &&
19 |            __detail::has_contiguous_range_base<X> &&
20 |            __ranges::contiguous_range<Y> &&
21 |            detail::has_valid_cusparse_matrix_types_v<A> &&
22 |            detail::has_valid_cusparse_vector_types_v<X> &&
23 |            detail::has_valid_cusparse_vector_types_v<Y>)
24 | void multiply(operation_info_t& info, A&& a, X&& x, Y&& y) {
25 |   log_trace("");
26 | 
27 |   auto x_base = __detail::get_ultimate_base(x);
28 |   auto a_base = __detail::get_ultimate_base(a);
29 | 
30 |   auto alpha_optional = __detail::get_scaling_factor(a, x);
31 |   tensor_scalar_t<A> alpha = alpha_optional.value_or(1);
32 |   tensor_scalar_t<A> beta = 0;
33 | 
34 |   // Get or create state
35 |   auto state = info.state_.get_state<__cusparse::spmv_state_t>();
36 |   if (!state) {
37 |     info.state_ = __cusparse::operation_state_t(
38 |         std::make_unique<__cusparse::spmv_state_t>());
39 |     state = info.state_.get_state<__cusparse::spmv_state_t>();
40 |   }
41 | 
42 |   // Create or get matrix descriptor
43 |   if (!state->a_descriptor()) {
44 |     cusparseSpMatDescr_t a_descr = __cusparse::create_cusparse_handle(a_base);
45 |     state->set_a_descriptor(a_descr);
46 |   }
47 | 
48 |   // Create vector descriptors
49 |   cusparseDnVecDescr_t b_descr = __cusparse::create_cusparse_handle(x_base);
50 |   cusparseDnVecDescr_t c_descr = __cusparse::create_cusparse_handle(y);
51 |   state->set_b_descriptor(b_descr);
52 |   state->set_c_descriptor(c_descr);
53 | 
54 |   // Get operation type based on matrix format
55 |   auto a_transpose = __cusparse::get_transpose(a);
56 | 
57 |   // Get buffer size
58 |   size_t buffer_size;
59 |   __cusparse::throw_if_error(cusparseSpMV_bufferSize(
60 |       state->handle(), a_transpose, &alpha, state->a_descriptor(),
61 |       state->b_descriptor(), &beta, state->c_descriptor(),
62 |       detail::cuda_data_type_v<tensor_scalar_t<Y>>, CUSPARSE_SPMV_ALG_DEFAULT,
63 |       &buffer_size));
64 | 
65 |   // Allocate buffer if needed
66 |   void* buffer = nullptr;
67 |   if (buffer_size > 0) {
68 |     cudaMalloc(&buffer, buffer_size);
69 |   }
70 | 
71 |   // Execute SpMV
72 |   __cusparse::throw_if_error(
73 |       cusparseSpMV(state->handle(), a_transpose, &alpha, state->a_descriptor(),
74 |                    state->b_descriptor(), &beta, state->c_descriptor(),
75 |                    detail::cuda_data_type_v<tensor_scalar_t<Y>>,
76 |                    CUSPARSE_SPMV_ALG_DEFAULT, buffer));
77 | 
78 |   // Free buffer if allocated
79 |   if (buffer) {
80 |     cudaFree(buffer);
81 |   }
82 | }
83 | 
84 | template <matrix A, vector X, vector Y>
85 |   requires(__detail::has_csr_base<A> &&
86 |            __detail::has_contiguous_range_base<X> &&
87 |            __ranges::contiguous_range<Y> &&
88 |            detail::has_valid_cusparse_matrix_types_v<A> &&
89 |            detail::has_valid_cusparse_vector_types_v<X> &&
90 |            detail::has_valid_cusparse_vector_types_v<Y>)
91 | void multiply(A&& a, X&& x, Y&& y) {
92 |   operation_info_t info;
93 |   multiply(info, std::forward<A>(a), std::forward<X>(x), std::forward<Y>(y));
94 | }
95 | 
96 | } // namespace spblas
97 | 


--------------------------------------------------------------------------------
/test/gtest/triangular_solve_test.cpp:
--------------------------------------------------------------------------------
  1 | #include <gtest/gtest.h>
  2 | 
  3 | #include "util.hpp"
  4 | #include <spblas/spblas.hpp>
  5 | 
  6 | template <typename T, typename I, class Triangle, class DiagonalStorage,
  7 |           spblas::__ranges::random_access_range B,
  8 |           spblas::__ranges::random_access_range X>
  9 | void reference_triangular_solve(spblas::csr_view<T, I> a, Triangle t,
 10 |                                 DiagonalStorage d, B&& b, X&& x) {
 11 |   auto&& values = a.values();
 12 |   auto&& colind = a.colind();
 13 |   auto&& rowptr = a.rowptr();
 14 |   auto shape = a.shape();
 15 | 
 16 |   if constexpr (std::is_same_v<Triangle, spblas::upper_triangle_t>) {
 17 |     // backward solve
 18 |     for (I row = shape[0]; row-- > 0;) {
 19 |       T tmp = b[row];
 20 |       T diag_val = 0.0;
 21 |       for (I j = rowptr[row]; j < rowptr[row + 1]; j++) {
 22 |         I col = colind[j];
 23 |         if (col > row) {
 24 |           T a_val = values[j];
 25 |           T x_val = x[col];
 26 |           tmp -= a_val * x_val; // b - U*x
 27 |         } else if (col == row) {
 28 |           diag_val = values[j];
 29 |         }
 30 |       }
 31 |       if constexpr (std::is_same_v<DiagonalStorage,
 32 |                                    spblas::explicit_diagonal_t>) {
 33 |         x[row] = tmp / diag_val; // ( b - U*x) / d
 34 |       } else {
 35 |         x[row] = tmp; // ( b- U*x) / 1
 36 |       }
 37 |     }
 38 |   } else if constexpr (std::is_same_v<Triangle, spblas::upper_triangle_t>) {
 39 |     // Forward Solve
 40 |     for (I row = 0; row < shape[0]; row++) {
 41 |       T tmp = b[row];
 42 |       T diag_val = 0.0;
 43 |       for (I j = rowptr[row]; j < rowptr[row + 1]; ++j) {
 44 |         I col = colind[j];
 45 |         if (col < row) {
 46 |           T a_val = values[j];
 47 |           T x_val = x[col];
 48 |           tmp -= a_val * x_val; // b - L*x
 49 |         } else if (col == row) {
 50 |           diag_val = values[j];
 51 |         }
 52 |       }
 53 |       if constexpr (std::is_same_v<DiagonalStorage,
 54 |                                    spblas::explicit_diagonal_t>) {
 55 |         x[row] = tmp / diag_val; // ( b - L*x) / d
 56 |       } else {
 57 |         x[row] = tmp; // ( b- L*x) / 1
 58 |       }
 59 |     }
 60 |   }
 61 | }
 62 | 
 63 | template <typename T, typename I, typename Triangle, typename DiagonalStorage>
 64 | void triangular_solve_test(Triangle t, DiagonalStorage d) {
 65 |   for (auto&& [m, n, nnz] : util::square_dims) {
 66 |     auto [values, rowptr, colind, shape, _] =
 67 |         spblas::generate_csr<T, I>(m, n, nnz);
 68 | 
 69 |     spblas::csr_view<T, I> a(values, rowptr, colind, shape, nnz);
 70 | 
 71 |     std::vector<T> x(n, 1);
 72 |     std::vector<T> b(m, 0);
 73 | 
 74 |     T scale_factor = 1e-3f;
 75 |     std::transform(values.begin(), values.end(), values.begin(),
 76 |                    [scale_factor](T val) { return scale_factor * val; });
 77 | 
 78 |     spblas::triangular_solve(a, Triangle{}, DiagonalStorage{}, b, x);
 79 | 
 80 |     std::vector<T> x_ref(m, 0);
 81 | 
 82 |     reference_triangular_solve(a, Triangle{}, DiagonalStorage{}, b, x_ref);
 83 | 
 84 |     for (std::size_t i = 0; i < x.size(); i++) {
 85 |       EXPECT_EQ_(x[i], x_ref[i]);
 86 |     }
 87 |   }
 88 | }
 89 | 
 90 | TEST(CsrView, TriangularSolveLowerImplicit) {
 91 |   using T = float;
 92 |   using I = spblas::index_t;
 93 | 
 94 |   triangular_solve_test<T, I>(spblas::lower_triangle_t{},
 95 |                               spblas::implicit_unit_diagonal_t{});
 96 | }
 97 | 
 98 | TEST(CsrView, TriangularSolveUpperImplicit) {
 99 |   using T = float;
100 |   using I = spblas::index_t;
101 | 
102 |   triangular_solve_test<T, I>(spblas::lower_triangle_t{},
103 |                               spblas::implicit_unit_diagonal_t{});
104 | }
105 | 


--------------------------------------------------------------------------------
/include/spblas/detail/log.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | //
 4 | // This defines a logging system that can be turned on and off by command line
 5 | //
 6 | //  -DLOG_LEVEL=SPBLAS_TRACE   (or any other level or undefined LOG_LEVEL to
 7 | //  turn it off)
 8 | //
 9 | // SPBLAS_DEBUG   (level 0) not meant to be kept in code, but good for debugging
10 | // SPBLAS_WARNING (level 1) for giving developers a more complete warning
11 | //                          message before throwing error or exiting
12 | // SPBLAS_TRACE   (level 2) provides trace comments of files and line numbers
13 | // SPBLAS_INFO    (level 3) for providing deep information about algorithm or
14 | //                          details
15 | //
16 | //
17 | //  Add to code any of the following
18 | //
19 | //  log_debug("formatted message"); // behaves like printf so can add formatting
20 | //  log_warning("formated message"); // warnings or early exit extra detail
21 | //  log_trace(""); // can also add formatted message, but often file/line is
22 | //                 // sufficient
23 | //  log_info("formatted message") // any extra info or data we find useful for
24 | //                                // analyzing algorithm or debugging
25 | //
26 | //  when LOG_LEVEL is not defined, it does nothing, but when defined, it prints
27 | //  as desired for all levels up to specified one
28 | //
29 | 
30 | enum { SPBLAS_DEBUG, SPBLAS_WARNING, SPBLAS_TRACE, SPBLAS_INFO };
31 | 
32 | #define log_debug(fmt, ...)                                                    \
33 |   _log_(SPBLAS_DEBUG, __FILE__, __LINE__, __PRETTY_FUNCTION__, fmt,            \
34 |         ##__VA_ARGS__)
35 | #define log_warning(fmt, ...)                                                  \
36 |   _log_(SPBLAS_WARNING, __FILE__, __LINE__, __PRETTY_FUNCTION__, fmt,          \
37 |         ##__VA_ARGS__)
38 | #define log_trace(fmt, ...)                                                    \
39 |   _log_(SPBLAS_TRACE, __FILE__, __LINE__, __PRETTY_FUNCTION__, fmt,            \
40 |         ##__VA_ARGS__)
41 | #define log_info(fmt, ...)                                                     \
42 |   _log_(SPBLAS_INFO, __FILE__, __LINE__, __PRETTY_FUNCTION__, fmt,             \
43 |         ##__VA_ARGS__)
44 | 
45 | #if defined(LOG_LEVEL)
46 | #define _log_(l, file, line, func, fmt, ...)                                   \
47 |   spblas_log_(l, #l, file, line, func, fmt, ##__VA_ARGS__)
48 | #else
49 | #define _log_(l, file, line, func, fmt, ...)                                   \
50 |   do {                                                                         \
51 |   } while (0)
52 | #endif
53 | 
54 | #ifdef LOG_LEVEL
55 | 
56 | #include <stdarg.h> // va_start, va_list, va_end
57 | #include <stdio.h>  // printf, vprintf
58 | #include <unistd.h> // isatty
59 | 
60 | static void spblas_log_(int level, const char* pref, const char* file,
61 |                         const int line, const char* func, const char* fmt,
62 |                         ...) {
63 |   va_list args;
64 |   va_start(args, fmt);
65 | 
66 |   if (level <= LOG_LEVEL) { // add all smaller logtype enums
67 | 
68 |     if (isatty(1)) {
69 |       // color log if isatty(1) ie file descriptor is from terminal
70 |       printf("\x1b[48;5;14m"); // background is high intensity light blue
71 |       printf("\x1b[38;5;0m");  // foreground is black
72 |     }
73 | 
74 |     // print out preamble:  [logtype] file:<line#>: functionname() message
75 |     // printf("[%s] %s:%d: %s()", pref, file, line, func);
76 | 
77 |     // print out preamble:  [logtype] file:<line#>: message
78 |     printf("[%s] %s:%d: ", pref, file, line);
79 | 
80 |     vprintf(fmt, args); // print out message
81 | 
82 |     // end color for log printing
83 |     if (isatty(1)) {
84 |       printf("\x1b[0m");
85 |     }
86 |     printf("\n");
87 |   } // if level <= LOG_LEVEL
88 |   fflush(0);
89 |   va_end(args);
90 | }
91 | #endif // LOG_LEVEL
92 | 


--------------------------------------------------------------------------------
/include/spblas/algorithms/add_impl.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <spblas/backend/backend.hpp>
  4 | #include <spblas/backend/csr_builder.hpp>
  5 | #include <spblas/backend/spa_accumulator.hpp>
  6 | #include <spblas/concepts.hpp>
  7 | 
  8 | namespace spblas {
  9 | 
 10 | template <vector A, vector B, vector C>
 11 | void add(A&& a, B&& b, C&& c) {
 12 |   if (__backend::shape(a) != __backend::shape(b) ||
 13 |       __backend::shape(b) != __backend::shape(c)) {
 14 |     throw std::invalid_argument("add: vector dimensions are incompatible.");
 15 |   }
 16 | 
 17 |   __backend::for_each(c, [&](auto&& e) {
 18 |     auto&& [i, c_v] = e;
 19 |     c_v = __backend::lookup(a, i) + __backend::lookup(b, i);
 20 |   });
 21 | }
 22 | 
 23 | template <matrix A, matrix B, matrix C>
 24 |   requires(__backend::lookupable<A> && __backend::lookupable<B> &&
 25 |            __backend::lookupable<C>)
 26 | void add(A&& a, B&& b, C&& c) {
 27 |   if (__backend::shape(a) != __backend::shape(b) ||
 28 |       __backend::shape(b) != __backend::shape(c)) {
 29 |     throw std::invalid_argument("add: matrix dimensions are incompatible.");
 30 |   }
 31 | 
 32 |   for (std::size_t i = 0; i < __backend::shape(c)[0]; i++) {
 33 |     for (std::size_t j = 0; j < __backend::shape(c)[1]; j++) {
 34 |       __backend::lookup(c, i, j) =
 35 |           __backend::lookup(a, i, j) + __backend::lookup(b, i, j);
 36 |     }
 37 |   }
 38 | }
 39 | 
 40 | template <matrix A, matrix B, matrix C>
 41 |   requires(__backend::row_iterable<A> && __backend::row_iterable<B> &&
 42 |            __detail::is_csr_view_v<C>)
 43 | void add(A&& a, B&& b, C&& c) {
 44 |   if (__backend::shape(a) != __backend::shape(b) ||
 45 |       __backend::shape(b) != __backend::shape(c)) {
 46 |     throw std::invalid_argument("add: matrix dimensions are incompatible.");
 47 |   }
 48 | 
 49 |   using T = tensor_scalar_t<C>;
 50 |   using I = tensor_index_t<C>;
 51 | 
 52 |   __backend::spa_accumulator<T, I> c_row(__backend::shape(c)[1]);
 53 |   __backend::csr_builder c_builder(c);
 54 | 
 55 |   for (I i = 0; i < __backend::shape(c)[0]; i++) {
 56 |     c_row.clear();
 57 | 
 58 |     for (auto&& [j, v] : __backend::lookup_row(a, i)) {
 59 |       c_row[j] += v;
 60 |     }
 61 | 
 62 |     for (auto&& [j, v] : __backend::lookup_row(b, i)) {
 63 |       c_row[j] += v;
 64 |     }
 65 | 
 66 |     c_row.sort();
 67 | 
 68 |     try {
 69 |       c_builder.insert_row(i, c_row.get());
 70 |     } catch (...) {
 71 |       throw std::runtime_error("add: ran out of memory.  CSR output view "
 72 |                                "has insufficient memory.");
 73 |     }
 74 |   }
 75 |   c.update(c.values(), c.rowptr(), c.colind(), c.shape(),
 76 |            c.rowptr()[c.shape()[0]]);
 77 | }
 78 | 
 79 | template <matrix A, matrix B, matrix C>
 80 |   requires(__backend::row_lookupable<A> && __backend::row_lookupable<B> &&
 81 |            __backend::row_lookupable<C>)
 82 | operation_info_t add_inspect(A&& a, B&& b, C&& c) {
 83 |   if (__backend::shape(a) != __backend::shape(b) ||
 84 |       __backend::shape(b) != __backend::shape(c)) {
 85 |     throw std::invalid_argument("add: matrix dimensions are incompatible.");
 86 |   }
 87 | 
 88 |   using I = tensor_index_t<C>;
 89 | 
 90 |   std::size_t nnz = 0;
 91 |   __backend::spa_set<I> c_row(__backend::shape(c)[1]);
 92 | 
 93 |   for (I i = 0; i < __backend::shape(c)[0]; i++) {
 94 |     c_row.clear();
 95 | 
 96 |     for (auto&& [j, _] : __backend::lookup_row(a, i)) {
 97 |       c_row.insert(j);
 98 |     }
 99 | 
100 |     for (auto&& [j, _] : __backend::lookup_row(b, i)) {
101 |       c_row.insert(j);
102 |     }
103 | 
104 |     nnz += c_row.size();
105 |   }
106 | 
107 |   return operation_info_t{__backend::shape(c), index_t(nnz)};
108 | }
109 | 
110 | template <matrix A, matrix B, matrix C>
111 | void add_compute(operation_info_t& info, A&& a, B&& b, C&& c) {
112 |   add(a, b, c);
113 | }
114 | 
115 | } // namespace spblas
116 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/rocsparse/exception.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <hip/hip_runtime.h>
 4 | #include <rocsparse/rocsparse.h>
 5 | #include <stdexcept>
 6 | #include <string>
 7 | 
 8 | namespace spblas {
 9 | 
10 | namespace __rocsparse {
11 | 
12 | // Throw an exception if the hipError_t is not hipSuccess.
13 | void throw_if_error(hipError_t error_code, std::string prefix = "") {
14 |   if (error_code == hipSuccess) {
15 |     return;
16 |   }
17 |   std::string name = hipGetErrorName(error_code);
18 |   std::string message = hipGetErrorString(error_code);
19 |   throw std::runtime_error(prefix + "HIP encountered an error " + name +
20 |                            ": \"" + message + "\"");
21 | }
22 | 
23 | // Throw an exception if the rocsparse_status is not rocsparse_status_success.
24 | void throw_if_error(rocsparse_status error_code) {
25 |   if (error_code == rocsparse_status_success) {
26 |     return;
27 |   } else if (error_code == rocsparse_status_invalid_handle) {
28 |     throw std::runtime_error(
29 |         "rocSPARSE encountered an error: \"rocsparse_status_invalid_handle\"");
30 |   } else if (error_code == rocsparse_status_not_implemented) {
31 |     throw std::runtime_error(
32 |         "rocSPARSE encountered an error: \"rocsparse_status_not_implemented\"");
33 |   } else if (error_code == rocsparse_status_invalid_pointer) {
34 |     throw std::runtime_error(
35 |         "rocSPARSE encountered an error: \"rocsparse_status_invalid_pointer\"");
36 |   } else if (error_code == rocsparse_status_invalid_size) {
37 |     throw std::runtime_error(
38 |         "rocSPARSE encountered an error: \"rocsparse_status_invalid_size\"");
39 |   } else if (error_code == rocsparse_status_memory_error) {
40 |     throw std::runtime_error(
41 |         "rocSPARSE encountered an error: \"rocsparse_status_memory_error\"");
42 |   } else if (error_code == rocsparse_status_internal_error) {
43 |     throw std::runtime_error(
44 |         "rocSPARSE encountered an error: \"rocsparse_status_internal_error\"");
45 |   } else if (error_code == rocsparse_status_invalid_value) {
46 |     throw std::runtime_error(
47 |         "rocSPARSE encountered an error: \"rocsparse_status_invalid_value\"");
48 |   } else if (error_code == rocsparse_status_arch_mismatch) {
49 |     throw std::runtime_error(
50 |         "rocSPARSE encountered an error: \"rocsparse_status_arch_mismatch\"");
51 |   } else if (error_code == rocsparse_status_zero_pivot) {
52 |     throw std::runtime_error(
53 |         "rocSPARSE encountered an error: \"rocsparse_status_zero_pivot\"");
54 |   } else if (error_code == rocsparse_status_not_initialized) {
55 |     throw std::runtime_error(
56 |         "rocSPARSE encountered an error: \"rocsparse_status_not_initialized\"");
57 |   } else if (error_code == rocsparse_status_type_mismatch) {
58 |     throw std::runtime_error(
59 |         "rocSPARSE encountered an error: \"rocsparse_status_type_mismatch\"");
60 |   } else if (error_code == rocsparse_status_type_mismatch) {
61 |     throw std::runtime_error(
62 |         "rocSPARSE encountered an error: \"rocsparse_status_invalid_size\"");
63 |   } else if (error_code == rocsparse_status_invalid_size) {
64 |     throw std::runtime_error(
65 |         "rocSPARSE encountered an error: \"rocsparse_status_invalid_size\"");
66 |   } else if (error_code == rocsparse_status_invalid_size) {
67 |     throw std::runtime_error(
68 |         "rocSPARSE encountered an error: \"rocsparse_status_invalid_size\"");
69 |   } else if (error_code == rocsparse_status_invalid_size) {
70 |     throw std::runtime_error(
71 |         "rocSPARSE encountered an error: \"rocsparse_status_invalid_size\"");
72 |   } else if (error_code == rocsparse_status_invalid_size) {
73 |     throw std::runtime_error(
74 |         "rocSPARSE encountered an error: \"rocsparse_status_invalid_size\"");
75 |   } else {
76 |     throw std::runtime_error(
77 |         "rocSPARSE encountered an error: \"unknown error\"");
78 |   }
79 | }
80 | 
81 | } // namespace __rocsparse
82 | 
83 | } // namespace spblas
84 | 


--------------------------------------------------------------------------------
/include/spblas/detail/view_inspectors.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <optional>
  4 | #include <utility> // std::declval
  5 | 
  6 | #include <spblas/detail/concepts.hpp>
  7 | #include <spblas/views/inspectors.hpp>
  8 | 
  9 | namespace spblas {
 10 | 
 11 | namespace __detail {
 12 | 
 13 | // Does this tensor view have a base?
 14 | template <typename T>
 15 | concept has_base = view<T> && requires(T& t) {
 16 |   { t.base() } -> tensor;
 17 | };
 18 | 
 19 | // Inspect a tensor: does it have a scaling factor?  If so, compute it.
 20 | // Returns an empty optional if no scaling factor OR returns an optional
 21 | // with the product of all the scaling factors.
 22 | template <tensor T>
 23 | auto get_scaling_factor(T&& t) {
 24 |   if constexpr (has_base<T>) {
 25 |     auto base_scaling_factor = get_scaling_factor(t.base());
 26 | 
 27 |     if constexpr (is_scaled_view_v<T>) {
 28 |       auto scaling_factor = t.alpha();
 29 | 
 30 |       using scaling_factor_type =
 31 |           decltype(scaling_factor * base_scaling_factor.value());
 32 | 
 33 |       if (base_scaling_factor.has_value()) {
 34 |         return std::optional<scaling_factor_type>(scaling_factor *
 35 |                                                   base_scaling_factor.value());
 36 |       } else {
 37 |         return std::optional<scaling_factor_type>(scaling_factor);
 38 |       }
 39 |     } else {
 40 |       return base_scaling_factor;
 41 |     }
 42 |   } else {
 43 |     if constexpr (is_scaled_view_v<T>) {
 44 |       return std::optional(t.alpha());
 45 |     } else {
 46 |       return std::optional<tensor_scalar_t<T>>{};
 47 |     }
 48 |   }
 49 | }
 50 | 
 51 | // Get scaling factors of t and u, returning:
 52 | // 1) empty optional, if no scaling factor in either
 53 | // 2) scaling factor of t OR u, if only one has a scaling factor
 54 | // 3) product of scaling factor of t and u, if both have a scaling factor.
 55 | template <tensor T, tensor U>
 56 | auto get_scaling_factor(T&& t, U&& u) {
 57 |   auto t_scaling_factor = get_scaling_factor(t);
 58 |   auto u_scaling_factor = get_scaling_factor(u);
 59 | 
 60 |   using scalar_type = decltype(std::declval<typename std::remove_cvref_t<
 61 |                                    decltype(t_scaling_factor)>::value_type>() *
 62 |                                std::declval<typename std::remove_cvref_t<
 63 |                                    decltype(u_scaling_factor)>::value_type>());
 64 | 
 65 |   if (t_scaling_factor.has_value()) {
 66 |     if (u_scaling_factor.has_value()) {
 67 |       return std::optional<scalar_type>(t_scaling_factor.value() *
 68 |                                         u_scaling_factor.value());
 69 |     } else {
 70 |       return std::optional<scalar_type>(t_scaling_factor);
 71 |     }
 72 |   } else if (u_scaling_factor.has_value()) {
 73 |     return std::optional<scalar_type>(u_scaling_factor);
 74 |   } else {
 75 |     return std::optional<scalar_type>{};
 76 |   }
 77 | }
 78 | 
 79 | template <tensor T>
 80 | bool has_scaling_factor(T&& t) {
 81 |   return get_scaling_factor(t).has_value();
 82 | }
 83 | 
 84 | template <tensor T>
 85 | auto get_ultimate_base(T&& t) {
 86 |   if constexpr (has_base<T>) {
 87 |     return get_ultimate_base(t.base());
 88 |   } else {
 89 |     return t;
 90 |   }
 91 | }
 92 | 
 93 | template <tensor T>
 94 | bool has_matrix_opt(T&& t) {
 95 |   if constexpr (is_matrix_opt_v<T>) {
 96 |     return true;
 97 |   } else if constexpr (has_base<T>) {
 98 |     return has_matrix_opt(t.base());
 99 |   } else {
100 |     return false;
101 |   }
102 | }
103 | 
104 | template <typename T>
105 | using ultimate_base_type_t = decltype(get_ultimate_base(std::declval<T>()));
106 | 
107 | template <typename T>
108 | concept has_csr_base = is_csr_view_v<ultimate_base_type_t<T>>;
109 | 
110 | template <typename T>
111 | concept has_csc_base = is_csc_view_v<ultimate_base_type_t<T>>;
112 | 
113 | template <typename T>
114 | concept has_mdspan_matrix_base = is_matrix_mdspan_v<ultimate_base_type_t<T>>;
115 | 
116 | template <typename T>
117 | concept has_contiguous_range_base =
118 |     spblas::__ranges::contiguous_range<ultimate_base_type_t<T>>;
119 | 
120 | } // namespace __detail
121 | 
122 | } // namespace spblas
123 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/armpl/triangular_solve_impl.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <spblas/vendor/armpl/detail/armpl.hpp>
  4 | 
  5 | #include <spblas/detail/log.hpp>
  6 | #include <spblas/detail/operation_info_t.hpp>
  7 | #include <spblas/detail/ranges.hpp>
  8 | #include <spblas/detail/view_inspectors.hpp>
  9 | 
 10 | #include <spblas/detail/triangular_types.hpp>
 11 | 
 12 | namespace spblas {
 13 | 
 14 | template <matrix A, class Triangle, class DiagonalStorage, vector B, vector X>
 15 |   requires __detail::has_csr_base<A> &&
 16 |            __detail::has_contiguous_range_base<B> &&
 17 |            __ranges::contiguous_range<X>
 18 | void triangular_solve(A&& a, Triangle uplo, DiagonalStorage diag, B&& b,
 19 |                       X&& x) {
 20 |   log_trace("");
 21 |   static_assert(std::is_same_v<Triangle, upper_triangle_t> ||
 22 |                 std::is_same_v<Triangle, lower_triangle_t>);
 23 |   static_assert(std::is_same_v<DiagonalStorage, explicit_diagonal_t> ||
 24 |                 std::is_same_v<DiagonalStorage, implicit_unit_diagonal_t>);
 25 | 
 26 |   auto a_base = __detail::get_ultimate_base(a);
 27 |   auto b_base = __detail::get_ultimate_base(b);
 28 | 
 29 |   using T = tensor_scalar_t<A>;
 30 |   using I = tensor_index_t<A>;
 31 |   using O = tensor_offset_t<A>;
 32 | 
 33 |   auto m = __backend::shape(a_base)[0];
 34 |   auto n = __backend::shape(a_base)[1];
 35 | 
 36 |   auto alpha_optional = __detail::get_scaling_factor(a, b);
 37 |   T alpha = alpha_optional.value_or(1);
 38 | 
 39 |   armpl_spmat_t a_handle = __armpl::create_matrix_handle(a_base);
 40 | 
 41 |   // Optimistically try the solve without a copy, in case the matrix is already
 42 |   // triangular
 43 |   auto stat = __armpl::sptrsv_exec<tensor_scalar_t<A>>(
 44 |       ARMPL_SPARSE_OPERATION_NOTRANS, a_handle, __ranges::data(x), alpha,
 45 |       __ranges::data(b_base));
 46 | 
 47 |   armpl_spmat_destroy(a_handle);
 48 | 
 49 |   if (stat != ARMPL_STATUS_SUCCESS) {
 50 | 
 51 |     //  Arm PL needs a copy of the matrix corresponding to the specified
 52 |     //  triangule with the diagonal set appropriately.
 53 | 
 54 |     auto is_upper = std::is_same_v<Triangle, upper_triangle_t>;
 55 |     auto is_unit = std::is_same_v<DiagonalStorage, implicit_unit_diagonal_t>;
 56 | 
 57 |     auto colind = a_base.colind().data();
 58 |     auto rowptr = a_base.rowptr().data();
 59 |     auto values = a_base.values().data();
 60 | 
 61 |     std::vector<T> tmp_values;
 62 |     std::vector<I> tmp_rowptr(m + 1);
 63 |     std::vector<O> tmp_colind;
 64 | 
 65 |     auto index_base = rowptr[0];
 66 | 
 67 |     auto is_included = [&](auto r, auto c) {
 68 |       if (is_unit) {
 69 |         if (is_upper) {
 70 |           return r < c;
 71 |         } else {
 72 |           return r > c;
 73 |         }
 74 |       } else {
 75 |         if (is_upper) {
 76 |           return r <= c;
 77 |         } else {
 78 |           return r >= c;
 79 |         }
 80 |       }
 81 |     };
 82 | 
 83 |     int k = 0;
 84 |     for (armpl_int_t r = 0; r < m; r++) {
 85 | 
 86 |       if (is_unit && is_upper) {
 87 |         tmp_colind.push_back(r);
 88 |         tmp_values.push_back(T(1));
 89 |         k++;
 90 |       }
 91 | 
 92 |       for (auto i = rowptr[r] - index_base; i < rowptr[r + 1] - index_base;
 93 |            i++) {
 94 |         auto c = colind[i];
 95 |         auto v = values[i];
 96 | 
 97 |         if (is_included(r, c)) {
 98 |           tmp_colind.push_back(c);
 99 |           tmp_values.push_back(v);
100 |           k++;
101 |         }
102 |       }
103 | 
104 |       if (is_unit && !is_upper) {
105 |         tmp_colind.push_back(r);
106 |         tmp_values.push_back(T(1));
107 |         k++;
108 |       }
109 | 
110 |       tmp_rowptr[r + 1] = k;
111 |     }
112 | 
113 |     __armpl::create_spmat_csr<tensor_scalar_t<A>>(
114 |         &a_handle, m, n, tmp_rowptr.data(), tmp_colind.data(),
115 |         tmp_values.data(), ARMPL_SPARSE_CREATE_NOCOPY);
116 | 
117 |     stat = __armpl::sptrsv_exec<tensor_scalar_t<A>>(
118 |         ARMPL_SPARSE_OPERATION_NOTRANS, a_handle, __ranges::data(x), alpha,
119 |         __ranges::data(b_base));
120 |     if (stat != ARMPL_STATUS_SUCCESS) {
121 |       armpl_spmat_print_err(a_handle);
122 |       assert(false);
123 |     }
124 | 
125 |     armpl_spmat_destroy(a_handle);
126 |   }
127 | 
128 | } // triangular_solve
129 | 
130 | } // namespace spblas
131 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | name: "CI"
  2 | 
  3 | on:
  4 |   push:
  5 |     branches:
  6 |       - main
  7 | 
  8 |   pull_request:
  9 | 
 10 | jobs:
 11 |   checks:
 12 |     runs-on: 'ubuntu-latest'
 13 |     steps:
 14 |     - uses: actions/checkout@v3
 15 |     - uses: actions/setup-python@v4
 16 |       with:
 17 |         python-version: '3.12'
 18 |         cache: 'pip'
 19 |     - run: pip install -r requirements.txt
 20 |     - name: Checks
 21 |       uses: pre-commit/action@v3.0.0
 22 | 
 23 |   gcc:
 24 |     runs-on: 'ubuntu-latest'
 25 |     strategy:
 26 |       matrix:
 27 |         cxx: [g++-14, g++-13, g++-12, g++-11]
 28 |     name: ${{ matrix.cxx }}
 29 |     env:
 30 |       CXX: ${{ matrix.cxx }}
 31 |     steps:
 32 |     - uses: actions/checkout@v4
 33 |     - name: CMake
 34 |       run: |
 35 |         sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y
 36 |         sudo apt-get update -y
 37 |         sudo apt-get install g++-11 g++-12 g++-13 g++-14
 38 |         cmake -B build
 39 |     - name: Build
 40 |       run: make -C build -j `nproc`
 41 |     - name: Test
 42 |       run: ./build/test/gtest/spblas-tests
 43 | 
 44 |   intel-llvm:
 45 |     runs-on: 'ubuntu-latest'
 46 |     strategy:
 47 |       matrix:
 48 |         mkl: [OFF, ON]
 49 |     name: intel-llvm${{ matrix.mkl == 'ON' && '-mkl' || '' }}
 50 |     env:
 51 |       CXX: icpx
 52 |     steps:
 53 |     - uses: actions/checkout@v4
 54 |     - name: Set up Intel
 55 |       run: |
 56 |           wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
 57 |           | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
 58 |           echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
 59 |           sudo apt-get update -y
 60 |           sudo apt-get install -y intel-oneapi-compiler-dpcpp-cpp intel-oneapi-mkl-devel
 61 |     - name: CMake
 62 |       run: |
 63 |         source /opt/intel/oneapi/setvars.sh
 64 |         cmake -B build -DENABLE_ONEMKL_SYCL=${{ matrix.mkl }}
 65 |     - name: Build
 66 |       run: |
 67 |         source /opt/intel/oneapi/setvars.sh
 68 |         make -C build -j `nproc`
 69 |     - name: Test
 70 |       run: |
 71 |         source /opt/intel/oneapi/setvars.sh
 72 |         ./build/test/gtest/spblas-tests
 73 | 
 74 |   macos:
 75 |     runs-on: 'macos-latest'
 76 |     strategy:
 77 |       matrix:
 78 |         armpl: [OFF, ON]
 79 |     name: macos${{ matrix.armpl == 'ON' && '-armpl' || '' }}
 80 |     steps:
 81 |     - uses: actions/checkout@v4
 82 |     - name: Set up ArmPL
 83 |       run: |
 84 |         brew install --cask arm-performance-libraries
 85 |         ARMPL_PATH=$(echo /opt/arm/armpl*)
 86 |         echo "ARMPL_DIR=$ARMPL_PATH" >> $GITHUB_ENV
 87 |     - name: CMake
 88 |       run: |
 89 |         cmake -B build -DENABLE_ARMPL=${{ matrix.armpl }}
 90 |     - name: Build
 91 |       run: |
 92 |         make -C build -j 3
 93 |     - name: Test
 94 |       run: |
 95 |         ./build/test/gtest/spblas-tests
 96 | 
 97 |   aocl:
 98 |     runs-on: 'cpu_amd'
 99 |     steps:
100 |     - uses: actions/checkout@v4
101 |     - name: Add AOCL-Sparse to Environment
102 |       run: |
103 |         source /apps/spacks/current/share/spack/setup-env.sh
104 |         echo "AOCLSPARSE_DIR=$(spack location -i aocl-sparse)" >> $GITHUB_ENV
105 |         echo "AOCLUTILS_DIR=$(spack location -i aocl-utils)" >> $GITHUB_ENV
106 |     - name: CMake
107 |       run: |
108 |         source /apps/spacks/current/share/spack/setup-env.sh
109 |         spack load /ia2365b
110 |         cmake -B build -DENABLE_AOCLSPARSE=ON
111 |     - name: Build
112 |       run: |
113 |         make -C build -j `nproc`
114 |     - name: Test
115 |       run: |
116 |         ./build/test/gtest/spblas-tests
117 | 
118 |   rocsparse:
119 |     runs-on: 'gpu_amd'
120 |     steps:
121 |     - uses: actions/checkout@v4
122 |     - name: CMake
123 |       shell: bash -l {0}
124 |       run: |
125 |         module load cmake
126 |         cmake -B build -DENABLE_ROCSPARSE=ON -DCMAKE_PREFIX_PATH=/opt/rocm
127 |     - name: Build
128 |       shell: bash -l {0}
129 |       run: |
130 |         make -C build -j `nproc`
131 |     - name: Test
132 |       shell: bash -l {0}
133 |       run: |
134 |         ./build/test/gtest/spblas-tests
135 | 
136 |   cusparse:
137 |     runs-on: 'gpu_nvidia'
138 |     steps:
139 |     - uses: actions/checkout@v4
140 |     - name: CMake
141 |       shell: bash -l {0}
142 |       run: |
143 |         module load cmake
144 |         cmake -B build -DENABLE_CUSPARSE=ON -DCMAKE_PREFIX_PATH=/usr/local/cuda/targets/x86_64-linux/lib/cmake
145 |     - name: Build
146 |       shell: bash -l {0}
147 |       run: |
148 |         make -C build -j `nproc`
149 |     - name: Test
150 |       shell: bash -l {0}
151 |       run: |
152 |         ./build/test/gtest/spblas-tests
153 | 


--------------------------------------------------------------------------------
/include/spblas/algorithms/detail/spgemm/spgemm_innerproduct.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <spblas/backend/backend.hpp>
  4 | #include <spblas/concepts.hpp>
  5 | #include <spblas/detail/log.hpp>
  6 | 
  7 | #include <spblas/algorithms/detail/sparse_dot_product.hpp>
  8 | #include <spblas/algorithms/transposed.hpp>
  9 | #include <spblas/backend/csr_builder.hpp>
 10 | #include <spblas/backend/spa_accumulator.hpp>
 11 | #include <spblas/detail/operation_info_t.hpp>
 12 | 
 13 | namespace spblas {
 14 | 
 15 | // C = AB
 16 | // CSR * CSC -> CSR
 17 | // SpGEMM (Inner Product)
 18 | template <matrix A, matrix B, matrix C>
 19 |   requires(__backend::row_iterable<A> && __backend::column_iterable<B> &&
 20 |            __detail::is_csr_view_v<C>)
 21 | void multiply(A&& a, B&& b, C&& c) {
 22 |   log_trace("");
 23 |   if (__backend::shape(a)[0] != __backend::shape(c)[0] ||
 24 |       __backend::shape(b)[1] != __backend::shape(c)[1] ||
 25 |       __backend::shape(a)[1] != __backend::shape(b)[0]) {
 26 |     throw std::invalid_argument(
 27 |         "multiply: matrix dimensions are incompatible.");
 28 |   }
 29 | 
 30 |   using T = tensor_scalar_t<C>;
 31 |   using I = tensor_index_t<C>;
 32 | 
 33 |   __backend::spa_accumulator<T, I> dot_product_acc(__backend::shape(a)[1]);
 34 |   __backend::spa_accumulator<T, I> c_row(__backend::shape(c)[1]);
 35 |   __backend::csr_builder c_builder(c);
 36 | 
 37 |   for (auto&& [i, a_row] : __backend::rows(a)) {
 38 |     c_row.clear();
 39 | 
 40 |     if (!__ranges::empty(a_row)) {
 41 |       for (auto&& [j, b_column] : __backend::columns(b)) {
 42 |         if (!__ranges::empty(b_column)) {
 43 |           auto v =
 44 |               __detail::sparse_dot_product<T>(dot_product_acc, a_row, b_column);
 45 | 
 46 |           if (v.has_value()) {
 47 |             c_row[j] += v.value();
 48 |           }
 49 |         }
 50 |       }
 51 |       c_row.sort();
 52 | 
 53 |       try {
 54 |         c_builder.insert_row(i, c_row.get());
 55 |       } catch (...) {
 56 |         throw std::runtime_error("multiply: SpGEMM ran out of memory.");
 57 |       }
 58 |     }
 59 |   }
 60 |   c_builder.finish();
 61 |   c.update(c.values(), c.rowptr(), c.colind(), c.shape(),
 62 |            c.rowptr()[c.shape()[0]]);
 63 | }
 64 | 
 65 | // C = AB
 66 | // CSR * CSC -> CSR
 67 | // SpGEMM (Inner Product)
 68 | template <matrix A, matrix B, matrix C>
 69 |   requires(__backend::row_iterable<A> && __backend::column_iterable<B> &&
 70 |            __detail::is_csr_view_v<C>)
 71 | operation_info_t multiply_compute(A&& a, B&& b, C&& c) {
 72 |   log_trace("");
 73 |   if (__backend::shape(a)[0] != __backend::shape(c)[0] ||
 74 |       __backend::shape(b)[1] != __backend::shape(c)[1] ||
 75 |       __backend::shape(a)[1] != __backend::shape(b)[0]) {
 76 |     throw std::invalid_argument(
 77 |         "multiply: matrix dimensions are incompatible.");
 78 |   }
 79 | 
 80 |   using T = tensor_scalar_t<C>;
 81 |   using I = tensor_index_t<C>;
 82 |   using O = tensor_offset_t<C>;
 83 | 
 84 |   O nnz = 0;
 85 | 
 86 |   __backend::spa_set<I> dot_product_acc(__backend::shape(a)[1]);
 87 | 
 88 |   for (auto&& [i, a_row] : __backend::rows(a)) {
 89 |     if (!__ranges::empty(a_row)) {
 90 |       for (auto&& [j, b_column] : __backend::columns(b)) {
 91 |         if (!__ranges::empty(b_column)) {
 92 |           auto v =
 93 |               __detail::sparse_intersection(dot_product_acc, a_row, b_column);
 94 | 
 95 |           if (v) {
 96 |             nnz++;
 97 |           }
 98 |         }
 99 |       }
100 |     }
101 |   }
102 | 
103 |   return operation_info_t{__backend::shape(c), nnz};
104 | }
105 | 
106 | // C = AB
107 | // CSR * CSC -> CSC
108 | // SpGEMM (Inner Product, transposed)
109 | template <matrix A, matrix B, matrix C>
110 |   requires(__backend::row_iterable<A> && __backend::column_iterable<B> &&
111 |            __detail::is_csc_view_v<C>)
112 | void multiply(A&& a, B&& b, C&& c) {
113 |   log_trace("");
114 |   if (__backend::shape(a)[0] != __backend::shape(c)[0] ||
115 |       __backend::shape(b)[1] != __backend::shape(c)[1] ||
116 |       __backend::shape(a)[1] != __backend::shape(b)[0]) {
117 |     throw std::invalid_argument(
118 |         "multiply: matrix dimensions are incompatible.");
119 |   }
120 | 
121 |   return multiply(transposed(b), transposed(a), transposed(c));
122 | }
123 | 
124 | // C = AB
125 | // CSR * CSC -> CSC
126 | // SpGEMM (Inner Product, transposed)
127 | template <matrix A, matrix B, matrix C>
128 |   requires(__backend::row_iterable<A> && __backend::column_iterable<B> &&
129 |            __detail::is_csc_view_v<C>)
130 | operation_info_t multiply_compute(A&& a, B&& b, C&& c) {
131 |   log_trace("");
132 |   if (__backend::shape(a)[0] != __backend::shape(c)[0] ||
133 |       __backend::shape(b)[1] != __backend::shape(c)[1] ||
134 |       __backend::shape(a)[1] != __backend::shape(b)[0]) {
135 |     throw std::invalid_argument(
136 |         "multiply: matrix dimensions are incompatible.");
137 |   }
138 | 
139 |   return multiply_compute(transposed(b), transposed(a), transposed(c));
140 | }
141 | 
142 | } // namespace spblas
143 | 


--------------------------------------------------------------------------------
/include/spblas/algorithms/detail/spgemm/spgemm_outerproduct.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <spblas/backend/backend.hpp>
  4 | #include <spblas/concepts.hpp>
  5 | #include <spblas/detail/log.hpp>
  6 | 
  7 | #include <spblas/algorithms/detail/sparse_dot_product.hpp>
  8 | #include <spblas/algorithms/transposed.hpp>
  9 | #include <spblas/backend/csr_builder.hpp>
 10 | #include <spblas/backend/hash_accumulator.hpp>
 11 | #include <spblas/detail/operation_info_t.hpp>
 12 | 
 13 | namespace spblas {
 14 | 
 15 | // C = AB
 16 | // CSC * CSR -> CSR
 17 | // SpGEMM (Outer Product)
 18 | template <matrix A, matrix B, matrix C>
 19 |   requires(__backend::column_iterable<A> && __backend::row_iterable<B> &&
 20 |            __detail::is_csr_view_v<C>)
 21 | void multiply(A&& a, B&& b, C&& c) {
 22 |   log_trace("");
 23 |   if (__backend::shape(a)[0] != __backend::shape(c)[0] ||
 24 |       __backend::shape(b)[1] != __backend::shape(c)[1] ||
 25 |       __backend::shape(a)[1] != __backend::shape(b)[0]) {
 26 |     throw std::invalid_argument(
 27 |         "multiply: matrix dimensions are incompatible.");
 28 |   }
 29 | 
 30 |   using T = tensor_scalar_t<C>;
 31 |   using I = tensor_index_t<C>;
 32 | 
 33 |   std::vector<__backend::hash_accumulator<T, I>> row_accumulators;
 34 | 
 35 |   for (std::size_t i = 0; i < __backend::shape(c)[0]; i++) {
 36 |     row_accumulators.emplace_back(__backend::shape(c)[1]);
 37 |   }
 38 | 
 39 |   for (std::size_t k = 0; k < __backend::shape(a)[1]; k++) {
 40 |     auto&& a_vec = __backend::lookup_column(a, k);
 41 |     auto&& b_vec = __backend::lookup_row(b, k);
 42 | 
 43 |     for (auto&& [i, a_v] : a_vec) {
 44 |       for (auto&& [j, b_v] : b_vec) {
 45 |         row_accumulators[i][j] += a_v * b_v;
 46 |       }
 47 |     }
 48 |   }
 49 | 
 50 |   __backend::csr_builder c_builder(c);
 51 | 
 52 |   for (std::size_t i = 0; i < row_accumulators.size(); i++) {
 53 |     auto&& c_row = row_accumulators[i];
 54 | 
 55 |     c_row.sort();
 56 | 
 57 |     try {
 58 |       c_builder.insert_row(i, c_row.get());
 59 |     } catch (...) {
 60 |       throw std::runtime_error("multiply: SpGEMM ran out of memory.");
 61 |     }
 62 |   }
 63 |   c.update(c.values(), c.rowptr(), c.colind(), c.shape(),
 64 |            c.rowptr()[c.shape()[0]]);
 65 | }
 66 | 
 67 | template <matrix A, matrix B, matrix C>
 68 |   requires(__backend::column_iterable<A> && __backend::row_iterable<B> &&
 69 |            __detail::is_csr_view_v<C>)
 70 | operation_info_t multiply_compute(A&& a, B&& b, C&& c) {
 71 |   log_trace("");
 72 |   if (__backend::shape(a)[0] != __backend::shape(c)[0] ||
 73 |       __backend::shape(b)[1] != __backend::shape(c)[1] ||
 74 |       __backend::shape(a)[1] != __backend::shape(b)[0]) {
 75 |     throw std::invalid_argument(
 76 |         "multiply: matrix dimensions are incompatible.");
 77 |   }
 78 | 
 79 |   using T = tensor_scalar_t<C>;
 80 |   using I = tensor_index_t<C>;
 81 |   using O = tensor_offset_t<C>;
 82 | 
 83 |   std::vector<__backend::hash_accumulator<T, I>> row_accumulators;
 84 | 
 85 |   for (std::size_t i = 0; i < __backend::shape(c)[0]; i++) {
 86 |     row_accumulators.emplace_back(__backend::shape(c)[1]);
 87 |   }
 88 | 
 89 |   O nnz = 0;
 90 | 
 91 |   for (std::size_t k = 0; k < __backend::shape(a)[1]; k++) {
 92 |     auto&& a_vec = __backend::lookup_column(a, k);
 93 |     auto&& b_vec = __backend::lookup_row(b, k);
 94 | 
 95 |     for (auto&& [i, a_v] : a_vec) {
 96 |       for (auto&& [j, b_v] : b_vec) {
 97 |         row_accumulators[i][j] += a_v * b_v;
 98 |       }
 99 |     }
100 |   }
101 | 
102 |   for (auto&& row_acc : row_accumulators) {
103 |     nnz += row_acc.size();
104 |   }
105 | 
106 |   return operation_info_t{__backend::shape(c), nnz};
107 | }
108 | 
109 | // C = AB
110 | // CSC * CSR -> CSC
111 | // SpGEMM (Outer Product, transposed)
112 | template <matrix A, matrix B, matrix C>
113 |   requires(__backend::column_iterable<A> && __backend::row_iterable<B> &&
114 |            __detail::is_csc_view_v<C>)
115 | void multiply(A&& a, B&& b, C&& c) {
116 |   log_trace("");
117 |   if (__backend::shape(a)[0] != __backend::shape(c)[0] ||
118 |       __backend::shape(b)[1] != __backend::shape(c)[1] ||
119 |       __backend::shape(a)[1] != __backend::shape(b)[0]) {
120 |     throw std::invalid_argument(
121 |         "multiply: matrix dimensions are incompatible.");
122 |   }
123 |   multiply(transposed(b), transposed(a), transposed(c));
124 | }
125 | 
126 | // C = AB
127 | // CSC * CSR -> CSC
128 | // SpGEMM (Outer Product, transposed)
129 | template <matrix A, matrix B, matrix C>
130 |   requires(__backend::column_iterable<A> && __backend::row_iterable<B> &&
131 |            __detail::is_csc_view_v<C>)
132 | operation_info_t multiply_compute(A&& a, B&& b, C&& c) {
133 |   log_trace("");
134 |   if (__backend::shape(a)[0] != __backend::shape(c)[0] ||
135 |       __backend::shape(b)[1] != __backend::shape(c)[1] ||
136 |       __backend::shape(a)[1] != __backend::shape(b)[0]) {
137 |     throw std::invalid_argument(
138 |         "multiply: matrix dimensions are incompatible.");
139 |   }
140 | 
141 |   auto info = multiply_compute(transposed(b), transposed(a), transposed(c));
142 |   info.update_impl_({info.result_shape()[1], info.result_shape()[0]},
143 |                     info.result_nnz());
144 |   return info;
145 | }
146 | 
147 | } // namespace spblas
148 | 


--------------------------------------------------------------------------------
/include/spblas/vendor/armpl/multiply_impl.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <spblas/algorithms/transposed.hpp>
  4 | 
  5 | #include <spblas/vendor/armpl/detail/detail.hpp>
  6 | 
  7 | #include <spblas/detail/log.hpp>
  8 | #include <spblas/detail/operation_info_t.hpp>
  9 | #include <spblas/detail/ranges.hpp>
 10 | #include <spblas/detail/view_inspectors.hpp>
 11 | 
 12 | namespace spblas {
 13 | 
 14 | // SpMV
 15 | template <matrix A, vector B, vector C>
 16 |   requires((__detail::has_csr_base<A> || __detail::has_csc_base<A>) &&
 17 |            __detail::has_contiguous_range_base<B> &&
 18 |            __ranges::contiguous_range<C>)
 19 | void multiply(A&& a, B&& b, C&& c) {
 20 |   log_trace("");
 21 |   auto a_base = __detail::get_ultimate_base(a);
 22 |   auto b_base = __detail::get_ultimate_base(b);
 23 | 
 24 |   auto alpha_optional = __detail::get_scaling_factor(a, b);
 25 |   tensor_scalar_t<A> alpha = alpha_optional.value_or(1);
 26 | 
 27 |   armpl_spmat_t a_handle = __armpl::create_matrix_handle(a_base);
 28 | 
 29 |   auto stat = __armpl::spmv_exec<tensor_scalar_t<A>>(
 30 |       ARMPL_SPARSE_OPERATION_NOTRANS, alpha, a_handle, __ranges::data(b_base),
 31 |       0, __ranges::data(c));
 32 | 
 33 |   armpl_spmat_destroy(a_handle);
 34 | }
 35 | 
 36 | // SpMM
 37 | template <matrix A, matrix B, matrix C>
 38 |   requires(
 39 |       (__detail::has_csr_base<A> || __detail::has_csc_base<A>) &&
 40 |       __detail::has_mdspan_matrix_base<B> && __detail::is_matrix_mdspan_v<C> &&
 41 |       std::is_same_v<typename __detail::ultimate_base_type_t<B>::layout_type,
 42 |                      __mdspan::layout_right> &&
 43 |       std::is_same_v<typename std::remove_cvref_t<C>::layout_type,
 44 |                      __mdspan::layout_right>)
 45 | void multiply(A&& a, B&& b, C&& c) {
 46 |   log_trace("");
 47 |   auto a_base = __detail::get_ultimate_base(a);
 48 |   auto b_base = __detail::get_ultimate_base(b);
 49 | 
 50 |   auto alpha_optional = __detail::get_scaling_factor(a, b);
 51 |   tensor_scalar_t<A> alpha = alpha_optional.value_or(1);
 52 | 
 53 |   auto a_handle = __armpl::create_matrix_handle(a_base);
 54 | 
 55 |   armpl_spmat_t b_handle, c_handle;
 56 | 
 57 |   __armpl::create_spmat_dense<tensor_scalar_t<B>>(
 58 |       &b_handle, ARMPL_ROW_MAJOR, __backend::shape(b_base)[0],
 59 |       __backend::shape(b_base)[1], __backend::shape(b_base)[1],
 60 |       b_base.data_handle(), ARMPL_SPARSE_CREATE_NOCOPY);
 61 | 
 62 |   __armpl::create_spmat_dense<tensor_scalar_t<C>>(
 63 |       &c_handle, ARMPL_ROW_MAJOR, __backend::shape(c)[0],
 64 |       __backend::shape(c)[1], __backend::shape(c)[1], c.data_handle(),
 65 |       ARMPL_SPARSE_CREATE_NOCOPY);
 66 | 
 67 |   __armpl::spmm_exec<tensor_scalar_t<A>>(ARMPL_SPARSE_OPERATION_NOTRANS,
 68 |                                          ARMPL_SPARSE_OPERATION_NOTRANS, alpha,
 69 |                                          a_handle, b_handle, 0, c_handle);
 70 | 
 71 |   armpl_int_t m, n;
 72 |   tensor_scalar_t<C>* armpl_values;
 73 |   __armpl::export_spmat_dense<tensor_scalar_t<C>>(c_handle, ARMPL_ROW_MAJOR, &m,
 74 |                                                   &n, &armpl_values);
 75 | 
 76 |   std::copy(armpl_values, armpl_values + (m * n), c.data_handle());
 77 | 
 78 |   free(armpl_values);
 79 | 
 80 |   armpl_spmat_destroy(a_handle);
 81 |   armpl_spmat_destroy(b_handle);
 82 |   armpl_spmat_destroy(c_handle);
 83 | }
 84 | 
 85 | // SpGEMM
 86 | template <matrix A, matrix B, matrix C>
 87 |   requires((__detail::has_csr_base<A> || __detail::has_csc_base<A>) &&
 88 |            (__detail::has_csr_base<B> || __detail::has_csc_base<B>) &&
 89 |            (__detail::is_csr_view_v<C> || __detail::is_csc_view_v<C>) )
 90 | operation_info_t multiply_compute(A&& a, B&& b, C&& c) {
 91 |   log_trace("");
 92 |   auto a_base = __detail::get_ultimate_base(a);
 93 |   auto b_base = __detail::get_ultimate_base(b);
 94 | 
 95 |   auto alpha_optional = __detail::get_scaling_factor(a, b);
 96 |   tensor_scalar_t<A> alpha = alpha_optional.value_or(1);
 97 | 
 98 |   auto a_handle = __armpl::create_matrix_handle(a_base);
 99 |   auto b_handle = __armpl::create_matrix_handle(b_base);
100 | 
101 |   armpl_spmat_t c_handle =
102 |       armpl_spmat_create_null(__backend::shape(c)[0], __backend::shape(c)[1]);
103 | 
104 |   __armpl::spmm_exec<tensor_scalar_t<A>>(ARMPL_SPARSE_OPERATION_NOTRANS,
105 |                                          ARMPL_SPARSE_OPERATION_NOTRANS, alpha,
106 |                                          a_handle, b_handle, 0, c_handle);
107 | 
108 |   armpl_int_t index_base, m, n, nnz;
109 |   armpl_spmat_query(c_handle, &index_base, &m, &n, &nnz);
110 | 
111 |   return operation_info_t(
112 |       index<>{__backend::shape(c)[0], __backend::shape(c)[1]}, nnz,
113 |       __armpl::operation_state_t{a_handle, b_handle, c_handle, nullptr});
114 | }
115 | 
116 | template <matrix A, matrix B, matrix C>
117 |   requires((__detail::has_csr_base<A> || __detail::has_csc_base<A>) &&
118 |            (__detail::has_csr_base<B> || __detail::has_csc_base<B>) &&
119 |            (__detail::is_csr_view_v<C> || __detail::is_csc_view_v<C>) )
120 | void multiply_fill(operation_info_t& info, A&& a, B&& b, C&& c) {
121 |   log_trace("");
122 |   auto c_handle = info.state_.c_handle;
123 | 
124 |   __armpl::export_matrix_handle(info, c, c_handle);
125 | }
126 | 
127 | } // namespace spblas
128 | 


--------------------------------------------------------------------------------