├── requirements.txt ├── test ├── CMakeLists.txt └── gtest │ ├── CMakeLists.txt │ ├── add_test.cpp │ ├── util.hpp │ ├── transpose_test.cpp │ └── triangular_solve_test.cpp ├── include ├── spblas │ ├── vendor │ │ ├── armpl │ │ │ ├── armpl.hpp │ │ │ ├── algorithms.hpp │ │ │ ├── detail │ │ │ │ ├── detail.hpp │ │ │ │ ├── create_matrix_handle.hpp │ │ │ │ └── export_matrix_handle.hpp │ │ │ ├── types.hpp │ │ │ ├── operation_state_t.hpp │ │ │ ├── triangular_solve_impl.hpp │ │ │ └── multiply_impl.hpp │ │ ├── cusparse │ │ │ ├── cusparse.hpp │ │ │ ├── multiply.hpp │ │ │ ├── type_validation.hpp │ │ │ ├── detail │ │ │ │ ├── abstract_operation_state.hpp │ │ │ │ ├── get_transpose.hpp │ │ │ │ ├── cusparse_tensors.hpp │ │ │ │ └── spmv_state_t.hpp │ │ │ ├── operation_state_t.hpp │ │ │ ├── cuda_allocator.hpp │ │ │ ├── types.hpp │ │ │ ├── exception.hpp │ │ │ └── spmv_impl.hpp │ │ ├── rocsparse │ │ │ ├── rocsparse.hpp │ │ │ ├── multiply.hpp │ │ │ ├── type_validation.hpp │ │ │ ├── detail │ │ │ │ ├── abstract_operation_state.hpp │ │ │ │ ├── get_transpose.hpp │ │ │ │ ├── rocsparse_tensors.hpp │ │ │ │ ├── spmv_state_t.hpp │ │ │ │ └── spmv_impl.hpp │ │ │ ├── operation_state_t.hpp │ │ │ ├── hip_allocator.hpp │ │ │ ├── types.hpp │ │ │ └── exception.hpp │ │ ├── aoclsparse │ │ │ ├── detail │ │ │ │ ├── detail.hpp │ │ │ │ └── create_matrix_handle.hpp │ │ │ ├── aoclsparse.hpp │ │ │ ├── algorithms.hpp │ │ │ ├── types.hpp │ │ │ ├── operation_state_t.hpp │ │ │ ├── spmv_impl.hpp │ │ │ ├── spmm_impl.hpp │ │ │ └── triangular_solve_impl.hpp │ │ └── onemkl_sycl │ │ │ ├── onemkl_sycl.hpp │ │ │ ├── algorithms.hpp │ │ │ ├── detail │ │ │ ├── detail.hpp │ │ │ ├── get_queue.hpp │ │ │ ├── execution_policy.hpp │ │ │ ├── get_pointer_device.hpp │ │ │ ├── get_matrix_handle.hpp │ │ │ └── create_matrix_handle.hpp │ │ │ ├── types.hpp │ │ │ ├── spmv_impl.hpp │ │ │ ├── operation_state_t.hpp │ │ │ ├── spmm_impl.hpp │ │ │ └── triangular_solve_impl.hpp │ ├── views │ │ ├── view_base.hpp │ │ ├── matrix_opt.hpp │ │ ├── scaled_view.hpp │ │ ├── views.hpp │ │ ├── csc_view.hpp │ │ ├── csr_view.hpp │ │ ├── matrix_opt_impl.hpp │ │ └── inspectors.hpp │ ├── algorithms │ │ ├── detail │ │ │ ├── spgemm │ │ │ │ ├── spgemm.hpp │ │ │ │ ├── spgemm_innerproduct.hpp │ │ │ │ └── spgemm_outerproduct.hpp │ │ │ └── sparse_dot_product.hpp │ │ ├── scale.hpp │ │ ├── scaled.hpp │ │ ├── transpose.hpp │ │ ├── scaled_impl.hpp │ │ ├── add.hpp │ │ ├── algorithms.hpp │ │ ├── triangular_solve.hpp │ │ ├── transposed.hpp │ │ ├── scale_impl.hpp │ │ ├── multiply.hpp │ │ ├── transpose_impl.hpp │ │ ├── triangular_solve_impl.hpp │ │ ├── multiply_impl.hpp │ │ └── add_impl.hpp │ ├── detail │ │ ├── detail.hpp │ │ ├── concepts.hpp │ │ ├── mdspan.hpp │ │ ├── triangular_types.hpp │ │ ├── tuple_concept.hpp │ │ ├── ranges.hpp │ │ ├── types.hpp │ │ ├── index.hpp │ │ ├── tag_invoke.hpp │ │ ├── operation_info_t.hpp │ │ ├── log.hpp │ │ └── view_inspectors.hpp │ ├── spblas.hpp │ ├── backend │ │ ├── backend.hpp │ │ ├── concepts.hpp │ │ ├── algorithms.hpp │ │ ├── hash_accumulator.hpp │ │ ├── csr_builder.hpp │ │ ├── spa_accumulator.hpp │ │ └── cpos.hpp │ └── concepts.hpp └── CMakeLists.txt ├── .clang-format ├── examples ├── cusparse │ ├── CMakeLists.txt │ ├── util.hpp │ └── cusparse_simple_spmv.cpp ├── rocsparse │ ├── CMakeLists.txt │ ├── util.hpp │ └── rocsparse_simple_spmv.cpp ├── device │ ├── CMakeLists.txt │ └── device_spmv.cpp ├── CMakeLists.txt ├── simple_spmv.cpp ├── spmm_csc.cpp ├── simple_spmm.cpp ├── matrix_opt_example.cpp ├── simple_sptrsv.cpp └── simple_spgemm.cpp ├── .pre-commit-config.yaml ├── notes ├── spgemm.cpp ├── spmv.hpp ├── matrices.hpp └── matrix_data_structure_notes.hpp ├── .github ├── pull_request_template.md └── workflows │ └── ci.yml └── LICENSE /requirements.txt: -------------------------------------------------------------------------------- 1 | pre-commit 2 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(gtest) 2 | -------------------------------------------------------------------------------- /include/spblas/vendor/armpl/armpl.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "algorithms.hpp" 4 | -------------------------------------------------------------------------------- /include/spblas/vendor/cusparse/cusparse.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "multiply.hpp" 4 | -------------------------------------------------------------------------------- /include/spblas/vendor/cusparse/multiply.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "spmv_impl.hpp" 4 | -------------------------------------------------------------------------------- /include/spblas/vendor/rocsparse/rocsparse.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "multiply.hpp" 4 | -------------------------------------------------------------------------------- /include/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | add_library(spblas INTERFACE) 3 | target_include_directories(spblas INTERFACE .) 4 | -------------------------------------------------------------------------------- /include/spblas/vendor/aoclsparse/detail/detail.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "create_matrix_handle.hpp" 4 | -------------------------------------------------------------------------------- /include/spblas/vendor/onemkl_sycl/onemkl_sycl.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "algorithms.hpp" 4 | #include 5 | -------------------------------------------------------------------------------- /include/spblas/vendor/rocsparse/multiply.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | -------------------------------------------------------------------------------- /include/spblas/vendor/armpl/algorithms.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "multiply_impl.hpp" 4 | 5 | #include "triangular_solve_impl.hpp" 6 | -------------------------------------------------------------------------------- /include/spblas/views/view_base.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace spblas { 4 | 5 | class view_base {}; 6 | 7 | } // namespace spblas 8 | -------------------------------------------------------------------------------- /include/spblas/vendor/armpl/detail/detail.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "armpl.hpp" 4 | #include "create_matrix_handle.hpp" 5 | #include "export_matrix_handle.hpp" 6 | -------------------------------------------------------------------------------- /include/spblas/algorithms/detail/spgemm/spgemm.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "spgemm_gustavsons.hpp" 4 | #include "spgemm_innerproduct.hpp" 5 | #include "spgemm_outerproduct.hpp" 6 | -------------------------------------------------------------------------------- /include/spblas/vendor/onemkl_sycl/algorithms.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "spgemm_impl.hpp" 4 | #include "spmm_impl.hpp" 5 | #include "spmv_impl.hpp" 6 | #include "triangular_solve_impl.hpp" 7 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | BasedOnStyle: LLVM 3 | PointerAlignment: Left 4 | ColumnLimit: 80 5 | AlwaysBreakTemplateDeclarations: Yes 6 | AllowShortFunctionsOnASingleLine: Empty 7 | SpaceAfterCStyleCast: Yes 8 | --- 9 | -------------------------------------------------------------------------------- /include/spblas/vendor/onemkl_sycl/detail/detail.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "create_matrix_handle.hpp" 4 | #include "execution_policy.hpp" 5 | #include "get_matrix_handle.hpp" 6 | #include "get_queue.hpp" 7 | -------------------------------------------------------------------------------- /include/spblas/vendor/onemkl_sycl/types.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace spblas { 6 | 7 | using index_t = std::int32_t; 8 | using offset_t = index_t; 9 | 10 | } // namespace spblas 11 | -------------------------------------------------------------------------------- /examples/cusparse/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | function(add_cuda_example example_name) 2 | add_executable(${example_name} ${example_name}.cpp) 3 | target_link_libraries(${example_name} spblas fmt) 4 | endfunction() 5 | 6 | add_cuda_example(cusparse_simple_spmv) 7 | -------------------------------------------------------------------------------- /examples/rocsparse/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | function(add_rocm_example example_name) 2 | add_executable(${example_name} ${example_name}.cpp) 3 | target_link_libraries(${example_name} spblas fmt) 4 | endfunction() 5 | 6 | add_rocm_example(rocsparse_simple_spmv) 7 | -------------------------------------------------------------------------------- /include/spblas/detail/detail.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | -------------------------------------------------------------------------------- /include/spblas/vendor/armpl/types.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace spblas { 7 | 8 | using index_t = armpl_int_t; 9 | using offset_t = index_t; 10 | 11 | } // namespace spblas 12 | -------------------------------------------------------------------------------- /include/spblas/vendor/aoclsparse/aoclsparse.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights reserved. 3 | * $COPYRIGHT$ 4 | * 5 | * Additional copyrights may follow 6 | * 7 | * $HEADER$ 8 | */ 9 | 10 | #pragma once 11 | 12 | #include "algorithms.hpp" 13 | -------------------------------------------------------------------------------- /include/spblas/algorithms/scale.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace spblas { 6 | 7 | template 8 | void scale(Scalar alpha, M&& m); 9 | 10 | template 11 | void scale(Scalar alpha, V&& v); 12 | 13 | } // namespace spblas 14 | -------------------------------------------------------------------------------- /include/spblas/algorithms/scaled.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace spblas { 6 | 7 | template 8 | auto scaled(Scalar alpha, M&& m); 9 | 10 | template 11 | auto scaled(Scalar alpha, V&& v); 12 | 13 | } // namespace spblas 14 | -------------------------------------------------------------------------------- /include/spblas/views/matrix_opt.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace spblas { 8 | 9 | // Matrix optimization data for a tensor `T`. 10 | template 11 | class matrix_opt; 12 | 13 | } // namespace spblas 14 | -------------------------------------------------------------------------------- /include/spblas/views/scaled_view.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace spblas { 8 | 9 | // Scale a tensor of type `T` by a scaling factor of type `S`. 10 | template 11 | class scaled_view; 12 | 13 | } // namespace spblas 14 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | 3 | - repo: https://github.com/pre-commit/mirrors-clang-format 4 | rev: v16.0.6 5 | hooks: 6 | - id: clang-format 7 | 8 | - repo: https://github.com/pre-commit/pre-commit-hooks 9 | rev: v4.4.0 10 | hooks: 11 | - id: trailing-whitespace 12 | - id: end-of-file-fixer 13 | - id: mixed-line-ending 14 | - id: check-added-large-files 15 | -------------------------------------------------------------------------------- /include/spblas/vendor/aoclsparse/algorithms.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights reserved. 3 | * $COPYRIGHT$ 4 | * 5 | * Additional copyrights may follow 6 | * 7 | * $HEADER$ 8 | */ 9 | 10 | #pragma once 11 | 12 | #include "spgemm_impl.hpp" 13 | #include "spmm_impl.hpp" 14 | #include "spmv_impl.hpp" 15 | #include "triangular_solve_impl.hpp" 16 | -------------------------------------------------------------------------------- /include/spblas/views/views.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | -------------------------------------------------------------------------------- /include/spblas/vendor/aoclsparse/types.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights reserved. 3 | * $COPYRIGHT$ 4 | * 5 | * Additional copyrights may follow 6 | * 7 | * $HEADER$ 8 | */ 9 | 10 | #pragma once 11 | 12 | #include "aoclsparse.h" 13 | #include 14 | 15 | namespace spblas { 16 | 17 | using index_t = aoclsparse_int; 18 | using offset_t = aoclsparse_int; 19 | 20 | } // namespace spblas 21 | -------------------------------------------------------------------------------- /include/spblas/algorithms/transpose.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace spblas { 7 | 8 | template 9 | operation_info_t transpose_inspect(A&& a, B&& b); 10 | 11 | template 12 | void transpose(operation_info_t& info, A&& a, B&& b); 13 | 14 | template 15 | auto transposed(M&& m); 16 | 17 | } // namespace spblas 18 | -------------------------------------------------------------------------------- /include/spblas/spblas.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #if defined(SPBLAS_ENABLE_ONEMKL_SYCL) || defined(SPBLAS_ENABLE_ARMPL) || \ 4 | defined(SPBLAS_ENABLE_AOCLSPARSE) || defined(SPBLAS_ENABLE_ROCSPARSE) || \ 5 | defined(SPBLAS_ENABLE_CUSPARSE) 6 | #define SPBLAS_VENDOR_BACKEND true 7 | #endif 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | -------------------------------------------------------------------------------- /include/spblas/algorithms/scaled_impl.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace spblas { 7 | 8 | template 9 | auto scaled(Scalar alpha, V&& v) { 10 | return scaled_view(alpha, std::forward(v)); 11 | } 12 | 13 | template 14 | auto scaled(Scalar alpha, M&& m) { 15 | return scaled_view(alpha, std::forward(m)); 16 | } 17 | 18 | } // namespace spblas 19 | -------------------------------------------------------------------------------- /include/spblas/detail/concepts.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | namespace spblas { 12 | 13 | namespace __detail { 14 | 15 | template 16 | concept matrix = requires(M& m) { 17 | { __backend::size(m) } -> std::weakly_incrementable; 18 | { __backend::shape(m) } -> tuple_like; 19 | }; 20 | 21 | } 22 | 23 | } // namespace spblas 24 | -------------------------------------------------------------------------------- /include/spblas/algorithms/add.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace spblas { 7 | 8 | template 9 | void add(A&& a, B&& b, C&& c); 10 | 11 | template 12 | void add(A&& a, B&& b, C&& c); 13 | 14 | template 15 | operation_info_t add_inspect(A&& a, B&& b, C&& c); 16 | 17 | template 18 | void add_inspect(operation_info_t& info, A&& a, B&& b, C&& c); 19 | 20 | } // namespace spblas 21 | -------------------------------------------------------------------------------- /notes/spgemm.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main(int argc, char** argv) { 4 | using namespace spblas; 5 | 6 | csr_matrix a(/* ... */); 7 | csr_matrix b(/* ... */); 8 | csr_matrix c; 9 | 10 | auto info = multiply_inspect(a, b, c); 11 | 12 | // Allocate more memory for c based on `info` 13 | 14 | auto [values, rowptr, colind] = allocate_memory_for(info); 15 | 16 | // `info` also has implementation-specific optimization data. 17 | 18 | multiply_execute(info, a, b, c); 19 | 20 | // update_info_for_new_values(info, {a, left_operand_t}); 21 | 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /include/spblas/detail/mdspan.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #if __has_include() 6 | #include 7 | #endif 8 | 9 | #if defined(__cpp_lib_mdspan) && __cpp_lib_mdspan >= 202207L 10 | 11 | namespace spblas { 12 | namespace __mdspan = std; 13 | } 14 | 15 | #elif __has_include() 16 | 17 | #include 18 | 19 | namespace spblas { 20 | namespace __mdspan = std::experimental; 21 | } 22 | 23 | #else 24 | 25 | static_assert(false, "spblas requires mdspan. Compile with a C++23 compiler " 26 | "or download the std/experimental implementation."); 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | **Summary:** 2 | Short summary of key additions or changes or fixes, including public facing issue 3 | or bug being address if it exists 4 | 5 | **Details:** 6 | 7 | - list of key changes to aide present and future reviewers in understanding what 8 | - is happening in this PR 9 | 10 | **Merge Checklist:** 11 | 12 | - [ ] Passing CI 13 | - [ ] Update documentation or README.md 14 | - [ ] Additional Test/example added (if applicable) and passing 15 | - [ ] At least one reviewer approval 16 | - [ ] (optional) Clang sanitizer scan run and triaged 17 | - [ ] Clang formatter applied (verified as part of passing CI) 18 | -------------------------------------------------------------------------------- /include/spblas/algorithms/algorithms.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | // #include 7 | 8 | #ifndef SPBLAS_VENDOR_BACKEND 9 | #include 10 | #include 11 | #endif 12 | 13 | #include 14 | #include 15 | 16 | #include 17 | #include 18 | 19 | #include 20 | #include 21 | -------------------------------------------------------------------------------- /examples/device/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | function(add_device_example example_name) 2 | add_executable(${example_name} ${example_name}.cpp) 3 | if (ENABLE_ROCSPARSE) 4 | set_source_files_properties(${example_name}.cpp PROPERTIES LANGUAGE HIP) 5 | target_link_libraries(${example_name} roc::rocthrust) 6 | elseif (ENABLE_CUSPARSE) 7 | target_link_libraries(${example_name} Thrust) 8 | elseif (ENABLE_ONEMKL_SYCL) 9 | target_link_libraries(${example_name} sycl_thrust) 10 | else() 11 | message(FATAL_ERROR "Device backend not found.") 12 | endif() 13 | target_link_libraries(${example_name} spblas fmt) 14 | endfunction() 15 | 16 | add_device_example(device_spmv) 17 | -------------------------------------------------------------------------------- /examples/rocsparse/util.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #define HIP_CHECK(expression) \ 6 | do { \ 7 | const hipError_t status = expression; \ 8 | if (status != hipSuccess) { \ 9 | std::cerr << "HIP error " << status << ": " << hipGetErrorString(status) \ 10 | << " at " << __FILE__ << ":" << __LINE__ << std::endl; \ 11 | } \ 12 | } while (false) 13 | -------------------------------------------------------------------------------- /include/spblas/algorithms/triangular_solve.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | template 8 | void triangular_matrix_vector_solve(ExecutionPolicy&& exec, InMat A, Triangle t, 9 | DiagonalStorage d, InVec b, OutVec x); 10 | 11 | namespace spblas { 12 | 13 | template 14 | void triangular_solve(A&& a, Triangle uplo, DiagonalStorage diag, B&& b, X&& x); 15 | 16 | } // namespace spblas 17 | -------------------------------------------------------------------------------- /include/spblas/vendor/cusparse/type_validation.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace spblas { 7 | namespace detail { 8 | 9 | template 10 | static constexpr bool has_valid_cusparse_matrix_types_v = 11 | is_valid_cusparse_scalar_type_v> && 12 | is_valid_cusparse_index_type_v> && 13 | is_valid_cusparse_index_type_v>; 14 | 15 | template 16 | static constexpr bool has_valid_cusparse_vector_types_v = 17 | is_valid_cusparse_scalar_type_v>; 18 | 19 | } // namespace detail 20 | } // namespace spblas 21 | -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | function(add_example example_name) 2 | add_executable(${example_name} ${example_name}.cpp) 3 | target_link_libraries(${example_name} spblas fmt) 4 | endfunction() 5 | 6 | # CPU examples 7 | if (SPBLAS_CPU_BACKEND) 8 | add_example(simple_spmv) 9 | add_example(simple_spmm) 10 | add_example(simple_spgemm) 11 | add_example(simple_sptrsv) 12 | add_example(spmm_csc) 13 | add_example(matrix_opt_example) 14 | endif() 15 | 16 | # GPU examples 17 | if (SPBLAS_GPU_BACKEND) 18 | add_subdirectory(device) 19 | if (ENABLE_CUSPARSE) 20 | add_subdirectory(cusparse) 21 | endif() 22 | if (ENABLE_ROCSPARSE) 23 | add_subdirectory(rocsparse) 24 | endif() 25 | endif() 26 | -------------------------------------------------------------------------------- /include/spblas/vendor/rocsparse/type_validation.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace spblas { 7 | namespace detail { 8 | 9 | template 10 | static constexpr bool has_valid_rocsparse_matrix_types_v = 11 | is_valid_rocsparse_scalar_type_v> && 12 | is_valid_rocsparse_index_type_v> && 13 | is_valid_rocsparse_index_type_v>; 14 | 15 | template 16 | static constexpr bool has_valid_rocsparse_vector_types_v = 17 | is_valid_rocsparse_scalar_type_v>; 18 | 19 | } // namespace detail 20 | } // namespace spblas 21 | -------------------------------------------------------------------------------- /include/spblas/detail/triangular_types.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace spblas { 4 | 5 | struct upper_triangle_t { 6 | explicit upper_triangle_t() = default; 7 | }; 8 | inline constexpr upper_triangle_t upper_triangle{}; 9 | 10 | struct lower_triangle_t { 11 | explicit lower_triangle_t() = default; 12 | }; 13 | inline constexpr lower_triangle_t lower_triangle{}; 14 | 15 | struct implicit_unit_diagonal_t { 16 | explicit implicit_unit_diagonal_t() = default; 17 | }; 18 | inline constexpr implicit_unit_diagonal_t implicit_unit_diagonal{}; 19 | 20 | struct explicit_diagonal_t { 21 | explicit explicit_diagonal_t() = default; 22 | }; 23 | inline constexpr explicit_diagonal_t explicit_diagonal{}; 24 | 25 | } // namespace spblas 26 | -------------------------------------------------------------------------------- /include/spblas/algorithms/transposed.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace spblas { 6 | 7 | template 8 | requires(__detail::is_csr_view_v) 9 | auto transposed(M&& m) { 10 | return csc_view, tensor_index_t, tensor_offset_t>( 11 | m.values(), m.rowptr(), m.colind(), {m.shape()[1], m.shape()[0]}, 12 | m.size()); 13 | } 14 | 15 | template 16 | requires(__detail::is_csc_view_v) 17 | auto transposed(M&& m) { 18 | return csr_view, tensor_index_t, tensor_offset_t>( 19 | m.values(), m.colptr(), m.rowind(), {m.shape()[1], m.shape()[0]}, 20 | m.size()); 21 | } 22 | 23 | } // namespace spblas 24 | -------------------------------------------------------------------------------- /notes/spmv.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main(int argc, char** argv) { 4 | using namespace spblas; 5 | 6 | csr_matrix a(/* ... */); 7 | dense_vector x(/* ... */); 8 | dense_vector y; 9 | 10 | operation_info_t info; 11 | 12 | device_policy policy; 13 | 14 | multiply_inspect(info, policy, a, x, y); 15 | multiply_inspect(info, policy, transposed(a), x, y); 16 | 17 | // Allocate more memory for y based on `info` 18 | 19 | while (/* ... */) { 20 | multiply_execute(info, policy, a, x, y); 21 | // do something with y, update x... 22 | multiply_execute(info, policy, transposed(a), y, x); 23 | // Maybe do some more stuff... 24 | } 25 | 26 | return 0; 27 | } 28 | -------------------------------------------------------------------------------- /examples/cusparse/util.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #define CUDA_CHECK(expression) \ 6 | do { \ 7 | const cudaError_t status = expression; \ 8 | if (status != cudaSuccess) { \ 9 | std::cerr << "CUDA error " << status << ": " \ 10 | << cudaGetErrorString(status) << " at " << __FILE__ << ":" \ 11 | << __LINE__ << std::endl; \ 12 | } \ 13 | } while (false) 14 | -------------------------------------------------------------------------------- /include/spblas/backend/backend.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #ifdef SPBLAS_ENABLE_ONEMKL_SYCL 10 | #include 11 | #endif 12 | 13 | #ifdef SPBLAS_ENABLE_ARMPL 14 | #include 15 | #endif 16 | 17 | #ifdef SPBLAS_ENABLE_AOCLSPARSE 18 | #include 19 | #endif 20 | 21 | #ifdef SPBLAS_ENABLE_ROCSPARSE 22 | #include 23 | #endif 24 | 25 | #ifdef SPBLAS_ENABLE_CUSPARSE 26 | #include 27 | #endif 28 | -------------------------------------------------------------------------------- /include/spblas/vendor/cusparse/detail/abstract_operation_state.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace spblas { 7 | namespace __cusparse { 8 | 9 | class abstract_operation_state_t { 10 | public: 11 | // Common state that all operations need 12 | cusparseHandle_t handle() const { 13 | return handle_; 14 | } 15 | 16 | // Make std::default_delete a friend so unique_ptr can delete us 17 | friend struct std::default_delete; 18 | 19 | protected: 20 | abstract_operation_state_t() { 21 | cusparseCreate(&handle_); 22 | } 23 | 24 | virtual ~abstract_operation_state_t() { 25 | if (handle_) { 26 | cusparseDestroy(handle_); 27 | } 28 | } 29 | 30 | cusparseHandle_t handle_; 31 | }; 32 | 33 | } // namespace __cusparse 34 | } // namespace spblas 35 | -------------------------------------------------------------------------------- /include/spblas/algorithms/scale_impl.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | namespace spblas { 9 | 10 | namespace { 11 | 12 | template 13 | requires(matrix || vector) 14 | void scale_impl_(Scalar alpha, T&& t) { 15 | auto&& values = __backend::values(t); 16 | std::for_each(__ranges::begin(values), __ranges::end(values), 17 | [&](auto&& v) { v *= alpha; }); 18 | } 19 | 20 | } // namespace 21 | 22 | template 23 | void scale(Scalar alpha, M&& m) { 24 | scale_impl_(alpha, std::forward(m)); 25 | } 26 | 27 | template 28 | void scale(Scalar alpha, V&& v) { 29 | scale_impl_(alpha, std::forward(v)); 30 | } 31 | 32 | } // namespace spblas 33 | -------------------------------------------------------------------------------- /include/spblas/vendor/rocsparse/detail/abstract_operation_state.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace spblas { 7 | namespace __rocsparse { 8 | 9 | class abstract_operation_state_t { 10 | public: 11 | // Common state that all operations need 12 | rocsparse_handle handle() const { 13 | return handle_; 14 | } 15 | 16 | // Make std::default_delete a friend so unique_ptr can delete us 17 | friend struct std::default_delete; 18 | 19 | protected: 20 | abstract_operation_state_t() { 21 | rocsparse_create_handle(&handle_); 22 | } 23 | 24 | virtual ~abstract_operation_state_t() { 25 | if (handle_) { 26 | rocsparse_destroy_handle(handle_); 27 | } 28 | } 29 | 30 | rocsparse_handle handle_; 31 | }; 32 | 33 | } // namespace __rocsparse 34 | } // namespace spblas 35 | -------------------------------------------------------------------------------- /include/spblas/vendor/onemkl_sycl/detail/get_queue.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace spblas { 6 | 7 | namespace __mkl { 8 | 9 | template 10 | sycl::queue get_queue(const spblas::mkl::parallel_policy& policy, T* ptr) { 11 | return policy.get_queue(ptr); 12 | } 13 | 14 | template 15 | sycl::queue& get_queue(spblas::mkl::device_policy& policy, T* ptr) { 16 | return policy.get_queue(); 17 | } 18 | 19 | } // namespace __mkl 20 | 21 | } // namespace spblas 22 | 23 | #if __has_include() 24 | 25 | #include 26 | 27 | namespace spblas { 28 | 29 | namespace __mkl { 30 | 31 | template 32 | sycl::queue& get_queue(thrust::execution_policy& policy, T* ptr) { 33 | return policy.get_queue(); 34 | } 35 | 36 | } // namespace __mkl 37 | 38 | } // namespace spblas 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /include/spblas/algorithms/multiply.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace spblas { 7 | 8 | template 9 | void multiply(A&& a, B&& b, C&& c); 10 | 11 | template 12 | void multiply(A&& a, B&& b, C&& c); 13 | 14 | template 15 | operation_info_t multiply_inspect(A&& a, B&& b, C&& c); 16 | 17 | template 18 | void multiply_inspect(operation_info_t& info, A&& a, B&& b, C&& c); 19 | 20 | template 21 | operation_info_t multiply_compute(A&& a, B&& b, C&& c); 22 | 23 | template 24 | void multiply_compute(operation_info_t& info, A&& a, B&& b, C&& c); 25 | 26 | template 27 | void multiply_fill(operation_info_t& info, A&& a, B&& b, C&& c); 28 | 29 | } // namespace spblas 30 | -------------------------------------------------------------------------------- /include/spblas/vendor/armpl/detail/create_matrix_handle.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace spblas { 7 | 8 | namespace __armpl { 9 | 10 | template 11 | requires __detail::is_csr_view_v 12 | armpl_spmat_t create_matrix_handle(M&& m) { 13 | armpl_spmat_t handle; 14 | __armpl::create_spmat_csr>( 15 | &handle, m.shape()[0], m.shape()[1], m.rowptr().data(), m.colind().data(), 16 | m.values().data(), ARMPL_SPARSE_CREATE_NOCOPY); 17 | return handle; 18 | } 19 | 20 | template 21 | requires __detail::is_csc_view_v 22 | armpl_spmat_t create_matrix_handle(M&& m) { 23 | armpl_spmat_t handle; 24 | __armpl::create_spmat_csc>( 25 | &handle, m.shape()[0], m.shape()[1], m.rowind().data(), m.colptr().data(), 26 | m.values().data(), ARMPL_SPARSE_CREATE_NOCOPY); 27 | return handle; 28 | } 29 | 30 | } // namespace __armpl 31 | 32 | } // namespace spblas 33 | -------------------------------------------------------------------------------- /include/spblas/detail/tuple_concept.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace spblas { 9 | 10 | namespace __detail { 11 | 12 | template 13 | concept tuple_element_gettable = requires(T tuple) { 14 | { get(tuple) } -> std::convertible_to; 15 | }; 16 | 17 | template 18 | concept tuple_like = 19 | requires { 20 | typename std::tuple_size>::type; 21 | requires std::same_as< 22 | std::remove_cvref_t< 23 | decltype(std::tuple_size_v>)>, 24 | std::size_t>; 25 | } && sizeof...(Args) == std::tuple_size_v> && 26 | [](std::index_sequence) { 27 | return (tuple_element_gettable && ...); 28 | }(std::make_index_sequence>>()); 29 | 30 | } // namespace __detail 31 | } // namespace spblas 32 | -------------------------------------------------------------------------------- /include/spblas/detail/ranges.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #if defined(__cpp_lib_ranges) && __cpp_lib_ranges >= 201911L && \ 6 | defined(__cpp_lib_ranges_zip) && __cpp_lib_ranges_zip >= 202110L 7 | 8 | #include 9 | 10 | namespace spblas { 11 | 12 | namespace __ranges = ::std::ranges; 13 | 14 | namespace __detail { 15 | 16 | namespace __ranges { 17 | 18 | template 19 | concept view = ::std::ranges::view; 20 | 21 | } 22 | 23 | } // namespace __detail 24 | 25 | } // namespace spblas 26 | 27 | #elif __has_include() 28 | 29 | #include 30 | 31 | namespace spblas { 32 | 33 | namespace __ranges = ::ranges; 34 | 35 | namespace __detail { 36 | 37 | namespace __ranges { 38 | 39 | template 40 | concept view = ::ranges::view_; 41 | 42 | } 43 | 44 | } // namespace __detail 45 | 46 | } // namespace spblas 47 | 48 | #else 49 | static_assert( 50 | false, 51 | "spblas requires support for std::ranges. Compile with C++23 or later."); 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /include/spblas/vendor/cusparse/detail/get_transpose.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace spblas { 7 | namespace __cusparse { 8 | 9 | // 10 | // Takes in a CSR or CSR_transpose (aka CSC) or CSC or CSC_transpose 11 | // and returns the cusparseOperation_t value associated with it being 12 | // represented in the CSR format 13 | // 14 | // CSR = CSR + NON_TRANSPOSE 15 | // CSR_transpose = CSR + TRANSPOSE 16 | // CSC = CSR + TRANSPOSE 17 | // CSC_transpose = CSR + NON_TRANSPOSE 18 | // 19 | template 20 | cusparseOperation_t get_transpose(M&& m) { 21 | static_assert(__detail::has_csr_base || __detail::has_csc_base); 22 | if constexpr (__detail::has_base) { 23 | return get_transpose(m.base()); 24 | } else if constexpr (__detail::is_csr_view_v) { 25 | return CUSPARSE_OPERATION_NON_TRANSPOSE; 26 | } else if constexpr (__detail::is_csc_view_v) { 27 | return CUSPARSE_OPERATION_TRANSPOSE; 28 | } 29 | } 30 | 31 | } // namespace __cusparse 32 | } // namespace spblas 33 | -------------------------------------------------------------------------------- /include/spblas/vendor/rocsparse/detail/get_transpose.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace spblas { 7 | namespace __rocsparse { 8 | 9 | // 10 | // Takes in a CSR or CSR_transpose (aka CSC) or CSC or CSC_transpose 11 | // and returns the rocsparse_operation value associated with it being 12 | // represented in the CSR format 13 | // 14 | // CSR = CSR + NON_TRANSPOSE 15 | // CSR_transpose = CSR + TRANSPOSE 16 | // CSC = CSR + TRANSPOSE 17 | // CSC_transpose = CSR + NON_TRANSPOSE 18 | // 19 | template 20 | rocsparse_operation get_transpose(M&& m) { 21 | static_assert(__detail::has_csr_base || __detail::has_csc_base); 22 | if constexpr (__detail::has_base) { 23 | return get_transpose(m.base()); 24 | } else if constexpr (__detail::is_csr_view_v) { 25 | return rocsparse_operation_none; 26 | } else if constexpr (__detail::is_csc_view_v) { 27 | return rocsparse_operation_transpose; 28 | } 29 | } 30 | 31 | } // namespace __rocsparse 32 | } // namespace spblas 33 | -------------------------------------------------------------------------------- /test/gtest/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | enable_testing() 2 | 3 | set(TEST_SOURCES) 4 | 5 | # CPU tests 6 | if (SPBLAS_CPU_BACKEND) 7 | list(APPEND TEST_SOURCES 8 | spmv_test.cpp 9 | spmm_test.cpp 10 | spgemm_test.cpp 11 | spgemm_csr_csc.cpp 12 | add_test.cpp 13 | transpose_test.cpp 14 | triangular_solve_test.cpp) 15 | endif() 16 | 17 | # GPU tests 18 | if (SPBLAS_GPU_BACKEND) 19 | if (ENABLE_ROCSPARSE) 20 | set_source_files_properties(device/spmv_test.cpp PROPERTIES LANGUAGE HIP) 21 | endif() 22 | list(APPEND TEST_SOURCES device/spmv_test.cpp) 23 | endif() 24 | 25 | add_executable(spblas-tests ${TEST_SOURCES}) 26 | target_link_libraries(spblas-tests spblas fmt GTest::gtest_main) 27 | 28 | # Backend-specific test configuration 29 | if (ENABLE_ROCSPARSE) 30 | target_link_libraries(spblas-tests roc::rocthrust) 31 | elseif (ENABLE_CUSPARSE) 32 | target_link_libraries(spblas-tests Thrust) 33 | elseif (ENABLE_ONEMKL_SYCL) 34 | target_link_libraries(spblas-tests sycl_thrust) 35 | endif() 36 | 37 | include(GoogleTest) 38 | gtest_discover_tests(spblas-tests) 39 | -------------------------------------------------------------------------------- /include/spblas/vendor/rocsparse/operation_state_t.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "detail/abstract_operation_state.hpp" 4 | #include 5 | 6 | namespace spblas { 7 | namespace __rocsparse { 8 | 9 | class operation_state_t { 10 | public: 11 | operation_state_t() = default; 12 | operation_state_t(std::unique_ptr&& state) 13 | : state_(std::move(state)) {} 14 | 15 | // Move-only 16 | operation_state_t(operation_state_t&&) = default; 17 | operation_state_t& operator=(operation_state_t&&) = default; 18 | 19 | // No copying 20 | operation_state_t(const operation_state_t&) = delete; 21 | operation_state_t& operator=(const operation_state_t&) = delete; 22 | 23 | // Access the underlying state 24 | template 25 | T* get_state() { 26 | return dynamic_cast(state_.get()); 27 | } 28 | 29 | template 30 | const T* get_state() const { 31 | return dynamic_cast(state_.get()); 32 | } 33 | 34 | private: 35 | std::unique_ptr state_; 36 | }; 37 | 38 | } // namespace __rocsparse 39 | } // namespace spblas 40 | -------------------------------------------------------------------------------- /include/spblas/backend/concepts.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace spblas { 7 | 8 | namespace __backend { 9 | 10 | template 11 | concept row_iterable = requires(T& t) { rows(t); }; 12 | 13 | template 14 | concept column_iterable = requires(T& t) { columns(t); }; 15 | 16 | template 17 | concept row_lookupable = requires(T& t) { lookup_row(t, tensor_index_t{}); }; 18 | 19 | template 20 | concept column_lookupable = 21 | requires(T& t) { lookup_column(t, tensor_index_t{}); }; 22 | 23 | namespace { 24 | 25 | template 26 | concept lookupable_matrix = 27 | requires(T& t, tensor_index_t i, tensor_index_t j) { 28 | { lookup(t, i, j) }; 29 | }; 30 | 31 | template 32 | concept lookupable_vector = requires(T& t, tensor_index_t i) { 33 | { lookup(t, i) }; 34 | }; 35 | 36 | } // namespace 37 | 38 | template 39 | concept lookupable = lookupable_matrix || lookupable_vector; 40 | 41 | } // namespace __backend 42 | 43 | } // namespace spblas 44 | -------------------------------------------------------------------------------- /include/spblas/vendor/cusparse/operation_state_t.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "detail/abstract_operation_state.hpp" 4 | #include 5 | 6 | namespace spblas { 7 | 8 | namespace __cusparse { 9 | 10 | class operation_state_t { 11 | public: 12 | operation_state_t() = default; 13 | operation_state_t(std::unique_ptr&& state) 14 | : state_(std::move(state)) {} 15 | 16 | // Move-only 17 | operation_state_t(operation_state_t&&) = default; 18 | operation_state_t& operator=(operation_state_t&&) = default; 19 | 20 | // No copying 21 | operation_state_t(const operation_state_t&) = delete; 22 | operation_state_t& operator=(const operation_state_t&) = delete; 23 | 24 | // Access the underlying state 25 | template 26 | T* get_state() { 27 | return dynamic_cast(state_.get()); 28 | } 29 | 30 | template 31 | const T* get_state() const { 32 | return dynamic_cast(state_.get()); 33 | } 34 | 35 | private: 36 | std::unique_ptr state_; 37 | }; 38 | 39 | } // namespace __cusparse 40 | 41 | } // namespace spblas 42 | -------------------------------------------------------------------------------- /include/spblas/vendor/onemkl_sycl/detail/execution_policy.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace spblas { 7 | 8 | namespace mkl { 9 | 10 | class parallel_policy { 11 | public: 12 | parallel_policy() {} 13 | 14 | template 15 | sycl::queue get_queue(T* ptr) const { 16 | return spblas::__mkl::get_pointer_queue(ptr); 17 | } 18 | 19 | sycl::queue get_queue() const { 20 | return sycl::queue(sycl::default_selector_v); 21 | } 22 | }; 23 | 24 | class device_policy { 25 | public: 26 | device_policy(const sycl::queue& queue) : queue_(queue) {} 27 | 28 | sycl::queue& get_queue() { 29 | return queue_; 30 | } 31 | 32 | const sycl::queue& get_queue() const { 33 | return queue_; 34 | } 35 | 36 | sycl::device get_device() const { 37 | return queue_.get_device(); 38 | } 39 | 40 | sycl::context get_context() const { 41 | return queue_.get_context(); 42 | } 43 | 44 | private: 45 | sycl::queue queue_; 46 | }; 47 | 48 | inline parallel_policy par; 49 | 50 | } // namespace mkl 51 | 52 | } // namespace spblas 53 | -------------------------------------------------------------------------------- /include/spblas/algorithms/detail/sparse_dot_product.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | namespace spblas { 8 | 9 | namespace __detail { 10 | 11 | template 12 | std::optional sparse_dot_product(__backend::spa_accumulator& acc, 13 | A&& a, B&& b) { 14 | acc.clear(); 15 | 16 | for (auto&& [i, v] : a) { 17 | acc[i] = v; 18 | } 19 | 20 | T sum = 0; 21 | bool implicit_zero = true; 22 | for (auto&& [i, v] : b) { 23 | if (acc.contains(i)) { 24 | sum += acc[i] * v; 25 | implicit_zero = false; 26 | } 27 | } 28 | 29 | if (implicit_zero) { 30 | return {}; 31 | } else { 32 | return sum; 33 | } 34 | } 35 | 36 | template 37 | bool sparse_intersection(Set&& set, A&& a, B&& b) { 38 | set.clear(); 39 | 40 | for (auto&& [i, v] : a) { 41 | set.insert(i); 42 | } 43 | 44 | for (auto&& [i, v] : b) { 45 | if (set.contains(i)) { 46 | return true; 47 | } 48 | } 49 | 50 | return false; 51 | } 52 | 53 | } // namespace __detail 54 | 55 | } // namespace spblas 56 | -------------------------------------------------------------------------------- /include/spblas/concepts.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace spblas { 10 | 11 | /* 12 | The following types fulfill the matrix concept: 13 | - Instantiations of csr_view<...> 14 | - Instantiations of csc_view<...> 15 | - Instantiations of mdspan<...> with rank 2 16 | - Instantiations of scaled_view where M is a matrix 17 | */ 18 | 19 | template 20 | concept matrix = __detail::is_csr_view_v || __detail::is_csc_view_v || 21 | __detail::is_matrix_mdspan_v || __detail::matrix; 22 | 23 | /* 24 | The following types fulfill the vector concept: 25 | - Random access range (e.g. std::vector<...>) 26 | */ 27 | 28 | template 29 | concept vector = __ranges::random_access_range && !matrix; 30 | 31 | template 32 | concept tensor = matrix || vector; 33 | 34 | template 35 | concept view = tensor && 36 | (std::derived_from, view_base> || 37 | __detail::is_matrix_mdspan_v || __detail::__ranges::view); 38 | 39 | } // namespace spblas 40 | -------------------------------------------------------------------------------- /include/spblas/vendor/onemkl_sycl/detail/get_pointer_device.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace spblas { 7 | 8 | namespace __mkl { 9 | 10 | inline std::vector global_contexts_; 11 | 12 | template 13 | std::pair get_pointer_device(T* ptr) { 14 | if (global_contexts_.empty()) { 15 | for (auto&& platform : sycl::platform::get_platforms()) { 16 | sycl::context context(platform.get_devices()); 17 | 18 | global_contexts_.push_back(context); 19 | } 20 | } 21 | 22 | for (auto&& context : global_contexts_) { 23 | try { 24 | sycl::device device = sycl::get_pointer_device(ptr, context); 25 | return {device, context}; 26 | } catch (...) { 27 | } 28 | } 29 | 30 | throw std::runtime_error( 31 | "get_pointer_device: could not locate device corresponding to pointer"); 32 | } 33 | 34 | template 35 | sycl::queue get_pointer_queue(T* ptr) { 36 | try { 37 | auto&& [device, context] = get_pointer_device(ptr); 38 | return sycl::queue(context, device); 39 | } catch (...) { 40 | return sycl::queue(sycl::cpu_selector_v); 41 | } 42 | } 43 | 44 | } // namespace __mkl 45 | 46 | } // namespace spblas 47 | -------------------------------------------------------------------------------- /include/spblas/backend/algorithms.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace spblas { 8 | 9 | namespace __backend { 10 | 11 | template 12 | requires(__backend::row_iterable) 13 | void for_each(M&& m, F&& f) { 14 | for (auto&& [i, row] : __backend::rows(m)) { 15 | for (auto&& [j, v] : row) { 16 | f(std::make_tuple(std::tuple{i, j}, std::reference_wrapper(v))); 17 | } 18 | } 19 | } 20 | 21 | template 22 | requires(__backend::column_iterable) 23 | void for_each(M&& m, F&& f) { 24 | for (auto&& [j, column] : __backend::columns(m)) { 25 | for (auto&& [i, v] : column) { 26 | f(std::make_tuple(std::tuple{i, j}, std::reference_wrapper(v))); 27 | } 28 | } 29 | } 30 | 31 | template 32 | requires(__backend::lookupable && __ranges::random_access_range) 33 | void for_each(V&& v, F&& f) { 34 | using index_type = __ranges::range_size_t; 35 | for (index_type i = 0; i < __backend::shape(v); i++) { 36 | auto&& value = __backend::lookup(v, i); 37 | f(std::make_tuple(i, std::reference_wrapper(value))); 38 | } 39 | } 40 | 41 | } // namespace __backend 42 | 43 | } // namespace spblas 44 | -------------------------------------------------------------------------------- /include/spblas/vendor/onemkl_sycl/detail/get_matrix_handle.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | namespace spblas { 14 | 15 | namespace __mkl { 16 | 17 | template 18 | oneapi::mkl::sparse::matrix_handle_t 19 | get_matrix_handle(sycl::queue& q, M&& m, 20 | oneapi::mkl::sparse::matrix_handle_t handle = nullptr) { 21 | if constexpr (__detail::is_matrix_opt_v) { 22 | log_trace("using A as matrix_opt"); 23 | 24 | if (m.matrix_handle_ == nullptr) { 25 | m.matrix_handle_ = create_matrix_handle(q, m.base()); 26 | } 27 | 28 | return m.matrix_handle_; 29 | } else if constexpr (__detail::has_base) { 30 | return get_matrix_handle(q, m.base(), handle); 31 | } else if (handle != nullptr) { 32 | log_trace("using A from operation_info_t"); 33 | 34 | return handle; 35 | } else { 36 | log_trace("using A as csr_base"); 37 | 38 | return create_matrix_handle(q, m); 39 | } 40 | } 41 | 42 | } // namespace __mkl 43 | 44 | } // namespace spblas 45 | -------------------------------------------------------------------------------- /include/spblas/vendor/cusparse/detail/cusparse_tensors.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace spblas { 11 | 12 | namespace __cusparse { 13 | 14 | template 15 | requires __detail::is_csr_view_v 16 | cusparseSpMatDescr_t create_cusparse_handle(M&& m) { 17 | cusparseSpMatDescr_t mat_descr; 18 | __cusparse::throw_if_error(cusparseCreateCsr( 19 | &mat_descr, __backend::shape(m)[0], __backend::shape(m)[1], 20 | m.values().size(), m.rowptr().data(), m.colind().data(), 21 | m.values().data(), detail::cusparse_index_type_v>, 22 | detail::cusparse_index_type_v>, 23 | CUSPARSE_INDEX_BASE_ZERO, detail::cuda_data_type_v>)); 24 | 25 | return mat_descr; 26 | } 27 | 28 | template 29 | requires __ranges::contiguous_range 30 | cusparseDnVecDescr_t create_cusparse_handle(V&& v) { 31 | cusparseDnVecDescr_t vec_descr; 32 | __cusparse::throw_if_error( 33 | cusparseCreateDnVec(&vec_descr, __backend::shape(v), __ranges::data(v), 34 | detail::cuda_data_type_v>)); 35 | 36 | return vec_descr; 37 | } 38 | 39 | } // namespace __cusparse 40 | 41 | } // namespace spblas 42 | -------------------------------------------------------------------------------- /include/spblas/vendor/rocsparse/detail/rocsparse_tensors.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace spblas { 11 | namespace __rocsparse { 12 | 13 | template 14 | requires __detail::is_csr_view_v 15 | rocsparse_spmat_descr create_rocsparse_handle(M&& m) { 16 | rocsparse_spmat_descr mat_descr; 17 | throw_if_error(rocsparse_create_csr_descr( 18 | &mat_descr, __backend::shape(m)[0], __backend::shape(m)[1], 19 | m.values().size(), m.rowptr().data(), m.colind().data(), 20 | m.values().data(), detail::rocsparse_index_type_v>, 21 | detail::rocsparse_index_type_v>, 22 | rocsparse_index_base_zero, 23 | detail::rocsparse_data_type_v>)); 24 | 25 | return mat_descr; 26 | } 27 | 28 | template 29 | requires __ranges::contiguous_range 30 | rocsparse_dnvec_descr create_rocsparse_handle(V&& v) { 31 | rocsparse_dnvec_descr vec_descr; 32 | throw_if_error(rocsparse_create_dnvec_descr( 33 | &vec_descr, __backend::shape(v), __ranges::data(v), 34 | detail::rocsparse_data_type_v>)); 35 | 36 | return vec_descr; 37 | } 38 | 39 | } // namespace __rocsparse 40 | } // namespace spblas 41 | -------------------------------------------------------------------------------- /include/spblas/vendor/cusparse/detail/spmv_state_t.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "abstract_operation_state.hpp" 7 | 8 | namespace spblas { 9 | namespace __cusparse { 10 | 11 | class spmv_state_t : public abstract_operation_state_t { 12 | public: 13 | spmv_state_t() = default; 14 | ~spmv_state_t() { 15 | if (a_descr_) { 16 | cusparseDestroySpMat(a_descr_); 17 | } 18 | if (b_descr_) { 19 | cusparseDestroyDnVec(b_descr_); 20 | } 21 | if (c_descr_) { 22 | cusparseDestroyDnVec(c_descr_); 23 | } 24 | } 25 | 26 | // Accessors for the descriptors 27 | cusparseSpMatDescr_t a_descriptor() const { 28 | return a_descr_; 29 | } 30 | cusparseDnVecDescr_t b_descriptor() const { 31 | return b_descr_; 32 | } 33 | cusparseDnVecDescr_t c_descriptor() const { 34 | return c_descr_; 35 | } 36 | 37 | // Setters for the descriptors 38 | void set_a_descriptor(cusparseSpMatDescr_t descr) { 39 | a_descr_ = descr; 40 | } 41 | void set_b_descriptor(cusparseDnVecDescr_t descr) { 42 | b_descr_ = descr; 43 | } 44 | void set_c_descriptor(cusparseDnVecDescr_t descr) { 45 | c_descr_ = descr; 46 | } 47 | 48 | private: 49 | cusparseSpMatDescr_t a_descr_ = nullptr; 50 | cusparseDnVecDescr_t b_descr_ = nullptr; 51 | cusparseDnVecDescr_t c_descr_ = nullptr; 52 | }; 53 | 54 | } // namespace __cusparse 55 | } // namespace spblas 56 | -------------------------------------------------------------------------------- /include/spblas/detail/types.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #ifdef SPBLAS_ENABLE_ONEMKL_SYCL 7 | #include 8 | #endif 9 | 10 | #ifdef SPBLAS_ENABLE_ARMPL 11 | #include 12 | #endif 13 | 14 | #ifdef SPBLAS_ENABLE_AOCLSPARSE 15 | #include 16 | #endif 17 | 18 | #ifdef SPBLAS_ENABLE_ROCSPARSE 19 | #include 20 | #endif 21 | 22 | #ifdef SPBLAS_ENABLE_CUSPARSE 23 | #include 24 | #endif 25 | 26 | namespace spblas { 27 | 28 | #ifndef SPBLAS_VENDOR_BACKEND 29 | using index_t = std::size_t; 30 | using offset_t = index_t; 31 | #endif 32 | 33 | template 34 | struct tensor_traits { 35 | using scalar_type = typename std::remove_cvref_t::scalar_type; 36 | using scalar_reference = typename std::remove_cvref_t::scalar_reference; 37 | using index_type = typename std::remove_cvref_t::index_type; 38 | using offset_type = typename std::remove_cvref_t::offset_type; 39 | }; 40 | 41 | template 42 | using tensor_scalar_t = typename tensor_traits::scalar_type; 43 | 44 | template 45 | using tensor_scalar_reference_t = typename tensor_traits::scalar_reference; 46 | 47 | template 48 | using tensor_index_t = typename tensor_traits::index_type; 49 | 50 | template 51 | using tensor_offset_t = typename tensor_traits::offset_type; 52 | 53 | } // namespace spblas 54 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2023, SparseBLAS 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /include/spblas/vendor/armpl/operation_state_t.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace spblas { 6 | 7 | namespace __armpl { 8 | 9 | struct operation_state_t { 10 | armpl_spmat_t a_handle = nullptr; 11 | armpl_spmat_t b_handle = nullptr; 12 | armpl_spmat_t c_handle = nullptr; 13 | armpl_spmat_t d_handle = nullptr; 14 | 15 | operation_state_t() = default; 16 | 17 | operation_state_t(armpl_spmat_t a_handle, armpl_spmat_t b_handle, 18 | armpl_spmat_t c_handle, armpl_spmat_t d_handle) 19 | : a_handle(a_handle), b_handle(b_handle), c_handle(c_handle), 20 | d_handle(d_handle) {} 21 | 22 | operation_state_t(operation_state_t&& other) { 23 | *this = std::move(other); 24 | } 25 | 26 | operation_state_t& operator=(operation_state_t&& other) { 27 | a_handle = other.a_handle; 28 | b_handle = other.b_handle; 29 | c_handle = other.c_handle; 30 | d_handle = other.d_handle; 31 | 32 | other.a_handle = other.b_handle = other.c_handle = other.d_handle = nullptr; 33 | 34 | return *this; 35 | } 36 | 37 | operation_state_t(const operation_state_t& other) = delete; 38 | 39 | ~operation_state_t() { 40 | release_matrix_handle(a_handle); 41 | release_matrix_handle(b_handle); 42 | release_matrix_handle(c_handle); 43 | release_matrix_handle(d_handle); 44 | } 45 | 46 | private: 47 | void release_matrix_handle(armpl_spmat_t& handle) { 48 | if (handle != nullptr) { 49 | armpl_spmat_destroy(handle); 50 | } 51 | } 52 | }; 53 | 54 | } // namespace __armpl 55 | 56 | } // namespace spblas 57 | -------------------------------------------------------------------------------- /include/spblas/vendor/aoclsparse/operation_state_t.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights reserved. 3 | * $COPYRIGHT$ 4 | * 5 | * Additional copyrights may follow 6 | * 7 | * $HEADER$ 8 | */ 9 | 10 | #pragma once 11 | 12 | #include "aoclsparse.h" 13 | 14 | namespace spblas { 15 | 16 | namespace __aoclsparse { 17 | 18 | struct operation_state_t { 19 | aoclsparse_matrix a_handle = nullptr; 20 | aoclsparse_matrix b_handle = nullptr; 21 | aoclsparse_matrix c_handle = nullptr; 22 | 23 | operation_state_t() = default; 24 | 25 | operation_state_t(aoclsparse_matrix a_handle, aoclsparse_matrix b_handle, 26 | aoclsparse_matrix c_handle) 27 | : a_handle(a_handle), b_handle(b_handle), c_handle(c_handle) {} 28 | 29 | operation_state_t(operation_state_t&& other) { 30 | *this = std::move(other); 31 | } 32 | 33 | operation_state_t& operator=(operation_state_t&& other) { 34 | a_handle = other.a_handle; 35 | b_handle = other.b_handle; 36 | c_handle = other.c_handle; 37 | 38 | other.a_handle = other.b_handle = other.c_handle = nullptr; 39 | 40 | return *this; 41 | } 42 | 43 | operation_state_t(const operation_state_t& other) = delete; 44 | 45 | ~operation_state_t() { 46 | release_matrix_handle(a_handle); 47 | release_matrix_handle(b_handle); 48 | release_matrix_handle(c_handle); 49 | } 50 | 51 | private: 52 | void release_matrix_handle(aoclsparse_matrix handle) { 53 | if (handle != nullptr) { 54 | aoclsparse_destroy(&handle); 55 | } 56 | } 57 | }; 58 | 59 | } // namespace __aoclsparse 60 | 61 | } // namespace spblas 62 | -------------------------------------------------------------------------------- /examples/simple_spmv.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | int main(int argc, char** argv) { 8 | using namespace spblas; 9 | 10 | using T = float; 11 | 12 | spblas::index_t m = 100; 13 | spblas::index_t n = 100; 14 | spblas::index_t nnz_in = 10; 15 | 16 | fmt::print("\n\t###########################################################" 17 | "######################"); 18 | fmt::print("\n\t### Running SpMV Example:"); 19 | fmt::print("\n\t###"); 20 | fmt::print("\n\t### y = alpha * A * x"); 21 | fmt::print("\n\t###"); 22 | fmt::print("\n\t### with "); 23 | fmt::print("\n\t### A, in CSR format, of size ({}, {}) with nnz = {}", m, n, 24 | nnz_in); 25 | fmt::print("\n\t### x, a dense vector, of size ({}, {})", n, 1); 26 | fmt::print("\n\t### y, a dense vector, of size ({}, {})", m, 1); 27 | fmt::print("\n\t### using float and spblas::index_t (size = {} bytes)", 28 | sizeof(spblas::index_t)); 29 | fmt::print("\n\t###########################################################" 30 | "######################"); 31 | fmt::print("\n"); 32 | 33 | auto&& [values, rowptr, colind, shape, nnz] = 34 | generate_csr(m, n, nnz_in); 35 | 36 | csr_view a(values, rowptr, colind, shape, nnz); 37 | 38 | // Scale every value of `a` by 5 in place. 39 | // scale(5.f, a); 40 | 41 | std::vector x(n, 1); 42 | std::vector y(m, 0); 43 | 44 | T alpha = 1.2f; 45 | auto a_scaled = scaled(alpha, a); 46 | 47 | // y = alpha * A * x 48 | multiply(a_scaled, x, y); 49 | 50 | fmt::print("\tExample is completed!\n"); 51 | 52 | return 0; 53 | } 54 | -------------------------------------------------------------------------------- /examples/spmm_csc.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | int main(int argc, char** argv) { 8 | using namespace spblas; 9 | namespace md = spblas::__mdspan; 10 | 11 | using T = float; 12 | 13 | spblas::index_t m = 100; 14 | spblas::index_t n = 10; 15 | spblas::index_t k = 100; 16 | spblas::index_t nnz_in = 10; 17 | 18 | fmt::print("\n\t###########################################################" 19 | "######################"); 20 | fmt::print("\n\t### Running SpMM Example:"); 21 | fmt::print("\n\t###"); 22 | fmt::print("\n\t### Y = alpha * A * X"); 23 | fmt::print("\n\t###"); 24 | fmt::print("\n\t### with "); 25 | fmt::print("\n\t### A, in CSC format, of size ({}, {}) with nnz = {}", m, k, 26 | nnz_in); 27 | fmt::print("\n\t### x, a dense matrix, of size ({}, {})", k, n); 28 | fmt::print("\n\t### y, a dense vector, of size ({}, {})", m, n); 29 | fmt::print("\n\t### using float and spblas::index_t (size = {} bytes)", 30 | sizeof(spblas::index_t)); 31 | fmt::print("\n\t###########################################################" 32 | "######################"); 33 | fmt::print("\n"); 34 | 35 | auto&& [values, colptr, rowind, shape, nnz] = generate_csc(m, k, nnz_in); 36 | 37 | csc_view a(values, colptr, rowind, shape, nnz); 38 | 39 | std::vector x_values(k * n, 1); 40 | std::vector y_values(m * n, 0); 41 | 42 | md::mdspan x(x_values.data(), k, n); 43 | md::mdspan y(y_values.data(), m, n); 44 | 45 | // y = A * (alpha * x) 46 | multiply(a, scaled(2.f, x), y); 47 | 48 | fmt::print("{}\n", spblas::__backend::values(y)); 49 | 50 | fmt::print("\tExample is completed!\n"); 51 | 52 | return 0; 53 | } 54 | -------------------------------------------------------------------------------- /include/spblas/backend/hash_accumulator.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | namespace spblas { 13 | 14 | namespace __backend { 15 | 16 | template 17 | class hash_accumulator { 18 | public: 19 | hash_accumulator(I count) {} 20 | 21 | T& operator[](I pos) { 22 | return hash_[pos]; 23 | } 24 | 25 | bool contains(I pos) { 26 | return hash_.contains(pos); 27 | } 28 | 29 | void clear() { 30 | hash_.clear(); 31 | } 32 | 33 | I size() const { 34 | return hash_.size(); 35 | } 36 | 37 | bool empty() { 38 | return hash_.empty(); 39 | } 40 | 41 | void sort() {} 42 | 43 | auto get() { 44 | std::vector> values(hash_.begin(), hash_.end()); 45 | 46 | std::sort(values.begin(), values.end(), [](auto&& a, auto&& b) { 47 | return std::get<0>(a) < std::get<0>(b); 48 | }); 49 | 50 | return values; 51 | } 52 | 53 | private: 54 | std::unordered_map hash_; 55 | }; 56 | 57 | template 58 | class hash_set { 59 | public: 60 | hash_set(T count) {} 61 | 62 | void insert(T key) { 63 | set_.insert(key); 64 | } 65 | 66 | bool contains(T key) { 67 | return set_.contains(key); 68 | } 69 | 70 | void clear() { 71 | set_.clear(); 72 | } 73 | 74 | T size() const { 75 | return set_.size(); 76 | } 77 | 78 | bool empty() { 79 | return set_.empty(); 80 | } 81 | 82 | auto get() const { 83 | return __ranges::views::all(set_); 84 | } 85 | 86 | private: 87 | std::unordered_set set_; 88 | }; 89 | 90 | } // namespace __backend 91 | 92 | } // namespace spblas 93 | -------------------------------------------------------------------------------- /test/gtest/add_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "util.hpp" 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | TEST(CsrView, Add_CSR_CSR_CSR) { 11 | using T = float; 12 | using I = spblas::index_t; 13 | 14 | for (auto&& [m, n, nnz] : util::dims) { 15 | auto [a_values, a_rowptr, a_colind, a_shape, a_nnz] = 16 | spblas::generate_csr(m, n, nnz); 17 | 18 | auto [b_values, b_rowptr, b_colind, b_shape, b_nnz] = 19 | spblas::generate_csr(m, n, nnz); 20 | 21 | spblas::csr_view a(a_values, a_rowptr, a_colind, a_shape, a_nnz); 22 | spblas::csr_view b(b_values, b_rowptr, b_colind, b_shape, b_nnz); 23 | 24 | std::vector c_rowptr(m + 1); 25 | 26 | spblas::csr_view c(nullptr, c_rowptr.data(), nullptr, {m, n}, 0); 27 | 28 | auto info = spblas::add_inspect(a, b, c); 29 | 30 | std::vector c_values(info.result_nnz()); 31 | std::vector c_colind(info.result_nnz()); 32 | 33 | c.update(c_values, c_rowptr, c_colind); 34 | 35 | spblas::add_compute(info, a, b, c); 36 | 37 | spblas::__backend::spa_accumulator c_row_ref( 38 | spblas::__backend::shape(c)[1]); 39 | 40 | for (I i = 0; i < spblas::__backend::shape(c)[0]; i++) { 41 | c_row_ref.clear(); 42 | 43 | for (auto&& [j, v] : spblas::__backend::lookup_row(a, i)) { 44 | c_row_ref[j] += v; 45 | } 46 | 47 | for (auto&& [j, v] : spblas::__backend::lookup_row(b, i)) { 48 | c_row_ref[j] += v; 49 | } 50 | 51 | auto&& c_row = spblas::__backend::lookup_row(c, i); 52 | 53 | for (auto&& [j, v] : c_row) { 54 | EXPECT_EQ_(c_row_ref[j], v); 55 | } 56 | } 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /examples/simple_spmm.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int main(int argc, char** argv) { 7 | using namespace spblas; 8 | namespace md = spblas::__mdspan; 9 | 10 | using T = float; 11 | 12 | spblas::index_t m = 100; 13 | spblas::index_t n = 10; 14 | spblas::index_t k = 100; 15 | spblas::index_t nnz_in = 10; 16 | 17 | fmt::print("\n\t###########################################################" 18 | "######################"); 19 | fmt::print("\n\t### Running SpMM Example:"); 20 | fmt::print("\n\t###"); 21 | fmt::print("\n\t### Y = alpha * A * X"); 22 | fmt::print("\n\t###"); 23 | fmt::print("\n\t### with "); 24 | fmt::print("\n\t### A, in CSR format, of size ({}, {}) with nnz = {}", m, k, 25 | nnz_in); 26 | fmt::print("\n\t### x, a dense matrix, of size ({}, {})", k, n); 27 | fmt::print("\n\t### y, a dense vector, of size ({}, {})", m, n); 28 | fmt::print("\n\t### using float and spblas::index_t (size = {} bytes)", 29 | sizeof(spblas::index_t)); 30 | fmt::print("\n\t###########################################################" 31 | "######################"); 32 | fmt::print("\n"); 33 | 34 | auto&& [values, rowptr, colind, shape, nnz] = generate_csr(m, k, nnz_in); 35 | 36 | csr_view a(values, rowptr, colind, shape, nnz); 37 | 38 | std::vector x_values(k * n, 1); 39 | std::vector y_values(m * n, 0); 40 | 41 | md::mdspan x(x_values.data(), k, n); 42 | md::mdspan y(y_values.data(), m, n); 43 | 44 | auto a_view = scaled(2.f, a); 45 | 46 | // y = A * (alpha * x) 47 | multiply(a_view, scaled(2.f, x), y); 48 | 49 | fmt::print("{}\n", spblas::__backend::values(y)); 50 | 51 | fmt::print("\tExample is completed!\n"); 52 | 53 | return 0; 54 | } 55 | -------------------------------------------------------------------------------- /test/gtest/util.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #define EXPECT_EQ_(t, u) \ 8 | if constexpr (std::floating_point> || \ 9 | std::floating_point>) { \ 10 | auto epsilon = \ 11 | 64 * \ 12 | std::numeric_limits>::epsilon(); \ 13 | auto abs_th = \ 14 | std::numeric_limits>::min(); \ 15 | auto diff = std::abs((t) - (u)); \ 16 | auto norm = std::min( \ 17 | std::abs((t)) + std::abs((u)), \ 18 | std::numeric_limits>::max()); \ 19 | auto abs_error = std::max(abs_th, epsilon * norm); \ 20 | EXPECT_NEAR((t), (u), abs_error); \ 21 | } else { \ 22 | EXPECT_EQ((t), (u)); \ 23 | } 24 | 25 | namespace util { 26 | 27 | inline auto dims = 28 | std::vector({std::tuple(1000, 100, 100), std::tuple(100, 1000, 10000), 29 | std::tuple(40, 40, 1000)}); 30 | 31 | inline auto square_dims = 32 | std::vector({std::tuple(1000, 1000, 100), std::tuple(100, 100, 100), 33 | std::tuple(40, 40, 1000)}); 34 | 35 | } // namespace util 36 | -------------------------------------------------------------------------------- /include/spblas/vendor/armpl/detail/export_matrix_handle.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | namespace spblas { 9 | 10 | namespace __armpl { 11 | 12 | template 13 | requires __detail::is_csr_view_v 14 | void export_matrix_handle(operation_info_t& info, M&& matrix, 15 | armpl_spmat_t matrix_handle) { 16 | auto nnz = info.result_nnz(); 17 | armpl_int_t m, n; 18 | armpl_int_t *rowptr, *colind; 19 | tensor_scalar_t* values; 20 | __armpl::export_spmat_csr>(matrix_handle, 0, &m, &n, 21 | &rowptr, &colind, &values); 22 | 23 | std::copy(values, values + nnz, matrix.values().begin()); 24 | std::copy(colind, colind + nnz, matrix.colind().begin()); 25 | std::copy(rowptr, rowptr + m + 1, matrix.rowptr().begin()); 26 | 27 | free(values); 28 | free(rowptr); 29 | free(colind); 30 | } 31 | 32 | template 33 | requires __detail::is_csc_view_v 34 | void export_matrix_handle(operation_info_t& info, M&& matrix, 35 | armpl_spmat_t matrix_handle) { 36 | auto nnz = info.result_nnz(); 37 | armpl_int_t m, n; 38 | armpl_int_t *colptr, *rowind; 39 | tensor_scalar_t* values; 40 | __armpl::export_spmat_csc>(matrix_handle, 0, &m, &n, 41 | &rowind, &colptr, &values); 42 | 43 | std::copy(values, values + nnz, matrix.values().begin()); 44 | std::copy(rowind, rowind + nnz, matrix.rowind().begin()); 45 | std::copy(colptr, colptr + n + 1, matrix.colptr().begin()); 46 | 47 | free(values); 48 | free(colptr); 49 | free(rowind); 50 | } 51 | 52 | } // namespace __armpl 53 | 54 | } // namespace spblas 55 | -------------------------------------------------------------------------------- /examples/matrix_opt_example.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int main(int argc, char** argv) { 7 | using namespace spblas; 8 | namespace md = spblas::__mdspan; 9 | 10 | using T = float; 11 | 12 | spblas::index_t m = 100; 13 | spblas::index_t n = 10; 14 | spblas::index_t k = 100; 15 | spblas::index_t nnz_in = 10; 16 | 17 | fmt::print("\n\t###########################################################" 18 | "######################"); 19 | fmt::print("\n\t### Running SpMM Example:"); 20 | fmt::print("\n\t###"); 21 | fmt::print("\n\t### Y = alpha * A * X"); 22 | fmt::print("\n\t###"); 23 | fmt::print("\n\t### with "); 24 | fmt::print("\n\t### A, in CSR format, of size ({}, {}) with nnz = {}", m, k, 25 | nnz_in); 26 | fmt::print("\n\t### x, a dense matrix, of size ({}, {})", k, n); 27 | fmt::print("\n\t### y, a dense vector, of size ({}, {})", m, n); 28 | fmt::print("\n\t### using float and spblas::index_t (size = {} bytes)", 29 | sizeof(spblas::index_t)); 30 | fmt::print("\n\t###########################################################" 31 | "######################"); 32 | fmt::print("\n"); 33 | 34 | auto&& [values, rowptr, colind, shape, nnz] = generate_csr(m, k, nnz_in); 35 | 36 | csr_view a(values, rowptr, colind, shape, nnz); 37 | 38 | matrix_opt a_opt(a); 39 | 40 | std::vector x_values(k * n, 1); 41 | std::vector y_values(m * n, 0); 42 | 43 | md::mdspan x(x_values.data(), k, n); 44 | md::mdspan y(y_values.data(), m, n); 45 | 46 | auto a_view = scaled(2.f, a); 47 | 48 | // y = A * (alpha * x) 49 | multiply(a_opt, scaled(2.f, x), y); 50 | 51 | fmt::print("{}\n", spblas::__backend::values(y)); 52 | 53 | fmt::print("\tExample is completed!\n"); 54 | 55 | return 0; 56 | } 57 | -------------------------------------------------------------------------------- /include/spblas/vendor/cusparse/cuda_allocator.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "exception.hpp" 4 | #include 5 | 6 | namespace spblas { 7 | 8 | namespace cusparse { 9 | 10 | template 11 | class cuda_allocator { 12 | public: 13 | using value_type = T; 14 | using pointer = T*; 15 | using const_pointer = const T*; 16 | using reference = T&; 17 | using const_reference = const T&; 18 | using size_type = std::size_t; 19 | using difference_type = std::ptrdiff_t; 20 | 21 | cuda_allocator() noexcept {} 22 | cuda_allocator(cudaStream_t stream) noexcept : stream_(stream) {} 23 | 24 | template 25 | cuda_allocator(const cuda_allocator& other) noexcept 26 | : stream_(other.stream()) {} 27 | 28 | cuda_allocator(const cuda_allocator&) = default; 29 | cuda_allocator& operator=(const cuda_allocator&) = default; 30 | ~cuda_allocator() = default; 31 | 32 | using is_always_equal = std::false_type; 33 | 34 | pointer allocate(std::size_t size) { 35 | void* ptr; 36 | this->throw_if_failure(cudaMallocAsync(&ptr, size * sizeof(T), stream())); 37 | 38 | return reinterpret_cast(ptr); 39 | } 40 | 41 | void deallocate(pointer ptr, std::size_t n = 0) { 42 | if (ptr != nullptr) { 43 | this->throw_if_failure(cudaFreeAsync(ptr, stream())); 44 | } 45 | } 46 | 47 | bool operator==(const cuda_allocator&) const = default; 48 | bool operator!=(const cuda_allocator&) const = default; 49 | 50 | template 51 | struct rebind { 52 | using other = cuda_allocator; 53 | }; 54 | 55 | cudaStream_t stream() const noexcept { 56 | return this->stream_; 57 | } 58 | 59 | private: 60 | void throw_if_failure(cudaError_t error) { 61 | if (error != cudaSuccess) { 62 | throw std::bad_alloc{}; 63 | } 64 | } 65 | 66 | cudaStream_t stream_ = nullptr; 67 | }; 68 | 69 | } // namespace cusparse 70 | 71 | } // namespace spblas 72 | -------------------------------------------------------------------------------- /include/spblas/backend/csr_builder.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace spblas { 6 | 7 | namespace __backend { 8 | 9 | template 10 | class csr_builder { 11 | public: 12 | csr_builder(csr_view view) : view_(view) { 13 | view_.rowptr()[0] = 0; 14 | } 15 | 16 | template <__ranges::forward_range Row> 17 | void insert_row(I row_index, Row&& row) { 18 | if (j_ptr_ + __ranges::size(row) > __ranges::size(view_.values()) || 19 | j_ptr_ + __ranges::size(row) > __ranges::size(view_.colind())) { 20 | throw std::runtime_error("csr_builder: not enough space in CSR."); 21 | } 22 | 23 | if (row_index + 1 >= __ranges::size(view_.rowptr())) { 24 | throw std::runtime_error("csr_builder: not enough rows in CSR."); 25 | } 26 | 27 | while (i_ < row_index) { 28 | view_.rowptr()[i_ + 1] = j_ptr_; 29 | i_++; 30 | } 31 | 32 | for (auto&& [j, v] : row) { 33 | view_.values()[j_ptr_] = v; 34 | view_.colind()[j_ptr_] = j; 35 | j_ptr_++; 36 | } 37 | view_.rowptr()[i_ + 1] = j_ptr_; 38 | i_++; 39 | } 40 | 41 | void finish() { 42 | while (i_ < view_.shape()[0]) { 43 | view_.rowptr()[i_ + 1] = j_ptr_; 44 | i_++; 45 | } 46 | } 47 | 48 | private: 49 | csr_view view_; 50 | O j_ptr_ = 0; 51 | I i_ = 0; 52 | }; 53 | 54 | template 55 | class csc_builder { 56 | public: 57 | csc_builder(csc_view view) : builder_(transposed(view)) {} 58 | 59 | template <__ranges::forward_range Column> 60 | void insert_column(I column_index, Column&& column) { 61 | builder_.insert_row(column_index, std::forward(column)); 62 | } 63 | 64 | void finish() { 65 | builder_.finish(); 66 | } 67 | 68 | private: 69 | csr_builder builder_; 70 | }; 71 | 72 | } // namespace __backend 73 | 74 | } // namespace spblas 75 | -------------------------------------------------------------------------------- /include/spblas/algorithms/transpose_impl.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace spblas { 8 | 9 | template 10 | operation_info_t transpose_inspect(A&& a, B&& b) { 11 | return {}; 12 | } 13 | 14 | template 15 | requires(__detail::is_csr_view_v && __detail::is_csr_view_v) 16 | void transpose(A&& a, B&& b) { 17 | if (__backend::shape(a)[0] != __backend::shape(b)[1] || 18 | __backend::shape(a)[1] != __backend::shape(b)[0]) { 19 | throw std::invalid_argument( 20 | "transpose: matrix dimensions are incompatible."); 21 | } 22 | if (b.values().size() < __backend::size(a) || 23 | b.colind().size() < __backend::size(a)) { 24 | throw std::runtime_error("transpose: Transpose ran out of memory."); 25 | } 26 | using O = tensor_offset_t; 27 | 28 | const auto b_base = __detail::get_ultimate_base(b); 29 | const auto b_rowptr = b_base.rowptr(); 30 | const auto b_colind = b_base.colind(); 31 | const auto b_values = b_base.values(); 32 | 33 | __ranges::fill(b_rowptr, 0); 34 | 35 | for (auto&& [i, row] : __backend::rows(a)) { 36 | for (auto&& [j, _] : row) { 37 | b_rowptr[j + 1]++; 38 | } 39 | } 40 | 41 | std::exclusive_scan(b_rowptr.begin(), b_rowptr.end(), b_rowptr.begin(), O{}); 42 | 43 | for (auto&& [i, row] : __backend::rows(a)) { 44 | for (auto&& [j, v] : row) { 45 | const auto out_idx = b_rowptr[j + 1]; 46 | b_colind[out_idx] = i; 47 | b_values[out_idx] = v; 48 | b_rowptr[j + 1]++; 49 | } 50 | } 51 | 52 | b.update(b.values(), b.rowptr(), b.colind(), b.shape(), a.size()); 53 | } 54 | 55 | template 56 | requires(__detail::is_csr_view_v && __detail::is_csr_view_v) 57 | void transpose(operation_info_t& info, A&& a, B&& b) { 58 | transpose(std::forward(a), std::forward(b)); 59 | } 60 | 61 | } // namespace spblas 62 | -------------------------------------------------------------------------------- /include/spblas/vendor/rocsparse/hip_allocator.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "exception.hpp" 4 | #include 5 | 6 | namespace spblas { 7 | 8 | namespace rocsparse { 9 | 10 | template 11 | class hip_allocator { 12 | public: 13 | using value_type = T; 14 | using pointer = T*; 15 | using const_pointer = const T*; 16 | using reference = T&; 17 | using const_reference = const T&; 18 | using size_type = std::size_t; 19 | using difference_type = std::ptrdiff_t; 20 | 21 | hip_allocator() noexcept {} 22 | hip_allocator(hipStream_t stream) noexcept : stream_(stream) {} 23 | 24 | template 25 | hip_allocator(const hip_allocator& other) noexcept 26 | : stream_(other.stream()) {} 27 | 28 | hip_allocator(const hip_allocator&) = default; 29 | hip_allocator& operator=(const hip_allocator&) = default; 30 | ~hip_allocator() = default; 31 | 32 | using is_always_equal = std::false_type; 33 | 34 | pointer allocate(std::size_t size) { 35 | void* ptr; 36 | hipError_t error = hipMallocAsync(&ptr, size * sizeof(T), stream()); 37 | throw_if_failure(error); 38 | 39 | return reinterpret_cast(ptr); 40 | } 41 | 42 | void deallocate(pointer ptr, std::size_t n = 0) { 43 | if (ptr != nullptr) { 44 | hipError_t error = hipFreeAsync(ptr, stream()); 45 | throw_if_failure(error); 46 | } 47 | } 48 | 49 | bool operator==(const hip_allocator&) const = default; 50 | bool operator!=(const hip_allocator&) const = default; 51 | 52 | template 53 | struct rebind { 54 | using other = hip_allocator; 55 | }; 56 | 57 | hipStream_t stream() const noexcept { 58 | return stream_; 59 | } 60 | 61 | private: 62 | void throw_if_failure(hipError_t error) { 63 | if (error != hipSuccess) { 64 | throw std::bad_alloc{}; 65 | } 66 | } 67 | 68 | hipStream_t stream_ = nullptr; 69 | }; 70 | 71 | } // namespace rocsparse 72 | 73 | } // namespace spblas 74 | -------------------------------------------------------------------------------- /include/spblas/vendor/aoclsparse/detail/create_matrix_handle.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | namespace spblas { 8 | 9 | namespace __aoclsparse { 10 | 11 | template 12 | requires __detail::is_csr_view_v 13 | aoclsparse_matrix create_matrix_handle(M&& m) { 14 | aoclsparse_matrix handle = nullptr; 15 | aoclsparse_status status = __aoclsparse::aoclsparse_create_csr( 16 | &handle, aoclsparse_index_base_zero, m.shape()[0], m.shape()[1], m.size(), 17 | m.rowptr().data(), m.colind().data(), m.values().data()); 18 | 19 | if (status != aoclsparse_status_success) { 20 | throw std::runtime_error("create_matrix_handle: AOCL-Sparse failed while " 21 | "creating matrix handle."); 22 | } 23 | 24 | return handle; 25 | } 26 | 27 | template 28 | requires __detail::is_csc_view_v 29 | aoclsparse_matrix create_matrix_handle(M&& m) { 30 | aoclsparse_matrix handle = nullptr; 31 | aoclsparse_status status = __aoclsparse::aoclsparse_create_csr( 32 | &handle, aoclsparse_index_base_zero, m.shape()[1], m.shape()[0], m.size(), 33 | m.colptr().data(), m.rowind().data(), m.values().data()); 34 | 35 | if (status != aoclsparse_status_success) { 36 | throw std::runtime_error("create_matrix_handle: AOCL-Sparse failed while " 37 | "creating matrix handle."); 38 | } 39 | 40 | return handle; 41 | } 42 | 43 | template 44 | aoclsparse_operation get_transpose(M&& m) { 45 | static_assert(__detail::has_csr_base || __detail::has_csc_base); 46 | if constexpr (__detail::has_base) { 47 | return get_transpose(m.base()); 48 | } else if constexpr (__detail::is_csr_view_v) { 49 | return aoclsparse_operation_none; 50 | } else if constexpr (__detail::is_csc_view_v) { 51 | return aoclsparse_operation_transpose; 52 | } 53 | } 54 | 55 | } // namespace __aoclsparse 56 | 57 | } // namespace spblas 58 | -------------------------------------------------------------------------------- /examples/simple_sptrsv.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int main(int argc, char** argv) { 7 | using namespace spblas; 8 | 9 | using T = float; 10 | 11 | spblas::index_t m = 100; 12 | spblas::index_t nnz_in = 20; 13 | 14 | fmt::print("\n\t###########################################################" 15 | "######################"); 16 | fmt::print("\n\t### Running SpTRSV Example:"); 17 | fmt::print("\n\t###"); 18 | fmt::print("\n\t### solve for x: A * x = alpha * b"); 19 | fmt::print("\n\t###"); 20 | fmt::print("\n\t### with "); 21 | fmt::print("\n\t### A, in CSR format, of size ({}, {}) with nnz = {}", m, m, 22 | nnz_in); 23 | fmt::print("\n\t### x, a dense vector, of size ({}, {})", m, 1); 24 | fmt::print("\n\t### b, a dense vector, of size ({}, {})", m, 1); 25 | fmt::print("\n\t### using float and spblas::index_t (size = {} bytes)", 26 | sizeof(spblas::index_t)); 27 | fmt::print("\n\t###########################################################" 28 | "######################"); 29 | fmt::print("\n"); 30 | 31 | auto&& [values, rowptr, colind, shape, nnz] = 32 | generate_csr(m, m, nnz_in); 33 | 34 | // scale values of matrix to make the implicit unit diagonal matrix 35 | // be diagonally dominant, so it is solveable 36 | T scale_factor = 1e-3f; 37 | std::transform(values.begin(), values.end(), values.begin(), 38 | [scale_factor](T val) { return scale_factor * val; }); 39 | 40 | csr_view a(values, rowptr, colind, shape, nnz); 41 | 42 | // Scale every value of `a` by 5 in place. 43 | // scale(5.f, a); 44 | 45 | std::vector x(m, 0); 46 | std::vector b(m, 1); 47 | 48 | T alpha = 1.2f; 49 | auto b_scaled = scaled(alpha, b); 50 | 51 | // solve for x: lower(A) * x = alpha * b 52 | triangular_solve(a, spblas::lower_triangle_t{}, 53 | spblas::implicit_unit_diagonal_t{}, b_scaled, x); 54 | 55 | fmt::print("\tExample is completed!\n"); 56 | 57 | return 0; 58 | } 59 | -------------------------------------------------------------------------------- /include/spblas/vendor/cusparse/types.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | namespace spblas { 11 | 12 | using index_t = std::int32_t; 13 | using offset_t = index_t; 14 | 15 | namespace detail { 16 | 17 | template 18 | constexpr static bool is_valid_cusparse_scalar_type_v = 19 | std::is_floating_point_v || std::is_same_v || 20 | std::is_same_v; 21 | 22 | template 23 | constexpr static bool is_valid_cusparse_index_type_v = 24 | std::is_same_v || std::is_same_v; 25 | 26 | template 27 | struct cuda_data_type; 28 | 29 | template <> 30 | struct cuda_data_type { 31 | constexpr static cudaDataType_t value = CUDA_R_32F; 32 | }; 33 | 34 | template <> 35 | struct cuda_data_type { 36 | constexpr static cudaDataType_t value = CUDA_R_64F; 37 | }; 38 | 39 | template <> 40 | struct cuda_data_type> { 41 | constexpr static cudaDataType_t value = CUDA_C_32F; 42 | }; 43 | 44 | template <> 45 | struct cuda_data_type> { 46 | constexpr static cudaDataType_t value = CUDA_C_64F; 47 | }; 48 | 49 | template <> 50 | struct cuda_data_type { 51 | constexpr static cudaDataType_t value = CUDA_R_8I; 52 | }; 53 | 54 | template <> 55 | struct cuda_data_type { 56 | constexpr static cudaDataType_t value = CUDA_R_32I; 57 | }; 58 | 59 | template 60 | constexpr static cudaDataType_t cuda_data_type_v = cuda_data_type::value; 61 | 62 | template 63 | struct cuda_index_type; 64 | 65 | template <> 66 | struct cuda_index_type { 67 | constexpr static cusparseIndexType_t value = CUSPARSE_INDEX_32I; 68 | }; 69 | 70 | template <> 71 | struct cuda_index_type { 72 | constexpr static cusparseIndexType_t value = CUSPARSE_INDEX_64I; 73 | }; 74 | 75 | template 76 | constexpr static cusparseIndexType_t cusparse_index_type_v = 77 | cuda_index_type::value; 78 | 79 | } // namespace detail 80 | 81 | } // namespace spblas 82 | -------------------------------------------------------------------------------- /include/spblas/views/csc_view.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace spblas { 8 | 9 | template 10 | class csc_view : public view_base { 11 | public: 12 | using scalar_type = T; 13 | using scalar_reference = T&; 14 | using index_type = I; 15 | using offset_type = O; 16 | 17 | csc_view(T* values, O* colptr, I* rowind, index shape, O nnz) 18 | : values_(values, nnz), colptr_(colptr, shape[1] + 1), 19 | rowind_(rowind, nnz), shape_(shape), nnz_(nnz) { 20 | if (colptr_.data() == nullptr) { 21 | colptr_ = std::span((O*) nullptr, (O*) nullptr); 22 | } 23 | } 24 | 25 | template <__ranges::contiguous_range V, __ranges::contiguous_range R, 26 | __ranges::contiguous_range C> 27 | csc_view(V&& values, R&& colptr, C&& rowind, index shape, O nnz) 28 | : values_(__ranges::data(values), __ranges::size(values)), 29 | colptr_(__ranges::data(colptr), __ranges::size(colptr)), 30 | rowind_(__ranges::data(rowind), __ranges::size(rowind)), shape_(shape), 31 | nnz_(nnz) {} 32 | 33 | void update(std::span values, std::span colptr, std::span rowind) { 34 | values_ = values; 35 | colptr_ = colptr; 36 | rowind_ = rowind; 37 | } 38 | 39 | void update(std::span values, std::span colptr, std::span rowind, 40 | index shape, O nnz) { 41 | values_ = values; 42 | colptr_ = colptr; 43 | rowind_ = rowind; 44 | shape_ = shape; 45 | nnz_ = nnz; 46 | } 47 | 48 | std::span values() const noexcept { 49 | return values_; 50 | } 51 | std::span colptr() const noexcept { 52 | return colptr_; 53 | } 54 | std::span rowind() const noexcept { 55 | return rowind_; 56 | } 57 | 58 | index shape() const noexcept { 59 | return shape_; 60 | } 61 | 62 | O size() const noexcept { 63 | return nnz_; 64 | } 65 | 66 | private: 67 | std::span values_; 68 | std::span colptr_; 69 | std::span rowind_; 70 | index shape_; 71 | O nnz_; 72 | }; 73 | 74 | } // namespace spblas 75 | -------------------------------------------------------------------------------- /include/spblas/vendor/onemkl_sycl/spmv_impl.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | // 13 | // Defines the following APIs for SpMV: 14 | // 15 | // y = alpha* op(A) * x 16 | // 17 | // where A is a sparse matrices of CSR format and 18 | // x/y are dense vectors 19 | // 20 | // //operation_info_t multiply_inspect(A, x, y) 21 | // //void multiply_inspect(operation_info_t, A, x, y) 22 | // 23 | // //void multiply_compute(operation_info_t, A, x, y) 24 | // void multiply(A, x, y) 25 | // 26 | 27 | namespace spblas { 28 | 29 | template 30 | requires((__detail::has_csr_base || __detail::has_csc_base) && 31 | __detail::has_contiguous_range_base && 32 | __ranges::contiguous_range) 33 | void multiply(ExecutionPolicy&& policy, A&& a, X&& x, Y&& y) { 34 | log_trace(""); 35 | auto x_base = __detail::get_ultimate_base(x); 36 | 37 | auto alpha_optional = __detail::get_scaling_factor(a, x); 38 | tensor_scalar_t alpha = alpha_optional.value_or(1); 39 | 40 | auto a_data = __detail::get_ultimate_base(a).values().data(); 41 | 42 | auto&& q = __mkl::get_queue(policy, a_data); 43 | 44 | auto a_handle = __mkl::get_matrix_handle(q, a); 45 | auto a_transpose = __mkl::get_transpose(a); 46 | 47 | oneapi::mkl::sparse::gemv(q, a_transpose, alpha, a_handle, 48 | __ranges::data(x_base), 0.0, __ranges::data(y)) 49 | .wait(); 50 | 51 | if (!__detail::has_matrix_opt(a)) { 52 | oneapi::mkl::sparse::release_matrix_handle(q, &a_handle).wait(); 53 | } 54 | } 55 | 56 | template 57 | requires((__detail::has_csr_base || __detail::has_csc_base) && 58 | __detail::has_contiguous_range_base && 59 | __ranges::contiguous_range) 60 | void multiply(A&& a, X&& x, Y&& y) { 61 | multiply(mkl::par, std::forward(a), std::forward(x), 62 | std::forward(y)); 63 | } 64 | 65 | } // namespace spblas 66 | -------------------------------------------------------------------------------- /examples/device/device_spmv.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | int main(int argc, char** argv) { 10 | using value_t = float; 11 | using index_t = spblas::index_t; 12 | using offset_t = spblas::offset_t; 13 | 14 | index_t m = 100; 15 | index_t n = 100; 16 | index_t nnz_in = 10; 17 | 18 | fmt::print("\n\t###########################################################" 19 | "######################"); 20 | fmt::print("\n\t### Running SpMV Example:"); 21 | fmt::print("\n\t###"); 22 | fmt::print("\n\t### y = alpha * A * x"); 23 | fmt::print("\n\t###"); 24 | fmt::print("\n\t### with "); 25 | fmt::print("\n\t### A, in CSR format, of size ({}, {}) with nnz = {}", m, n, 26 | nnz_in); 27 | fmt::print("\n\t### x, a dense vector, of size ({}, {})", n, 1); 28 | fmt::print("\n\t### y, a dense vector, of size ({}, {})", m, 1); 29 | fmt::print("\n\t### using float and spblas::index_t (size = {} bytes)", 30 | sizeof(index_t)); 31 | fmt::print("\n\t###########################################################" 32 | "######################"); 33 | fmt::print("\n"); 34 | 35 | auto&& [values, rowptr, colind, shape, nnz] = 36 | spblas::generate_csr(m, n, nnz_in); 37 | 38 | thrust::device_vector d_values(values); 39 | thrust::device_vector d_rowptr(rowptr); 40 | thrust::device_vector d_colind(colind); 41 | 42 | spblas::csr_view a( 43 | d_values.data().get(), d_rowptr.data().get(), d_colind.data().get(), 44 | shape, nnz); 45 | 46 | // Scale every value of `a` by 5 in place. 47 | // scale(5.f, a); 48 | 49 | std::vector x(n, 1); 50 | std::vector y(m, 0); 51 | 52 | thrust::device_vector d_x(x); 53 | thrust::device_vector d_y(y); 54 | 55 | std::span x_span(d_x.data().get(), n); 56 | std::span y_span(d_y.data().get(), m); 57 | 58 | // y = A * x 59 | spblas::multiply(a, x_span, y_span); 60 | 61 | thrust::copy(d_y.begin(), d_y.end(), y.begin()); 62 | 63 | fmt::print("\tExample is completed!\n"); 64 | 65 | return 0; 66 | } 67 | -------------------------------------------------------------------------------- /include/spblas/views/csr_view.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace spblas { 8 | 9 | template 10 | class csr_builder; 11 | 12 | template 13 | class csr_view : public view_base { 14 | public: 15 | using scalar_type = T; 16 | using scalar_reference = T&; 17 | using index_type = I; 18 | using offset_type = O; 19 | 20 | csr_view(T* values, O* rowptr, I* colind, index shape, O nnz) 21 | : values_(values, nnz), rowptr_(rowptr, shape[0] + 1), 22 | colind_(colind, nnz), shape_(shape), nnz_(nnz) { 23 | if (rowptr_.data() == nullptr) { 24 | rowptr_ = std::span((O*) nullptr, (O*) nullptr); 25 | } 26 | } 27 | 28 | template <__ranges::contiguous_range V, __ranges::contiguous_range R, 29 | __ranges::contiguous_range C> 30 | csr_view(V&& values, R&& rowptr, C&& colind, index shape, O nnz) 31 | : values_(__ranges::data(values), __ranges::size(values)), 32 | rowptr_(__ranges::data(rowptr), __ranges::size(rowptr)), 33 | colind_(__ranges::data(colind), __ranges::size(colind)), shape_(shape), 34 | nnz_(nnz) {} 35 | 36 | void update(std::span values, std::span rowptr, std::span colind) { 37 | values_ = values; 38 | rowptr_ = rowptr; 39 | colind_ = colind; 40 | } 41 | 42 | void update(std::span values, std::span rowptr, std::span colind, 43 | index shape, O nnz) { 44 | values_ = values; 45 | rowptr_ = rowptr; 46 | colind_ = colind; 47 | shape_ = shape; 48 | nnz_ = nnz; 49 | } 50 | 51 | std::span values() const noexcept { 52 | return values_; 53 | } 54 | std::span rowptr() const noexcept { 55 | return rowptr_; 56 | } 57 | std::span colind() const noexcept { 58 | return colind_; 59 | } 60 | 61 | index shape() const noexcept { 62 | return shape_; 63 | } 64 | 65 | O size() const noexcept { 66 | return nnz_; 67 | } 68 | 69 | friend class csr_builder; 70 | 71 | private: 72 | std::span values_; 73 | std::span rowptr_; 74 | std::span colind_; 75 | index shape_; 76 | O nnz_; 77 | }; 78 | 79 | } // namespace spblas 80 | -------------------------------------------------------------------------------- /include/spblas/backend/spa_accumulator.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | namespace spblas { 11 | 12 | namespace __backend { 13 | 14 | template 15 | class spa_accumulator { 16 | public: 17 | spa_accumulator(I count) : data_(count), set_(count, false) {} 18 | 19 | T& operator[](I pos) { 20 | if (!set_[pos]) { 21 | stored_.push_back(pos); 22 | set_[pos] = true; 23 | } 24 | return data_[pos]; 25 | } 26 | 27 | bool contains(I pos) { 28 | return set_[pos]; 29 | } 30 | 31 | void clear() { 32 | for (auto&& pos : stored_) { 33 | set_[pos] = false; 34 | data_[pos] = 0; 35 | } 36 | stored_.clear(); 37 | } 38 | 39 | I size() const { 40 | return stored_.size(); 41 | } 42 | 43 | bool empty() { 44 | return size() == 0; 45 | } 46 | 47 | void sort() { 48 | std::sort(stored_.begin(), stored_.end()); 49 | } 50 | 51 | auto get() { 52 | std::span data(data_); 53 | std::span stored(stored_); 54 | 55 | return stored | __ranges::views::transform([=](auto idx) { 56 | return std::make_tuple(idx, std::reference_wrapper(data[idx])); 57 | }); 58 | } 59 | 60 | private: 61 | std::vector data_; 62 | std::vector set_; 63 | std::vector stored_; 64 | }; 65 | 66 | template 67 | class spa_set { 68 | public: 69 | spa_set(T count) : set_(count, false) {} 70 | 71 | void insert(T key) { 72 | if (!set_[key]) { 73 | stored_.push_back(key); 74 | set_[key] = true; 75 | } 76 | } 77 | 78 | bool contains(T key) { 79 | return set_[key]; 80 | } 81 | 82 | void clear() { 83 | for (auto&& pos : stored_) { 84 | set_[pos] = false; 85 | } 86 | stored_.clear(); 87 | } 88 | 89 | T size() const { 90 | return stored_.size(); 91 | } 92 | 93 | bool empty() { 94 | return size() == 0; 95 | } 96 | 97 | auto get() const { 98 | return std::span(stored_); 99 | } 100 | 101 | private: 102 | std::vector set_; 103 | std::vector stored_; 104 | }; 105 | 106 | } // namespace __backend 107 | 108 | } // namespace spblas 109 | -------------------------------------------------------------------------------- /examples/simple_spgemm.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | int main(int argc, char** argv) { 10 | using namespace spblas; 11 | namespace md = spblas::__mdspan; 12 | 13 | using T = float; 14 | 15 | spblas::index_t m = 100; 16 | spblas::index_t n = 10; 17 | spblas::index_t k = 100; 18 | spblas::index_t nnz = 100; 19 | 20 | fmt::print("\n\t###########################################################" 21 | "######################"); 22 | fmt::print("\n\t### Running SpGEMM Example:"); 23 | fmt::print("\n\t###"); 24 | fmt::print("\n\t### C = A * B"); 25 | fmt::print("\n\t###"); 26 | fmt::print("\n\t### with "); 27 | fmt::print("\n\t### A, in CSR format, of size ({}, {}) with nnz = {}", m, k, 28 | nnz); 29 | fmt::print("\n\t### B, in CSR format, of size ({}, {}) with nnz = {}", k, n, 30 | nnz); 31 | fmt::print("\n\t### C, in CSR format, of size ({}, {}) with nnz to be" 32 | " determined", 33 | m, n); 34 | fmt::print("\n\t### using float and spblas::index_t (size = {} bytes)", 35 | sizeof(spblas::index_t)); 36 | fmt::print("\n\t###########################################################" 37 | "######################"); 38 | fmt::print("\n"); 39 | 40 | auto&& [a_values, a_rowptr, a_colind, a_shape, as] = 41 | generate_csr(m, k, nnz); 42 | auto&& [b_values, b_rowptr, b_colind, b_shape, bs] = 43 | generate_csr(k, n, nnz); 44 | 45 | csr_view a(a_values, a_rowptr, a_colind, a_shape, nnz); 46 | csr_view b(b_values, b_rowptr, b_colind, b_shape, nnz); 47 | 48 | std::vector c_rowptr(m + 1); 49 | 50 | csr_view c(nullptr, c_rowptr.data(), nullptr, {m, n}, 0); 51 | 52 | auto info = multiply_compute(scaled(1.f, a), b, c); 53 | 54 | fmt::print("\t\t C_nnz = {}", info.result_nnz()); 55 | 56 | std::vector c_values(info.result_nnz()); 57 | std::vector c_colind(info.result_nnz()); 58 | c.update(c_values, c_rowptr, c_colind); 59 | 60 | multiply_fill(info, scaled(1.f, a), b, c); 61 | 62 | for (auto&& [i, row] : spblas::__backend::rows(c)) { 63 | fmt::print("{}: {}\n", i, row); 64 | } 65 | 66 | fmt::print("\tExample is completed!\n"); 67 | 68 | return 0; 69 | } 70 | -------------------------------------------------------------------------------- /include/spblas/detail/index.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | namespace spblas { 13 | 14 | template 15 | class index { 16 | public: 17 | using index_type = T; 18 | 19 | using first_type = T; 20 | using second_type = T; 21 | 22 | constexpr index_type operator[](index_type dim) const noexcept { 23 | if (dim == 0) { 24 | return first; 25 | } else { 26 | return second; 27 | } 28 | } 29 | 30 | constexpr index(index_type first, index_type second) 31 | : first(first), second(second) {} 32 | 33 | template 34 | requires(!std::is_same_v && __detail::tuple_like) 35 | constexpr index(Tuple tuple) : first(get<0>(tuple)), second(get<1>(tuple)) {} 36 | 37 | template 38 | constexpr index(std::initializer_list tuple) { 39 | assert(tuple.size() == 2); 40 | first = *tuple.begin(); 41 | second = *(tuple.begin() + 1); 42 | } 43 | 44 | constexpr bool operator==(const index&) const noexcept = default; 45 | 46 | index() = default; 47 | ~index() = default; 48 | index(const index&) = default; 49 | index& operator=(const index&) = default; 50 | index(index&&) = default; 51 | index& operator=(index&&) = default; 52 | 53 | index_type first; 54 | index_type second; 55 | }; 56 | 57 | template 58 | inline constexpr I get(spblas::index index) 59 | requires(Index <= 1) 60 | { 61 | if constexpr (Index == 0) { 62 | return index.first; 63 | } 64 | if constexpr (Index == 1) { 65 | return index.second; 66 | } 67 | } 68 | 69 | } // namespace spblas 70 | 71 | namespace std { 72 | 73 | template 74 | struct tuple_element> 75 | : tuple_element> {}; 76 | 77 | template 78 | struct tuple_size> : integral_constant {}; 79 | 80 | template 81 | inline constexpr I get(spblas::index index) 82 | requires(Index <= 1) 83 | { 84 | if constexpr (Index == 0) { 85 | return index.first; 86 | } 87 | if constexpr (Index == 1) { 88 | return index.second; 89 | } 90 | } 91 | 92 | } // namespace std 93 | -------------------------------------------------------------------------------- /include/spblas/vendor/aoclsparse/spmv_impl.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights reserved. 3 | * $COPYRIGHT$ 4 | * 5 | * Additional copyrights may follow 6 | * 7 | * $HEADER$ 8 | */ 9 | 10 | #pragma once 11 | 12 | #include "aoclsparse.h" 13 | #include 14 | 15 | #include "aocl_wrappers.hpp" 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | // 23 | // Defines the following APIs for SpMV: 24 | // 25 | // y =alpha* op(A) * x 26 | // 27 | // where A is a sparse matrices of CSR format and 28 | // x/y are dense vectors 29 | 30 | namespace spblas { 31 | 32 | template 33 | requires(__detail::has_csr_base || __detail::has_csc_base) && 34 | __detail::has_contiguous_range_base && 35 | __ranges::contiguous_range 36 | void multiply(A&& a, X&& x, Y&& y) { 37 | log_trace(""); 38 | auto a_base = __detail::get_ultimate_base(a); 39 | auto x_base = __detail::get_ultimate_base(x); 40 | 41 | aoclsparse_matrix csrA = __aoclsparse::create_matrix_handle(a_base); 42 | aoclsparse_operation opA = __aoclsparse::get_transpose(a); 43 | 44 | using T = tensor_scalar_t; 45 | using I = tensor_index_t; 46 | using O = tensor_offset_t; 47 | 48 | auto alpha_optional = __detail::get_scaling_factor(a, x); 49 | T alpha = alpha_optional.value_or(1); 50 | 51 | aoclsparse_mat_descr descr = NULL; 52 | aoclsparse_status status = aoclsparse_create_mat_descr(&descr); 53 | if (status != aoclsparse_status_success) { 54 | fmt::print("\t descr creation failed\n"); 55 | } 56 | aoclsparse_set_mat_type(descr, aoclsparse_matrix_type_general); 57 | aoclsparse_index_base indexing = aoclsparse_index_base_zero; 58 | 59 | // Do we need these two 60 | aoclsparse_set_mv_hint(csrA, opA, descr, 1); 61 | aoclsparse_optimize(csrA); 62 | 63 | T beta = static_cast(0.0); 64 | status = __aoclsparse::aoclsparse_mv(opA, &alpha, csrA, descr, 65 | __ranges::data(x_base), &beta, 66 | __ranges::data(y)); 67 | if (status != aoclsparse_status_success) { 68 | fmt::print("\t SpMV failed: {}\n", (int) status); 69 | } 70 | 71 | aoclsparse_destroy(&csrA); 72 | aoclsparse_destroy_mat_descr(descr); 73 | } 74 | 75 | } // namespace spblas 76 | -------------------------------------------------------------------------------- /include/spblas/algorithms/triangular_solve_impl.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace spblas { 10 | 11 | template 12 | requires(__backend::row_iterable && __backend::lookupable && 13 | __backend::lookupable) 14 | void triangular_solve(A&& a, Triangle t, DiagonalStorage d, B&& b, X&& x) { 15 | static_assert(std::is_same_v || 16 | std::is_same_v); 17 | assert(__backend::shape(a)[0] == __backend::shape(a)[1]); 18 | 19 | assert(__backend::shape(a)[1] == __backend::shape(x) && 20 | __backend::shape(a)[0] == __backend::shape(b)); 21 | 22 | using T = tensor_scalar_t; 23 | using V = decltype(std::declval>() * 24 | std::declval>()); 25 | 26 | T diagonal_value = 0; 27 | 28 | if constexpr (std::is_same_v) { 29 | for (auto&& [i, a_row] : __ranges::views::reverse(__backend::rows(a))) { 30 | V dot_product = 0; 31 | for (auto&& [k, a_v] : a_row) { 32 | if (k > i) { 33 | dot_product += a_v * __backend::lookup(x, k); 34 | } else if (i == k) { 35 | diagonal_value = a_v; 36 | } 37 | } 38 | if constexpr (std::is_same_v) { 39 | __backend::lookup(x, i) = 40 | (__backend::lookup(b, i) - dot_product) / diagonal_value; 41 | } else { 42 | __backend::lookup(x, i) = __backend::lookup(b, i) - dot_product; 43 | } 44 | } 45 | } else if constexpr (std::is_same_v) { 46 | for (auto&& [i, a_row] : __backend::rows(a)) { 47 | V dot_product = 0; 48 | for (auto&& [k, a_v] : a_row) { 49 | if (k < i) { 50 | dot_product += a_v * __backend::lookup(x, k); 51 | } else if (i == k) { 52 | diagonal_value = a_v; 53 | } 54 | } 55 | if constexpr (std::is_same_v) { 56 | __backend::lookup(x, i) = 57 | (__backend::lookup(b, i) - dot_product) / diagonal_value; 58 | } else { 59 | __backend::lookup(x, i) = __backend::lookup(b, i) - dot_product; 60 | } 61 | } 62 | } 63 | } 64 | 65 | } // namespace spblas 66 | -------------------------------------------------------------------------------- /include/spblas/vendor/onemkl_sycl/operation_state_t.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace spblas { 6 | 7 | namespace __mkl { 8 | 9 | struct operation_state_t { 10 | 11 | oneapi::mkl::sparse::matrix_handle_t a_handle = nullptr; 12 | oneapi::mkl::sparse::matrix_handle_t b_handle = nullptr; 13 | oneapi::mkl::sparse::matrix_handle_t c_handle = nullptr; 14 | oneapi::mkl::sparse::matrix_handle_t d_handle = nullptr; 15 | 16 | oneapi::mkl::sparse::matmat_descr_t descr = nullptr; 17 | 18 | void* c_rowptr = nullptr; 19 | 20 | sycl::queue q; 21 | 22 | operation_state_t() = default; 23 | 24 | operation_state_t(oneapi::mkl::sparse::matrix_handle_t a_handle, 25 | oneapi::mkl::sparse::matrix_handle_t b_handle, 26 | oneapi::mkl::sparse::matrix_handle_t c_handle, 27 | oneapi::mkl::sparse::matrix_handle_t d_handle, 28 | oneapi::mkl::sparse::matmat_descr_t descr, void* c_rowptr, 29 | sycl::queue q) 30 | : a_handle(a_handle), b_handle(b_handle), c_handle(c_handle), 31 | d_handle(d_handle), descr(descr), c_rowptr(c_rowptr), q(q) {} 32 | operation_state_t(operation_state_t&& other) { 33 | *this = std::move(other); 34 | } 35 | 36 | operation_state_t& operator=(operation_state_t&& other) { 37 | a_handle = other.a_handle; 38 | b_handle = other.b_handle; 39 | c_handle = other.c_handle; 40 | d_handle = other.d_handle; 41 | 42 | descr = other.descr; 43 | c_rowptr = other.c_rowptr; 44 | q = other.q; 45 | 46 | other.a_handle = other.b_handle = other.c_handle = other.d_handle = nullptr; 47 | other.descr = nullptr; 48 | other.c_rowptr = nullptr; 49 | 50 | return *this; 51 | } 52 | 53 | operation_state_t(const operation_state_t& other) = delete; 54 | 55 | ~operation_state_t() { 56 | release_matrix_handle(a_handle); 57 | release_matrix_handle(b_handle); 58 | release_matrix_handle(c_handle); 59 | release_matrix_handle(d_handle); 60 | release_matmat_descr(descr); 61 | } 62 | 63 | private: 64 | void release_matrix_handle(oneapi::mkl::sparse::matrix_handle_t& handle) { 65 | if (handle != nullptr) { 66 | oneapi::mkl::sparse::release_matrix_handle(q, &handle); 67 | } 68 | } 69 | 70 | void release_matmat_descr(oneapi::mkl::sparse::matmat_descr_t& descr) { 71 | if (descr != nullptr) { 72 | oneapi::mkl::sparse::release_matmat_descr(&descr); 73 | } 74 | } 75 | }; 76 | 77 | } // namespace __mkl 78 | 79 | } // namespace spblas 80 | -------------------------------------------------------------------------------- /include/spblas/vendor/rocsparse/types.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | namespace spblas { 10 | 11 | using index_t = std::int32_t; 12 | using offset_t = index_t; 13 | 14 | namespace detail { 15 | 16 | template 17 | constexpr static bool is_valid_rocsparse_scalar_type_v = 18 | std::is_same_v || std::is_same_v || 19 | std::is_floating_point_v; 20 | 21 | template 22 | constexpr static bool is_valid_rocsparse_index_type_v = 23 | std::is_same_v || std::is_same_v || 24 | std::is_same_v; 25 | 26 | template 27 | struct rocsparse_data_type; 28 | 29 | template <> 30 | struct rocsparse_data_type { 31 | constexpr static rocsparse_datatype value = rocsparse_datatype_i32_r; 32 | }; 33 | 34 | template <> 35 | struct rocsparse_data_type { 36 | constexpr static rocsparse_datatype value = rocsparse_datatype_u32_r; 37 | }; 38 | 39 | template <> 40 | struct rocsparse_data_type { 41 | constexpr static rocsparse_datatype value = rocsparse_datatype_f32_r; 42 | }; 43 | 44 | template <> 45 | struct rocsparse_data_type { 46 | constexpr static rocsparse_datatype value = rocsparse_datatype_f64_r; 47 | }; 48 | 49 | template <> 50 | struct rocsparse_data_type> { 51 | constexpr static rocsparse_datatype value = rocsparse_datatype_f32_c; 52 | }; 53 | 54 | template <> 55 | struct rocsparse_data_type> { 56 | constexpr static rocsparse_datatype value = rocsparse_datatype_f64_c; 57 | }; 58 | 59 | template 60 | constexpr static rocsparse_datatype rocsparse_data_type_v = 61 | rocsparse_data_type::value; 62 | 63 | template 64 | struct rocsparse_index_type; 65 | 66 | template <> 67 | struct rocsparse_index_type { 68 | constexpr static rocsparse_indextype value = rocsparse_indextype_u16; 69 | }; 70 | 71 | template <> 72 | struct rocsparse_index_type { 73 | constexpr static rocsparse_indextype value = rocsparse_indextype_i32; 74 | }; 75 | 76 | template <> 77 | struct rocsparse_index_type { 78 | constexpr static rocsparse_indextype value = rocsparse_indextype_i64; 79 | }; 80 | 81 | template 82 | constexpr static rocsparse_indextype rocsparse_index_type_v = 83 | rocsparse_index_type::value; 84 | 85 | } // namespace detail 86 | 87 | } // namespace spblas 88 | -------------------------------------------------------------------------------- /include/spblas/vendor/onemkl_sycl/detail/create_matrix_handle.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | namespace spblas { 8 | 9 | namespace __mkl { 10 | 11 | template 12 | requires __detail::is_csr_view_v 13 | oneapi::mkl::sparse::matrix_handle_t create_matrix_handle(sycl::queue& q, 14 | M&& m) { 15 | oneapi::mkl::sparse::matrix_handle_t handle = nullptr; 16 | oneapi::mkl::sparse::init_matrix_handle(&handle); 17 | 18 | oneapi::mkl::sparse::set_csr_data( 19 | q, handle, m.shape()[0], m.shape()[1], oneapi::mkl::index_base::zero, 20 | m.rowptr().data(), m.colind().data(), m.values().data()) 21 | .wait(); 22 | 23 | return handle; 24 | } 25 | 26 | template 27 | requires __detail::is_csc_view_v 28 | oneapi::mkl::sparse::matrix_handle_t create_matrix_handle(sycl::queue& q, 29 | M&& m) { 30 | oneapi::mkl::sparse::matrix_handle_t handle = nullptr; 31 | oneapi::mkl::sparse::init_matrix_handle(&handle); 32 | 33 | oneapi::mkl::sparse::set_csr_data( 34 | q, handle, m.shape()[1], m.shape()[0], oneapi::mkl::index_base::zero, 35 | m.colptr().data(), m.rowind().data(), m.values().data()) 36 | .wait(); 37 | 38 | return handle; 39 | } 40 | 41 | template 42 | requires __detail::has_base 43 | oneapi::mkl::sparse::matrix_handle_t create_matrix_handle(sycl::queue& q, 44 | M&& m) { 45 | return create_matrix_handle(q, m.base()); 46 | } 47 | 48 | // 49 | // Takes in a CSR or CSR_transpose (aka CSC) or CSC or CSC_transpose 50 | // and returns the transpose value associated with it being represented 51 | // in the CSR format (since oneMKL SYCL currently does not have CSC 52 | // format 53 | // 54 | // CSR = CSR + nontrans 55 | // CSR_transpose = CSR + trans 56 | // CSC = CSR + trans 57 | // CSC_transpose -> CSR + nontrans 58 | // 59 | template 60 | oneapi::mkl::transpose get_transpose(M&& m) { 61 | static_assert(__detail::has_csr_base || __detail::has_csc_base); 62 | if constexpr (__detail::has_base) { 63 | return get_transpose(m.base()); 64 | } else if constexpr (__detail::is_csr_view_v) { 65 | return oneapi::mkl::transpose::nontrans; 66 | } else if constexpr (__detail::is_csc_view_v) { 67 | return oneapi::mkl::transpose::trans; 68 | } 69 | } 70 | 71 | } // namespace __mkl 72 | 73 | } // namespace spblas 74 | -------------------------------------------------------------------------------- /include/spblas/vendor/rocsparse/detail/spmv_state_t.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "../hip_allocator.hpp" 7 | #include "abstract_operation_state.hpp" 8 | 9 | namespace spblas { 10 | namespace __rocsparse { 11 | 12 | class spmv_state_t : public abstract_operation_state_t { 13 | public: 14 | spmv_state_t() : spmv_state_t(rocsparse::hip_allocator{}) {} 15 | 16 | spmv_state_t(rocsparse::hip_allocator alloc) 17 | : alloc_(alloc), buffer_size_(0), workspace_(nullptr), a_descr_(nullptr), 18 | b_descr_(nullptr), c_descr_(nullptr) {} 19 | 20 | ~spmv_state_t() { 21 | if (workspace_) { 22 | alloc_.deallocate(workspace_, buffer_size_); 23 | } 24 | if (a_descr_) { 25 | rocsparse_destroy_spmat_descr(a_descr_); 26 | } 27 | if (b_descr_) { 28 | rocsparse_destroy_dnvec_descr(b_descr_); 29 | } 30 | if (c_descr_) { 31 | rocsparse_destroy_dnvec_descr(c_descr_); 32 | } 33 | } 34 | 35 | // Workspace management 36 | void* workspace() const { 37 | return workspace_; 38 | } 39 | size_t buffer_size() const { 40 | return buffer_size_; 41 | } 42 | 43 | void allocate_workspace(size_t size) { 44 | if (size > buffer_size_) { 45 | if (workspace_) { 46 | alloc_.deallocate(workspace_, buffer_size_); 47 | } 48 | buffer_size_ = size; 49 | workspace_ = alloc_.allocate(size); 50 | } 51 | } 52 | 53 | // Descriptor accessors 54 | rocsparse_spmat_descr a_descriptor() const { 55 | return a_descr_; 56 | } 57 | rocsparse_dnvec_descr b_descriptor() const { 58 | return b_descr_; 59 | } 60 | rocsparse_dnvec_descr c_descriptor() const { 61 | return c_descr_; 62 | } 63 | 64 | // Descriptor setters 65 | void set_a_descriptor(rocsparse_spmat_descr descr) { 66 | if (a_descr_) { 67 | rocsparse_destroy_spmat_descr(a_descr_); 68 | } 69 | a_descr_ = descr; 70 | } 71 | 72 | void set_b_descriptor(rocsparse_dnvec_descr descr) { 73 | if (b_descr_) { 74 | rocsparse_destroy_dnvec_descr(b_descr_); 75 | } 76 | b_descr_ = descr; 77 | } 78 | 79 | void set_c_descriptor(rocsparse_dnvec_descr descr) { 80 | if (c_descr_) { 81 | rocsparse_destroy_dnvec_descr(c_descr_); 82 | } 83 | c_descr_ = descr; 84 | } 85 | 86 | private: 87 | rocsparse::hip_allocator alloc_; 88 | size_t buffer_size_; 89 | char* workspace_; 90 | 91 | // Descriptors 92 | rocsparse_spmat_descr a_descr_; 93 | rocsparse_dnvec_descr b_descr_; 94 | rocsparse_dnvec_descr c_descr_; 95 | }; 96 | 97 | } // namespace __rocsparse 98 | } // namespace spblas 99 | -------------------------------------------------------------------------------- /include/spblas/views/matrix_opt_impl.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #ifdef SPBLAS_ENABLE_ONEMKL_SYCL 8 | #include 9 | #include 10 | #endif 11 | 12 | namespace spblas { 13 | 14 | template 15 | requires(view && 16 | (__detail::is_csr_view_v || __detail::is_csc_view_v) ) 17 | class matrix_opt : public view_base { 18 | public: 19 | using scalar_type = tensor_scalar_t; 20 | using scalar_reference = tensor_scalar_reference_t; 21 | using index_type = tensor_index_t; 22 | using offset_type = tensor_offset_t; 23 | 24 | matrix_opt(M matrix) : matrix_(matrix) { 25 | #ifdef SPBLAS_ENABLE_ONEMKL_SYCL 26 | matrix_handle_ = nullptr; 27 | #endif 28 | } 29 | 30 | ~matrix_opt() { 31 | #ifdef SPBLAS_ENABLE_ONEMKL_SYCL 32 | if (matrix_handle_) { 33 | // q here needs to be on same context as queue in operations, 34 | // idealy from execution policy 35 | sycl::queue q(sycl::cpu_selector_v); 36 | oneapi::mkl::sparse::release_matrix_handle(q, &matrix_handle_, {}).wait(); 37 | matrix_handle_ = nullptr; 38 | } 39 | #endif 40 | } 41 | 42 | auto shape() const noexcept { 43 | return __backend::shape(base()); 44 | } 45 | 46 | index_type size() const noexcept { 47 | return __backend::size(base()); 48 | } 49 | 50 | auto base() { 51 | return matrix_; 52 | } 53 | 54 | auto base() const { 55 | return matrix_; 56 | } 57 | 58 | private: 59 | friend auto tag_invoke(__backend::size_fn_, matrix_opt matrix) { 60 | return matrix.size(); 61 | } 62 | 63 | friend auto tag_invoke(__backend::shape_fn_, matrix_opt matrix) { 64 | return matrix.shape(); 65 | } 66 | 67 | friend scalar_reference tag_invoke(__backend::lookup_fn_, matrix_opt matrix, 68 | index_type i, index_type j) 69 | requires(__backend::lookupable) 70 | { 71 | return __backend::lookup(matrix.base(), i, j); 72 | } 73 | 74 | friend auto tag_invoke(__backend::rows_fn_, matrix_opt matrix) 75 | requires(__backend::row_iterable) 76 | { 77 | return __backend::rows(matrix.base()); 78 | } 79 | 80 | friend auto tag_invoke(__backend::lookup_row_fn_, matrix_opt matrix, 81 | index_type row_index) 82 | requires(__backend::row_lookupable) 83 | { 84 | return __backend::lookup_row(matrix.base(), row_index); 85 | } 86 | 87 | public: 88 | M matrix_; 89 | 90 | #ifdef SPBLAS_ENABLE_ONEMKL_SYCL 91 | oneapi::mkl::sparse::matrix_handle_t matrix_handle_; 92 | #endif 93 | }; 94 | 95 | template 96 | matrix_opt(T t) -> matrix_opt; 97 | 98 | } // namespace spblas 99 | -------------------------------------------------------------------------------- /include/spblas/vendor/cusparse/exception.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace spblas { 9 | 10 | namespace __cusparse { 11 | 12 | // Throw an exception if the cudaError_t is not cudaSuccess. 13 | void throw_if_error(cudaError_t error_code, std::string prefix = "") { 14 | if (error_code == cudaSuccess) { 15 | return; 16 | } 17 | std::string name = cudaGetErrorName(error_code); 18 | std::string message = cudaGetErrorString(error_code); 19 | throw std::runtime_error(prefix + "CUDA encountered an error " + name + 20 | ": \"" + message + "\""); 21 | } 22 | 23 | // Throw an exception if the cusparseStatus_t is not CUSPARSE_STATUS_SUCCESS. 24 | void throw_if_error(cusparseStatus_t error_code) { 25 | if (error_code == CUSPARSE_STATUS_SUCCESS) { 26 | return; 27 | } else if (error_code == CUSPARSE_STATUS_NOT_INITIALIZED) { 28 | throw std::runtime_error( 29 | "cuSPARSE encountered an error: \"CUSPARSE_STATUS_NOT_INITIALIZED\""); 30 | } else if (error_code == CUSPARSE_STATUS_ALLOC_FAILED) { 31 | throw std::runtime_error( 32 | "cuSPARSE encountered an error: \"CUSPARSE_STATUS_ALLOC_FAILED\""); 33 | } else if (error_code == CUSPARSE_STATUS_INVALID_VALUE) { 34 | throw std::runtime_error( 35 | "cuSPARSE encountered an error: \"CUSPARSE_STATUS_INVALID_VALUE\""); 36 | } else if (error_code == CUSPARSE_STATUS_ARCH_MISMATCH) { 37 | throw std::runtime_error( 38 | "cuSPARSE encountered an error: \"CUSPARSE_STATUS_ARCH_MISMATCH\""); 39 | } else if (error_code == CUSPARSE_STATUS_EXECUTION_FAILED) { 40 | throw std::runtime_error( 41 | "cuSPARSE encountered an error: \"CUSPARSE_STATUS_EXECUTION_FAILED\""); 42 | } else if (error_code == CUSPARSE_STATUS_INTERNAL_ERROR) { 43 | throw std::runtime_error( 44 | "cuSPARSE encountered an error: \"CUSPARSE_STATUS_INTERNAL_ERROR\""); 45 | } else if (error_code == CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED) { 46 | throw std::runtime_error("cuSPARSE encountered an error: " 47 | "\"CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED\""); 48 | } else if (error_code == CUSPARSE_STATUS_NOT_SUPPORTED) { 49 | throw std::runtime_error( 50 | "cuSPARSE encountered an error: \"CUSPARSE_STATUS_NOT_SUPPORTED\""); 51 | } else if (error_code == CUSPARSE_STATUS_INSUFFICIENT_RESOURCES) { 52 | throw std::runtime_error("cuSPARSE encountered an error: " 53 | "\"CUSPARSE_STATUS_INSUFFICIENT_RESOURCES\""); 54 | } else { 55 | throw std::runtime_error( 56 | "cuSPARSE encountered an error: \"unknown error\""); 57 | } 58 | } 59 | 60 | } // namespace __cusparse 61 | 62 | } // namespace spblas 63 | -------------------------------------------------------------------------------- /test/gtest/transpose_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "util.hpp" 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | TEST(CsrView, Transpose) { 11 | using T = float; 12 | using I = spblas::index_t; 13 | using O = spblas::offset_t; 14 | 15 | for (auto&& [m, k, nnz] : util::dims) { 16 | // Generate CSR Matrix A. 17 | auto [a_values, a_rowptr, a_colind, a_shape, a_nnz] = 18 | spblas::generate_csr(m, k, nnz); 19 | 20 | spblas::csr_view a(a_values, a_rowptr, a_colind, a_shape, a_nnz); 21 | 22 | // Transpose; B = A_T 23 | 24 | spblas::index b_shape(a.shape()[1], a.shape()[0]); 25 | 26 | std::vector b_rowptr(b_shape[0] + 1); 27 | std::vector b_colind(a.size()); 28 | std::vector b_values(a.size()); 29 | 30 | spblas::csr_view b(b_values, b_rowptr, b_colind, b_shape, 31 | a.size()); 32 | 33 | auto info = spblas::transpose_inspect(a, b); 34 | spblas::transpose(info, a, b); 35 | 36 | // Create transposed COO for reference. 37 | std::vector ref_values; 38 | std::vector ref_rowind; 39 | std::vector ref_colind; 40 | 41 | for (auto&& [i, row] : spblas::__backend::rows(a)) { 42 | for (auto&& [j, v] : row) { 43 | ref_values.push_back(v); 44 | ref_rowind.push_back(j); 45 | ref_colind.push_back(i); 46 | } 47 | } 48 | 49 | // Create COO from transposed matrix for test. 50 | std::vector test_values; 51 | std::vector test_rowind; 52 | std::vector test_colind; 53 | 54 | for (auto&& [i, row] : spblas::__backend::rows(b)) { 55 | for (auto&& [j, v] : row) { 56 | test_values.push_back(v); 57 | test_rowind.push_back(i); 58 | test_colind.push_back(j); 59 | } 60 | } 61 | 62 | // Ensure both COO matrices are sorted. 63 | spblas::__ranges::sort( 64 | spblas::__ranges::views::zip(ref_rowind, ref_colind, ref_values)); 65 | spblas::__ranges::sort( 66 | spblas::__ranges::views::zip(test_rowind, test_colind, test_values)); 67 | 68 | EXPECT_EQ(ref_values.size(), test_values.size()); 69 | EXPECT_EQ(ref_rowind.size(), test_rowind.size()); 70 | EXPECT_EQ(ref_colind.size(), test_colind.size()); 71 | 72 | for (auto&& [a, b] : 73 | spblas::__ranges::views::zip(ref_values, test_values)) { 74 | EXPECT_EQ_(a, b); 75 | } 76 | 77 | for (auto&& [a, b] : 78 | spblas::__ranges::views::zip(ref_rowind, test_rowind)) { 79 | EXPECT_EQ(a, b); 80 | } 81 | 82 | for (auto&& [a, b] : 83 | spblas::__ranges::views::zip(ref_colind, test_colind)) { 84 | EXPECT_EQ(a, b); 85 | } 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /include/spblas/detail/tag_invoke.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace spblas { 7 | 8 | namespace _tag_invoke { 9 | void tag_invoke(); 10 | 11 | struct _fn { 12 | template 13 | constexpr auto operator()(CPO cpo, Args&&... args) const 14 | noexcept(noexcept(tag_invoke((CPO&&) cpo, (Args&&) args...))) 15 | -> decltype(tag_invoke((CPO&&) cpo, (Args&&) args...)) { 16 | return tag_invoke((CPO&&) cpo, (Args&&) args...); 17 | } 18 | }; 19 | 20 | template 21 | using tag_invoke_result_t = 22 | decltype(tag_invoke(std::declval(), std::declval()...)); 23 | 24 | using yes_type = char; 25 | using no_type = char (&)[2]; 26 | 27 | template 28 | auto try_tag_invoke(int) // 29 | noexcept(noexcept(tag_invoke(std::declval(), std::declval()...))) 30 | -> decltype(static_cast(tag_invoke(std::declval(), 31 | std::declval()...)), 32 | yes_type{}); 33 | 34 | template 35 | no_type try_tag_invoke(...) noexcept(false); 36 | 37 | template