├── .clang-format
├── .flake8
├── .pre-commit-config.yaml
├── .style.yapf
├── CMakeLists.txt
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── cmake
    ├── Modules
    │   ├── cpm_helpers.cmake
    │   ├── cuda_arch_helpers.cmake
    │   └── set_cpu_arch_flags.cmake
    ├── generate_install_info_py.cmake
    ├── thirdparty
    │   ├── get_legate.cmake
    │   └── get_nccl.cmake
    └── versions.json
├── conda
    └── conda-build
    │   ├── build.sh
    │   ├── conda_build_config.yaml
    │   └── meta.yaml
├── examples
    ├── common.py
    ├── gmg.py
    ├── matrix_power.py
    ├── pde.py
    ├── spgemm_microbenchmark.py
    └── spmv_microbenchmark.py
├── install.py
├── legate_sparse
    ├── __init__.py
    ├── _version.py
    ├── base.py
    ├── config.py
    ├── coverage.py
    ├── csr.py
    ├── dia.py
    ├── gallery.py
    ├── install_info.py.in
    ├── io.py
    ├── linalg.py
    ├── module.py
    ├── runtime.py
    ├── settings.py
    ├── types.py
    └── utils.py
├── legate_sparse_cpp.cmake
├── legate_sparse_python.cmake
├── scripts
    ├── memlog_analysis.py
    ├── memlog_cli.py
    └── memlog_parser.py
├── setup.py
├── src
    └── legate_sparse
    │   ├── array
    │       ├── conv
    │       │   ├── csr_to_dense.cc
    │       │   ├── csr_to_dense.cu
    │       │   ├── csr_to_dense.h
    │       │   ├── csr_to_dense_omp.cc
    │       │   ├── csr_to_dense_template.inl
    │       │   ├── dense_to_csr.cc
    │       │   ├── dense_to_csr.cu
    │       │   ├── dense_to_csr.h
    │       │   ├── dense_to_csr_omp.cc
    │       │   ├── dense_to_csr_template.inl
    │       │   ├── pos_to_coordinates.cc
    │       │   ├── pos_to_coordinates.cu
    │       │   ├── pos_to_coordinates.h
    │       │   ├── pos_to_coordinates_omp.cc
    │       │   └── pos_to_coordinates_template.inl
    │       ├── csr
    │       │   ├── get_diagonal.cc
    │       │   ├── get_diagonal.cu
    │       │   ├── get_diagonal.h
    │       │   ├── get_diagonal_omp.cc
    │       │   ├── get_diagonal_template.inl
    │       │   ├── indexing.cc
    │       │   ├── indexing.cu
    │       │   ├── indexing.h
    │       │   ├── indexing_omp.cc
    │       │   ├── indexing_template.inl
    │       │   ├── spgemm_csr_csr_csr.cc
    │       │   ├── spgemm_csr_csr_csr.cu
    │       │   ├── spgemm_csr_csr_csr.h
    │       │   ├── spgemm_csr_csr_csr_omp.cc
    │       │   ├── spgemm_csr_csr_csr_template.inl
    │       │   ├── spmv.cc
    │       │   ├── spmv.cu
    │       │   ├── spmv.h
    │       │   ├── spmv_omp.cc
    │       │   └── spmv_template.inl
    │       └── util
    │       │   ├── scale_rect.cc
    │       │   ├── scale_rect.cu
    │       │   ├── scale_rect.h
    │       │   ├── scale_rect_omp.cc
    │       │   ├── scale_rect_template.inl
    │       │   ├── unzip_rect.cc
    │       │   ├── unzip_rect.cu
    │       │   ├── unzip_rect.h
    │       │   ├── unzip_rect_omp.cc
    │       │   ├── unzip_rect_template.inl
    │       │   ├── zip_to_rect.cc
    │       │   ├── zip_to_rect.cu
    │       │   ├── zip_to_rect.h
    │       │   ├── zip_to_rect_omp.cc
    │       │   └── zip_to_rect_template.inl
    │   ├── cffi.h
    │   ├── cudalibs.cu
    │   ├── cudalibs.h
    │   ├── io
    │       ├── mtx_to_coo.cc
    │       └── mtx_to_coo.h
    │   ├── linalg
    │       ├── axpby.cc
    │       ├── axpby.cu
    │       ├── axpby.h
    │       ├── axpby_omp.cc
    │       └── axpby_template.inl
    │   ├── mapper
    │       ├── mapper.cc
    │       └── mapper.h
    │   ├── partition
    │       ├── fast_image_partition.cc
    │       ├── fast_image_partition.cu
    │       ├── fast_image_partition.h
    │       └── fast_image_partition_template.inl
    │   ├── sparse.cc
    │   ├── sparse.h
    │   ├── sparse_c.h
    │   └── util
    │       ├── cuda_help.h
    │       ├── cusparse_utils.h
    │       ├── dispatch.h
    │       ├── legate_utils.h
    │       ├── logger.h
    │       ├── omp_help.h
    │       ├── thrust_allocator.h
    │       ├── typedefs.h
    │       ├── upcast_future.cc
    │       └── upcast_future.h
├── test.py
└── tests
    ├── integration
        ├── conftest.py
        ├── test_cg_axpby.py
        ├── test_cg_solve.py
        ├── test_comparison.py
        ├── test_csr_from_coo.py
        ├── test_csr_from_csr.py
        ├── test_csr_from_dense.py
        ├── test_csr_to_dense.py
        ├── test_csr_transpose.py
        ├── test_diagonal.py
        ├── test_diags.py
        ├── test_gmres_solve.py
        ├── test_indexing.py
        ├── test_io.py
        ├── test_manual_sorting.py
        ├── test_nonzero.py
        ├── test_spgemm.py
        ├── test_spmv.py
        ├── test_unary_operation.py
        └── utils
        │   ├── banded_matrix.py
        │   └── sample.py
    └── testdata
        ├── GlossGT.mtx
        ├── Ragusa18.mtx
        ├── cage4.mtx
        ├── karate.mtx
        └── test.mtx


/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | ignore = 
 3 |   # 'foo' is too complex (N)
 4 |   C901, 
 5 |   # continuation line missing indentation or outdented
 6 |   E122, 
 7 |   E203, E501,
 8 |   F403, F821, W503
 9 | max-line-length = 80
10 | max-complexity = 18
11 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |     - repo: https://github.com/pre-commit/mirrors-mypy
 3 |       rev: 'v1.5.1'
 4 |       hooks:
 5 |             - id: mypy
 6 |               language: system
 7 |               pass_filenames: false
 8 |               args: ['legate_sparse']
 9 |     - repo: https://github.com/psf/black
10 |       rev: 23.9.1
11 |       hooks:
12 |             - id: black
13 |     - repo: https://github.com/PyCQA/isort
14 |       rev: 5.12.0
15 |       hooks:
16 |             - id: isort
17 |               args: ["--profile", "black"]
18 |     - repo: https://github.com/PyCQA/flake8
19 |       rev: 6.1.0
20 |       hooks:
21 |             - id: flake8
22 |               args: [--config=.flake8]
23 |     - repo: https://github.com/pre-commit/mirrors-clang-format
24 |       rev: 'v16.0.6'  # Use the sha / tag you want to point at
25 |       hooks:
26 |         - id: clang-format
27 |           files: \.(cu|cuh|h|cc|inl)$
28 |           types_or: []
29 | 
30 | default_language_version:
31 |     python: python3
32 | 


--------------------------------------------------------------------------------
/cmake/Modules/cpm_helpers.cmake:
--------------------------------------------------------------------------------
 1 | #=============================================================================
 2 | # Copyright 2022 NVIDIA Corporation
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #=============================================================================
16 | 
17 | function(get_cpm_git_args _out_var)
18 | 
19 |   set(oneValueArgs TAG BRANCH REPOSITORY)
20 |   cmake_parse_arguments(GIT "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
21 | 
22 |   set(repo_tag "")
23 |   set(gh_tag_prefix "")
24 |   # Default to specifying `GIT_REPOSITORY` and `GIT_TAG`
25 |   set(cpm_git_args GIT_REPOSITORY ${GIT_REPOSITORY})
26 | 
27 |   if(GIT_BRANCH)
28 |     set(gh_tag_prefix "heads")
29 |     set(repo_tag "${GIT_BRANCH}")
30 |     list(APPEND cpm_git_args GIT_TAG ${GIT_BRANCH})
31 |   elseif(GIT_TAG)
32 |     set(gh_tag_prefix "tags")
33 |     set(repo_tag "${GIT_TAG}")
34 |     list(APPEND cpm_git_args GIT_TAG ${GIT_TAG})
35 |   endif()
36 | 
37 |   # Remove `.git` suffix from repo URL
38 |   if(GIT_REPOSITORY MATCHES "^(.*)(\.git)$")
39 |     set(GIT_REPOSITORY "${CMAKE_MATCH_1}")
40 |   endif()
41 |   if(GIT_REPOSITORY MATCHES "github\.com")
42 |     # If retrieving from github use `.zip` URL to download faster
43 |     set(cpm_git_args URL "${GIT_REPOSITORY}/archive/${repo_tag}.zip")
44 |   elseif(GIT_REPOSITORY MATCHES "gitlab\.com")
45 |     # GitLab archive URIs replace slashes with dashes
46 |     string(REPLACE "/" "-" archive_tag "${repo_tag}")
47 |     string(LENGTH "${GIT_REPOSITORY}" repo_name_len)
48 |     string(FIND "${GIT_REPOSITORY}" "/" repo_name_idx REVERSE)
49 |     math(EXPR repo_name_len "${repo_name_len} - ${repo_name_idx}")
50 |     string(SUBSTRING "${GIT_REPOSITORY}" ${repo_name_idx} ${repo_name_len} repo_name)
51 |     # If retrieving from gitlab use `.zip` URL to download faster
52 |     set(cpm_git_args URL "${GIT_REPOSITORY}/-/archive/${repo_tag}/${repo_name}-${archive_tag}.zip")
53 |   endif()
54 | 
55 |   set(${_out_var} ${cpm_git_args} PARENT_SCOPE)
56 | 
57 | endfunction()
58 | 


--------------------------------------------------------------------------------
/cmake/Modules/cuda_arch_helpers.cmake:
--------------------------------------------------------------------------------
 1 | #=============================================================================
 2 | # Copyright 2024 NVIDIA Corporation
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #=============================================================================
16 | 
17 | function(set_cuda_arch_from_names)
18 |   set(cuda_archs "")
19 |   # translate legacy arch names into numbers
20 |   if(CMAKE_CUDA_ARCHITECTURES MATCHES "fermi")
21 |     list(APPEND cuda_archs 20)
22 |   endif()
23 |   if(CMAKE_CUDA_ARCHITECTURES MATCHES "kepler")
24 |     list(APPEND cuda_archs 30)
25 |   endif()
26 |   if(CMAKE_CUDA_ARCHITECTURES MATCHES "k20")
27 |     list(APPEND cuda_archs 35)
28 |   endif()
29 |   if(CMAKE_CUDA_ARCHITECTURES MATCHES "k80")
30 |     list(APPEND cuda_archs 37)
31 |   endif()
32 |   if(CMAKE_CUDA_ARCHITECTURES MATCHES "maxwell")
33 |     list(APPEND cuda_archs 52)
34 |   endif()
35 |   if(CMAKE_CUDA_ARCHITECTURES MATCHES "pascal")
36 |     list(APPEND cuda_archs 60)
37 |   endif()
38 |   if(CMAKE_CUDA_ARCHITECTURES MATCHES "volta")
39 |     list(APPEND cuda_archs 70)
40 |   endif()
41 |   if(CMAKE_CUDA_ARCHITECTURES MATCHES "turing")
42 |     list(APPEND cuda_archs 75)
43 |   endif()
44 |   if(CMAKE_CUDA_ARCHITECTURES MATCHES "ampere")
45 |     list(APPEND cuda_archs 80)
46 |   endif()
47 |   if(CMAKE_CUDA_ARCHITECTURES MATCHES "ada")
48 |     list(APPEND cuda_archs 89)
49 |   endif()
50 |   if(CMAKE_CUDA_ARCHITECTURES MATCHES "hopper")
51 |     list(APPEND cuda_archs 90)
52 |   endif()
53 | 
54 |   if(cuda_archs)
55 |     list(LENGTH cuda_archs num_archs)
56 |     if(num_archs GREATER 1)
57 |       # A CMake architecture list entry of "80" means to build both compute and sm.
58 |       # What we want is for the newest arch only to build that way, while the rest
59 |       # build only for sm.
60 |       list(POP_BACK cuda_archs latest_arch)
61 |       list(TRANSFORM cuda_archs APPEND "-real")
62 |       list(APPEND cuda_archs ${latest_arch})
63 |     else()
64 |       list(TRANSFORM cuda_archs APPEND "-real")
65 |     endif()
66 |     set(CMAKE_CUDA_ARCHITECTURES ${cuda_archs} PARENT_SCOPE)
67 |   endif()
68 | endfunction()
69 | 
70 | function(add_cuda_architecture_defines defs)
71 |   message(VERBOSE "legate: CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}")
72 | 
73 |   set(_defs ${${defs}})
74 | 
75 |   macro(add_def_if_arch_enabled arch def)
76 |     if("${arch}" IN_LIST CMAKE_CUDA_ARCHITECTURES OR
77 |       ("${arch}-real" IN_LIST CMAKE_CUDA_ARCHITECTURES) OR
78 |       ("${arch}-virtual" IN_LIST CMAKE_CUDA_ARCHITECTURES))
79 |       list(APPEND _defs ${def})
80 |     endif()
81 |   endmacro()
82 | 
83 |   add_def_if_arch_enabled("20" "FERMI_ARCH")
84 |   add_def_if_arch_enabled("30" "KEPLER_ARCH")
85 |   add_def_if_arch_enabled("35" "K20_ARCH")
86 |   add_def_if_arch_enabled("37" "K80_ARCH")
87 |   add_def_if_arch_enabled("52" "MAXWELL_ARCH")
88 |   add_def_if_arch_enabled("60" "PASCAL_ARCH")
89 |   add_def_if_arch_enabled("70" "VOLTA_ARCH")
90 |   add_def_if_arch_enabled("75" "TURING_ARCH")
91 |   add_def_if_arch_enabled("80" "AMPERE_ARCH")
92 |   add_def_if_arch_enabled("89" "ADA_ARCH")
93 |   add_def_if_arch_enabled("90" "HOPPER_ARCH")
94 | 
95 |   set(${defs} ${_defs} PARENT_SCOPE)
96 | endfunction()
97 | 


--------------------------------------------------------------------------------
/cmake/Modules/set_cpu_arch_flags.cmake:
--------------------------------------------------------------------------------
 1 | #=============================================================================
 2 | # Copyright 2022 NVIDIA Corporation
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #=============================================================================
16 | 
17 | #------------------------------------------------------------------------------#
18 | # Architecture
19 | #------------------------------------------------------------------------------#
20 | if(BUILD_MARCH AND BUILD_MCPU)
21 |   message(FATAL_ERROR "BUILD_MARCH and BUILD_MCPU are incompatible")
22 | endif()
23 | 
24 | function(set_cpu_arch_flags out_var)
25 |   # Try -march first. On platforms that don't support it, GCC will issue a hard
26 |   # error, so we'll know not to use it. Default is "native", but explicitly
27 |   # setting BUILD_MARCH="" disables use of the flag
28 |   if(BUILD_MARCH)
29 |     set(INTERNAL_BUILD_MARCH ${BUILD_MARCH})
30 |   elseif(NOT DEFINED BUILD_MARCH)
31 |     set(INTERNAL_BUILD_MARCH "native")
32 |   endif()
33 | 
34 |   set(flags "")
35 | 
36 |   include(CheckCXXCompilerFlag)
37 |   if(INTERNAL_BUILD_MARCH)
38 |     check_cxx_compiler_flag("-march=${INTERNAL_BUILD_MARCH}" COMPILER_SUPPORTS_MARCH)
39 |     if(COMPILER_SUPPORTS_MARCH)
40 |       list(APPEND flags "-march=${INTERNAL_BUILD_MARCH}")
41 |     elseif(BUILD_MARCH)
42 |       message(FATAL_ERROR "The flag -march=${INTERNAL_BUILD_MARCH} is not supported by the compiler")
43 |     else()
44 |       unset(INTERNAL_BUILD_MARCH)
45 |     endif()
46 |   endif()
47 | 
48 |   # Try -mcpu. We do this second because it is deprecated on x86, but
49 |   # GCC won't issue a hard error, so we can't tell if it worked or not.
50 |   if (NOT INTERNAL_BUILD_MARCH AND NOT DEFINED BUILD_MARCH)
51 |     if(BUILD_MCPU)
52 |       set(INTERNAL_BUILD_MCPU ${BUILD_MCPU})
53 |     else()
54 |       set(INTERNAL_BUILD_MCPU "native")
55 |     endif()
56 | 
57 |     check_cxx_compiler_flag("-mcpu=${INTERNAL_BUILD_MCPU}" COMPILER_SUPPORTS_MCPU)
58 |     if(COMPILER_SUPPORTS_MCPU)
59 |       list(APPEND flags "-mcpu=${INTERNAL_BUILD_MCPU}")
60 |     elseif(BUILD_MCPU)
61 |       message(FATAL_ERROR "The flag -mcpu=${INTERNAL_BUILD_MCPU} is not supported by the compiler")
62 |     else()
63 |       unset(INTERNAL_BUILD_MCPU)
64 |     endif()
65 |   endif()
66 | 
67 |   # Add flags for Power architectures
68 |   check_cxx_compiler_flag("-maltivec -Werror" COMPILER_SUPPORTS_MALTIVEC)
69 |   if(COMPILER_SUPPORTS_MALTIVEC)
70 |     list(APPEND flags "-maltivec")
71 |   endif()
72 |   check_cxx_compiler_flag("-mabi=altivec -Werror" COMPILER_SUPPORTS_MABI_ALTIVEC)
73 |   if(COMPILER_SUPPORTS_MABI_ALTIVEC)
74 |     list(APPEND flags "-mabi=altivec")
75 |   endif()
76 |   check_cxx_compiler_flag("-mvsx -Werror" COMPILER_SUPPORTS_MVSX)
77 |   if(COMPILER_SUPPORTS_MVSX)
78 |     list(APPEND flags "-mvsx")
79 |   endif()
80 | 
81 |   set(${out_var} "${flags}" PARENT_SCOPE)
82 | endfunction()
83 | 
84 | set_cpu_arch_flags(arch_flags)
85 | 


--------------------------------------------------------------------------------
/cmake/generate_install_info_py.cmake:
--------------------------------------------------------------------------------
 1 | #=============================================================================
 2 | # Copyright 2022-2024 NVIDIA Corporation
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #=============================================================================
16 | 
17 | execute_process(
18 |   COMMAND ${CMAKE_C_COMPILER}
19 |     -E -DLEGATE_USE_PYTHON_CFFI
20 |     -I "${CMAKE_CURRENT_LIST_DIR}/../src/legate_sparse"
21 |     -P "${CMAKE_CURRENT_LIST_DIR}/../src/legate_sparse/sparse_c.h"
22 |   ECHO_ERROR_VARIABLE
23 |   OUTPUT_VARIABLE header
24 |   COMMAND_ERROR_IS_FATAL ANY
25 | )
26 | 
27 | set(libpath "")
28 | configure_file(
29 |   "${CMAKE_CURRENT_LIST_DIR}/../legate_sparse/install_info.py.in"
30 |   "${CMAKE_CURRENT_LIST_DIR}/../legate_sparse/install_info.py"
31 | @ONLY)
32 | 


--------------------------------------------------------------------------------
/cmake/thirdparty/get_nccl.cmake:
--------------------------------------------------------------------------------
 1 | #=============================================================================
 2 | # Copyright 2022 NVIDIA Corporation
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #=============================================================================
16 | 
17 | function(find_or_configure_nccl)
18 | 
19 |     if(TARGET NCCL::NCCL)
20 |         return()
21 |     endif()
22 | 
23 |     rapids_find_generate_module(NCCL
24 |         HEADER_NAMES  nccl.h
25 |         LIBRARY_NAMES nccl
26 |     )
27 | 
28 |     # Currently NCCL has no CMake build-system so we require
29 |     # it built and installed on the machine already
30 |     rapids_find_package(NCCL REQUIRED)
31 | 
32 | endfunction()
33 | 
34 | find_or_configure_nccl()
35 | 


--------------------------------------------------------------------------------
/cmake/versions.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "packages" : {
 3 |     "legate" : {
 4 |       "repo": "legate.internal",
 5 |       "org": "nv-legate",
 6 |       "version": "25.03.02",
 7 |       "git_url" : "git@github.com:nv-legate/legate.internal.git",
 8 |       "git_shallow": false,
 9 |       "always_download": false,
10 |       "git_tag" : "75dc0a92bbd2dfb79b6b680a0f37cbd0370d0181",
11 |       "anaconda_label": "main"
12 |     },
13 |     "cupynumeric" : {
14 |       "repo": "cupynumeric.internal",
15 |       "org": "nv-legate",
16 |       "version": "25.03.02",
17 |       "git_url" : "git@github.com:nv-legate/cupynumeric.internal",
18 |       "git_shallow": false,
19 |       "always_download": false,
20 |       "git_tag" : "1fa45603c560068508c3be2e0df45aec62359019",
21 |       "anaconda_label": "experimental"
22 |     }
23 |   }
24 | }
25 | 


--------------------------------------------------------------------------------
/conda/conda-build/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo -e "\n\n--------------------- CONDA/CONDA-BUILD/BUILD.SH -----------------------\n"
 4 | 
 5 | set -xeo pipefail;
 6 | 
 7 | # If run through CI, BUILD_MARCH is set externally. If it is not set, try to set it.
 8 | ARCH=$(uname -m)
 9 | if [[ -z "${BUILD_MARCH}" ]]; then
10 |     if [[ "${ARCH}" = "aarch64" ]]; then
11 |         # Use the gcc march value used by aarch64 Ubuntu.
12 |         BUILD_MARCH=armv8-a
13 |     else
14 |         # Use uname -m otherwise
15 |         BUILD_MARCH=$(uname -m | tr '_' '-')
16 |     fi
17 | fi
18 | 
19 | # Rewrite conda's -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=ONLY to
20 | #                 -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
21 | CMAKE_ARGS="$(echo "$CMAKE_ARGS" | sed -r "s@_INCLUDE=ONLY@_INCLUDE=BOTH@g")"
22 | 
23 | # Add our options to conda's CMAKE_ARGS
24 | CMAKE_ARGS+="
25 | --log-level=VERBOSE
26 | -DBUILD_SHARED_LIBS=ON
27 | -DBUILD_MARCH=${BUILD_MARCH}
28 | -DCMAKE_BUILD_TYPE=Release
29 | -DCMAKE_VERBOSE_MAKEFILE=ON
30 | -DCMAKE_BUILD_PARALLEL_LEVEL=${JOBS:-$(nproc --ignore=1)}"
31 | if [ -z "$CPU_ONLY" ]; then
32 |   CMAKE_ARGS+="-DCMAKE_CUDA_ARCHITECTURES=all-major"
33 | fi
34 | 
35 | export CMAKE_GENERATOR=Ninja
36 | export CUDAHOSTCXX=${CXX}
37 | export OPENSSL_DIR="$PREFIX"
38 | 
39 | echo "Environment"
40 | env
41 | 
42 | echo "Build starting on $(date)"
43 | CUDAFLAGS="-isystem ${PREFIX}/include -L${PREFIX}/lib"
44 | export CUDAFLAGS
45 | 
46 | SKBUILD_BUILD_OPTIONS=-j$CPU_COUNT \
47 | $PYTHON -m pip install             \
48 |   --root /                         \
49 |   --no-deps                        \
50 |   --prefix "$PREFIX"               \
51 |   --no-build-isolation             \
52 |   --upgrade                        \
53 |   --cache-dir "$PIP_CACHE_DIR"     \
54 |   --disable-pip-version-check      \
55 |   . -vv
56 | 
57 | echo "Build ending on $(date)"
58 | 


--------------------------------------------------------------------------------
/conda/conda-build/conda_build_config.yaml:
--------------------------------------------------------------------------------
 1 | gpu_enabled:
 2 |   - true
 3 |   - false
 4 | 
 5 | upload_build:
 6 |   - false
 7 | 
 8 | python:
 9 |   - 3.10
10 |   - 3.11
11 |   - 3.12
12 | 
13 | numpy_version:
14 |   - ">=1.22,<2"
15 | 
16 | cmake_version:
17 |   - ">=3.20.1,!=3.23.0"
18 | 


--------------------------------------------------------------------------------
/legate_sparse/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022-2024 NVIDIA Corporation
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | 
16 | Not sure what is supposed to go in here...
17 | 
18 | """
19 | 
20 | import scipy.sparse as _sp  # type: ignore
21 | 
22 | from .coverage import clone_module  # noqa: F401
23 | from .csr import csr_array, csr_matrix  # noqa: F401
24 | from .module import *  # noqa: F401
25 | 
26 | clone_module(_sp, globals())
27 | 
28 | del clone_module
29 | del _sp
30 | 


--------------------------------------------------------------------------------
/legate_sparse/coverage.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022-2024 NVIDIA Corporation
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | #
 15 | from __future__ import annotations
 16 | 
 17 | from functools import wraps
 18 | from types import FunctionType, MethodDescriptorType, MethodType, ModuleType
 19 | from typing import Any, Container, Mapping, Optional, cast
 20 | 
 21 | from legate.core import track_provenance
 22 | from typing_extensions import Protocol
 23 | 
 24 | MOD_INTERNAL = {"__dir__", "__getattr__"}
 25 | 
 26 | 
 27 | def filter_namespace(
 28 |     ns: Mapping[str, Any],
 29 |     *,
 30 |     omit_names: Optional[Container[str]] = None,
 31 |     omit_types: tuple[type, ...] = (),
 32 | ) -> dict[str, Any]:
 33 |     omit_names = omit_names or set()
 34 |     return {
 35 |         attr: value
 36 |         for attr, value in ns.items()
 37 |         if attr not in omit_names and not isinstance(value, omit_types)
 38 |     }
 39 | 
 40 | 
 41 | def should_wrap(obj: object) -> bool:
 42 |     return isinstance(obj, (FunctionType, MethodType, MethodDescriptorType))
 43 | 
 44 | 
 45 | class AnyCallable(Protocol):
 46 |     def __call__(self, *args: Any, **kwargs: Any) -> Any:
 47 |         ...
 48 | 
 49 | 
 50 | def wrap(func: AnyCallable) -> Any:
 51 |     @wraps(func)
 52 |     @track_provenance(nested=True)
 53 |     def wrapper(*args: Any, **kwargs: Any) -> Any:
 54 |         return func(*args, **kwargs)
 55 | 
 56 |     return wrapper
 57 | 
 58 | 
 59 | def clone_module(origin_module: ModuleType, new_globals: dict[str, Any]) -> None:
 60 |     """Copy attributes from one module to another, excluding submodules
 61 | 
 62 |     Function types are wrapped with a decorator to report API calls. All
 63 |     other values are copied as-is.
 64 | 
 65 |     Parameters
 66 |     ----------
 67 |     origin_module : ModuleTpe
 68 |         Existing module to clone attributes from
 69 | 
 70 |     new_globals : dict
 71 |         a globals() dict for the new module to clone into
 72 | 
 73 |     Returns
 74 |     -------
 75 |     None
 76 | 
 77 |     """
 78 |     for attr, value in new_globals.items():
 79 |         # Only need to wrap things that are in the origin module to begin with
 80 |         if attr not in origin_module.__dict__:
 81 |             continue
 82 |         if isinstance(value, FunctionType):
 83 |             wrapped = wrap(cast(AnyCallable, value))
 84 |             new_globals[attr] = wrapped
 85 | 
 86 | 
 87 | def clone_scipy_arr_kind(origin_class: type) -> Any:
 88 |     """Copy attributes from an origin class to the input class.
 89 | 
 90 |     Method types are wrapped with a decorator to report API calls. All
 91 |     other values are copied as-is.
 92 | 
 93 |     """
 94 | 
 95 |     def body(cls: type):
 96 |         for attr, value in cls.__dict__.items():
 97 |             # Only need to wrap things that are in the origin class to begin
 98 |             # with
 99 |             if not hasattr(origin_class, attr):
100 |                 continue
101 |             if should_wrap(value):
102 |                 wrapped = wrap(value)
103 |                 setattr(cls, attr, wrapped)
104 | 
105 |         return cls
106 | 
107 |     return body
108 | 


--------------------------------------------------------------------------------
/legate_sparse/install_info.py.in:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020-2024, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | #
 9 | # See the LICENSE file for details.
10 | #
11 | 
12 | # IMPORTANT:
13 | #   * install_info.py is a generated file and should not be modified by hand
14 | 
15 | def get_libpath():
16 |     import os, sys, platform
17 |     join = os.path.join
18 |     exists = os.path.exists
19 |     dirname = os.path.dirname
20 |     cn_path = dirname(dirname(__file__))
21 |     so_ext = {
22 |         "": "",
23 |         "Java": ".jar",
24 |         "Linux": ".so",
25 |         "Darwin": ".dylib",
26 |         "Windows": ".dll"
27 |     }[platform.system()]
28 | 
29 |     def find_liblegate_sparse(libdir):
30 |         if exists(join(libdir, f"liblegate_sparse{so_ext}")):
31 |             return libdir
32 |         return None
33 | 
34 |     return (
35 |         find_liblegate_sparse(join(cn_path, "build", "lib")) or
36 |         find_liblegate_sparse(join(dirname(dirname(dirname(cn_path))), "lib")) or
37 |         find_liblegate_sparse(join(dirname(dirname(sys.executable)), "lib")) or
38 |         ""
39 |     )
40 | 
41 | 
42 | libpath: str = get_libpath()
43 | header: str = """@header@"""
44 | 


--------------------------------------------------------------------------------
/legate_sparse/io.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022-2024 NVIDIA Corporation
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import numpy as np
16 | from legate.core import track_provenance, types
17 | 
18 | from .config import SparseOpCode
19 | from .csr import csr_array
20 | from .runtime import runtime
21 | from .types import coord_ty, float64, nnz_ty
22 | from .utils import store_to_cupynumeric_array
23 | 
24 | 
25 | @track_provenance(runtime.sparse_library)
26 | def mmread(source):
27 |     # TODO (rohany): We'll assume for now that all of the nodes in the system
28 |     # can access the file passed in, so we don't need to worry about where this
29 |     # task gets mapped to.
30 |     rows = runtime.create_store(coord_ty, ndim=1)
31 |     cols = runtime.create_store(coord_ty, ndim=1)
32 |     vals = runtime.create_store(float64, ndim=1)
33 |     m = runtime.create_store(coord_ty, optimize_scalar=True, shape=(1,))
34 |     n = runtime.create_store(coord_ty, optimize_scalar=True, shape=(1,))
35 |     nnz = runtime.create_store(nnz_ty, optimize_scalar=True, shape=(1,))
36 |     task = runtime.create_auto_task(SparseOpCode.READ_MTX_TO_COO)
37 |     task.add_output(m)
38 |     task.add_output(n)
39 |     task.add_output(nnz)
40 |     task.add_output(rows)
41 |     task.add_output(cols)
42 |     task.add_output(vals)
43 |     task.add_scalar_arg(source, types.string_type)
44 |     task.execute()
45 | 
46 |     m = int(np.asarray(m.get_physical_store().get_inline_allocation())[0])
47 |     n = int(np.asarray(n.get_physical_store().get_inline_allocation())[0])
48 |     nnz = int(np.asarray(nnz.get_physical_store().get_inline_allocation())[0])
49 |     # Slice down each store from the resulting size into the actual size.
50 |     sl = slice(0, nnz)
51 |     rows = store_to_cupynumeric_array(rows.slice(0, sl))
52 |     cols = store_to_cupynumeric_array(cols.slice(0, sl))
53 |     vals = store_to_cupynumeric_array(vals.slice(0, sl))
54 |     return csr_array((vals, (rows, cols)), shape=(m, n))
55 | 


--------------------------------------------------------------------------------
/legate_sparse/module.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022-2024 NVIDIA Corporation
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # Portions of this file are also subject to the following license:
16 | #
17 | # Copyright (c) 2001-2002 Enthought, Inc. 2003-2022, SciPy Developers.
18 | # All rights reserved.
19 | #
20 | # Redistribution and use in source and binary forms, with or without
21 | # modification, are permitted provided that the following conditions
22 | # are met:
23 | #
24 | # 1. Redistributions of source code must retain the above copyright
25 | # notice, this list of conditions and the following disclaimer.
26 | #
27 | # 2. Redistributions in binary form must reproduce the above
28 | # copyright notice, this list of conditions and the following
29 | # disclaimer in the documentation and/or other materials provided
30 | # with the distribution.
31 | #
32 | # 3. Neither the name of the copyright holder nor the names of its
33 | # contributors may be used to endorse or promote products derived
34 | # from this software without specific prior written permission.
35 | #
36 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
37 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
38 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
39 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
40 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
43 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
44 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
45 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
46 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
47 | 
48 | 
49 | from .csr import csr_array  # noqa: F401
50 | from .dia import dia_array  # noqa: F401
51 | from .gallery import diags  # noqa: F401
52 | from .io import mmread  # noqa: F401
53 | 
54 | # expose default types
55 | from .types import coord_ty, nnz_ty  # noqa: F401
56 | 
57 | 
58 | # is_sparse_matrix returns whether or not an object is a legate
59 | # sparse created sparse matrix.
60 | def is_sparse_matrix(o):
61 |     return any((isinstance(o, csr_array),))
62 | 
63 | 
64 | issparse = is_sparse_matrix
65 | isspmatrix = is_sparse_matrix
66 | 
67 | 
68 | # Variants for each particular format type.
69 | def isspmatrix_csr(o):
70 |     return isinstance(o, csr_array)
71 | 


--------------------------------------------------------------------------------
/legate_sparse/runtime.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022-2024 NVIDIA Corporation
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | from __future__ import annotations
 15 | 
 16 | from typing import TYPE_CHECKING
 17 | 
 18 | import numpy as np
 19 | from legate.core import (
 20 |     AutoTask,
 21 |     LogicalStore,
 22 |     ManualTask,
 23 |     Shape,
 24 |     TaskTarget,
 25 |     get_legate_runtime,
 26 |     get_machine,
 27 |     types,
 28 | )
 29 | 
 30 | from .config import SparseOpCode, _library
 31 | 
 32 | if TYPE_CHECKING:
 33 |     from typing import Optional, Union
 34 | 
 35 |     import numpy.typing as npt
 36 | 
 37 | TO_CORE_DTYPES = {
 38 |     np.dtype(np.bool_): types.bool_,
 39 |     np.dtype(np.int8): types.int8,
 40 |     np.dtype(np.int16): types.int16,
 41 |     np.dtype(np.int32): types.int32,
 42 |     np.dtype(np.int64): types.int64,
 43 |     np.dtype(np.uint8): types.uint8,
 44 |     np.dtype(np.uint16): types.uint16,
 45 |     np.dtype(np.uint32): types.uint32,
 46 |     np.dtype(np.uint64): types.uint64,
 47 |     np.dtype(np.float16): types.float16,
 48 |     np.dtype(np.float32): types.float32,
 49 |     np.dtype(np.float64): types.float64,
 50 |     np.dtype(np.complex64): types.complex64,
 51 |     np.dtype(np.complex128): types.complex128,
 52 | }
 53 | 
 54 | 
 55 | # TODO (marsaev): rename to SparseRuntime to avoid confusion?
 56 | class Runtime:
 57 |     def __init__(self, sparse_library):
 58 |         self.sparse_library = sparse_library
 59 |         self.legate_runtime = get_legate_runtime()
 60 |         self.legate_machine = get_machine()
 61 | 
 62 |         # Load all the necessary CUDA libraries if we have GPUs.
 63 |         if self.num_gpus > 0:
 64 |             # TODO (rohany): Also handle destroying the cuda libraries when the
 65 |             #  runtime is torn down.
 66 |             task = self.legate_runtime.create_manual_task(
 67 |                 self.sparse_library,
 68 |                 SparseOpCode.LOAD_CUDALIBS,
 69 |                 launch_shape=Shape((self.num_gpus,)),
 70 |             )
 71 |             task.execute()
 72 |             self.legate_runtime.issue_execution_fence(block=True)
 73 | 
 74 |     @property
 75 |     def num_procs(self):
 76 |         return self.legate_machine.count(self.legate_machine.preferred_target)
 77 | 
 78 |     @property
 79 |     def num_gpus(self):
 80 |         return self.legate_machine.count(TaskTarget.GPU)
 81 | 
 82 |     def create_store(
 83 |         self,
 84 |         ty: Union[npt.DTypeLike],
 85 |         shape: Optional[Union[tuple[int, ...], Shape]] = None,
 86 |         optimize_scalar: bool = False,
 87 |         ndim: Optional[int] = None,
 88 |     ) -> LogicalStore:
 89 |         core_ty = TO_CORE_DTYPES[ty] if isinstance(ty, np.dtype) else ty
 90 |         return self.legate_runtime.create_store(
 91 |             core_ty, shape=shape, optimize_scalar=optimize_scalar, ndim=ndim
 92 |         )
 93 | 
 94 |     # only OpCode
 95 |     def create_auto_task(self, OpCode) -> AutoTask:
 96 |         return self.legate_runtime.create_auto_task(self.sparse_library, OpCode)
 97 | 
 98 |     # OpCode and launch domains
 99 |     def create_manual_task(self, OpCode, *args) -> ManualTask:
100 |         return self.legate_runtime.create_manual_task(
101 |             self.sparse_library, OpCode, *args
102 |         )
103 | 
104 | 
105 | # TODO (marsaev): rename to sparse_runtime to avoid confusion?
106 | runtime = Runtime(_library)
107 | 


--------------------------------------------------------------------------------
/legate_sparse/settings.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023-2024 NVIDIA Corporation
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | from __future__ import annotations
16 | 
17 | from legate.util.settings import PrioritizedSetting, Settings, convert_bool
18 | 
19 | __all__ = ("settings",)
20 | 
21 | 
22 | class SparseRuntimeSettings(Settings):
23 |     fast_spgemm: PrioritizedSetting[bool] = PrioritizedSetting(
24 |         "fast-spgemm",
25 |         "LEGATE_SPARSE_FAST_SPGEMM",
26 |         default=False,
27 |         convert=convert_bool,
28 |         help="""
29 |         Switch to faster CUSPARSE_SPGEMM_ALG1, which, however, use
30 |         significantly more FB memory. It will be used by default when cusparse<12.1,
31 |         where memory-restricted SpGEMM was introduced.
32 |         """,
33 |     )
34 | 
35 | 
36 | settings = SparseRuntimeSettings()
37 | 


--------------------------------------------------------------------------------
/legate_sparse/types.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022-2024 NVIDIA Corporation
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import numpy
16 | 
17 | # Define some common types. Hopefully as we make more
18 | # progress in generalizing the compute kernels, we can
19 | # remove this code.
20 | coord_ty = numpy.dtype(numpy.int64)
21 | nnz_ty = numpy.dtype(numpy.uint64)
22 | float64 = numpy.dtype(numpy.float64)
23 | int32 = numpy.dtype(numpy.int32)
24 | int64 = numpy.dtype(numpy.int64)
25 | uint64 = numpy.dtype(numpy.uint64)
26 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # Copyright 2024 NVIDIA Corporation
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | from setuptools import find_packages
19 | from skbuild import setup
20 | 
21 | # TODO: build yields "cant find legate module"....
22 | """
23 | import legate.install_info as lg_install_info
24 | import os
25 | from pathlib import Path
26 | 
27 | legate_dir = Path(lg_install_info.libpath).parent.as_posix()
28 | 
29 | cmake_flags = [
30 |     f"-Dlegate_ROOT:STRING={legate_dir}",
31 | ]
32 | 
33 | env_cmake_args = os.environ.get("CMAKE_ARGS")
34 | if env_cmake_args is not None:
35 |     cmake_flags.append(env_cmake_args)
36 | os.environ["CMAKE_ARGS"] = " ".join(cmake_flags)
37 | """
38 | 
39 | setup(
40 |     name="legate-sparse",
41 |     version="25.03.00",
42 |     description="An Aspiring Drop-In Replacement for SciPy Sparse module at Scale",
43 |     author="NVIDIA Corporation",
44 |     license="Apache 2.0",
45 |     classifiers=[
46 |         "Intended Audience :: Developers",
47 |         "Topic :: Database",
48 |         "Topic :: Scientific/Engineering",
49 |         "License :: OSI Approved :: Apache Software License",
50 |         "Programming Language :: Python",
51 |         "Programming Language :: Python :: 3.10",
52 |         "Programming Language :: Python :: 3.11",
53 |     ],
54 |     packages=find_packages(
55 |         where=".",
56 |         include=["legate_sparse*"],
57 |     ),
58 |     include_package_data=True,
59 |     zip_safe=False,
60 | )
61 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/conv/csr_to_dense.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/array/conv/csr_to_dense.h"
18 | #include "legate_sparse/array/conv/csr_to_dense_template.inl"
19 | 
20 | namespace sparse {
21 | 
22 | using namespace legate;
23 | 
24 | template <Type::Code INDEX_CODE, Type::Code VAL_CODE>
25 | struct CSRToDenseImplBody<VariantKind::CPU, INDEX_CODE, VAL_CODE> {
26 |   using INDEX_TY = type_of<INDEX_CODE>;
27 |   using VAL_TY   = type_of<VAL_CODE>;
28 | 
29 |   void operator()(const AccessorWO<VAL_TY, 2>& A_vals,
30 |                   const AccessorRO<Rect<1>, 1>& B_pos,
31 |                   const AccessorRO<INDEX_TY, 1>& B_crd,
32 |                   const AccessorRO<VAL_TY, 1>& B_vals,
33 |                   const Rect<2>& rect)
34 |   {
35 |     // Initialize the output array.
36 |     for (INDEX_TY i = rect.lo[0]; i < rect.hi[0] + 1; i++) {
37 |       for (INDEX_TY j = rect.lo[1]; j < rect.hi[1] + 1; j++) {
38 |         A_vals[{i, j}] = 0.0;
39 |       }
40 |     }
41 |     // Do the conversion.
42 |     for (INDEX_TY i = rect.lo[0]; i < rect.hi[0] + 1; i++) {
43 |       for (size_t jB = B_pos[i].lo; jB < B_pos[i].hi + 1; jB++) {
44 |         INDEX_TY j     = B_crd[jB];
45 |         A_vals[{i, j}] = B_vals[jB];
46 |       }
47 |     }
48 |   }
49 | };
50 | 
51 | /*static*/ void CSRToDense::cpu_variant(TaskContext context)
52 | {
53 |   csr_to_dense_template<VariantKind::CPU>(context);
54 | }
55 | 
56 | namespace  // unnamed
57 | {
58 | static void __attribute__((constructor)) register_tasks(void) { CSRToDense::register_variants(); }
59 | 
60 | }  // namespace
61 | 
62 | }  // namespace sparse
63 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/conv/csr_to_dense.cu:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/array/conv/csr_to_dense.h"
18 | #include "legate_sparse/array/conv/csr_to_dense_template.inl"
19 | #include "legate_sparse/util/cusparse_utils.h"
20 | 
21 | namespace sparse {
22 | 
23 | using namespace legate;
24 | 
25 | template <typename INDEX_TY, typename VAL_TY>
26 | __global__ void CSRtoDenseKernel(size_t rows,
27 |                                  Rect<2> bounds,
28 |                                  AccessorWO<VAL_TY, 2> A_vals,
29 |                                  AccessorRO<Rect<1>, 1> B_pos,
30 |                                  AccessorRO<INDEX_TY, 1> B_crd,
31 |                                  AccessorRO<VAL_TY, 1> B_vals)
32 | {
33 |   const auto idx = global_tid_1d();
34 |   if (idx >= rows) {
35 |     return;
36 |   }
37 |   INDEX_TY i = idx + bounds.lo[0];
38 |   // Initialize the row with all zeros.
39 |   for (INDEX_TY j = bounds.lo[1]; j < bounds.hi[1] + 1; j++) {
40 |     A_vals[{i, j}] = 0.0;
41 |   }
42 |   // Copy the non-zero values into place.
43 |   for (INDEX_TY j_pos = B_pos[i].lo; j_pos < B_pos[i].hi + 1; j_pos++) {
44 |     INDEX_TY j     = B_crd[j_pos];
45 |     A_vals[{i, j}] = B_vals[j_pos];
46 |   }
47 | }
48 | 
49 | template <>
50 | struct CSRToDenseImpl<VariantKind::GPU> {
51 |   template <Type::Code INDEX_CODE, Type::Code VAL_CODE>
52 |   void operator()(CSRToDenseArgs& args) const
53 |   {
54 |     using INDEX_TY = type_of<INDEX_CODE>;
55 |     using VAL_TY   = type_of<VAL_CODE>;
56 | 
57 |     auto& A_vals = args.A_vals;
58 |     auto& B_pos  = args.B_pos;
59 |     auto& B_crd  = args.B_crd;
60 |     auto& B_vals = args.B_vals;
61 | 
62 |     // Break out early if the iteration space partition is empty.
63 |     if (B_pos.domain().empty()) {
64 |       return;
65 |     }
66 | 
67 |     auto stream = get_cached_stream();
68 | 
69 |     auto B_domain = B_pos.domain();
70 |     auto rows     = B_domain.hi()[0] - B_domain.lo()[0] + 1;
71 |     auto blocks   = get_num_blocks_1d(rows);
72 |     CSRtoDenseKernel<<<blocks, THREADS_PER_BLOCK, 0, stream>>>(rows,
73 |                                                                A_vals.shape<2>(),
74 |                                                                A_vals.write_accessor<VAL_TY, 2>(),
75 |                                                                B_pos.read_accessor<Rect<1>, 1>(),
76 |                                                                B_crd.read_accessor<INDEX_TY, 1>(),
77 |                                                                B_vals.read_accessor<VAL_TY, 1>());
78 |     LEGATE_SPARSE_CHECK_CUDA_STREAM(stream);
79 |   }
80 | };
81 | 
82 | /*static*/ void CSRToDense::gpu_variant(TaskContext context)
83 | {
84 |   csr_to_dense_template<VariantKind::GPU>(context);
85 | }
86 | 
87 | }  // namespace sparse
88 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/conv/csr_to_dense.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include "legate_sparse/sparse.h"
20 | #include "legate_sparse/sparse_c.h"
21 | #include "legate.h"
22 | 
23 | namespace sparse {
24 | 
25 | struct CSRToDenseArgs {
26 |   const legate::PhysicalStore& A_vals;
27 |   const legate::PhysicalStore& B_pos;
28 |   const legate::PhysicalStore& B_crd;
29 |   const legate::PhysicalStore& B_vals;
30 | };
31 | 
32 | class CSRToDense : public SparseTask<CSRToDense> {
33 |  public:
34 |   static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_CSR_TO_DENSE};
35 | 
36 |  public:
37 |   static void cpu_variant(legate::TaskContext ctx);
38 | #ifdef LEGATE_USE_OPENMP
39 |   static void omp_variant(legate::TaskContext ctx);
40 | #endif
41 | #ifdef LEGATE_USE_CUDA
42 |   static void gpu_variant(legate::TaskContext context);
43 | #endif
44 | };
45 | 
46 | }  // namespace sparse
47 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/conv/csr_to_dense_omp.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/array/conv/csr_to_dense.h"
18 | #include "legate_sparse/array/conv/csr_to_dense_template.inl"
19 | 
20 | namespace sparse {
21 | 
22 | using namespace legate;
23 | 
24 | template <Type::Code INDEX_CODE, Type::Code VAL_CODE>
25 | struct CSRToDenseImplBody<VariantKind::OMP, INDEX_CODE, VAL_CODE> {
26 |   using INDEX_TY = type_of<INDEX_CODE>;
27 |   using VAL_TY   = type_of<VAL_CODE>;
28 | 
29 |   void operator()(const AccessorWO<VAL_TY, 2>& A_vals,
30 |                   const AccessorRO<Rect<1>, 1>& B_pos,
31 |                   const AccessorRO<INDEX_TY, 1>& B_crd,
32 |                   const AccessorRO<VAL_TY, 1>& B_vals,
33 |                   const Rect<2>& rect)
34 |   {
35 |     // Initialize the output array.
36 | #pragma omp parallel for schedule(static) collapse(2)
37 |     for (INDEX_TY i = rect.lo[0]; i < rect.hi[0] + 1; i++) {
38 |       for (INDEX_TY j = rect.lo[1]; j < rect.hi[1] + 1; j++) {
39 |         A_vals[{i, j}] = 0.0;
40 |       }
41 |     }
42 |     // Do the conversion.
43 | #pragma omp parallel for schedule(monotonic : dynamic, 128)
44 |     for (INDEX_TY i = rect.lo[0]; i < rect.hi[0] + 1; i++) {
45 |       for (size_t jB = B_pos[i].lo; jB < B_pos[i].hi + 1; jB++) {
46 |         INDEX_TY j     = B_crd[jB];
47 |         A_vals[{i, j}] = B_vals[jB];
48 |       }
49 |     }
50 |   }
51 | };
52 | 
53 | /*static*/ void CSRToDense::omp_variant(TaskContext context)
54 | {
55 |   csr_to_dense_template<VariantKind::OMP>(context);
56 | }
57 | 
58 | }  // namespace sparse
59 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/conv/csr_to_dense_template.inl:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | // Useful for IDEs.
20 | #include "legate_sparse/array/conv/csr_to_dense.h"
21 | #include "legate_sparse/util/dispatch.h"
22 | 
23 | #include <iostream>
24 | 
25 | namespace sparse {
26 | 
27 | using namespace legate;
28 | 
29 | template <VariantKind KIND, Type::Code INDEX_CODE, Type::Code VAL_CODE>
30 | struct CSRToDenseImplBody;
31 | 
32 | template <VariantKind KIND>
33 | struct CSRToDenseImpl {
34 |   template <Type::Code INDEX_CODE, Type::Code VAL_CODE>
35 |   void operator()(CSRToDenseArgs& args) const
36 |   {
37 |     using INDEX_TY = type_of<INDEX_CODE>;
38 |     using VAL_TY   = type_of<VAL_CODE>;
39 | 
40 |     auto A_vals = args.A_vals.write_accessor<VAL_TY, 2>();
41 |     auto B_pos  = args.B_pos.read_accessor<Rect<1>, 1>();
42 |     auto B_crd  = args.B_crd.read_accessor<INDEX_TY, 1>();
43 |     auto B_vals = args.B_vals.read_accessor<VAL_TY, 1>();
44 | 
45 |     if (args.A_vals.domain().empty()) {
46 |       return;
47 |     }
48 |     CSRToDenseImplBody<KIND, INDEX_CODE, VAL_CODE>()(
49 |       A_vals, B_pos, B_crd, B_vals, args.A_vals.shape<2>());
50 |   }
51 | };
52 | 
53 | template <VariantKind KIND>
54 | static void csr_to_dense_template(TaskContext context)
55 | {
56 |   auto outputs = context.outputs();
57 |   // We have to promote the pos region for the auto-parallelizer to kick in,
58 |   // so remove the transformation before proceeding.
59 |   // if (inputs[0].transformed()) { inputs[0].remove_transform(); }
60 | 
61 |   CSRToDenseArgs args{outputs[0], context.inputs()[0], context.inputs()[1], context.inputs()[2]};
62 | 
63 |   index_type_value_type_dispatch(
64 |     args.B_crd.code(), args.A_vals.code(), CSRToDenseImpl<KIND>{}, args);
65 | }
66 | 
67 | }  // namespace sparse
68 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/conv/dense_to_csr.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/array/conv/dense_to_csr.h"
18 | #include "legate_sparse/array/conv/dense_to_csr_template.inl"
19 | 
20 | namespace sparse {
21 | 
22 | using namespace legate;
23 | 
24 | template <Type::Code VAL_CODE>
25 | struct DenseToCSRNNZImplBody<VariantKind::CPU, VAL_CODE> {
26 |   using VAL_TY = type_of<VAL_CODE>;
27 | 
28 |   void operator()(const AccessorWO<nnz_ty, 2>& nnz,
29 |                   const AccessorRO<VAL_TY, 2>& B_vals,
30 |                   const Rect<2>& rect)
31 |   {
32 |     for (auto i = rect.lo[0]; i < rect.hi[0] + 1; i++) {
33 |       size_t row_nnz = 0;
34 |       for (auto j = rect.lo[1]; j < rect.hi[1] + 1; j++) {
35 |         if (B_vals[{i, j}] != static_cast<VAL_TY>(0.0)) {
36 |           row_nnz++;
37 |         }
38 |       }
39 |       nnz[{i, 0}] = row_nnz;
40 |     }
41 |   }
42 | };
43 | 
44 | template <Type::Code INDEX_CODE, Type::Code VAL_CODE>
45 | struct DenseToCSRImplBody<VariantKind::CPU, INDEX_CODE, VAL_CODE> {
46 |   using INDEX_TY = type_of<INDEX_CODE>;
47 |   using VAL_TY   = type_of<VAL_CODE>;
48 | 
49 |   void operator()(const AccessorRO<Rect<1>, 2>& A_pos,
50 |                   const AccessorWO<INDEX_TY, 1>& A_crd,
51 |                   const AccessorWO<VAL_TY, 1>& A_vals,
52 |                   const AccessorRO<VAL_TY, 2>& B_vals,
53 |                   const Rect<2>& rect)
54 |   {
55 |     for (auto i = rect.lo[0]; i < rect.hi[0] + 1; i++) {
56 |       coord_t nnz_pos = A_pos[{i, 0}].lo;
57 |       for (auto j = rect.lo[1]; j < rect.hi[1] + 1; j++) {
58 |         if (B_vals[{i, j}] != static_cast<VAL_TY>(0.0)) {
59 |           A_crd[nnz_pos]  = static_cast<INDEX_TY>(j);
60 |           A_vals[nnz_pos] = B_vals[{i, j}];
61 |           nnz_pos++;
62 |         }
63 |       }
64 |     }
65 |   }
66 | };
67 | 
68 | /*static*/ void DenseToCSRNNZ::cpu_variant(TaskContext context)
69 | {
70 |   dense_to_csr_nnz_template<VariantKind::CPU>(context);
71 | }
72 | 
73 | /*static*/ void DenseToCSR::cpu_variant(TaskContext context)
74 | {
75 |   dense_to_csr_template<VariantKind::CPU>(context);
76 | }
77 | 
78 | namespace  // unnamed
79 | {
80 | static void __attribute__((constructor)) register_tasks(void)
81 | {
82 |   DenseToCSRNNZ::register_variants();
83 |   DenseToCSR::register_variants();
84 | }
85 | 
86 | }  // namespace
87 | 
88 | }  // namespace sparse
89 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/conv/dense_to_csr.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include "legate_sparse/sparse.h"
20 | #include "legate_sparse/sparse_c.h"
21 | #include "legate.h"
22 | 
23 | namespace sparse {
24 | 
25 | struct DenseToCSRNNZArgs {
26 |   const legate::PhysicalStore& nnz;
27 |   const legate::PhysicalStore& B_vals;
28 | };
29 | 
30 | class DenseToCSRNNZ : public SparseTask<DenseToCSRNNZ> {
31 |  public:
32 |   static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_DENSE_TO_CSR_NNZ};
33 |   static void cpu_variant(legate::TaskContext ctx);
34 | #ifdef LEGATE_USE_OPENMP
35 |   static void omp_variant(legate::TaskContext ctx);
36 | #endif
37 | #ifdef LEGATE_USE_CUDA
38 |   static void gpu_variant(legate::TaskContext context);
39 | #endif
40 | };
41 | 
42 | struct DenseToCSRArgs {
43 |   const legate::PhysicalStore& A_pos;
44 |   const legate::PhysicalStore& A_crd;
45 |   const legate::PhysicalStore& A_vals;
46 |   const legate::PhysicalStore& B_vals;
47 | };
48 | 
49 | class DenseToCSR : public SparseTask<DenseToCSR> {
50 |  public:
51 |   static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_DENSE_TO_CSR};
52 |   static void cpu_variant(legate::TaskContext ctx);
53 | #ifdef LEGATE_USE_OPENMP
54 |   static void omp_variant(legate::TaskContext ctx);
55 | #endif
56 | #ifdef LEGATE_USE_CUDA
57 |   static void gpu_variant(legate::TaskContext context);
58 | #endif
59 | };
60 | 
61 | }  // namespace sparse
62 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/conv/dense_to_csr_omp.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/array/conv/dense_to_csr.h"
18 | #include "legate_sparse/array/conv/dense_to_csr_template.inl"
19 | 
20 | namespace sparse {
21 | 
22 | using namespace legate;
23 | 
24 | template <Type::Code VAL_CODE>
25 | struct DenseToCSRNNZImplBody<VariantKind::OMP, VAL_CODE> {
26 |   using VAL_TY = type_of<VAL_CODE>;
27 | 
28 |   void operator()(const AccessorWO<nnz_ty, 2>& nnz,
29 |                   const AccessorRO<VAL_TY, 2>& B_vals,
30 |                   const Rect<2>& rect)
31 |   {
32 | #pragma omp parallel for schedule(static)
33 |     for (auto i = rect.lo[0]; i < rect.hi[0] + 1; i++) {
34 |       size_t row_nnz = 0;
35 |       for (auto j = rect.lo[1]; j < rect.hi[1] + 1; j++) {
36 |         if (B_vals[{i, j}] != static_cast<VAL_TY>(0.0)) {
37 |           row_nnz++;
38 |         }
39 |       }
40 |       nnz[{i, 0}] = row_nnz;
41 |     }
42 |   }
43 | };
44 | 
45 | template <Type::Code INDEX_CODE, Type::Code VAL_CODE>
46 | struct DenseToCSRImplBody<VariantKind::OMP, INDEX_CODE, VAL_CODE> {
47 |   using INDEX_TY = type_of<INDEX_CODE>;
48 |   using VAL_TY   = type_of<VAL_CODE>;
49 | 
50 |   void operator()(const AccessorRO<Rect<1>, 2>& A_pos,
51 |                   const AccessorWO<INDEX_TY, 1>& A_crd,
52 |                   const AccessorWO<VAL_TY, 1>& A_vals,
53 |                   const AccessorRO<VAL_TY, 2>& B_vals,
54 |                   const Rect<2>& rect)
55 |   {
56 | #pragma omp parallel for schedule(static)
57 |     for (auto i = rect.lo[0]; i < rect.hi[0] + 1; i++) {
58 |       coord_t nnz_pos = A_pos[{i, 0}].lo;
59 |       for (auto j = rect.lo[1]; j < rect.hi[1] + 1; j++) {
60 |         if (B_vals[{i, j}] != static_cast<VAL_TY>(0.0)) {
61 |           A_crd[nnz_pos]  = static_cast<INDEX_TY>(j);
62 |           A_vals[nnz_pos] = B_vals[{i, j}];
63 |           nnz_pos++;
64 |         }
65 |       }
66 |     }
67 |   }
68 | };
69 | 
70 | /*static*/ void DenseToCSRNNZ::omp_variant(TaskContext context)
71 | {
72 |   dense_to_csr_nnz_template<VariantKind::OMP>(context);
73 | }
74 | 
75 | /*static*/ void DenseToCSR::omp_variant(TaskContext context)
76 | {
77 |   dense_to_csr_template<VariantKind::OMP>(context);
78 | }
79 | 
80 | }  // namespace sparse
81 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/conv/dense_to_csr_template.inl:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | // Useful for IDEs.
20 | #include "legate_sparse/array/conv/dense_to_csr.h"
21 | #include "legate_sparse/util/dispatch.h"
22 | #include "legate_sparse/util/typedefs.h"
23 | 
24 | #include <iostream>
25 | 
26 | namespace sparse {
27 | 
28 | using namespace legate;
29 | 
30 | template <VariantKind KIND, Type::Code VAL_CODE>
31 | struct DenseToCSRNNZImplBody;
32 | 
33 | template <VariantKind KIND>
34 | struct DenseToCSRNNZImpl {
35 |   template <Type::Code VAL_CODE>
36 |   void operator()(DenseToCSRNNZArgs& args) const
37 |   {
38 |     using VAL_TY = type_of<VAL_CODE>;
39 | 
40 |     auto nnz    = args.nnz.write_accessor<nnz_ty, 2>();
41 |     auto B_vals = args.B_vals.read_accessor<VAL_TY, 2>();
42 | 
43 |     if (args.nnz.domain().empty()) {
44 |       return;
45 |     }
46 |     DenseToCSRNNZImplBody<KIND, VAL_CODE>()(nnz, B_vals, args.B_vals.shape<2>());
47 |   }
48 | };
49 | 
50 | template <VariantKind KIND, Type::Code INDEX_CODE, Type::Code VAL_CODE>
51 | struct DenseToCSRImplBody;
52 | 
53 | template <VariantKind KIND>
54 | struct DenseToCSRImpl {
55 |   template <Type::Code INDEX_CODE, Type::Code VAL_CODE>
56 |   void operator()(DenseToCSRArgs& args) const
57 |   {
58 |     using INDEX_TY = type_of<INDEX_CODE>;
59 |     using VAL_TY   = type_of<VAL_CODE>;
60 | 
61 |     auto A_pos  = args.A_pos.read_accessor<Rect<1>, 2>();
62 |     auto A_crd  = args.A_crd.write_accessor<INDEX_TY, 1>();
63 |     auto A_vals = args.A_vals.write_accessor<VAL_TY, 1>();
64 |     auto B_vals = args.B_vals.read_accessor<VAL_TY, 2>();
65 | 
66 |     if (args.A_pos.domain().empty()) {
67 |       return;
68 |     }
69 |     DenseToCSRImplBody<KIND, INDEX_CODE, VAL_CODE>()(
70 |       A_pos, A_crd, A_vals, B_vals, args.B_vals.shape<2>());
71 |   }
72 | };
73 | 
74 | template <VariantKind KIND>
75 | static void dense_to_csr_nnz_template(TaskContext context)
76 | {
77 |   DenseToCSRNNZArgs args{
78 |     context.output(0),  // nnz_per_row
79 |     context.input(0)    // B_vals
80 |   };
81 |   value_type_dispatch(args.B_vals.code(), DenseToCSRNNZImpl<KIND>{}, args);
82 | }
83 | 
84 | template <VariantKind KIND>
85 | static void dense_to_csr_template(TaskContext context)
86 | {
87 |   DenseToCSRArgs args{
88 |     context.input(0),   // A_pos (promoted)
89 |     context.output(0),  // A_crd
90 |     context.output(1),  // A_vals
91 |     context.input(1)    // B_vals
92 |   };
93 | 
94 |   index_type_value_type_dispatch(
95 |     args.A_crd.code(), args.A_vals.code(), DenseToCSRImpl<KIND>{}, args);
96 | }
97 | 
98 | }  // namespace sparse
99 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/conv/pos_to_coordinates.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/array/conv/pos_to_coordinates.h"
18 | #include "legate_sparse/array/conv/pos_to_coordinates_template.inl"
19 | 
20 | namespace sparse {
21 | 
22 | using namespace legate;
23 | 
24 | template <Type::Code INDEX_CODE>
25 | struct ExpandPosToCoordinatesImplBody<VariantKind::CPU, INDEX_CODE> {
26 |   using INDEX_TY = type_of<INDEX_CODE>;
27 | 
28 |   void operator()(const AccessorRO<Rect<1>, 1>& pos,
29 |                   const AccessorWO<INDEX_TY, 1>& row_indices,
30 |                   const Rect<1>& rect)
31 |   {
32 |     for (size_t row = rect.lo[0]; row < rect.hi[0] + 1; row++) {
33 |       for (size_t j_pos = pos[row].lo; j_pos < pos[row].hi + 1; j_pos++) {
34 |         row_indices[j_pos] = row;
35 |       }
36 |     }
37 |   }
38 | };
39 | 
40 | /*static*/ void ExpandPosToCoordinates::cpu_variant(TaskContext context)
41 | {
42 |   pos_to_coordinates_template<VariantKind::CPU>(context);
43 | }
44 | 
45 | namespace  // unnamed
46 | {
47 | static void __attribute__((constructor)) register_tasks(void)
48 | {
49 |   ExpandPosToCoordinates::register_variants();
50 | }
51 | }  // namespace
52 | 
53 | }  // namespace sparse
54 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/conv/pos_to_coordinates.cu:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/array/conv/pos_to_coordinates.h"
18 | #include "legate_sparse/array/conv/pos_to_coordinates_template.inl"
19 | #include "legate_sparse/util/cuda_help.h"
20 | #include "legate_sparse/util/cusparse_utils.h"
21 | #include <stdio.h>
22 | 
23 | namespace sparse {
24 | 
25 | using namespace legate;
26 | 
27 | template <typename INDEX_TY>
28 | __global__ void fill_row_indices(size_t rows,
29 |                                  size_t offset,
30 |                                  AccessorRO<Rect<1>, 1> pos,
31 |                                  AccessorWO<INDEX_TY, 1> row_indices)
32 | {
33 |   const auto idx = global_tid_1d();
34 | 
35 |   if (idx >= rows) {
36 |     return;
37 |   }
38 | 
39 |   size_t row = offset + idx;
40 |   for (size_t j_pos = pos[row].lo; j_pos < pos[row].hi + 1; j_pos++) {
41 |     row_indices[j_pos] = row;
42 |   }
43 | }
44 | 
45 | template <Type::Code INDEX_CODE>
46 | struct ExpandPosToCoordinatesImplBody<VariantKind::GPU, INDEX_CODE> {
47 |   using INDEX_TY = type_of<INDEX_CODE>;
48 | 
49 |   void operator()(const AccessorRO<Rect<1>, 1>& pos,
50 |                   const AccessorWO<INDEX_TY, 1>& row_indices,
51 |                   const Rect<1>& rect)
52 |   {
53 |     auto stream = get_cached_stream();
54 |     auto blocks = get_num_blocks_1d(rect.volume());
55 |     size_t rows = rect.volume();
56 | 
57 |     fill_row_indices<<<blocks, THREADS_PER_BLOCK, 0, stream>>>(rows, rect.lo[0], pos, row_indices);
58 |     LEGATE_SPARSE_CHECK_CUDA_STREAM(stream);
59 |   }
60 | };
61 | 
62 | /*static*/ void ExpandPosToCoordinates::gpu_variant(TaskContext context)
63 | {
64 |   pos_to_coordinates_template<VariantKind::GPU>(context);
65 | }
66 | 
67 | }  // namespace sparse
68 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/conv/pos_to_coordinates.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include "legate_sparse/sparse.h"
20 | #include "legate_sparse/sparse_c.h"
21 | #include "legate.h"
22 | 
23 | namespace sparse {
24 | 
25 | struct ExpandPosToCoordinatesArgs {
26 |   const legate::PhysicalStore row_indices;
27 |   const legate::PhysicalStore pos;
28 | };
29 | 
30 | class ExpandPosToCoordinates : public SparseTask<ExpandPosToCoordinates> {
31 |  public:
32 |   static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_EXPAND_POS_TO_COORDINATES};
33 | 
34 |  public:
35 |   static void cpu_variant(legate::TaskContext ctx);
36 | #ifdef LEGATE_USE_OPENMP
37 |   static void omp_variant(legate::TaskContext ctx);
38 | #endif
39 | #ifdef LEGATE_USE_CUDA
40 |   static void gpu_variant(legate::TaskContext ctx);
41 | #endif
42 | };
43 | 
44 | }  // namespace sparse
45 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/conv/pos_to_coordinates_omp.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/array/conv/pos_to_coordinates.h"
18 | #include "legate_sparse/array/conv/pos_to_coordinates_template.inl"
19 | 
20 | namespace sparse {
21 | 
22 | using namespace legate;
23 | 
24 | template <Type::Code INDEX_CODE>
25 | struct ExpandPosToCoordinatesImplBody<VariantKind::OMP, INDEX_CODE> {
26 |   using INDEX_TY = type_of<INDEX_CODE>;
27 | 
28 |   void operator()(const AccessorRO<Rect<1>, 1>& pos,
29 |                   const AccessorWO<INDEX_TY, 1>& row_indices,
30 |                   const Rect<1>& rect)
31 |   {
32 | #pragma omp parallel for schedule(monotonic : dynamic, 128)
33 |     for (auto row = rect.lo[0]; row < rect.hi[0] + 1; row++) {
34 |       for (size_t j_pos = pos[row].lo; j_pos < pos[row].hi + 1; j_pos++) {
35 |         row_indices[j_pos] = row;
36 |       }
37 |     }
38 |   }
39 | };
40 | 
41 | /*static*/ void ExpandPosToCoordinates::omp_variant(TaskContext context)
42 | {
43 |   pos_to_coordinates_template<VariantKind::OMP>(context);
44 | }
45 | 
46 | }  // namespace sparse
47 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/conv/pos_to_coordinates_template.inl:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | // Useful for IDEs.
20 | #include "legate_sparse/array/conv/pos_to_coordinates.h"
21 | #include "legate_sparse/util/dispatch.h"
22 | 
23 | namespace sparse {
24 | using namespace legate;
25 | 
26 | template <VariantKind KIND, Type::Code INDEX_CODE>
27 | struct ExpandPosToCoordinatesImplBody;
28 | 
29 | template <VariantKind KIND>
30 | struct ExpandPosToCoordinatesImpl {
31 |   template <Type::Code INDEX_CODE>
32 |   void operator()(ExpandPosToCoordinatesArgs& args) const
33 |   {
34 |     using INDEX_TY = type_of<INDEX_CODE>;
35 | 
36 |     auto pos                = args.pos.read_accessor<Rect<1>, 1>();
37 |     auto row_indices        = args.row_indices.write_accessor<INDEX_TY, 1>();
38 |     auto pos_domain         = args.pos.domain();
39 |     auto row_indices_domain = args.row_indices.domain();
40 | 
41 |     if (pos_domain.empty() || row_indices_domain.empty()) {
42 |       return;
43 |     }
44 |     ExpandPosToCoordinatesImplBody<KIND, INDEX_CODE>()(pos, row_indices, args.pos.shape<1>());
45 |   }
46 | };
47 | 
48 | template <VariantKind KIND>
49 | static void pos_to_coordinates_template(TaskContext context)
50 | {
51 |   ExpandPosToCoordinatesArgs args{
52 |     context.outputs()[0],
53 |     context.inputs()[0],
54 |   };
55 |   index_type_dispatch(args.row_indices.code(), ExpandPosToCoordinatesImpl<KIND>(), args);
56 | }
57 | 
58 | }  // namespace sparse
59 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/csr/get_diagonal.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/array/csr/get_diagonal.h"
18 | #include "legate_sparse/array/csr/get_diagonal_template.inl"
19 | 
20 | namespace sparse {
21 | 
22 | using namespace legate;
23 | 
24 | template <Type::Code INDEX_CODE, Type::Code VAL_CODE>
25 | struct GetCSRDiagonalImplBody<VariantKind::CPU, INDEX_CODE, VAL_CODE> {
26 |   using INDEX_TY = type_of<INDEX_CODE>;
27 |   using VAL_TY   = type_of<VAL_CODE>;
28 | 
29 |   void operator()(const AccessorWO<VAL_TY, 1>& diag,
30 |                   const AccessorRO<Rect<1>, 1>& pos,
31 |                   const AccessorRO<INDEX_TY, 1>& crd,
32 |                   const AccessorRO<VAL_TY, 1>& vals,
33 |                   const Rect<1>& rect)
34 |   {
35 |     for (coord_t i = rect.lo[0]; i < rect.hi[0] + 1; i++) {
36 |       diag[i] = 0.0;
37 |       for (size_t j_pos = pos[i].lo; j_pos < pos[i].hi + 1; j_pos++) {
38 |         if (crd[j_pos] == i) {
39 |           diag[i] = vals[j_pos];
40 |         }
41 |       }
42 |     }
43 |   }
44 | };
45 | 
46 | /*static*/ void GetCSRDiagonal::cpu_variant(TaskContext context)
47 | {
48 |   get_csr_diagonal_template<VariantKind::CPU>(context);
49 | }
50 | 
51 | namespace  // unnamed
52 | {
53 | static void __attribute__((constructor)) register_tasks(void)
54 | {
55 |   GetCSRDiagonal::register_variants();
56 | }
57 | }  // namespace
58 | 
59 | }  // namespace sparse
60 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/csr/get_diagonal.cu:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/array/csr/get_diagonal.h"
18 | #include "legate_sparse/array/csr/get_diagonal_template.inl"
19 | #include "legate_sparse/util/cuda_help.h"
20 | 
21 | namespace sparse {
22 | 
23 | using namespace legate;
24 | 
25 | template <typename INDEX_TY, typename VAL_TY>
26 | __global__ void compute_diag_kernel(size_t rows,
27 |                                     int64_t offset,
28 |                                     AccessorWO<VAL_TY, 1> diag,
29 |                                     AccessorRO<Rect<1>, 1> pos,
30 |                                     AccessorRO<INDEX_TY, 1> crd,
31 |                                     AccessorRO<VAL_TY, 1> vals)
32 | {
33 |   const auto idx = global_tid_1d();
34 |   if (idx >= rows) {
35 |     return;
36 |   }
37 |   auto i  = idx + offset;
38 |   diag[i] = 0.0;
39 |   for (size_t j_pos = pos[i].lo; j_pos < pos[i].hi + 1; j_pos++) {
40 |     if (crd[j_pos] == i) {
41 |       diag[i] = vals[j_pos];
42 |     }
43 |   }
44 | }
45 | 
46 | template <Type::Code INDEX_CODE, Type::Code VAL_CODE>
47 | struct GetCSRDiagonalImplBody<VariantKind::GPU, INDEX_CODE, VAL_CODE> {
48 |   using INDEX_TY = type_of<INDEX_CODE>;
49 |   using VAL_TY   = type_of<VAL_CODE>;
50 | 
51 |   void operator()(const AccessorWO<VAL_TY, 1>& diag,
52 |                   const AccessorRO<Rect<1>, 1>& pos,
53 |                   const AccessorRO<INDEX_TY, 1>& crd,
54 |                   const AccessorRO<VAL_TY, 1>& vals,
55 |                   const Rect<1>& rect)
56 |   {
57 |     auto stream = get_cached_stream();
58 |     auto blocks = get_num_blocks_1d(rect.volume());
59 |     compute_diag_kernel<INDEX_TY, VAL_TY>
60 |       <<<blocks, THREADS_PER_BLOCK, 0, stream>>>(rect.volume(), rect.lo[0], diag, pos, crd, vals);
61 |     LEGATE_SPARSE_CHECK_CUDA_STREAM(stream);
62 |   }
63 | };
64 | 
65 | /*static*/ void GetCSRDiagonal::gpu_variant(TaskContext context)
66 | {
67 |   get_csr_diagonal_template<VariantKind::GPU>(context);
68 | }
69 | 
70 | }  // namespace sparse
71 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/csr/get_diagonal.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include "legate_sparse/sparse.h"
20 | #include "legate_sparse/sparse_c.h"
21 | #include "legate.h"
22 | 
23 | #include "legate/mapping/store.h"
24 | 
25 | namespace sparse {
26 | struct GetCSRDiagonalArgs {
27 |   const legate::PhysicalStore& diag;
28 |   const legate::PhysicalStore& pos;
29 |   const legate::PhysicalStore& crd;
30 |   const legate::PhysicalStore& vals;
31 | };
32 | 
33 | class GetCSRDiagonal : public SparseTask<GetCSRDiagonal> {
34 |  public:
35 |   static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_CSR_DIAGONAL};
36 |   // TODO (rohany): We could rewrite this having each implementation just make
37 |   //  a call to thrust::transform, but the implementations are simple enough
38 |   //  anyway.
39 |   static void cpu_variant(legate::TaskContext ctx);
40 | #ifdef LEGATE_USE_OPENMP
41 |   static void omp_variant(legate::TaskContext ctx);
42 | #endif
43 | #ifdef LEGATE_USE_CUDA
44 |   static void gpu_variant(legate::TaskContext context);
45 | #endif
46 | };
47 | 
48 | }  // namespace sparse
49 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/csr/get_diagonal_omp.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/array/csr/get_diagonal.h"
18 | #include "legate_sparse/array/csr/get_diagonal_template.inl"
19 | 
20 | namespace sparse {
21 | 
22 | using namespace legate;
23 | 
24 | template <Type::Code INDEX_CODE, Type::Code VAL_CODE>
25 | struct GetCSRDiagonalImplBody<VariantKind::OMP, INDEX_CODE, VAL_CODE> {
26 |   using INDEX_TY = type_of<INDEX_CODE>;
27 |   using VAL_TY   = type_of<VAL_CODE>;
28 | 
29 |   void operator()(const AccessorWO<VAL_TY, 1>& diag,
30 |                   const AccessorRO<Rect<1>, 1>& pos,
31 |                   const AccessorRO<INDEX_TY, 1>& crd,
32 |                   const AccessorRO<VAL_TY, 1>& vals,
33 |                   const Rect<1>& rect)
34 |   {
35 | #pragma omp parallel for schedule(monotonic : dynamic, 128)
36 |     for (coord_t i = rect.lo[0]; i < rect.hi[0] + 1; i++) {
37 |       diag[i] = 0.0;
38 |       for (size_t j_pos = pos[i].lo; j_pos < pos[i].hi + 1; j_pos++) {
39 |         if (crd[j_pos] == i) {
40 |           diag[i] = vals[j_pos];
41 |         }
42 |       }
43 |     }
44 |   }
45 | };
46 | 
47 | /*static*/ void GetCSRDiagonal::omp_variant(TaskContext context)
48 | {
49 |   get_csr_diagonal_template<VariantKind::OMP>(context);
50 | }
51 | 
52 | }  // namespace sparse
53 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/csr/get_diagonal_template.inl:
--------------------------------------------------------------------------------
 1 | /* Copyright 2021-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | // Useful for IDEs.
20 | #include "legate_sparse/array/csr/get_diagonal.h"
21 | #include "legate_sparse/util/dispatch.h"
22 | 
23 | namespace sparse {
24 | 
25 | using namespace legate;
26 | 
27 | template <VariantKind KIND, Type::Code INDEX_CODE, Type::Code VAL_CODE>
28 | struct GetCSRDiagonalImplBody;
29 | 
30 | template <VariantKind KIND>
31 | struct GetCSRDiagonalImpl {
32 |   template <Type::Code INDEX_CODE, Type::Code VAL_CODE>
33 |   void operator()(GetCSRDiagonalArgs& args) const
34 |   {
35 |     using INDEX_TY = type_of<INDEX_CODE>;
36 |     using VAL_TY   = type_of<VAL_CODE>;
37 | 
38 |     auto diag = args.diag.write_accessor<VAL_TY, 1>();
39 |     auto pos  = args.pos.read_accessor<Rect<1>, 1>();
40 |     auto crd  = args.crd.read_accessor<INDEX_TY, 1>();
41 |     auto vals = args.vals.read_accessor<VAL_TY, 1>();
42 | 
43 |     assert(args.diag.domain().dense());
44 |     if (args.diag.domain().empty()) {
45 |       return;
46 |     }
47 | 
48 |     GetCSRDiagonalImplBody<KIND, INDEX_CODE, VAL_CODE>()(
49 |       diag, pos, crd, vals, args.diag.shape<1>());
50 |   }
51 | };
52 | 
53 | template <VariantKind KIND>
54 | static void get_csr_diagonal_template(TaskContext context)
55 | {
56 |   auto inputs = context.inputs();
57 |   GetCSRDiagonalArgs args{context.outputs()[0], inputs[0], inputs[1], inputs[2]};
58 |   index_type_value_type_dispatch(
59 |     args.crd.code(), args.diag.code(), GetCSRDiagonalImpl<KIND>{}, args);
60 | }
61 | }  // namespace sparse
62 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/csr/indexing.cc:
--------------------------------------------------------------------------------
  1 | /* Copyright 2022-2024 NVIDIA Corporation
  2 |  *
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  *     http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  *
 15 |  */
 16 | 
 17 | #include "legate_sparse/array/csr/indexing.h"
 18 | #include "legate_sparse/array/csr/indexing_template.inl"
 19 | 
 20 | namespace sparse {
 21 | 
 22 | using namespace legate;
 23 | 
 24 | template <Type::Code INDEX_CODE, Type::Code VAL_CODE>
 25 | struct CSRIndexingCSRImplBody<VariantKind::CPU, INDEX_CODE, VAL_CODE> {
 26 |   using INDEX_TY = type_of<INDEX_CODE>;
 27 |   using VAL_TY   = type_of<VAL_CODE>;
 28 | 
 29 |   void operator()(const AccessorRO<Rect<1>, 1>& A_pos,
 30 |                   const AccessorRO<INDEX_TY, 1>& A_crd,
 31 |                   const AccessorRW<VAL_TY, 1>& A_vals,
 32 |                   const AccessorRO<Rect<1>, 1>& mask_pos,
 33 |                   const AccessorRO<INDEX_TY, 1>& mask_crd,
 34 |                   const AccessorRO<VAL_TY, 1>& value,
 35 |                   const Rect<1>& rect)
 36 |   {
 37 |     for (size_t row = rect.lo[0]; row < rect.hi[0] + 1; row++) {
 38 |       size_t j_pos_start = A_pos[row].lo;
 39 |       size_t j_pos_end   = A_pos[row].hi + 1;
 40 | 
 41 |       size_t m_pos_start = mask_pos[row].lo;
 42 |       size_t m_pos_end   = mask_pos[row].hi + 1;
 43 | 
 44 |       size_t m_pos = m_pos_start;
 45 |       size_t j_pos = j_pos_start;
 46 | 
 47 |       // When the if condition is satisfied, the (row, col) of A and
 48 |       // mask match. Ideally, we would expect it to match for all
 49 |       // elements, even though mask stores only the True elements
 50 |       // making its sparsity pattern differ from A.
 51 |       // This would be the case if mask was derived from A.
 52 |       // However, if mask has entries that are not present in A,
 53 |       // then the else conditions will be hit.
 54 |       // Note that we don't update the vals array in those cases
 55 |       // since updating vals would require changing its size
 56 |       // apriori and hence the sparsity pattern of A, which is not
 57 |       // supported in this task.
 58 | 
 59 |       while (m_pos < m_pos_end && j_pos < j_pos_end) {
 60 |         if (mask_crd[m_pos] == A_crd[j_pos]) {
 61 |           A_vals[j_pos] = static_cast<VAL_TY>(value[0]);
 62 |           j_pos++;
 63 |           m_pos++;
 64 |         } else if (mask_crd[m_pos] > A_crd[j_pos]) {
 65 |           // this element in A is either not found in mask or is False
 66 |           // in mask and thus not stored. This means the pointer for
 67 |           // mask (m_pos) would have skipped ahead of the pointer
 68 |           // for A (j_pos), so A needs to catch-up; increment j_pos
 69 |           j_pos++;
 70 |         } else {  // mask_crd[m_pos] < A_crd[j_pos]
 71 |           // In this case, A is ahead and mask is behind in this row
 72 |           // which means mask has an entry (r,c) that was not in A.
 73 |           // Increment m_pos and let mask move ahead
 74 |           m_pos++;
 75 |         }
 76 |         // when either one of the pointers reach the end of the row,
 77 |         // we are done because we only update vals when (row, col)
 78 |         // of mask and A match exactly, and if one of the pointers
 79 |         // has reached the end of this row, the vals for this row
 80 |         // can never be updated, so exit the loop.
 81 |       }
 82 |     }
 83 |   }
 84 | };
 85 | 
 86 | /* static */ void CSRIndexingCSR::cpu_variant(legate::TaskContext context)
 87 | {
 88 |   csr_indexing_csr_template<VariantKind::CPU>(context);
 89 | }
 90 | 
 91 | namespace  // unnamed
 92 | {
 93 | static void __attribute__((constructor)) register_tasks(void)
 94 | {
 95 |   CSRIndexingCSR::register_variants();
 96 | }
 97 | }  // namespace
 98 | 
 99 | }  // namespace sparse
100 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/csr/indexing.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include "legate_sparse/sparse.h"
20 | #include "legate_sparse/sparse_c.h"
21 | #include "legate.h"
22 | 
23 | namespace sparse {
24 | 
25 | struct CSRIndexingCSRArgs {
26 |   const legate::PhysicalStore& A_vals;
27 |   const legate::PhysicalStore& A_pos;
28 |   const legate::PhysicalStore& A_crd;
29 |   const legate::PhysicalStore& key_pos;
30 |   const legate::PhysicalStore& key_crd;
31 |   const legate::PhysicalStore& value;
32 | };
33 | 
34 | class CSRIndexingCSR : public SparseTask<CSRIndexingCSR> {
35 |  public:
36 |   static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_CSR_INDEXING_CSR};
37 | 
38 |   // TODO: The implementatio of the below three variants are
39 |   // identical and hence need to be templated (DRY)
40 | 
41 |  public:
42 |   static void cpu_variant(legate::TaskContext context);
43 | 
44 | #ifdef LEGATE_USE_OPENMP
45 |   static void omp_variant(legate::TaskContext context);
46 | #endif
47 | 
48 | #ifdef LEGATE_USE_CUDA
49 |   static void gpu_variant(legate::TaskContext context);
50 | #endif
51 | };
52 | 
53 | }  // namespace sparse
54 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/csr/indexing_omp.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/array/csr/indexing.h"
18 | #include "legate_sparse/array/csr/indexing_template.inl"
19 | 
20 | namespace sparse {
21 | 
22 | using namespace legate;
23 | 
24 | template <Type::Code INDEX_CODE, Type::Code VAL_CODE>
25 | struct CSRIndexingCSRImplBody<VariantKind::OMP, INDEX_CODE, VAL_CODE> {
26 |   using INDEX_TY = type_of<INDEX_CODE>;
27 |   using VAL_TY   = type_of<VAL_CODE>;
28 | 
29 |   void operator()(const AccessorRO<Rect<1>, 1>& A_pos,
30 |                   const AccessorRO<INDEX_TY, 1>& A_crd,
31 |                   const AccessorRW<VAL_TY, 1>& A_vals,
32 |                   const AccessorRO<Rect<1>, 1>& mask_pos,
33 |                   const AccessorRO<INDEX_TY, 1>& mask_crd,
34 |                   const AccessorRO<VAL_TY, 1>& value,
35 |                   const Rect<1>& rect)
36 |   {
37 |     std::cout << "OMP variant" << std::endl;
38 | #pragma omp parallel for
39 |     for (size_t row = rect.lo[0]; row < rect.hi[0] + 1; row++) {
40 |       size_t j_pos_start = A_pos[row].lo;
41 |       size_t j_pos_end   = A_pos[row].hi + 1;
42 | 
43 |       size_t m_pos_start = mask_pos[row].lo;
44 |       size_t m_pos_end   = mask_pos[row].hi + 1;
45 | 
46 |       size_t m_pos = m_pos_start;
47 |       size_t j_pos = j_pos_start;
48 | 
49 |       // When the if condition is satisfied, the (row, col) of A and
50 |       // mask match. Ideally, we would expect it to match for all
51 |       // elements, even though mask stores only the True elements
52 |       // making its sparsity pattern differ from A.
53 |       // This would be the case if mask was derived from A.
54 |       // However, if mask has entries that are not present in A,
55 |       // then the else conditions will be hit.
56 |       // Note that we don't update the vals array in those cases
57 |       // since updating vals would require changing its size
58 |       // apriori and hence the sparsity pattern of A, which is not
59 |       // supported in this task.
60 | 
61 |       while (m_pos < m_pos_end && j_pos < j_pos_end) {
62 |         if (mask_crd[m_pos] == A_crd[j_pos]) {
63 |           A_vals[j_pos] = static_cast<VAL_TY>(value[0]);
64 |           j_pos++;
65 |           m_pos++;
66 |         } else if (mask_crd[m_pos] > A_crd[j_pos]) {
67 |           // this element in A is either not found in mask or is False
68 |           // in mask and thus not stored. This means the pointer for
69 |           // mask (m_pos) would have skipped ahead of the pointer
70 |           // for A (j_pos), so A needs to catch-up; increment j_pos
71 |           j_pos++;
72 |         } else {  // mask_crd[m_pos] < A_crd[j_pos]
73 |           // In this case, A is ahead and mask is behind in this row
74 |           // which means mask has an entry (r,c) that was not in A.
75 |           // Increment m_pos and let mask move ahead
76 |           m_pos++;
77 |         }
78 |         // when either one of the pointers reach the end of the row,
79 |         // we are done because we only update vals when (row, col)
80 |         // of mask and A match exactly, and if one of the pointers
81 |         // has reached the end of this row, the vals for this row
82 |         // can never be updated, so exit the loop.
83 |       }
84 |     }
85 |   }
86 | };
87 | 
88 | /* static */ void CSRIndexingCSR::omp_variant(TaskContext context)
89 | {
90 |   csr_indexing_csr_template<VariantKind::OMP>(context);
91 | }
92 | 
93 | }  // namespace sparse
94 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/csr/indexing_template.inl:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include "legate_sparse/array/csr/indexing.h"
20 | #include "legate_sparse/util/dispatch.h"
21 | 
22 | namespace sparse {
23 | 
24 | using namespace legate;
25 | 
26 | template <VariantKind KIND, Type::Code INDEX_TY, Type::Code VAL_CODE>
27 | struct CSRIndexingCSRImplBody;
28 | 
29 | template <VariantKind KIND>
30 | struct CSRIndexingCSRImpl {
31 |   template <Type::Code INDEX_CODE, Type::Code VAL_CODE>
32 |   void operator()(const CSRIndexingCSRArgs& args)
33 |   {
34 |     using INDEX_TY = type_of<INDEX_CODE>;
35 |     using VAL_TY   = type_of<VAL_CODE>;
36 | 
37 |     auto A_pos  = args.A_pos.read_accessor<Rect<1>, 1>();
38 |     auto A_crd  = args.A_crd.read_accessor<INDEX_TY, 1>();
39 |     auto A_vals = args.A_vals.read_write_accessor<VAL_TY, 1>();
40 | 
41 |     auto key_pos = args.key_pos.read_accessor<Rect<1>, 1>();
42 |     auto key_crd = args.key_crd.read_accessor<INDEX_TY, 1>();
43 | 
44 |     auto value = args.value.read_accessor<VAL_TY, 1>();
45 | 
46 |     // TODO: Rect is based on A_pos.shape, is that correct?
47 |     CSRIndexingCSRImplBody<KIND, INDEX_CODE, VAL_CODE>()(
48 |       A_pos, A_crd, A_vals, key_pos, key_crd, value, args.A_pos.shape<1>());
49 |   }
50 | };
51 | 
52 | template <VariantKind KIND>
53 | static void csr_indexing_csr_template(TaskContext context)
54 | {
55 |   CSRIndexingCSRArgs args{
56 |     context.outputs()[0],
57 |     context.inputs()[0],
58 |     context.inputs()[1],
59 |     context.inputs()[2],
60 |     context.inputs()[3],
61 |     context.inputs()[4],  // value
62 |   };
63 | 
64 |   index_type_value_type_dispatch(
65 |     args.A_crd.code(), args.A_vals.code(), CSRIndexingCSRImpl<KIND>(), args);
66 | }
67 | 
68 | }  // namespace sparse
69 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/csr/spgemm_csr_csr_csr.h:
--------------------------------------------------------------------------------
  1 | /* Copyright 2022-2024 NVIDIA Corporation
  2 |  *
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  *     http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  *
 15 |  */
 16 | 
 17 | #pragma once
 18 | 
 19 | #include "legate_sparse/sparse.h"
 20 | #include "legate_sparse/sparse_c.h"
 21 | #include "legate.h"
 22 | 
 23 | namespace sparse {
 24 | 
 25 | struct SpGEMMCSRxCSRxCSRNNZArgs {
 26 |   const legate::PhysicalStore& nnz;
 27 |   const legate::PhysicalStore& B_pos;
 28 |   const legate::PhysicalStore& B_crd;
 29 |   const legate::PhysicalStore& C_pos;
 30 |   const legate::PhysicalStore& C_crd;
 31 | };
 32 | 
 33 | class SpGEMMCSRxCSRxCSRNNZ : public SparseTask<SpGEMMCSRxCSRxCSRNNZ> {
 34 |  public:
 35 |   static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_SPGEMM_CSR_CSR_CSR_NNZ};
 36 | 
 37 |   static constexpr legate::VariantOptions CPU_VARIANT_OPTIONS =
 38 |     legate::VariantOptions{}.with_has_allocations(true);
 39 |   static constexpr legate::VariantOptions OMP_VARIANT_OPTIONS =
 40 |     legate::VariantOptions{}.with_has_allocations(true);
 41 | 
 42 |  public:
 43 |   static void cpu_variant(legate::TaskContext ctx);
 44 | #ifdef LEGATE_USE_OPENMP
 45 |   static void omp_variant(legate::TaskContext ctx);
 46 | #endif
 47 | };
 48 | 
 49 | struct SpGEMMCSRxCSRxCSRArgs {
 50 |   const legate::PhysicalStore& A_pos;
 51 |   const legate::PhysicalStore& A_crd;
 52 |   const legate::PhysicalStore& A_vals;
 53 |   const legate::PhysicalStore& B_pos;
 54 |   const legate::PhysicalStore& B_crd;
 55 |   const legate::PhysicalStore& B_vals;
 56 |   const legate::PhysicalStore& C_pos;
 57 |   const legate::PhysicalStore& C_crd;
 58 |   const legate::PhysicalStore& C_vals;
 59 | };
 60 | 
 61 | class SpGEMMCSRxCSRxCSR : public SparseTask<SpGEMMCSRxCSRxCSR> {
 62 |  public:
 63 |   static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_SPGEMM_CSR_CSR_CSR};
 64 | 
 65 |   static constexpr legate::VariantOptions CPU_VARIANT_OPTIONS =
 66 |     legate::VariantOptions{}.with_has_allocations(true);
 67 |   static constexpr legate::VariantOptions OMP_VARIANT_OPTIONS =
 68 |     legate::VariantOptions{}.with_has_allocations(true);
 69 | 
 70 |  public:
 71 |   static void cpu_variant(legate::TaskContext ctx);
 72 | #ifdef LEGATE_USE_OPENMP
 73 |   static void omp_variant(legate::TaskContext ctx);
 74 | #endif
 75 | };
 76 | 
 77 | struct SpGEMMCSRxCSRxCSRGPUArgs {
 78 |   const legate::PhysicalStore& A_pos;
 79 |   const legate::PhysicalStore& A_crd;
 80 |   const legate::PhysicalStore& A_vals;
 81 |   const legate::PhysicalStore& B_pos;
 82 |   const legate::PhysicalStore& B_crd;
 83 |   const legate::PhysicalStore& B_vals;
 84 |   const legate::PhysicalStore& C_pos;
 85 |   const legate::PhysicalStore& C_crd;
 86 |   const legate::PhysicalStore& C_vals;
 87 |   const uint64_t A2_dim;
 88 |   const uint64_t C1_dim;
 89 |   const uint64_t fast_switch;
 90 |   std::vector<legate::comm::Communicator> comms;
 91 | };
 92 | 
 93 | // CSRxCSRxCSR SpGEMM for NVIDIA GPUs. Due to limitations with cuSPARSE,
 94 | // we take a different approach than on CPUs and OMPs.
 95 | class SpGEMMCSRxCSRxCSRGPU : public SparseTask<SpGEMMCSRxCSRxCSRGPU> {
 96 |  public:
 97 |   static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_SPGEMM_CSR_CSR_CSR_GPU};
 98 | 
 99 |   static constexpr legate::VariantOptions GPU_VARIANT_OPTIONS =
100 |     legate::VariantOptions{}.with_has_allocations(true);
101 | 
102 |  public:
103 | #ifdef LEGATE_USE_CUDA
104 |   static void gpu_variant(legate::TaskContext ctx);
105 | #endif
106 | };
107 | 
108 | }  // namespace sparse
109 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/csr/spmv.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/array/csr/spmv.h"
18 | #include "legate_sparse/array/csr/spmv_template.inl"
19 | 
20 | namespace sparse {
21 | 
22 | using namespace legate;
23 | 
24 | template <Type::Code INDEX_CODE, Type::Code VAL_CODE>
25 | struct CSRSpMVRowSplitImplBody<VariantKind::CPU, INDEX_CODE, VAL_CODE> {
26 |   using INDEX_TY = type_of<INDEX_CODE>;
27 |   using VAL_TY   = type_of<VAL_CODE>;
28 | 
29 |   void operator()(const AccessorWO<VAL_TY, 1>& y,
30 |                   const AccessorRO<Rect<1>, 1>& A_pos,
31 |                   const AccessorRO<INDEX_TY, 1>& A_crd,
32 |                   const AccessorRO<VAL_TY, 1>& A_vals,
33 |                   const AccessorRO<VAL_TY, 1>& x,
34 |                   const Rect<1>& rect)
35 |   {
36 |     for (coord_t i = rect.lo[0]; i < rect.hi[0] + 1; i++) {
37 |       VAL_TY sum = 0.0;
38 |       for (size_t j_pos = A_pos[i].lo; j_pos < A_pos[i].hi + 1; j_pos++) {
39 |         auto j = A_crd[j_pos];
40 |         sum += A_vals[j_pos] * x[j];
41 |       }
42 |       y[i] = sum;
43 |     }
44 |   }
45 | };
46 | 
47 | /*static*/ void CSRSpMVRowSplit::cpu_variant(TaskContext context)
48 | {
49 |   csr_spmv_row_split_template<VariantKind::CPU>(context);
50 | }
51 | 
52 | namespace  // unnamed
53 | {
54 | static void __attribute__((constructor)) register_tasks(void)
55 | {
56 |   CSRSpMVRowSplit::register_variants();
57 | }
58 | }  // namespace
59 | 
60 | }  // namespace sparse
61 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/csr/spmv.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include "legate_sparse/sparse.h"
20 | #include "legate_sparse/sparse_c.h"
21 | #include "legate.h"
22 | 
23 | namespace sparse {
24 | 
25 | struct CSRSpMVRowSplitArgs {
26 |   const legate::PhysicalStore& y;
27 |   const legate::PhysicalStore& A_pos;
28 |   const legate::PhysicalStore& A_crd;
29 |   const legate::PhysicalStore& A_vals;
30 |   const legate::PhysicalStore& x;
31 | };
32 | 
33 | class CSRSpMVRowSplit : public SparseTask<CSRSpMVRowSplit> {
34 |  public:
35 |   static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_CSR_SPMV_ROW_SPLIT};
36 | 
37 |   static constexpr legate::VariantOptions GPU_VARIANT_OPTIONS =
38 |     legate::VariantOptions{}.with_has_allocations(true);
39 | 
40 |  public:
41 |   static void cpu_variant(legate::TaskContext ctx);
42 | #ifdef LEGATE_USE_OPENMP
43 |   static void omp_variant(legate::TaskContext ctx);
44 | #endif
45 | #ifdef LEGATE_USE_CUDA
46 |   static void gpu_variant(legate::TaskContext context);
47 | #endif
48 | };
49 | 
50 | }  // namespace sparse
51 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/csr/spmv_omp.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/array/csr/spmv.h"
18 | #include "legate_sparse/array/csr/spmv_template.inl"
19 | 
20 | namespace sparse {
21 | 
22 | using namespace legate;
23 | 
24 | template <Type::Code INDEX_CODE, Type::Code VAL_CODE>
25 | struct CSRSpMVRowSplitImplBody<VariantKind::OMP, INDEX_CODE, VAL_CODE> {
26 |   using INDEX_TY = type_of<INDEX_CODE>;
27 |   using VAL_TY   = type_of<VAL_CODE>;
28 | 
29 |   void operator()(const AccessorWO<VAL_TY, 1>& y,
30 |                   const AccessorRO<Rect<1>, 1>& A_pos,
31 |                   const AccessorRO<INDEX_TY, 1>& A_crd,
32 |                   const AccessorRO<VAL_TY, 1>& A_vals,
33 |                   const AccessorRO<VAL_TY, 1>& x,
34 |                   const Rect<1>& rect)
35 |   {
36 | #pragma omp parallel for schedule(monotonic : dynamic, 128)
37 |     for (coord_t i = rect.lo[0]; i < rect.hi[0] + 1; i++) {
38 |       VAL_TY sum = 0.0;
39 |       for (size_t j_pos = A_pos[i].lo; j_pos < A_pos[i].hi + 1; j_pos++) {
40 |         auto j = A_crd[j_pos];
41 |         sum += A_vals[j_pos] * x[j];
42 |       }
43 |       y[i] = sum;
44 |     }
45 |   }
46 | };
47 | 
48 | /*static*/ void CSRSpMVRowSplit::omp_variant(TaskContext context)
49 | {
50 |   csr_spmv_row_split_template<VariantKind::OMP>(context);
51 | }
52 | 
53 | }  // namespace sparse
54 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/csr/spmv_template.inl:
--------------------------------------------------------------------------------
 1 | /* Copyright 2021-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | // Useful for IDEs.
20 | #include "legate_sparse/array/csr/spmv.h"
21 | #include "legate_sparse/util/dispatch.h"
22 | 
23 | #include <iostream>
24 | 
25 | namespace sparse {
26 | 
27 | using namespace legate;
28 | 
29 | template <VariantKind KIND, Type::Code INDEX_CODE, Type::Code VAL_CODE>
30 | struct CSRSpMVRowSplitImplBody;
31 | 
32 | template <VariantKind KIND>
33 | struct CSRSpMVRowSplitImpl {
34 |   template <Type::Code INDEX_CODE, Type::Code VAL_CODE>
35 |   void operator()(CSRSpMVRowSplitArgs& args) const
36 |   {
37 |     using INDEX_TY = type_of<INDEX_CODE>;
38 |     using VAL_TY   = type_of<VAL_CODE>;
39 | 
40 |     auto y      = args.y.write_accessor<VAL_TY, 1>();
41 |     auto A_pos  = args.A_pos.read_accessor<Rect<1>, 1>();
42 |     auto A_crd  = args.A_crd.read_accessor<INDEX_TY, 1>();
43 |     auto A_vals = args.A_vals.read_accessor<VAL_TY, 1>();
44 |     auto x      = args.x.read_accessor<VAL_TY, 1>();
45 | 
46 |     assert(args.y.domain().dense());
47 |     if (args.y.domain().empty()) {
48 |       return;
49 |     }
50 | 
51 |     CSRSpMVRowSplitImplBody<KIND, INDEX_CODE, VAL_CODE>()(
52 |       y, A_pos, A_crd, A_vals, x, args.y.shape<1>());
53 |   }
54 | };
55 | 
56 | template <VariantKind KIND>
57 | static void csr_spmv_row_split_template(TaskContext context)
58 | {
59 |   auto inputs = context.inputs();
60 |   CSRSpMVRowSplitArgs args{context.outputs()[0], inputs[0], inputs[1], inputs[2], inputs[3]};
61 | 
62 |   index_type_value_type_dispatch(
63 |     args.A_crd.code(), args.y.code(), CSRSpMVRowSplitImpl<KIND>{}, args);
64 | }
65 | 
66 | }  // namespace sparse
67 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/util/scale_rect.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/array/util/scale_rect.h"
18 | #include "legate_sparse/array/util/scale_rect_template.inl"
19 | 
20 | namespace sparse {
21 | 
22 | using namespace legate;
23 | 
24 | template <>
25 | struct ScaleRect1ImplBody<VariantKind::CPU> {
26 |   void operator()(const AccessorRW<Rect<1>, 1>& output, const int64_t scale, const Rect<1>& rect)
27 |   {
28 |     for (coord_t i = rect.lo[0]; i < rect.hi[0] + 1; i++) {
29 |       output[i].lo = output[i].lo + scale;
30 |       output[i].hi = output[i].hi + scale;
31 |     }
32 |   }
33 | };
34 | 
35 | /*static*/ void ScaleRect1::cpu_variant(TaskContext context)
36 | {
37 |   scale_rect_1_template<VariantKind::CPU>(context);
38 | }
39 | 
40 | namespace  // unnamed
41 | {
42 | static void __attribute__((constructor)) register_tasks(void) { ScaleRect1::register_variants(); }
43 | }  // namespace
44 | 
45 | }  // namespace sparse
46 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/util/scale_rect.cu:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/array/util/scale_rect.h"
18 | #include "legate_sparse/array/util/scale_rect_template.inl"
19 | #include "legate_sparse/util/cuda_help.h"
20 | 
21 | namespace sparse {
22 | 
23 | using namespace legate;
24 | 
25 | __global__ void scale_rect1_kernel(size_t elems,
26 |                                    coord_t offset,
27 |                                    const AccessorRW<Rect<1>, 1> out,
28 |                                    int64_t scale)
29 | {
30 |   const auto tid = global_tid_1d();
31 |   if (tid >= elems) {
32 |     return;
33 |   }
34 |   const auto idx = tid + offset;
35 |   out[idx].lo    = out[idx].lo + scale;
36 |   out[idx].hi    = out[idx].hi + scale;
37 | }
38 | 
39 | template <>
40 | struct ScaleRect1ImplBody<VariantKind::GPU> {
41 |   void operator()(const AccessorRW<Rect<1>, 1>& output, const int64_t scale, const Rect<1>& rect)
42 |   {
43 |     auto elems  = rect.volume();
44 |     auto blocks = get_num_blocks_1d(elems);
45 |     auto stream = get_cached_stream();
46 |     scale_rect1_kernel<<<blocks, THREADS_PER_BLOCK, 0, stream>>>(elems, rect.lo, output, scale);
47 |     LEGATE_SPARSE_CHECK_CUDA_STREAM(stream);
48 |   }
49 | };
50 | 
51 | /*static*/ void ScaleRect1::gpu_variant(TaskContext context)
52 | {
53 |   scale_rect_1_template<VariantKind::GPU>(context);
54 | }
55 | 
56 | }  // namespace sparse
57 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/util/scale_rect.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include "legate_sparse/sparse.h"
20 | #include "legate_sparse/sparse_c.h"
21 | #include "legate.h"
22 | 
23 | namespace sparse {
24 | 
25 | struct ScaleRect1Args {
26 |   const legate::PhysicalStore& out;
27 |   int64_t scale;
28 | };
29 | 
30 | class ScaleRect1 : public SparseTask<ScaleRect1> {
31 |  public:
32 |   static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_SCALE_RECT_1};
33 |   static void cpu_variant(legate::TaskContext context);
34 | #ifdef LEGATE_USE_OPENMP
35 |   static void omp_variant(legate::TaskContext context);
36 | #endif
37 | #ifdef LEGATE_USE_CUDA
38 |   static void gpu_variant(legate::TaskContext context);
39 | #endif
40 | };
41 | 
42 | }  // namespace sparse
43 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/util/scale_rect_omp.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/array/util/scale_rect.h"
18 | #include "legate_sparse/array/util/scale_rect_template.inl"
19 | 
20 | namespace sparse {
21 | 
22 | using namespace legate;
23 | 
24 | template <>
25 | struct ScaleRect1ImplBody<VariantKind::OMP> {
26 |   void operator()(const AccessorRW<Rect<1>, 1>& output, const int64_t scale, const Rect<1>& rect)
27 |   {
28 | #pragma omp parallel for schedule(static)
29 |     for (coord_t i = rect.lo[0]; i < rect.hi[0] + 1; i++) {
30 |       output[i].lo = output[i].lo + scale;
31 |       output[i].hi = output[i].hi + scale;
32 |     }
33 |   }
34 | };
35 | 
36 | /*static*/ void ScaleRect1::omp_variant(TaskContext context)
37 | {
38 |   scale_rect_1_template<VariantKind::OMP>(context);
39 | }
40 | 
41 | }  // namespace sparse
42 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/util/scale_rect_template.inl:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | // Useful for IDEs.
20 | #include "legate_sparse/array/util/scale_rect.h"
21 | #include "legate_sparse/util/dispatch.h"
22 | 
23 | namespace sparse {
24 | 
25 | using namespace legate;
26 | 
27 | template <VariantKind KIND>
28 | struct ScaleRect1ImplBody;
29 | 
30 | template <VariantKind KIND>
31 | struct ScaleRect1Impl {
32 |   void operator()(ScaleRect1Args& args) const
33 |   {
34 |     auto output = args.out.read_write_accessor<Rect<1>, 1>();
35 |     if (args.out.domain().empty()) {
36 |       return;
37 |     }
38 |     ScaleRect1ImplBody<KIND>()(output, args.scale, args.out.shape<1>());
39 |   }
40 | };
41 | 
42 | template <VariantKind KIND>
43 | static void scale_rect_1_template(TaskContext context)
44 | {
45 |   auto task  = context.task_;
46 |   auto scale = task->futures[0].get_result<int64_t>();
47 |   ScaleRect1Args args{context.outputs()[0], scale};
48 |   ScaleRect1Impl<KIND>{}(args);
49 | }
50 | 
51 | }  // namespace sparse
52 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/util/unzip_rect.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/array/util/unzip_rect.h"
18 | #include "legate_sparse/array/util/unzip_rect_template.inl"
19 | 
20 | namespace sparse {
21 | 
22 | using namespace legate;
23 | 
24 | template <>
25 | struct UnZipRect1ImplBody<VariantKind::CPU> {
26 |   void operator()(const AccessorWO<int64_t, 1>& out1,
27 |                   const AccessorWO<int64_t, 1>& out2,
28 |                   const AccessorRO<Rect<1>, 1>& in,
29 |                   const Rect<1>& rect)
30 |   {
31 |     for (coord_t i = rect.lo[0]; i < rect.hi[0] + 1; i++) {
32 |       out1[i] = in[i].lo;
33 |       out2[i] = in[i].hi;
34 |     }
35 |   }
36 | };
37 | 
38 | /*static*/ void UnZipRect1::cpu_variant(TaskContext context)
39 | {
40 |   unzip_rect_1_template<VariantKind::CPU>(context);
41 | }
42 | 
43 | namespace  // unnamed
44 | {
45 | static void __attribute__((constructor)) register_tasks(void) { UnZipRect1::register_variants(); }
46 | }  // namespace
47 | 
48 | }  // namespace sparse
49 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/util/unzip_rect.cu:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/array/util/unzip_rect.h"
18 | #include "legate_sparse/array/util/unzip_rect_template.inl"
19 | #include "legate_sparse/util/cuda_help.h"
20 | 
21 | namespace sparse {
22 | 
23 | using namespace legate;
24 | 
25 | __global__ void unzip_rect1_kernel(size_t elems,
26 |                                    coord_t offset,
27 |                                    const AccessorWO<int64_t, 1> lo,
28 |                                    const AccessorWO<int64_t, 1> hi,
29 |                                    const AccessorRO<Rect<1>, 1> in)
30 | {
31 |   const auto tid = global_tid_1d();
32 |   if (tid >= elems) {
33 |     return;
34 |   }
35 |   const auto idx = tid + offset;
36 |   lo[idx]        = in[idx].lo;
37 |   hi[idx]        = in[idx].hi;
38 | }
39 | 
40 | template <>
41 | struct UnZipRect1ImplBody<VariantKind::GPU> {
42 |   void operator()(const AccessorWO<int64_t, 1>& out1,
43 |                   const AccessorWO<int64_t, 1>& out2,
44 |                   const AccessorRO<Rect<1>, 1>& in,
45 |                   const Rect<1>& rect)
46 |   {
47 |     auto elems  = rect.volume();
48 |     auto blocks = get_num_blocks_1d(elems);
49 |     auto stream = get_cached_stream();
50 |     unzip_rect1_kernel<<<blocks, THREADS_PER_BLOCK, 0, stream>>>(elems, rect.lo, out1, out2, in);
51 |     LEGATE_SPARSE_CHECK_CUDA_STREAM(stream);
52 |   }
53 | };
54 | 
55 | /*static*/ void UnZipRect1::gpu_variant(TaskContext context)
56 | {
57 |   unzip_rect_1_template<VariantKind::GPU>(context);
58 | }
59 | 
60 | }  // namespace sparse
61 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/util/unzip_rect.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include "legate_sparse/sparse.h"
20 | #include "legate_sparse/sparse_c.h"
21 | #include "legate.h"
22 | 
23 | namespace sparse {
24 | 
25 | struct UnZipRect1Args {
26 |   const legate::PhysicalStore& out1;
27 |   const legate::PhysicalStore& out2;
28 |   const legate::PhysicalStore& in;
29 | };
30 | 
31 | class UnZipRect1 : public SparseTask<UnZipRect1> {
32 |  public:
33 |   static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_UNZIP_RECT_1};
34 |   static void cpu_variant(legate::TaskContext ctx);
35 | #ifdef LEGATE_USE_OPENMP
36 |   static void omp_variant(legate::TaskContext ctx);
37 | #endif
38 | #ifdef LEGATE_USE_CUDA
39 |   static void gpu_variant(legate::TaskContext context);
40 | #endif
41 | };
42 | 
43 | }  // namespace sparse
44 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/util/unzip_rect_omp.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/array/util/unzip_rect.h"
18 | #include "legate_sparse/array/util/unzip_rect_template.inl"
19 | 
20 | namespace sparse {
21 | 
22 | using namespace legate;
23 | 
24 | template <>
25 | struct UnZipRect1ImplBody<VariantKind::OMP> {
26 |   void operator()(const AccessorWO<int64_t, 1>& out1,
27 |                   const AccessorWO<int64_t, 1>& out2,
28 |                   const AccessorRO<Rect<1>, 1>& in,
29 |                   const Rect<1>& rect)
30 |   {
31 | #pragma omp parallel for schedule(static)
32 |     for (coord_t i = rect.lo[0]; i < rect.hi[0] + 1; i++) {
33 |       out1[i] = in[i].lo;
34 |       out2[i] = in[i].hi;
35 |     }
36 |   }
37 | };
38 | 
39 | /*static*/ void UnZipRect1::omp_variant(TaskContext context)
40 | {
41 |   unzip_rect_1_template<VariantKind::OMP>(context);
42 | }
43 | 
44 | }  // namespace sparse
45 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/util/unzip_rect_template.inl:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | // Useful for IDEs.
20 | #include "legate_sparse/array/util/unzip_rect.h"
21 | #include "legate_sparse/util/dispatch.h"
22 | 
23 | namespace sparse {
24 | 
25 | using namespace legate;
26 | 
27 | template <VariantKind KIND>
28 | struct UnZipRect1ImplBody;
29 | 
30 | template <VariantKind KIND>
31 | struct UnZipRect1Impl {
32 |   void operator()(UnZipRect1Args& args) const
33 |   {
34 |     auto out1 = args.out1.write_accessor<int64_t, 1>();
35 |     auto out2 = args.out2.write_accessor<int64_t, 1>();
36 |     auto in   = args.in.read_accessor<Rect<1>, 1>();
37 |     if (args.in.domain().empty()) {
38 |       return;
39 |     }
40 |     UnZipRect1ImplBody<KIND>()(out1, out2, in, args.in.shape<1>());
41 |   }
42 | };
43 | 
44 | template <VariantKind KIND>
45 | static void unzip_rect_1_template(TaskContext context)
46 | {
47 |   auto outputs = context.outputs();
48 |   UnZipRect1Args args{outputs[0], outputs[1], context.inputs()[0]};
49 |   UnZipRect1Impl<KIND>{}(args);
50 | }
51 | 
52 | }  // namespace sparse
53 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/util/zip_to_rect.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/array/util/zip_to_rect.h"
18 | #include "legate_sparse/array/util/zip_to_rect_template.inl"
19 | 
20 | namespace sparse {
21 | 
22 | using namespace legate;
23 | 
24 | template <typename VAL>
25 | struct ZipToRect1ImplBody<VariantKind::CPU, VAL> {
26 |   void operator()(const AccessorWO<Rect<1>, 1>& output,
27 |                   const AccessorRO<VAL, 1>& lo,
28 |                   const AccessorRO<VAL, 1>& hi,
29 |                   const Rect<1>& rect)
30 |   {
31 |     for (coord_t i = rect.lo[0]; i < rect.hi[0] + 1; i++) {
32 |       output[i] = Rect<1>{Point<1>{lo[i]}, Point<1>{hi[i] - 1}};
33 |     }
34 |   }
35 | };
36 | 
37 | /*static*/ void ZipToRect1::cpu_variant(TaskContext context)
38 | {
39 |   zip_to_rect_1_template<VariantKind::CPU>(context);
40 | }
41 | 
42 | namespace  // unnamed
43 | {
44 | static void __attribute__((constructor)) register_tasks(void) { ZipToRect1::register_variants(); }
45 | }  // namespace
46 | 
47 | }  // namespace sparse
48 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/util/zip_to_rect.cu:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/array/util/zip_to_rect.h"
18 | #include "legate_sparse/array/util/zip_to_rect_template.inl"
19 | #include "legate_sparse/util/cuda_help.h"
20 | 
21 | namespace sparse {
22 | 
23 | using namespace legate;
24 | 
25 | template <typename VAL>
26 | __global__ void zip_rect1_kernel(size_t elems,
27 |                                  coord_t offset,
28 |                                  const AccessorWO<Rect<1>, 1> out,
29 |                                  const AccessorRO<VAL, 1> lo,
30 |                                  const AccessorRO<VAL, 1> hi)
31 | {
32 |   const auto tid = global_tid_1d();
33 |   if (tid >= elems) {
34 |     return;
35 |   }
36 |   const auto idx = tid + offset;
37 |   out[idx]       = {lo[idx], hi[idx] - 1};
38 | }
39 | 
40 | template <typename VAL>
41 | struct ZipToRect1ImplBody<VariantKind::GPU, VAL> {
42 |   void operator()(const AccessorWO<Rect<1>, 1>& output,
43 |                   const AccessorRO<VAL, 1>& lo,
44 |                   const AccessorRO<VAL, 1>& hi,
45 |                   const Rect<1>& rect)
46 |   {
47 |     auto stream = get_cached_stream();
48 |     auto elems  = rect.volume();
49 |     auto blocks = get_num_blocks_1d(elems);
50 |     zip_rect1_kernel<<<blocks, THREADS_PER_BLOCK, 0, stream>>>(elems, rect.lo, output, lo, hi);
51 |     LEGATE_SPARSE_CHECK_CUDA_STREAM(stream);
52 |   }
53 | };
54 | 
55 | /*static*/ void ZipToRect1::gpu_variant(TaskContext context)
56 | {
57 |   zip_to_rect_1_template<VariantKind::GPU>(context);
58 | }
59 | 
60 | }  // namespace sparse
61 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/util/zip_to_rect.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include "legate_sparse/sparse.h"
20 | #include "legate_sparse/sparse_c.h"
21 | #include "legate.h"
22 | 
23 | namespace sparse {
24 | 
25 | struct ZipToRect1Args {
26 |   const legate::PhysicalStore& out;
27 |   const legate::PhysicalStore& lo;
28 |   const legate::PhysicalStore& hi;
29 | };
30 | 
31 | class ZipToRect1 : public SparseTask<ZipToRect1> {
32 |  public:
33 |   static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_ZIP_TO_RECT_1};
34 |   static void cpu_variant(legate::TaskContext ctx);
35 | #ifdef LEGATE_USE_OPENMP
36 |   static void omp_variant(legate::TaskContext ctx);
37 | #endif
38 | #ifdef LEGATE_USE_CUDA
39 |   static void gpu_variant(legate::TaskContext ctx);
40 | #endif
41 | };
42 | 
43 | }  // namespace sparse
44 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/util/zip_to_rect_omp.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/array/util/zip_to_rect.h"
18 | #include "legate_sparse/array/util/zip_to_rect_template.inl"
19 | 
20 | namespace sparse {
21 | 
22 | using namespace legate;
23 | 
24 | template <typename VAL>
25 | struct ZipToRect1ImplBody<VariantKind::OMP, VAL> {
26 |   void operator()(const AccessorWO<Rect<1>, 1>& output,
27 |                   const AccessorRO<VAL, 1>& lo,
28 |                   const AccessorRO<VAL, 1>& hi,
29 |                   const Rect<1>& rect)
30 |   {
31 | #pragma omp parallel for schedule(static)
32 |     for (coord_t i = rect.lo[0]; i < rect.hi[0] + 1; i++) {
33 |       output[i] = Rect<1>{Point<1>{lo[i]}, Point<1>{hi[i] - 1}};
34 |     }
35 |   }
36 | };
37 | 
38 | /*static*/ void ZipToRect1::omp_variant(TaskContext context)
39 | {
40 |   zip_to_rect_1_template<VariantKind::OMP>(context);
41 | }
42 | 
43 | }  // namespace sparse
44 | 


--------------------------------------------------------------------------------
/src/legate_sparse/array/util/zip_to_rect_template.inl:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | // Useful for IDEs.
20 | #include "legate_sparse/array/util/zip_to_rect.h"
21 | #include "legate_sparse/util/dispatch.h"
22 | 
23 | namespace sparse {
24 | 
25 | using namespace legate;
26 | 
27 | template <VariantKind KIND, typename VAL>
28 | struct ZipToRect1ImplBody;
29 | 
30 | template <VariantKind KIND, typename VAL>
31 | struct ZipToRect1Impl {
32 |   void operator()(ZipToRect1Args& args) const
33 |   {
34 |     auto output = args.out.write_accessor<Rect<1>, 1>();
35 |     auto lo     = args.lo.read_accessor<VAL, 1>();
36 |     auto hi     = args.hi.read_accessor<VAL, 1>();
37 |     if (args.out.domain().empty()) {
38 |       return;
39 |     }
40 |     ZipToRect1ImplBody<KIND, VAL>()(output, lo, hi, args.out.shape<1>());
41 |   }
42 | };
43 | 
44 | template <VariantKind KIND>
45 | static void zip_to_rect_1_template(TaskContext context)
46 | {
47 |   auto inputs = context.inputs();
48 |   ZipToRect1Args args{context.outputs()[0], inputs[0], inputs[1]};
49 |   if (inputs[0].data().type().code() == legate::Type::Code::INT64) {
50 |     ZipToRect1Impl<KIND, int64_t>{}(args);
51 |   } else {
52 |     assert(inputs[0].data().type().code() == legate::Type::Code::UINT64);
53 |     ZipToRect1Impl<KIND, uint64_t>{}(args);
54 |   }
55 | }
56 | 
57 | }  // namespace sparse
58 | 


--------------------------------------------------------------------------------
/src/legate_sparse/cffi.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2023-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #ifndef __LEGATE_SPARSE_CFFI_H__
18 | #define __LEGATE_SPARSE_CFFI_H__
19 | 
20 | enum LegateSparseOpCode {
21 |   _LEGATE_SPARSE_OP_CODE_BASE = 0,
22 |   LEGATE_SPARSE_CSR_TO_DENSE,
23 |   LEGATE_SPARSE_DENSE_TO_CSR_NNZ,
24 |   LEGATE_SPARSE_DENSE_TO_CSR,
25 |   LEGATE_SPARSE_BOUNDS_FROM_PARTITIONED_COORDINATES,
26 |   LEGATE_SPARSE_SORTED_COORDS_TO_COUNTS,
27 |   LEGATE_SPARSE_EXPAND_POS_TO_COORDINATES,
28 | 
29 |   // File IO.
30 |   LEGATE_SPARSE_READ_MTX_TO_COO,
31 | 
32 |   // Operations on matrices that aren't quite tensor algebra related.
33 |   LEGATE_SPARSE_CSR_DIAGONAL,
34 | 
35 |   // Indexing a CSR matrix with another CSR matrix
36 |   LEGATE_SPARSE_CSR_INDEXING_CSR,
37 | 
38 |   // Linear algebra operations
39 |   LEGATE_SPARSE_CSR_SPMV_ROW_SPLIT,
40 |   LEGATE_SPARSE_SPGEMM_CSR_CSR_CSR_NNZ,
41 |   LEGATE_SPARSE_SPGEMM_CSR_CSR_CSR,
42 |   LEGATE_SPARSE_SPGEMM_CSR_CSR_CSR_GPU,
43 | 
44 |   // Dense linear algebra tasks needed for things
45 |   // like iterative linear solvers.
46 |   LEGATE_SPARSE_AXPBY,
47 | 
48 |   // nonzero API
49 |   LEGATE_SPARSE_NONZERO,
50 | 
51 |   // Utility tasks.
52 |   LEGATE_SPARSE_ZIP_TO_RECT_1,
53 |   LEGATE_SPARSE_UNZIP_RECT_1,
54 |   LEGATE_SPARSE_SCALE_RECT_1,
55 |   LEGATE_SPARSE_FAST_IMAGE_RANGE,
56 |   LEGATE_SPARSE_UPCAST_FUTURE_TO_REGION,
57 | 
58 |   // Utility tasks for loading cuda libraries.
59 |   LEGATE_SPARSE_LOAD_CUDALIBS,
60 |   LEGATE_SPARSE_UNLOAD_CUDALIBS,
61 | 
62 |   LEGATE_SPARSE_LAST_TASK,  // must be last
63 | };
64 | 
65 | #endif  // __LEGATE_SPARSE_CFFI_H__
66 | 


--------------------------------------------------------------------------------
/src/legate_sparse/cudalibs.cu:
--------------------------------------------------------------------------------
  1 | /* Copyright 2022-2024 NVIDIA Corporation
  2 |  *
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  *     http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  *
 15 |  */
 16 | 
 17 | #include "legate_sparse/sparse.h"
 18 | #include "legate_sparse/sparse_c.h"
 19 | #include "legate_sparse/cudalibs.h"
 20 | 
 21 | #include <stdio.h>
 22 | 
 23 | namespace sparse {
 24 | 
 25 | CUDALibraries::CUDALibraries() : finalized_(false), cusparse_(nullptr) {}
 26 | 
 27 | CUDALibraries::~CUDALibraries() { finalize(); }
 28 | 
 29 | void CUDALibraries::finalize()
 30 | {
 31 |   if (finalized_) {
 32 |     return;
 33 |   }
 34 |   if (cusparse_ != nullptr) {
 35 |     finalize_cusparse();
 36 |   }
 37 |   finalized_ = true;
 38 | }
 39 | 
 40 | void CUDALibraries::finalize_cusparse()
 41 | {
 42 |   CHECK_CUSPARSE(cusparseDestroy(cusparse_));
 43 |   cusparse_ = nullptr;
 44 | }
 45 | 
 46 | cusparseHandle_t CUDALibraries::get_cusparse()
 47 | {
 48 |   if (this->cusparse_ == nullptr) {
 49 |     CHECK_CUSPARSE(cusparseCreate(&this->cusparse_));
 50 |   }
 51 |   return this->cusparse_;
 52 | }
 53 | 
 54 | static CUDALibraries& get_cuda_libraries(legate::Processor proc)
 55 | {
 56 |   if (proc.kind() != legate::Processor::TOC_PROC) {
 57 |     fprintf(stderr, "Illegal request for CUDA libraries for non-GPU processor");
 58 |     LEGATE_ABORT("Illegal request for CUDA libraries for non-GPU processor");
 59 |   }
 60 | 
 61 |   static CUDALibraries cuda_libraries[LEGION_MAX_NUM_PROCS];
 62 |   const auto proc_id = proc.id & (LEGION_MAX_NUM_PROCS - 1);
 63 |   return cuda_libraries[proc_id];
 64 | }
 65 | 
 66 | legate::cuda::StreamView get_cached_stream()
 67 | {
 68 |   return legate::cuda::StreamPool::get_stream_pool().get_stream();
 69 | }
 70 | 
 71 | cusparseHandle_t get_cusparse()
 72 | {
 73 |   const auto proc = legate::Processor::get_executing_processor();
 74 |   auto& lib       = get_cuda_libraries(proc);
 75 |   return lib.get_cusparse();
 76 | }
 77 | 
 78 | class LoadCUDALibsTask : public SparseTask<LoadCUDALibsTask> {
 79 |  public:
 80 |   static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_LOAD_CUDALIBS};
 81 | 
 82 |  public:
 83 |   static void gpu_variant(legate::TaskContext context)
 84 |   {
 85 |     const auto proc = legate::Processor::get_executing_processor();
 86 |     auto& lib       = get_cuda_libraries(proc);
 87 |     lib.get_cusparse();
 88 |   }
 89 | };
 90 | 
 91 | class UnloadCUDALibsTask : public SparseTask<UnloadCUDALibsTask> {
 92 |  public:
 93 |   static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_UNLOAD_CUDALIBS};
 94 | 
 95 |  public:
 96 |   static void gpu_variant(legate::TaskContext context)
 97 |   {
 98 |     const auto proc = legate::Processor::get_executing_processor();
 99 |     auto& lib       = get_cuda_libraries(proc);
100 |     lib.finalize();
101 |   }
102 | };
103 | 
104 | static void __attribute__((constructor)) register_tasks(void)
105 | {
106 |   LoadCUDALibsTask::register_variants();
107 |   UnloadCUDALibsTask::register_variants();
108 | }
109 | 
110 | }  // namespace sparse
111 | 


--------------------------------------------------------------------------------
/src/legate_sparse/cudalibs.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include "legate_sparse/util/cuda_help.h"
20 | 
21 | namespace sparse {
22 | 
23 | struct CUDALibraries {
24 |  public:
25 |   CUDALibraries();
26 |   ~CUDALibraries();
27 | 
28 |  private:
29 |   // Prevent copying and overwriting.
30 |   CUDALibraries(const CUDALibraries& rhs)            = delete;
31 |   CUDALibraries& operator=(const CUDALibraries& rhs) = delete;
32 | 
33 |  public:
34 |   void finalize();
35 |   cusparseHandle_t get_cusparse();
36 | 
37 |  private:
38 |   void finalize_cusparse();
39 | 
40 |  private:
41 |   bool finalized_;
42 |   cusparseHandle_t cusparse_;
43 | };
44 | 
45 | }  // namespace sparse
46 | 


--------------------------------------------------------------------------------
/src/legate_sparse/io/mtx_to_coo.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include "legate_sparse/sparse.h"
20 | #include "legate_sparse/sparse_c.h"
21 | #include "legate.h"
22 | 
23 | namespace sparse {
24 | 
25 | class ReadMTXToCOO : public SparseTask<ReadMTXToCOO> {
26 |  public:
27 |   static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_READ_MTX_TO_COO};
28 | 
29 |   static constexpr legate::VariantOptions CPU_VARIANT_OPTIONS =
30 |     legate::VariantOptions{}.with_has_allocations(true);
31 | 
32 |  public:
33 |   static void cpu_variant(legate::TaskContext ctx);
34 | };
35 | 
36 | }  // namespace sparse
37 | 


--------------------------------------------------------------------------------
/src/legate_sparse/linalg/axpby.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/linalg/axpby.h"
18 | #include "legate_sparse/linalg/axpby_template.inl"
19 | 
20 | namespace sparse {
21 | 
22 | using namespace legate;
23 | 
24 | template <Type::Code VAL_CODE, bool IS_ALPHA, bool NEGATE>
25 | struct AXPBYImplBody<VariantKind::CPU, VAL_CODE, IS_ALPHA, NEGATE> {
26 |   using VAL_TY = type_of<VAL_CODE>;
27 | 
28 |   void operator()(const AccessorRW<VAL_TY, 1>& y,
29 |                   const AccessorRO<VAL_TY, 1>& x,
30 |                   const AccessorRO<VAL_TY, 1>& a,
31 |                   const AccessorRO<VAL_TY, 1>& b,
32 |                   const Rect<1>& rect)
33 |   {
34 |     auto val = a[0] / b[0];
35 |     if (NEGATE) {
36 |       val = static_cast<VAL_TY>(-1) * val;
37 |     }
38 |     for (coord_t i = rect.lo[0]; i < rect.hi[0] + 1; i++) {
39 |       if (IS_ALPHA) {
40 |         y[i] = val * x[i] + y[i];
41 |       } else {
42 |         y[i] = x[i] + val * y[i];
43 |       }
44 |     }
45 |   }
46 | };
47 | 
48 | /*static*/ void AXPBY::cpu_variant(TaskContext context)
49 | {
50 |   axpby_template<VariantKind::CPU>(context);
51 | }
52 | 
53 | namespace  // unnamed
54 | {
55 | static void __attribute__((constructor)) register_tasks(void) { AXPBY::register_variants(); }
56 | }  // namespace
57 | 
58 | }  // namespace sparse
59 | 


--------------------------------------------------------------------------------
/src/legate_sparse/linalg/axpby.cu:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/linalg/axpby.h"
18 | #include "legate_sparse/linalg/axpby_template.inl"
19 | #include "legate_sparse/util/cuda_help.h"
20 | 
21 | namespace sparse {
22 | 
23 | using namespace legate;
24 | 
25 | template <typename VAL_TY, bool IS_ALPHA, bool NEGATE>
26 | __global__ void axpby_kernel(size_t elems,
27 |                              coord_t offset,
28 |                              AccessorRW<VAL_TY, 1> y,
29 |                              AccessorRO<VAL_TY, 1> x,
30 |                              AccessorRO<VAL_TY, 1> a,
31 |                              AccessorRO<VAL_TY, 1> b)
32 | {
33 |   const auto idx = global_tid_1d();
34 |   if (idx >= elems) {
35 |     return;
36 |   }
37 |   auto i   = idx + offset;
38 |   auto val = a[0] / b[0];
39 |   if (NEGATE) {
40 |     val = static_cast<VAL_TY>(-1) * val;
41 |   }
42 |   if (IS_ALPHA) {
43 |     y[i] = val * x[i] + y[i];
44 |   } else {
45 |     y[i] = x[i] + val * y[i];
46 |   }
47 | }
48 | 
49 | template <Type::Code VAL_CODE, bool IS_ALPHA, bool NEGATE>
50 | struct AXPBYImplBody<VariantKind::GPU, VAL_CODE, IS_ALPHA, NEGATE> {
51 |   using VAL_TY = type_of<VAL_CODE>;
52 | 
53 |   void operator()(const AccessorRW<VAL_TY, 1>& y,
54 |                   const AccessorRO<VAL_TY, 1>& x,
55 |                   const AccessorRO<VAL_TY, 1>& a,
56 |                   const AccessorRO<VAL_TY, 1>& b,
57 |                   const Rect<1>& rect)
58 |   {
59 |     auto elems  = rect.volume();
60 |     auto blocks = get_num_blocks_1d(elems);
61 |     auto stream = get_cached_stream();
62 |     axpby_kernel<VAL_TY, IS_ALPHA, NEGATE>
63 |       <<<blocks, THREADS_PER_BLOCK, 0, stream>>>(elems, rect.lo[0], y, x, a, b);
64 |     LEGATE_SPARSE_CHECK_CUDA_STREAM(stream);
65 |   }
66 | };
67 | 
68 | /*static*/ void AXPBY::gpu_variant(TaskContext context)
69 | {
70 |   axpby_template<VariantKind::GPU>(context);
71 | }
72 | 
73 | }  // namespace sparse
74 | 


--------------------------------------------------------------------------------
/src/legate_sparse/linalg/axpby.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include "legate_sparse/sparse.h"
20 | #include "legate_sparse/sparse_c.h"
21 | #include "legate.h"
22 | 
23 | namespace sparse {
24 | 
25 | struct AXPBYArgs {
26 |   const legate::PhysicalStore& y;
27 |   const legate::PhysicalStore& x;
28 |   const legate::PhysicalStore& a;
29 |   const legate::PhysicalStore& b;
30 |   const bool isalpha;
31 |   const bool negate;
32 | };
33 | 
34 | class AXPBY : public SparseTask<AXPBY> {
35 |  public:
36 |   static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_AXPBY};
37 |   static void cpu_variant(legate::TaskContext ctx);
38 | #ifdef LEGATE_USE_OPENMP
39 |   static void omp_variant(legate::TaskContext ctx);
40 | #endif
41 | #ifdef LEGATE_USE_CUDA
42 |   static void gpu_variant(legate::TaskContext ctx);
43 | #endif
44 | };
45 | 
46 | }  // namespace sparse
47 | 


--------------------------------------------------------------------------------
/src/legate_sparse/linalg/axpby_omp.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/linalg/axpby.h"
18 | #include "legate_sparse/linalg/axpby_template.inl"
19 | 
20 | namespace sparse {
21 | 
22 | using namespace legate;
23 | 
24 | template <Type::Code VAL_CODE, bool IS_ALPHA, bool NEGATE>
25 | struct AXPBYImplBody<VariantKind::OMP, VAL_CODE, IS_ALPHA, NEGATE> {
26 |   using VAL_TY = type_of<VAL_CODE>;
27 | 
28 |   void operator()(const AccessorRW<VAL_TY, 1>& y,
29 |                   const AccessorRO<VAL_TY, 1>& x,
30 |                   const AccessorRO<VAL_TY, 1>& a,
31 |                   const AccessorRO<VAL_TY, 1>& b,
32 |                   const Rect<1>& rect)
33 |   {
34 |     auto val = a[0] / b[0];
35 |     if (NEGATE) {
36 |       val = static_cast<VAL_TY>(-1) * val;
37 |     }
38 | #pragma omp parallel for schedule(static)
39 |     for (coord_t i = rect.lo[0]; i < rect.hi[0] + 1; i++) {
40 |       if (IS_ALPHA) {
41 |         y[i] = val * x[i] + y[i];
42 |       } else {
43 |         y[i] = x[i] + val * y[i];
44 |       }
45 |     }
46 |   }
47 | };
48 | 
49 | /*static*/ void AXPBY::omp_variant(TaskContext context)
50 | {
51 |   axpby_template<VariantKind::OMP>(context);
52 | }
53 | 
54 | }  // namespace sparse
55 | 


--------------------------------------------------------------------------------
/src/legate_sparse/linalg/axpby_template.inl:
--------------------------------------------------------------------------------
 1 | /* Copyright 2021-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | // Useful for IDEs.
20 | #include "legate_sparse/linalg/axpby.h"
21 | #include "legate_sparse/util/dispatch.h"
22 | 
23 | namespace sparse {
24 | 
25 | using namespace legate;
26 | 
27 | template <VariantKind KIND, Type::Code VAL_CODE, bool IS_ALPHA, bool NEGATE>
28 | struct AXPBYImplBody;
29 | 
30 | template <VariantKind KIND>
31 | struct AXPBYImpl {
32 |   template <Type::Code VAL_CODE>
33 |   void operator()(AXPBYArgs& args) const
34 |   {
35 |     using VAL_TY = type_of<VAL_CODE>;
36 |     auto y       = args.y.read_write_accessor<VAL_TY, 1>();
37 |     auto x       = args.x.read_accessor<VAL_TY, 1>();
38 |     auto a       = args.a.read_accessor<VAL_TY, 1>();
39 |     auto b       = args.b.read_accessor<VAL_TY, 1>();
40 |     if (args.y.domain().empty()) {
41 |       return;
42 |     }
43 |     if (args.isalpha) {
44 |       if (args.negate) {
45 |         AXPBYImplBody<KIND, VAL_CODE, true, true>()(y, x, a, b, args.y.shape<1>());
46 |       } else {
47 |         AXPBYImplBody<KIND, VAL_CODE, true, false>()(y, x, a, b, args.y.shape<1>());
48 |       }
49 |     } else {
50 |       if (args.negate) {
51 |         AXPBYImplBody<KIND, VAL_CODE, false, true>()(y, x, a, b, args.y.shape<1>());
52 |       } else {
53 |         AXPBYImplBody<KIND, VAL_CODE, false, false>()(y, x, a, b, args.y.shape<1>());
54 |       }
55 |     }
56 |   }
57 | };
58 | 
59 | template <VariantKind KIND>
60 | static void axpby_template(TaskContext context)
61 | {
62 |   AXPBYArgs args{
63 |     context.outputs()[0],
64 |     context.inputs()[0],
65 |     context.inputs()[1],
66 |     context.inputs()[2],
67 |     context.scalars()[0].value<bool>(),
68 |     context.scalars()[1].value<bool>(),
69 |   };
70 |   value_type_dispatch(args.y.code(), AXPBYImpl<KIND>{}, args);
71 | }
72 | 
73 | }  // namespace sparse
74 | 


--------------------------------------------------------------------------------
/src/legate_sparse/mapper/mapper.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include "legate/mapping/mapping.h"
20 | 
21 | namespace sparse {
22 | 
23 | class LegateSparseMapper : public legate::mapping::Mapper {
24 |  public:
25 |   // Virtual mapping functions of LegateMapper that need to be overridden.
26 |   virtual std::vector<legate::mapping::StoreMapping> store_mappings(
27 |     const legate::mapping::Task& task,
28 |     const std::vector<legate::mapping::StoreTarget>& options) override;
29 | 
30 |   auto allocation_pool_size(const legate::mapping::Task& task,
31 |                             legate::mapping::StoreTarget memory_kind)
32 |     -> std::optional<std::size_t> override;
33 | 
34 |   virtual legate::Scalar tunable_value(legate::TunableID tunable_id) override;
35 | };
36 | 
37 | }  // namespace sparse
38 | 


--------------------------------------------------------------------------------
/src/legate_sparse/partition/fast_image_partition.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/partition/fast_image_partition.h"
18 | #include "legate_sparse/partition/fast_image_partition_template.inl"
19 | 
20 | namespace sparse {
21 | 
22 | using namespace legate;
23 | 
24 | namespace  // unnamed
25 | {
26 | static void __attribute__((constructor)) register_tasks(void)
27 | {
28 |   FastImageRange::register_variants();
29 | }
30 | }  // namespace
31 | 
32 | }  // namespace sparse


--------------------------------------------------------------------------------
/src/legate_sparse/partition/fast_image_partition.cu:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/partition/fast_image_partition.h"
18 | #include "legate_sparse/partition/fast_image_partition_template.inl"
19 | #include "legate_sparse/util/cuda_help.h"
20 | #include "legate_sparse/util/thrust_allocator.h"
21 | 
22 | #include <thrust/extrema.h>
23 | #include <cub/cub.cuh>
24 | 
25 | namespace sparse {
26 | 
27 | using namespace legate;
28 | 
29 | template <Type::Code INDEX_CODE>
30 | struct FastImageRangeImplBody<VariantKind::GPU, INDEX_CODE> {
31 |   using INDEX_TY = type_of<INDEX_CODE>;
32 | 
33 |   void operator()(const AccessorWO<Rect<1>, 1>& out_pos,
34 |                   const AccessorRO<Rect<1>, 1>& in_pos,
35 |                   const AccessorRO<INDEX_TY, 1>& in_crd,
36 |                   const Rect<1>& rowbounds,
37 |                   const Rect<1>& bounds)
38 |   {
39 |     ThrustAllocator alloc(Memory::GPU_FB_MEM);
40 |     auto stream             = get_cached_stream();
41 |     auto thrust_exec_policy = thrust::cuda::par(alloc).on(stream);
42 | 
43 |     thrust::pair<const INDEX_TY*, const INDEX_TY*> result = thrust::minmax_element(
44 |       thrust_exec_policy, in_crd.ptr(bounds.lo[0]), in_crd.ptr(bounds.hi[0]) + 1);
45 | 
46 |     // out[idx]       = {lo[idx], hi[idx] - 1};
47 |     INDEX_TY lo_idx, hi_idx;
48 |     cudaMemcpyAsync(&lo_idx, result.first, sizeof(INDEX_TY), cudaMemcpyDefault, stream);
49 |     cudaMemcpyAsync(&hi_idx, result.second, sizeof(INDEX_TY), cudaMemcpyDefault, stream);
50 |     thrust::fill(thrust_exec_policy,
51 |                  out_pos.ptr(rowbounds.lo[0]),
52 |                  out_pos.ptr(rowbounds.hi[0]) + 1,
53 |                  Rect<1>({lo_idx, hi_idx}));
54 | 
55 |     LEGATE_SPARSE_CHECK_CUDA_STREAM(stream);
56 |   }
57 | };
58 | 
59 | /*static*/ void FastImageRange::gpu_variant(TaskContext context)
60 | {
61 |   fast_image_range_template<VariantKind::GPU>(context);
62 | }
63 | 
64 | }  // namespace sparse
65 | 


--------------------------------------------------------------------------------
/src/legate_sparse/partition/fast_image_partition.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include "legate_sparse/sparse.h"
20 | #include "legate_sparse/sparse_c.h"
21 | #include "legate.h"
22 | 
23 | namespace sparse {
24 | 
25 | struct FastImageRangeArgs {
26 |   const legate::PhysicalStore output_pos;
27 |   const legate::PhysicalStore input_pos;
28 |   const legate::PhysicalStore input_crd;
29 | };
30 | 
31 | // only for CSR SpGEMM on GPU right now
32 | class FastImageRange : public SparseTask<FastImageRange> {
33 |  public:
34 |   static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_FAST_IMAGE_RANGE};
35 | 
36 |   static constexpr legate::VariantOptions GPU_VARIANT_OPTIONS =
37 |     legate::VariantOptions{}.with_has_allocations(true);
38 | 
39 |  public:
40 | #ifdef LEGATE_USE_CUDA
41 |   static void gpu_variant(legate::TaskContext context);
42 | #endif
43 | };
44 | 
45 | }  // namespace sparse
46 | 


--------------------------------------------------------------------------------
/src/legate_sparse/partition/fast_image_partition_template.inl:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | // Useful for IDEs.
20 | #include "legate_sparse/util/dispatch.h"
21 | #include "legate_sparse/util/typedefs.h"
22 | #include "legate_sparse/partition/fast_image_partition.h"
23 | 
24 | namespace sparse {
25 | 
26 | using namespace legate;
27 | 
28 | template <VariantKind KIND, Type::Code INDEX_CODE>
29 | struct FastImageRangeImplBody;
30 | 
31 | template <VariantKind KIND>
32 | struct FastImageRangeImpl {
33 |   template <Type::Code INDEX_CODE>
34 |   void operator()(FastImageRangeArgs& args) const
35 |   {
36 |     using INDEX_TY = type_of<INDEX_CODE>;
37 | 
38 |     auto output_pos = args.output_pos.write_accessor<Rect<1>, 1>();
39 |     auto input_pos  = args.input_pos.read_accessor<Rect<1>, 1>();
40 |     auto input_crd  = args.input_crd.read_accessor<INDEX_TY, 1>();
41 |     assert(args.input_pos.domain().dense());
42 |     assert(args.input_crd.domain().dense());
43 |     if (args.input_crd.domain().empty()) {
44 |       return;
45 |     }
46 |     FastImageRangeImplBody<KIND, INDEX_CODE>()(
47 |       output_pos, input_pos, input_crd, args.input_pos.shape<1>(), args.input_crd.shape<1>());
48 |   }
49 | };
50 | 
51 | template <VariantKind KIND>
52 | static void fast_image_range_template(TaskContext context)
53 | {
54 |   FastImageRangeArgs args{context.output(0), context.input(0), context.input(1)};
55 |   index_type_dispatch(args.input_crd.code(), FastImageRangeImpl<KIND>{}, args);
56 | }
57 | 
58 | }  // namespace sparse
59 | 


--------------------------------------------------------------------------------
/src/legate_sparse/sparse.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2021-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/sparse.h"
18 | 
19 | #include "legate_sparse/mapper/mapper.h"
20 | 
21 | #include "legate.h"
22 | 
23 | using namespace legate;
24 | 
25 | namespace sparse {
26 | 
27 | static const char* const library_name = "legate.sparse";
28 | 
29 | TaskRegistrar& Sparse::get_registrar()
30 | {
31 |   static TaskRegistrar registrar;
32 |   return registrar;
33 | }
34 | 
35 | void registration_callback()
36 | {
37 |   ResourceConfig config;
38 |   // TODO (rohany): I want to use the enums here, but I'm not sure the best way
39 |   //  to keep this in line with the Python import since there seems to be a
40 |   //  cyclic dependency.
41 |   // config.max_tasks = LEGATE_SPARSE_LAST_TASK;
42 |   config.max_tasks = 100;
43 |   // SJ; Sat 11 Jan 2025 01:16:31 PM PST
44 |   // Do we need to set max_projections if we don't use any projection functors?
45 |   // TODO (rohany): We're dynamically generating projections... How does cupynumeric handle this?
46 |   // config.max_projections = 1000;
47 | 
48 |   auto options = VariantOptions{}.with_has_allocations(false);
49 |   auto ctx     = Runtime::get_runtime()->create_library(
50 |     library_name,
51 |     config,
52 |     std::make_unique<LegateSparseMapper>(),
53 |     {{VariantCode::CPU, options}, {VariantCode::GPU, options}, {VariantCode::OMP, options}});
54 | 
55 |   Sparse::get_registrar().register_all_tasks(ctx);
56 | }
57 | 
58 | }  // namespace sparse
59 | 
60 | extern "C" {
61 | 
62 | void legate_sparse_perform_registration(void) { sparse::registration_callback(); }
63 | }
64 | 


--------------------------------------------------------------------------------
/src/legate_sparse/sparse.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include "legate.h"
20 | 
21 | namespace sparse {
22 | 
23 | enum class VariantKind : int {
24 |   CPU = 0,
25 |   OMP = 1,
26 |   GPU = 2,
27 | };
28 | 
29 | struct Sparse {
30 |   static legate::TaskRegistrar& get_registrar();
31 | };
32 | 
33 | template <typename T>
34 | struct SparseTask : public legate::LegateTask<T> {
35 |   using Registrar = Sparse;
36 | };
37 | 
38 | }  // namespace sparse
39 | 


--------------------------------------------------------------------------------
/src/legate_sparse/sparse_c.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #ifndef __SPARSE_C_H
18 | #define __SPARSE_C_H
19 | 
20 | #include "cffi.h"
21 | 
22 | #ifdef __cplusplus
23 | extern "C" {
24 | #endif
25 | 
26 | void legate_sparse_perform_registration();
27 | 
28 | #ifdef __cplusplus
29 | }
30 | #endif
31 | 
32 | #endif  // __SPARSE_C_H
33 | 


--------------------------------------------------------------------------------
/src/legate_sparse/util/legate_utils.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include "legate/utilities/typedefs.h"
20 | #include "realm/logging.h"
21 | #include <string_view>
22 | 
23 | namespace sparse {
24 | 
25 | // Create 1D extents from lower and upper bounds
26 | template <typename T, typename Q>
27 | legate::Rect<1> create_1d_extents(const T& lo, const Q& hi)
28 | {
29 |   return legate::Rect<1>{legate::Point<1>{lo}, legate::Point<1>{hi}};
30 | }
31 | 
32 | inline Realm::Logger& get_logger()
33 | {
34 |   static Realm::Logger logger("legate-sparse");
35 |   return logger;
36 | }
37 | 
38 | // Remove the path and use only the filename
39 | #define __FILENAME__ (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__)
40 | 
41 | // Macros for buffer allocation logging
42 | #if ENABLE_BUFFER_LOGGING
43 | #define CREATE_BUFFER(T, size, mem, desc)                                                       \
44 |   [&]() {                                                                                       \
45 |     auto buf = legate::create_buffer<T, 1>(size, mem);                                          \
46 |     get_logger().print() << "Buffer allocation at " << __FILENAME__ << ":" << __LINE__          \
47 |                          << " - Size: " << size << " Type: " << #T << " Description: " << desc; \
48 |     return buf;                                                                                 \
49 |   }()
50 | #else
51 | #define CREATE_BUFFER(T, size, mem, desc) legate::create_buffer<T, 1>(size, mem)
52 | #endif
53 | 
54 | #if ENABLE_BUFFER_LOGGING
55 | #define LOG_BUFFER(T, size, desc)                                                    \
56 |   get_logger().print() << "Buffer allocation at " << __FILENAME__ << ":" << __LINE__ \
57 |                        << " - Size: " << size << " Type: " << #T << " Description: " << desc
58 | #else
59 | #define LOG_BUFFER(T, size, desc)
60 | #endif
61 | 
62 | }  // namespace sparse
63 | 


--------------------------------------------------------------------------------
/src/legate_sparse/util/logger.h:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/src/legate_sparse/util/omp_help.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <stdint.h>
20 | #include <vector>
21 | #include <cstddef>
22 | 
23 | namespace sparse {
24 | 
25 | // Simple STL vector-based thread local storage for OpenMP threads to avoid false sharing
26 | template <typename VAL>
27 | struct ThreadLocalStorage {
28 |  private:
29 |   static constexpr size_t CACHE_LINE_SIZE = 64;
30 | 
31 |  public:
32 |   ThreadLocalStorage(size_t num_threads)
33 |     : storage_(CACHE_LINE_SIZE * num_threads), num_threads_(num_threads)
34 |   {
35 |   }
36 |   ~ThreadLocalStorage() {}
37 | 
38 |  public:
39 |   VAL& operator[](size_t idx)
40 |   {
41 |     return *reinterpret_cast<VAL*>(storage_.data() + CACHE_LINE_SIZE * idx);
42 |   }
43 | 
44 |  private:
45 |   std::vector<int8_t> storage_;
46 |   size_t num_threads_;
47 | };
48 | 
49 | }  // namespace sparse
50 | 


--------------------------------------------------------------------------------
/src/legate_sparse/util/thrust_allocator.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include "legate.h"
20 | #include "legate_sparse/util/legate_utils.h"
21 | 
22 | namespace sparse {
23 | 
24 | using namespace legate;
25 | 
26 | class ThrustAllocator : public legate::ScopedAllocator {
27 |  public:
28 |   using value_type = char;
29 | 
30 |   ThrustAllocator(Memory::Kind kind) : legate::ScopedAllocator(kind) {}
31 | 
32 |   char* allocate(size_t num_bytes)
33 |   {
34 |     LOG_BUFFER(char, num_bytes, "ThrustAllocator::allocate");
35 |     return static_cast<char*>(ScopedAllocator::allocate(num_bytes));
36 |   }
37 | 
38 |   void deallocate(char* ptr, size_t n) { ScopedAllocator::deallocate(ptr); }
39 | };
40 | 
41 | }  // namespace sparse
42 | 


--------------------------------------------------------------------------------
/src/legate_sparse/util/typedefs.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | namespace sparse {
18 | 
19 | // We use uint64_t's to accumulate the non-zeros per row in
20 | // different sparse matrix computations.
21 | using nnz_ty = uint64_t;
22 | 
23 | }  // namespace sparse


--------------------------------------------------------------------------------
/src/legate_sparse/util/upcast_future.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #include "legate_sparse/util/upcast_future.h"
18 | 
19 | namespace sparse {
20 | 
21 | using namespace legate;
22 | 
23 | template <typename T>
24 | void upcast_impl(legate::TaskContext ctx)
25 | {
26 |   auto& in_fut = ctx.inputs()[0];
27 |   const T* src;
28 |   T* dst;
29 |   switch (in_fut.dim()) {
30 |     case 0: {
31 |       // Futures can be 0-dimensional. legate doesn't appear to complain
32 |       // if we make a 1-D accessor of a 0-D "store".
33 |       dst = ctx.outputs()[0].write_accessor<T, 1, false>().ptr(0);
34 |       src = ctx.inputs()[0].read_accessor<T, 1, false>().ptr(0);
35 |       break;
36 |     }
37 |     case 1: {
38 |       dst = ctx.outputs()[0].write_accessor<T, 1, false>().ptr(0);
39 |       src = ctx.inputs()[0].read_accessor<T, 1, false>().ptr(0);
40 |       break;
41 |     }
42 |     case 2: {
43 |       dst = ctx.outputs()[0].write_accessor<T, 2, false>().ptr({0, 0});
44 |       src = ctx.inputs()[0].read_accessor<T, 2, false>().ptr({0, 0});
45 |       break;
46 |     }
47 |     case 3: {
48 |       dst = ctx.outputs()[0].write_accessor<T, 3, false>().ptr({0, 0, 0});
49 |       src = ctx.inputs()[0].read_accessor<T, 3, false>().ptr({0, 0, 0});
50 |       break;
51 |     }
52 |   }
53 |   memcpy(dst, src, sizeof(T));
54 | }
55 | 
56 | /*static*/ void UpcastFutureToRegion::cpu_variant(TaskContext ctx)
57 | {
58 |   assert(ctx.is_single_task());
59 |   auto future_size = ctx.scalars()[0].value<size_t>();
60 |   switch (future_size) {
61 |     case 1: {
62 |       upcast_impl<uint8_t>(ctx);
63 |       break;
64 |     }
65 |     case 2: {
66 |       upcast_impl<uint16_t>(ctx);
67 |       break;
68 |     }
69 |     case 4: {
70 |       upcast_impl<uint32_t>(ctx);
71 |       break;
72 |     }
73 |     case 8: {
74 |       upcast_impl<uint64_t>(ctx);
75 |       break;
76 |     }
77 |     default: {
78 |       assert(false);
79 |     }
80 |   }
81 | }
82 | 
83 | namespace  // unnamed
84 | {
85 | static void __attribute__((constructor)) register_tasks(void)
86 | {
87 |   UpcastFutureToRegion::register_variants();
88 | }
89 | }  // namespace
90 | 
91 | }  // namespace sparse
92 | 


--------------------------------------------------------------------------------
/src/legate_sparse/util/upcast_future.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022-2024 NVIDIA Corporation
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  *
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include "legate_sparse/sparse.h"
20 | #include "legate_sparse/sparse_c.h"
21 | #include "legate.h"
22 | 
23 | namespace sparse {
24 | 
25 | class UpcastFutureToRegion : public SparseTask<UpcastFutureToRegion> {
26 |  public:
27 |   static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_UPCAST_FUTURE_TO_REGION};
28 |   static void cpu_variant(legate::TaskContext ctx);
29 | 
30 |  private:
31 |   template <typename T>
32 |   static void cpu_variant_impl(legate::TaskContext ctx);
33 | };
34 | 
35 | }  // namespace sparse
36 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2021-2024 NVIDIA Corporation
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | from __future__ import annotations
18 | 
19 | import sys
20 | 
21 | from legate.tester import CustomTest, FeatureType
22 | from legate.tester.config import Config
23 | from legate.tester.project import Project
24 | from legate.tester.test_plan import TestPlan
25 | from legate.tester.test_system import TestSystem
26 | from legate.util.types import EnvDict
27 | 
28 | 
29 | class ProjectLegateSparse(Project):
30 |     def skipped_examples(self) -> set[str]:
31 |         return []
32 | 
33 |     def custom_files(self) -> list[CustomTest]:
34 |         return []
35 | 
36 |     def stage_env(self, feature: FeatureType) -> EnvDict:
37 |         return {}
38 | 
39 | 
40 | if __name__ == "__main__":
41 |     config = Config(sys.argv, project=ProjectLegateSparse())
42 | 
43 |     system = TestSystem(dry_run=config.dry_run)
44 | 
45 |     plan = TestPlan(config, system)
46 | 
47 |     plan.execute()
48 |     sys.exit(0)
49 | 


--------------------------------------------------------------------------------
/tests/integration/conftest.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import pytest
 3 | from scipy import sparse as scipy_sparse
 4 | from utils.sample import simple_system_gen
 5 | 
 6 | import legate_sparse as sparse
 7 | 
 8 | 
 9 | @pytest.fixture
10 | def create_mask():
11 |     """
12 |     Create a boolean mask matrix with a random sparsity pattern
13 |     """
14 | 
15 |     def _create_mask(rows, density=0.3):
16 |         cols = rows
17 |         nnz = int(rows * cols * density)
18 | 
19 |         # SciPy
20 |         row_idx = numpy.random.randint(0, rows, size=nnz)
21 |         col_idx = numpy.random.randint(0, cols, size=nnz)
22 |         data = numpy.ones(nnz, dtype=bool)
23 |         A_scipy = scipy_sparse.csr_array((data, (row_idx, col_idx)), shape=(rows, cols))
24 | 
25 |         # Sparse
26 |         A_sparse = sparse.csr_array(A_scipy.todense())
27 | 
28 |         # Verify matrices are equivalent
29 |         A_scipy_dense = numpy.asarray(A_scipy.todense())
30 |         A_sparse_dense = numpy.asarray(A_sparse.todense())
31 |         assert numpy.all(
32 |             numpy.allclose(A_scipy_dense, A_sparse_dense, rtol=1e-5, atol=1e-6)
33 |         )
34 | 
35 |         return A_scipy, A_sparse
36 | 
37 |     return _create_mask
38 | 
39 | 
40 | @pytest.fixture
41 | def create_matrix():
42 |     """
43 |     Create matrices in SciPy and Legate Sparse that are equivalent
44 |     """
45 | 
46 |     def _create_matrix(N, tol=0.5):
47 |         _, A_scipy, _ = simple_system_gen(N, N, scipy_sparse.csr_array, tol=tol)
48 |         A_sparse = sparse.csr_array(A_scipy)
49 | 
50 |         # Verify matrices are equivalent
51 |         A_scipy_dense = numpy.asarray(A_scipy.todense())
52 |         A_sparse_dense = numpy.asarray(A_sparse.todense())
53 |         assert numpy.all(
54 |             numpy.allclose(A_scipy_dense, A_sparse_dense, rtol=1e-5, atol=1e-6)
55 |         )
56 | 
57 |         return A_scipy, A_sparse
58 | 
59 |     return _create_matrix
60 | 


--------------------------------------------------------------------------------
/tests/integration/test_cg_axpby.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022-2024 NVIDIA Corporation
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import cupynumeric as np
16 | import pytest
17 | 
18 | import legate_sparse.linalg as sparse_linalg
19 | 
20 | 
21 | @pytest.mark.parametrize("y", [[2.0, 3.0]])
22 | @pytest.mark.parametrize("x", [[0.0, 1.0]])
23 | @pytest.mark.parametrize("a", [[2.0]])
24 | @pytest.mark.parametrize("b", [[3.0]])
25 | @pytest.mark.parametrize("isalpha", [True, False])
26 | @pytest.mark.parametrize("negate", [True, False])
27 | def test_cg_linalg(y, x, a, b, isalpha, negate):
28 |     scalar = a[0] / b[0]
29 |     if negate:
30 |         scalar = -scalar
31 |     alpha = scalar if isalpha else 1.0
32 |     beta = 1.0 if isalpha else scalar
33 |     expected_y = alpha * np.asarray(x) + beta * np.asarray(y)
34 | 
35 |     y = np.array(y)
36 |     x = np.array(x)
37 |     a = np.array(a)
38 |     b = np.array(b)
39 | 
40 |     sparse_linalg.cg_axpby(y, x, a, b, isalpha=isalpha, negate=negate)
41 | 
42 |     assert np.allclose(expected_y, y)
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     import sys
47 | 
48 |     sys.exit(pytest.main(sys.argv))
49 | 


--------------------------------------------------------------------------------
/tests/integration/test_cg_solve.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023-2024 NVIDIA Corporation
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import cupynumeric as np
 16 | import pytest
 17 | from utils.sample import sample_dense, sample_dense_vector
 18 | 
 19 | import legate_sparse.linalg as linalg
 20 | from legate_sparse import csr_array
 21 | 
 22 | 
 23 | def test_cg_solve():
 24 |     N, D = 1000, 1000
 25 |     seed = 471014
 26 |     A = sample_dense(N, D, 0.1, seed)
 27 |     A = 0.5 * (A + A.T)
 28 |     A = A + N * np.eye(N)
 29 |     # Assert that A is indeed positive semi-definite.
 30 |     assert np.all(np.linalg.eigvals(A) > 0)
 31 |     A = csr_array(A)
 32 |     x = sample_dense_vector(D, 0.1, seed)
 33 |     y = A @ x
 34 |     x_pred, iters = linalg.cg(A, y, tol=1e-8)
 35 |     assert np.allclose((A @ x_pred), y, rtol=1e-8, atol=0.0)
 36 | 
 37 | 
 38 | def test_cg_solve_with_callback():
 39 |     N, D = 1000, 1000
 40 |     seed = 471014
 41 |     A = sample_dense(N, D, 0.1, seed)
 42 |     A = 0.5 * (A + A.T)
 43 |     A = A + N * np.eye(N)
 44 |     # Assert that A is indeed positive semi-definite.
 45 |     assert np.all(np.linalg.eigvals(A) > 0)
 46 |     A = csr_array(A)
 47 |     x = sample_dense_vector(D, 0.1, seed)
 48 |     y = A @ x
 49 |     residuals = []
 50 | 
 51 |     def callback(x):
 52 |         # Test that nothing goes wrong if we do some arbitrary computation in
 53 |         # the callback on x.
 54 |         residuals.append(y - A @ x)
 55 | 
 56 |     x_pred, iters = linalg.cg(A, y, tol=1e-8, callback=callback)
 57 |     assert np.allclose((A @ x_pred), y, rtol=1e-8, atol=0.0)
 58 |     assert len(residuals) > 0
 59 | 
 60 | 
 61 | # def test_cg_solve_with_identity_preconditioner():
 62 | #     N, D = 1000, 1000
 63 | #     seed = 471014
 64 | #     A = sample_dense(N, D, 0.1, seed)
 65 | #     A = 0.5 * (A + A.T)
 66 | #     A = A + N * np.eye(N)
 67 | #     # Assert that A is indeed positive semi-definite.
 68 | #     assert np.all(np.linalg.eigvals(A) > 0)
 69 | #     A = csr_array(A)
 70 | #     x = sample_dense_vector(D, 0.1, seed)
 71 | #     y = A @ x
 72 | #     assert np.allclose((A @ x), y)
 73 | #     x_pred, iters = linalg.cg(A, y, M=eye(A.shape[0]), tol=1e-8)
 74 | #     assert np.allclose((A @ x_pred), y)
 75 | 
 76 | 
 77 | def test_cg_solve_with_linear_operator():
 78 |     N, D = 1000, 1000
 79 |     seed = 471014
 80 |     A = sample_dense(N, D, 0.1, seed)
 81 |     A = 0.5 * (A + A.T)
 82 |     A = A + N * np.eye(N)
 83 |     # Assert that A is indeed positive semi-definite.
 84 |     assert np.all(np.linalg.eigvals(A) > 0)
 85 |     A = csr_array(A)
 86 |     x = sample_dense_vector(D, 0.1, seed)
 87 |     y = A @ x
 88 | 
 89 |     def matvec(x):
 90 |         return A @ x
 91 | 
 92 |     x_pred, iters = linalg.cg(
 93 |         linalg.LinearOperator(A.shape, matvec=matvec), y, tol=1e-8
 94 |     )
 95 |     assert np.allclose((A @ x_pred), y, rtol=1e-8, atol=0.0)
 96 | 
 97 |     def matvec(x, out=None):
 98 |         return A.dot(x, out=out)
 99 | 
100 |     x_pred, iters = linalg.cg(
101 |         linalg.LinearOperator(A.shape, matvec=matvec), y, tol=1e-8
102 |     )
103 |     assert np.allclose((A @ x_pred), y, rtol=1e-8, atol=0.0)
104 | 
105 | 
106 | if __name__ == "__main__":
107 |     import sys
108 | 
109 |     pytest.main(sys.argv)
110 |     sys.exit(0)
111 | 


--------------------------------------------------------------------------------
/tests/integration/test_comparison.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022-2024 NVIDIA Corporation
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import cupynumeric as np
16 | import pytest
17 | from utils.sample import simple_system_gen
18 | 
19 | import legate_sparse as sparse
20 | 
21 | # TODO: Enable "eq" after #209 is fixed
22 | COMPARISON_OPS = [
23 |     ("gt", lambda x, y: x > y),
24 |     ("lt", lambda x, y: x < y),
25 |     ("ge", lambda x, y: x >= y),
26 |     ("le", lambda x, y: x <= y),
27 |     # ("eq", lambda x, y: x == y),
28 |     ("ne", lambda x, y: x != y),
29 | ]
30 | 
31 | 
32 | @pytest.mark.parametrize("N", [8, 13])
33 | @pytest.mark.parametrize("threshold", [0.3, 0.5])
34 | @pytest.mark.parametrize("op_name, op_func", COMPARISON_OPS)
35 | def test_comparison_operation(N, threshold, op_name, op_func):
36 |     """Test element-wise comparison operations on non-zero entries of the matrix
37 | 
38 |     Parameters
39 |     ----------
40 |     N : int
41 |         Size of the test matrix
42 |     threshold : float
43 |         Value to compare against
44 |     op_name : str
45 |         Name of the comparison operation
46 |     op_func : callable
47 |         The comparison function to test
48 |     """
49 |     A_dense, A_sparse, _ = simple_system_gen(N, N, sparse.csr_array, tol=0.7)
50 | 
51 |     sparse_result = op_func(A_sparse, threshold)
52 |     dense_result = op_func(A_dense[A_dense != 0], threshold)
53 | 
54 |     assert sparse_result.astype(int).sum() == dense_result.astype(int).sum()
55 | 
56 | 
57 | @pytest.mark.parametrize("op_name, op_func", COMPARISON_OPS)
58 | def test_comparison_error_cases(op_name, op_func):
59 |     """Test error cases for comparison operations.
60 | 
61 |     Parameters
62 |     ----------
63 |     op_name : str
64 |         Name of the comparison operation
65 |     op_func : callable
66 |         The comparison function to test
67 |     """
68 |     N = 8
69 |     _, A_sparse, _ = simple_system_gen(N, N, sparse.csr_array, tol=0.7)
70 | 
71 |     # Test comparison with non-scalar values
72 |     invalid_comparisons = [
73 |         np.array([1, 2, 3]),  # 1D array
74 |         np.array([[1, 2], [3, 4]]),  # 2D array
75 |         "string",  # string
76 |         [1, 2, 3],  # list
77 |     ]
78 | 
79 |     for invalid_value in invalid_comparisons:
80 |         with pytest.raises(AssertionError):
81 |             op_func(A_sparse, invalid_value)
82 | 
83 | 
84 | if __name__ == "__main__":
85 |     import sys
86 | 
87 |     sys.exit(pytest.main(sys.argv))
88 | 


--------------------------------------------------------------------------------
/tests/integration/test_csr_from_coo.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 NVIDIA Corporation
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import sys
16 | 
17 | import cupynumeric as np
18 | import numpy
19 | import pytest
20 | from utils.sample import simple_system_gen
21 | 
22 | import legate_sparse as sparse
23 | 
24 | 
25 | @pytest.mark.parametrize("N", [7, 13])
26 | @pytest.mark.parametrize("M", [5, 29])
27 | def test_csr_from_coo(N, M):
28 |     shape = (N, M)
29 | 
30 |     np.random.seed(0)
31 | 
32 |     # This can generate duplicates nnz
33 |     # nnz = N*M // 2
34 |     # row_ind = np.random.random_integers(0, high=(N-1), size=nnz)
35 |     # col_ind = np.random.random_integers(0, high=(M-1), size=nnz)
36 |     # vals = np.random.rand(nnz)
37 | 
38 |     # so we just extract sparsity from dense matrix
39 |     A_dense_orig, _, _ = simple_system_gen(N, M, sparse.csr_array)
40 |     nnzs = np.argwhere(A_dense_orig > 0.0)
41 |     vals = A_dense_orig.ravel()
42 |     vals = vals[vals > 0.0]
43 | 
44 |     row_ind, col_ind = nnzs[:, 0], nnzs[:, 1]
45 | 
46 |     # we want test on unsorted inputs
47 |     perm = np.array(numpy.random.permutation(numpy.arange(row_ind.shape[0])))
48 |     row_ind = row_ind[perm]
49 |     col_ind = col_ind[perm]
50 | 
51 |     A = sparse.csr_array((vals, (row_ind, col_ind)), shape=shape)
52 | 
53 |     A_dense = np.zeros(shape=shape)
54 |     for r, c, v in zip(row_ind, col_ind, vals):
55 |         A_dense[r, c] = v
56 | 
57 |     assert np.all(np.isclose(A_dense, A.todense()))
58 | 
59 | 
60 | if __name__ == "__main__":
61 |     sys.exit(pytest.main(sys.argv))
62 | 


--------------------------------------------------------------------------------
/tests/integration/test_csr_from_csr.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 NVIDIA Corporation
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import sys
16 | 
17 | import cupynumeric as np
18 | import pytest
19 | from legate.core import get_legate_runtime
20 | 
21 | import legate_sparse as sparse
22 | 
23 | 
24 | def test_csr_from_csr_fixed():
25 |     """
26 |     2 0 0 0 1 0
27 |     5 8 0 0 0 2
28 |     0 0 3 4 0 0
29 |     0 6 0 1 0 0
30 |     9 0 0 0 4 0
31 |     7 0 0 0 2 1
32 |     """
33 |     row_offsets = np.array([0, 2, 5, 7, 9, 11, 14], dtype=np.int64)
34 |     csr_vals = np.array([2, 1, 5, 8, 2, 3, 4, 6, 1, 9, 4, 7, 2, 1], dtype=np.float64)
35 |     col_indices = np.array([0, 4, 0, 1, 5, 2, 3, 1, 3, 0, 4, 0, 4, 5], dtype=np.int64)
36 |     matrix_shape = (6, 6)
37 | 
38 |     A = sparse.csr_array(  # noqa: F841
39 |         (csr_vals, col_indices, row_offsets), shape=matrix_shape
40 |     )
41 | 
42 |     get_legate_runtime().issue_execution_fence(block=True)
43 | 
44 | 
45 | @pytest.mark.parametrize("N", [7, 13])
46 | @pytest.mark.parametrize("M", [5, 29])
47 | def test_csr_from_csr_gen(N, M):
48 |     nnz_per_row = np.random.randint(M, size=N)
49 |     row_offsets = np.append([0], np.cumsum(nnz_per_row))
50 |     nnz = row_offsets[-1]
51 |     col_indices = np.random.randint(M, size=nnz)
52 |     csr_vals = np.random.rand(nnz)
53 |     matrix_shape = (N, M)
54 | 
55 |     A = sparse.csr_array(  # noqa: F841
56 |         (csr_vals, col_indices, row_offsets), shape=matrix_shape
57 |     )
58 | 
59 | 
60 | @pytest.mark.parametrize("N", [7, 13])
61 | @pytest.mark.parametrize("M", [5, 29])
62 | def test_csr_from_empty(N, M):
63 |     A = sparse.csr_array((N, M), dtype=np.float64)  # noqa: F841
64 | 
65 | 
66 | if __name__ == "__main__":
67 |     sys.exit(pytest.main(sys.argv))
68 | 


--------------------------------------------------------------------------------
/tests/integration/test_csr_from_dense.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 NVIDIA Corporation
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import sys
16 | 
17 | import cupynumeric as np
18 | import pytest
19 | from legate.core import get_legate_runtime
20 | from utils.sample import simple_system_gen
21 | 
22 | import legate_sparse as sparse
23 | 
24 | 
25 | @pytest.mark.parametrize("N", [7, 13])
26 | @pytest.mark.parametrize("M", [5, 29])
27 | def test_csr_from_csr(N, M):
28 |     np.random.seed(0)
29 |     A_dense, A, _ = simple_system_gen(N, M, sparse.csr_array)
30 | 
31 |     get_legate_runtime().issue_execution_fence(block=True)
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     sys.exit(pytest.main(sys.argv))
36 | 


--------------------------------------------------------------------------------
/tests/integration/test_csr_to_dense.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 NVIDIA Corporation
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import sys
16 | 
17 | import cupynumeric as np
18 | import pytest
19 | 
20 | import legate_sparse as sparse
21 | 
22 | 
23 | def test_csr_to_dense():
24 |     row_offsets = np.array([0, 2, 5, 7, 9, 11, 14], dtype=np.int64)
25 |     csr_vals = np.array([2, 1, 5, 8, 2, 3, 4, 6, 1, 9, 4, 7, 2, 1], dtype=np.float64)
26 |     col_indices = np.array([0, 4, 0, 1, 5, 2, 3, 1, 3, 0, 4, 0, 4, 5], dtype=np.int64)
27 |     matrix_shape = (6, 6)
28 | 
29 |     A = sparse.csr_array((csr_vals, col_indices, row_offsets), shape=matrix_shape)
30 | 
31 |     B = A.todense()
32 |     expected_B = np.array(
33 |         [
34 |             [2, 0, 0, 0, 1, 0],
35 |             [5, 8, 0, 0, 0, 2],
36 |             [0, 0, 3, 4, 0, 0],
37 |             [0, 6, 0, 1, 0, 0],
38 |             [9, 0, 0, 0, 4, 0],
39 |             [7, 0, 0, 0, 2, 1],
40 |         ],
41 |         dtype=np.float64,
42 |     )
43 | 
44 |     assert (B == expected_B).all()
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     sys.exit(pytest.main(sys.argv))
49 | 


--------------------------------------------------------------------------------
/tests/integration/test_csr_transpose.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 NVIDIA Corporation
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import sys
16 | 
17 | import cupynumeric as np
18 | import pytest
19 | from utils.sample import simple_system_gen
20 | 
21 | import legate_sparse as sparse
22 | 
23 | 
24 | @pytest.mark.parametrize("N", [5, 29])
25 | @pytest.mark.parametrize("M", [7, 13])
26 | @pytest.mark.parametrize("iscopy", [True, False])
27 | def test_csr_spgemm(N, M, iscopy):
28 |     np.random.seed(0)
29 |     A_dense, A, _ = simple_system_gen(N, M, sparse.csr_array)
30 | 
31 |     assert np.all(np.isclose(A_dense, A.T.transpose(copy=iscopy).todense()))
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     sys.exit(pytest.main(sys.argv))
36 | 


--------------------------------------------------------------------------------
/tests/integration/test_diagonal.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 NVIDIA Corporation
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import sys
16 | 
17 | import cupynumeric as np
18 | import pytest
19 | from utils.sample import simple_system_gen
20 | 
21 | from legate_sparse import csr_array
22 | 
23 | 
24 | @pytest.mark.parametrize("N", [7, 13])
25 | @pytest.mark.parametrize("with_zeros", [True, False])
26 | def test_csr_diagonal(N, with_zeros):
27 |     M = N
28 |     np.random.seed(0)
29 |     A_dense, _, _ = simple_system_gen(N, M, None, tol=0.2)
30 | 
31 |     if not with_zeros:
32 |         A_dense += np.eye(N, M)
33 | 
34 |     A = csr_array(A_dense)
35 |     dense_diag = np.diagonal(A_dense)
36 |     csr_diag = A.diagonal()
37 | 
38 |     assert np.all(np.isclose(dense_diag, csr_diag))
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     sys.exit(pytest.main(sys.argv))
43 | 


--------------------------------------------------------------------------------
/tests/integration/test_diags.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 NVIDIA Corporation
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import sys
16 | 
17 | import cupynumeric as np
18 | import pytest
19 | import scipy.sparse as sp
20 | 
21 | import legate_sparse as sparse
22 | 
23 | 
24 | @pytest.mark.parametrize("N", [12, 34])
25 | @pytest.mark.parametrize("diagonals", [3, 5])
26 | @pytest.mark.parametrize("dtype", (np.float32, np.float64, np.complex64, np.complex128))
27 | @pytest.mark.parametrize("fmt", ["csr", "dia"])
28 | def test_diags(N, diagonals, dtype, fmt):
29 |     A = sparse.diags(
30 |         [1] * diagonals,
31 |         [x - (diagonals // 2) for x in range(diagonals)],
32 |         shape=(N, N),
33 |         format=fmt,
34 |         dtype=dtype,
35 |     )
36 | 
37 |     if fmt == "dia":
38 |         A = A.tocsr()
39 | 
40 |     B = sp.diags(
41 |         [1] * diagonals,
42 |         [x - (diagonals // 2) for x in range(diagonals)],
43 |         shape=(N, N),
44 |         format=fmt,
45 |         dtype=dtype,
46 |     )
47 | 
48 |     assert np.array_equal(A.todense(), B.todense())
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     sys.exit(pytest.main(sys.argv))
53 | 


--------------------------------------------------------------------------------
/tests/integration/test_gmres_solve.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022-2024 NVIDIA Corporation
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import cupynumeric as np
16 | import pytest
17 | from utils.sample import sample_dense, sample_dense_vector
18 | 
19 | import legate_sparse.linalg as linalg
20 | from legate_sparse import csr_array
21 | 
22 | 
23 | def test_gmres_solve():
24 |     N, D = 1000, 1000
25 |     seed = 471014
26 |     A = sample_dense(N, D, 0.1, seed)
27 |     A = 0.5 * (A + A.T)
28 |     A = A + N * np.eye(N)
29 |     A = csr_array(A)
30 |     x = sample_dense_vector(D, 0.1, seed)
31 | 
32 |     y = A @ x
33 |     assert np.allclose((A @ x), y)
34 | 
35 |     x_pred, iters = linalg.gmres(A, y, atol=1e-5, tol=1e-5, maxiter=300)
36 |     assert np.allclose((A @ x_pred), y, atol=1e-8)
37 | 
38 | 
39 | if __name__ == "__main__":
40 |     import sys
41 | 
42 |     sys.exit(pytest.main(sys.argv))
43 | 


--------------------------------------------------------------------------------
/tests/integration/test_io.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022-2024 NVIDIA Corporation
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from pathlib import Path
16 | 
17 | import cupynumeric as np
18 | import pytest
19 | import scipy.io as sci_io
20 | 
21 | import legate_sparse.io as legate_io
22 | 
23 | TEST_DIR = Path(__file__).parent.parent
24 | 
25 | 
26 | @pytest.fixture
27 | def test_mtx_files():
28 |     mtx_files = [
29 |         "test.mtx",
30 |         "GlossGT.mtx",
31 |         "Ragusa18.mtx",
32 |         "cage4.mtx",
33 |         "karate.mtx",
34 |     ]
35 |     return [str(TEST_DIR / "testdata" / mtx_file) for mtx_file in mtx_files]
36 | 
37 | 
38 | def test_mmread(test_mtx_files):
39 |     for mtx_file in test_mtx_files:
40 |         arr = legate_io.mmread(mtx_file)
41 |         s = sci_io.mmread(mtx_file)
42 |         assert np.array_equal(arr.todense(), s.todense())
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     import sys
47 | 
48 |     sys.exit(pytest.main(sys.argv))
49 | 


--------------------------------------------------------------------------------
/tests/integration/test_manual_sorting.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022-2024 NVIDIA Corporation
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import cupynumeric as np
16 | import numpy
17 | import pytest
18 | from legate_sparse.utils import sort_by_rows_then_cols
19 | 
20 | 
21 | def test_manual_sorting():
22 |     N = 5
23 |     for _ in range(N):
24 |         rows = np.random.randint(0, 100, 100)
25 |         cols = np.random.randint(0, 100, 100)
26 |         assert numpy.allclose(
27 |             sort_by_rows_then_cols(rows, cols), numpy.lexsort((cols, rows))
28 |         )
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     import sys
33 | 
34 |     sys.exit(pytest.main(sys.argv))
35 | 


--------------------------------------------------------------------------------
/tests/integration/test_nonzero.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022-2024 NVIDIA Corporation
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import cupynumeric as np
16 | import numpy
17 | import pytest
18 | from utils.sample import simple_system_gen
19 | 
20 | import legate_sparse as sparse
21 | 
22 | 
23 | @pytest.mark.parametrize("N", [8, 13])
24 | def test_nonzero(N):
25 |     """
26 |     This test checks that the nonzero method returns the correct indices for a sparse matrix.
27 |     """
28 |     np.random.seed(0)
29 |     A_dense, _, _ = simple_system_gen(N, N, None, tol=0.2)
30 | 
31 |     r_numpy, c_numpy = numpy.nonzero(A_dense)
32 | 
33 |     A = sparse.csr_array(A_dense)
34 |     r_scipy, c_scipy = A.nonzero()
35 | 
36 |     assert np.all(r_numpy == r_scipy)
37 |     assert np.all(c_numpy == c_scipy)
38 | 
39 | 
40 | if __name__ == "__main__":
41 |     import sys
42 | 
43 |     sys.exit(pytest.main(sys.argv))
44 | 


--------------------------------------------------------------------------------
/tests/integration/test_spgemm.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 NVIDIA Corporation
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import sys
16 | 
17 | import cupynumeric as np
18 | import pytest
19 | from legate_sparse.runtime import runtime
20 | from utils.banded_matrix import banded_matrix
21 | from utils.sample import simple_system_gen
22 | 
23 | import legate_sparse as sparse
24 | 
25 | 
26 | @pytest.mark.parametrize("N", [5, 29])
27 | def test_csr_spgemm(N):
28 |     np.random.seed(0)
29 |     A_dense, A, _ = simple_system_gen(N, N, sparse.csr_array)
30 | 
31 |     B = A.copy()
32 | 
33 |     C = A @ B
34 | 
35 |     assert np.all(np.isclose(C.todense(), A_dense @ A_dense))
36 | 
37 | 
38 | @pytest.mark.parametrize("N", [5, 29])
39 | @pytest.mark.parametrize("unsupported_dtype", ["int", "bool"])
40 | def test_csr_spgemm_unsupported_dtype(N, unsupported_dtype):
41 |     np.random.seed(0)
42 | 
43 |     nnz_per_row = 3
44 |     A = banded_matrix(N, nnz_per_row).astype(unsupported_dtype)
45 |     B = banded_matrix(N, nnz_per_row).astype(unsupported_dtype)
46 | 
47 |     if runtime.num_gpus > 0:
48 |         expected_exp = NotImplementedError
49 |         with pytest.raises(expected_exp):
50 |             C = A @ B  # noqa: F841
51 | 
52 | 
53 | if __name__ == "__main__":
54 |     sys.exit(pytest.main(sys.argv))
55 | 


--------------------------------------------------------------------------------
/tests/integration/test_spmv.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 NVIDIA Corporation
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import sys
16 | 
17 | import cupynumeric as np
18 | import pytest
19 | from legate_sparse.runtime import runtime
20 | from utils.banded_matrix import banded_matrix
21 | from utils.sample import simple_system_gen
22 | 
23 | import legate_sparse as sparse
24 | 
25 | 
26 | @pytest.mark.parametrize("N", [5, 29])
27 | @pytest.mark.parametrize("M", [7, 17])
28 | @pytest.mark.parametrize("inline", [True, False])
29 | def test_csr_spmv(N, M, inline):
30 |     np.random.seed(0)
31 |     A_dense, A, x = simple_system_gen(N, M, sparse.csr_array)
32 | 
33 |     if inline:
34 |         y = np.ndarray((N,))
35 |         A.dot(x, out=y)
36 |     else:
37 |         y = A @ x
38 | 
39 |     assert np.all(np.isclose(y, A_dense @ x))
40 | 
41 | 
42 | @pytest.mark.parametrize("N", [5, 29])
43 | @pytest.mark.parametrize("nnz_per_row", [3, 9])
44 | @pytest.mark.parametrize("unsupported_dtype", ["int", "bool"])
45 | def test_csr_spmv_unsupported_dtype(N, nnz_per_row, unsupported_dtype):
46 |     np.random.seed(0)
47 | 
48 |     A = banded_matrix(N, nnz_per_row).astype(unsupported_dtype)
49 |     x = np.ndarray((N,))
50 | 
51 |     if runtime.num_gpus > 0:
52 |         expected_exp = NotImplementedError
53 |         with pytest.raises(expected_exp):
54 |             y = A.dot(x)  # noqa: F841
55 | 
56 | 
57 | if __name__ == "__main__":
58 |     sys.exit(pytest.main(sys.argv))
59 | 


--------------------------------------------------------------------------------
/tests/integration/test_unary_operation.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 NVIDIA Corporation
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import sys
16 | 
17 | import cupynumeric as np
18 | import pytest
19 | 
20 | import legate_sparse as sparse
21 | 
22 | 
23 | def test_unary_operation():
24 |     row_offsets = np.array([0, 2, 5, 7, 9, 11, 14], dtype=np.int64)
25 |     csr_vals = np.array([2, 1, 5, 8, 2, 3, 4, 6, 1, 9, 4, 7, 2, 1], dtype=np.float64)
26 |     col_indices = np.array([0, 4, 0, 1, 5, 2, 3, 1, 3, 0, 4, 0, 4, 5], dtype=np.int64)
27 |     matrix_shape = (6, 6)
28 | 
29 |     A = sparse.csr_array((csr_vals, col_indices, row_offsets), shape=matrix_shape)
30 | 
31 |     B = A * 2
32 |     Bvalues = np.asarray(B.vals)
33 |     expected_Bvalues = np.array(
34 |         [4, 2, 10, 16, 4, 6, 8, 12, 2, 18, 8, 14, 4, 2], dtype=np.float64
35 |     )
36 |     assert (Bvalues == expected_Bvalues).all()
37 | 
38 |     C = A.multiply(3)
39 |     Cvalues = np.asarray(C.vals)
40 |     expected_Cvalues = np.array(
41 |         [6, 3, 15, 24, 6, 9, 12, 18, 3, 27, 12, 21, 6, 3], dtype=np.float64
42 |     )
43 |     assert (Cvalues == expected_Cvalues).all()
44 | 
45 |     D = A.conj().conj()
46 |     assert np.all(np.isclose(A.todense(), D.todense()))
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     sys.exit(pytest.main(sys.argv))
51 | 


--------------------------------------------------------------------------------
/tests/integration/utils/banded_matrix.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023-2024 NVIDIA Corporation
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import cupynumeric as np
 16 | 
 17 | import legate_sparse as sparse
 18 | 
 19 | 
 20 | def banded_matrix(
 21 |     N: int,
 22 |     nnz_per_row: int,
 23 |     from_diags: bool = True,
 24 |     init_with_ones: bool = True,
 25 |     verbose: bool = False,
 26 | ):
 27 |     """
 28 |     Parameters
 29 |     ----------
 30 |     N: int
 31 |         Size of the NxN sparse matrix
 32 |     nnz_per_row: int
 33 |         Number of non-zero elements per row (odd number)
 34 |     from_diags: bool
 35 |         use sparse.diags to generate the banded matrix (default = True)
 36 |     init_with_ones: bool
 37 |         Initialize the matrix with ones instead of arange
 38 | 
 39 |     Returns
 40 |     -------
 41 |     csr_array:
 42 |         Return a sparse matrix
 43 |     """
 44 | 
 45 |     if from_diags:
 46 |         return sparse.diags(
 47 |             np.array([1] * nnz_per_row),
 48 |             np.array([x - (nnz_per_row // 2) for x in range(nnz_per_row)]),
 49 |             shape=(N, N),
 50 |             format="csr",
 51 |             dtype=np.float64,
 52 |         )
 53 |     else:
 54 |         assert N > nnz_per_row
 55 |         assert nnz_per_row % 2 == 1
 56 |         half_nnz = nnz_per_row // 2
 57 | 
 58 |         pred_nrows = nnz_per_row - half_nnz
 59 |         post_nrows = pred_nrows
 60 |         main_rows = N - pred_nrows - post_nrows
 61 | 
 62 |         pred = np.arange(nnz_per_row - half_nnz, nnz_per_row + 1)
 63 |         post = np.flip(pred)
 64 |         nnz_arr = np.concatenate((pred, np.ones(main_rows) * nnz_per_row, post))
 65 | 
 66 |         if sparse.__name__ == "legate_sparse":
 67 |             row_offsets = np.zeros(N + 1).astype(sparse.coord_ty)
 68 |         else:
 69 |             row_offsets = np.zeros(N + 1).astype(int)
 70 | 
 71 |         row_offsets[1 : N + 1] = np.cumsum(nnz_arr)
 72 |         nnz = row_offsets[-1]
 73 | 
 74 |         col_indices = np.tile(
 75 |             np.arange(-half_nnz, nnz_per_row - half_nnz), (N,)
 76 |         ) + np.repeat(np.arange(N), nnz_per_row)
 77 | 
 78 |         if init_with_ones:
 79 |             data = np.ones(N * nnz_per_row).astype(np.float64)
 80 |         else:
 81 |             data = np.arange(N * nnz_per_row).astype(np.float64) / N
 82 | 
 83 |         mask = col_indices >= 0
 84 |         mask &= col_indices < N
 85 | 
 86 |         col_indices = col_indices[mask]
 87 |         data = data[mask]
 88 |         assert data.shape[0] == nnz
 89 |         assert col_indices.shape[0] == nnz
 90 | 
 91 |         if verbose:
 92 |             np.set_printoptions(linewidth=1000)
 93 |             print(f"data       : {data}")
 94 |             print(f"col_indices: {col_indices}")
 95 |             print(f"row_offsets: {row_offsets}")
 96 | 
 97 |         return sparse.csr_array(
 98 |             (data, col_indices.astype(np.int64), row_offsets.astype(np.int64)),
 99 |             shape=(N, N),
100 |             copy=False,
101 |         )
102 | 


--------------------------------------------------------------------------------
/tests/integration/utils/sample.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023-2024 NVIDIA Corporation
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import cupynumeric as np
16 | import numpy
17 | import scipy.sparse as scpy
18 | import scipy.stats as stats
19 | 
20 | 
21 | class Normal(stats.rv_continuous):
22 |     def _rvs(self, *args, size=None, random_state=None):
23 |         return random_state.standard_normal(size)
24 | 
25 | 
26 | def sample(N: int, D: int, density: float, seed: int):
27 |     NormalType = Normal(seed=seed)
28 |     SeededNormal = NormalType()
29 |     return scpy.random(
30 |         N,
31 |         D,
32 |         density=density,
33 |         format="csr",
34 |         dtype=numpy.float64,
35 |         random_state=seed,
36 |         data_rvs=SeededNormal.rvs,
37 |     )
38 | 
39 | 
40 | def sample_dense(N: int, D: int, density: float, seed: int):
41 |     return numpy.asarray(sample(N, D, density, seed).todense())
42 | 
43 | 
44 | def sample_dense_vector(N: int, density: float, seed: int):
45 |     return sample_dense(N, 1, density, seed).squeeze()
46 | 
47 | 
48 | def simple_system_gen(N, M, cls, tol=0.5):
49 |     a_dense = np.random.rand(N, M)
50 |     x = np.random.rand(M)
51 |     a_dense = np.where(a_dense < tol, a_dense, 0)
52 | 
53 |     a_sparse = None if cls is None else cls(a_dense)
54 | 
55 |     return a_dense, a_sparse, x
56 | 


--------------------------------------------------------------------------------
/tests/testdata/GlossGT.mtx:
--------------------------------------------------------------------------------
  1 | %%MatrixMarket matrix coordinate pattern general
  2 | %-------------------------------------------------------------------------------
  3 | % UF Sparse Matrix Collection, Tim Davis
  4 | % http://www.cise.ufl.edu/research/sparse/matrices/Pajek/GlossGT
  5 | % name: Pajek/GlossGT
  6 | % [Pajek network: graph and digraph glossary]
  7 | % id: 1501
  8 | % date: 2001
  9 | % author: W. Cherowitzo
 10 | % ed: V. Batagelj
 11 | % fields: name title A id kind notes aux date author ed
 12 | % aux: nodename coord
 13 | % kind: directed graph
 14 | %-------------------------------------------------------------------------------
 15 | % notes:
 16 | % ------------------------------------------------------------------------------
 17 | % Pajek network converted to sparse adjacency matrix for inclusion in UF sparse 
 18 | % matrix collection, Tim Davis.  For Pajek datasets, See V. Batagelj & A. Mrvar,
 19 | % http://vlado.fmf.uni-lj.si/pub/networks/data/.                                
 20 | % ------------------------------------------------------------------------------
 21 | %  Bill Cherowitzo: Graph and Digraph Glossary                                  
 22 | %  http://www-math.cudenver.edu/~wcherowi/courses/m4408/glossary.html           
 23 | %  Pajek's network: Barbara Zemlji"c, 2. nov 2003                               
 24 | % The original problem had 3D xyz coordinates, but all values of z were equal   
 25 | % to 0, and have been removed.  This graph has 2D coordinates.                  
 26 | %-------------------------------------------------------------------------------
 27 | 72 72 122
 28 | 3 4
 29 | 15 4
 30 | 16 4
 31 | 2 6
 32 | 7 6
 33 | 26 6
 34 | 28 6
 35 | 37 6
 36 | 41 6
 37 | 46 6
 38 | 66 6
 39 | 53 9
 40 | 48 12
 41 | 14 16
 42 | 19 17
 43 | 29 17
 44 | 11 19
 45 | 17 19
 46 | 69 19
 47 | 1 20
 48 | 29 20
 49 | 31 20
 50 | 69 20
 51 | 15 22
 52 | 39 22
 53 | 52 22
 54 | 57 22
 55 | 7 26
 56 | 41 26
 57 | 49 26
 58 | 64 26
 59 | 66 26
 60 | 67 26
 61 | 25 27
 62 | 2 28
 63 | 4 28
 64 | 6 28
 65 | 8 28
 66 | 18 28
 67 | 33 28
 68 | 38 28
 69 | 41 28
 70 | 55 28
 71 | 56 28
 72 | 61 28
 73 | 1 30
 74 | 8 30
 75 | 13 30
 76 | 18 30
 77 | 26 30
 78 | 28 30
 79 | 29 30
 80 | 33 30
 81 | 36 30
 82 | 37 30
 83 | 41 30
 84 | 56 30
 85 | 62 30
 86 | 63 30
 87 | 69 30
 88 | 71 30
 89 | 15 31
 90 | 40 32
 91 | 13 37
 92 | 21 38
 93 | 36 38
 94 | 66 38
 95 | 52 39
 96 | 46 40
 97 | 18 41
 98 | 10 42
 99 | 21 42
100 | 54 43
101 | 59 43
102 | 36 44
103 | 71 45
104 | 10 46
105 | 42 46
106 | 60 50
107 | 20 51
108 | 31 51
109 | 32 51
110 | 35 52
111 | 39 52
112 | 32 58
113 | 40 58
114 | 42 58
115 | 46 58
116 | 48 58
117 | 63 62
118 | 14 65
119 | 17 65
120 | 62 65
121 | 51 68
122 | 8 69
123 | 29 69
124 | 58 69
125 | 63 69
126 | 4 71
127 | 7 71
128 | 8 71
129 | 18 71
130 | 22 71
131 | 25 71
132 | 26 71
133 | 37 71
134 | 38 71
135 | 39 71
136 | 41 71
137 | 44 71
138 | 45 71
139 | 47 71
140 | 52 71
141 | 57 71
142 | 62 71
143 | 19 72
144 | 25 72
145 | 27 72
146 | 31 72
147 | 33 72
148 | 64 72
149 | 68 72
150 | 


--------------------------------------------------------------------------------
/tests/testdata/Ragusa18.mtx:
--------------------------------------------------------------------------------
 1 | %%MatrixMarket matrix coordinate integer general
 2 | %-------------------------------------------------------------------------------
 3 | % UF Sparse Matrix Collection, Tim Davis
 4 | % http://www.cise.ufl.edu/research/sparse/matrices/Pajek/Ragusa18
 5 | % name: Pajek/Ragusa18
 6 | % [Pajek network: Ragusa set]
 7 | % id: 1516
 8 | % date: 2006
 9 | % author: V. Batagelj
10 | % ed: V. Batagelj
11 | % fields: name title A id kind notes aux date author ed
12 | % aux: nodename coord
13 | % kind: directed weighted graph
14 | %-------------------------------------------------------------------------------
15 | % notes:
16 | % ------------------------------------------------------------------------------
17 | % Pajek network converted to sparse adjacency matrix for inclusion in UF sparse 
18 | % matrix collection, Tim Davis.  For Pajek datasets, See V. Batagelj & A. Mrvar,
19 | % http://vlado.fmf.uni-lj.si/pub/networks/data/.                                
20 | % ------------------------------------------------------------------------------
21 | %-------------------------------------------------------------------------------
22 | 23 23 64
23 | 11 1 1
24 | 4 2 1
25 | 7 2 1
26 | 8 2 1
27 | 9 2 1
28 | 10 2 1
29 | 11 2 2
30 | 15 2 1
31 | 17 2 1
32 | 21 2 2
33 | 23 2 1
34 | 8 3 2
35 | 11 3 1
36 | 16 3 2
37 | 17 3 1
38 | 21 3 1
39 | 7 6 1
40 | 13 6 1
41 | 2 7 1
42 | 6 7 1
43 | 8 7 1
44 | 9 8 1
45 | 13 9 1
46 | 19 9 1
47 | 23 9 2
48 | 9 10 1
49 | 2 11 2
50 | 11 11 2
51 | 12 11 1
52 | 16 11 1
53 | 17 11 1
54 | 21 11 1
55 | 11 12 2
56 | 18 12 1
57 | 2 13 2
58 | 5 13 1
59 | 6 13 1
60 | 7 13 1
61 | 6 14 1
62 | 9 14 1
63 | 8 16 4
64 | 12 16 1
65 | 16 16 1
66 | 6 19 1
67 | 9 19 1
68 | 21 20 1
69 | 1 21 1
70 | 2 21 1
71 | 3 21 2
72 | 6 21 1
73 | 7 21 1
74 | 11 21 2
75 | 12 21 3
76 | 16 21 1
77 | 17 21 1
78 | 21 21 1
79 | 22 21 1
80 | 23 21 1
81 | 11 22 1
82 | 4 23 1
83 | 8 23 1
84 | 13 23 1
85 | 18 23 1
86 | 21 23 1
87 | 


--------------------------------------------------------------------------------
/tests/testdata/cage4.mtx:
--------------------------------------------------------------------------------
 1 | %%MatrixMarket matrix coordinate real general
 2 | %-------------------------------------------------------------------------------
 3 | % UF Sparse Matrix Collection, Tim Davis
 4 | % http://www.cise.ufl.edu/research/sparse/matrices/vanHeukelum/cage4
 5 | % name: vanHeukelum/cage4
 6 | % [DNA electrophoresis, 4 monomers in polymer. A. van Heukelum, Utrecht U.]
 7 | % id: 905
 8 | % date: 2003
 9 | % author: A. van Heukelum
10 | % ed: T. Davis
11 | % fields: title A name id date author ed kind
12 | % kind: directed weighted graph
13 | %-------------------------------------------------------------------------------
14 | 9 9 49
15 | 1 1 .75
16 | 2 1 .075027667114587
17 | 4 1 .0916389995520797
18 | 5 1 .0375138335572935
19 | 8 1 .0458194997760398
20 | 1 2 .137458499328119
21 | 2 2 .687569167786467
22 | 3 2 .0916389995520797
23 | 5 2 .0375138335572935
24 | 6 2 .0458194997760398
25 | 2 3 .112541500671881
26 | 3 3 .666666666666667
27 | 4 3 .13745849932812
28 | 6 3 .0458194997760398
29 | 7 3 .0375138335572935
30 | 1 4 .112541500671881
31 | 3 4 .075027667114587
32 | 4 4 .729097498880199
33 | 7 4 .0375138335572935
34 | 8 4 .0458194997760398
35 | 1 5 .137458499328119
36 | 2 5 .075027667114587
37 | 5 5 .537513833557293
38 | 6 5 .075027667114587
39 | 7 5 .0916389995520797
40 | 9 5 .0833333333333333
41 | 2 6 .112541500671881
42 | 3 6 .0916389995520797
43 | 5 6 .13745849932812
44 | 6 6 .445874834005214
45 | 8 6 .13745849932812
46 | 9 6 .075027667114587
47 | 3 7 .075027667114587
48 | 4 7 .13745849932812
49 | 5 7 .112541500671881
50 | 7 7 .470791832661453
51 | 8 7 .112541500671881
52 | 9 7 .0916389995520797
53 | 1 8 .112541500671881
54 | 4 8 .0916389995520797
55 | 6 8 .075027667114587
56 | 7 8 .0916389995520797
57 | 8 8 .54581949977604
58 | 9 8 .0833333333333333
59 | 5 9 .25
60 | 6 9 .150055334229174
61 | 7 9 .183277999104159
62 | 8 9 .25
63 | 9 9 .166666666666667
64 | 


--------------------------------------------------------------------------------
/tests/testdata/karate.mtx:
--------------------------------------------------------------------------------
  1 | %%MatrixMarket matrix coordinate pattern symmetric
  2 | %-------------------------------------------------------------------------------
  3 | % UF Sparse Matrix Collection, Tim Davis
  4 | % http://www.cise.ufl.edu/research/sparse/matrices/Newman/karate
  5 | % name: Newman/karate
  6 | % [Karate club, from Wayne Zachary, 1977]
  7 | % id: 2399
  8 | % date: 1977
  9 | % author: W. Zachary
 10 | % ed: M. Newman
 11 | % fields: name title A id date author kind notes ed
 12 | % kind: undirected graph
 13 | %-------------------------------------------------------------------------------
 14 | % notes:
 15 | % Network collection from M. Newman                                          
 16 | % http://www-personal.umich.edu/~mejn/netdata/                               
 17 | %                                                                            
 18 | % The graph "karate" contains the network of friendships between the 34      
 19 | % members of a karate club at a US university, as described by Wayne Zachary 
 20 | % in 1977.  If you use these data in your work, please cite W. W. Zachary, An
 21 | % information flow model for conflict and fission in small groups, Journal of
 22 | % Anthropological Research 33, 452-473 (1977).                               
 23 | %-------------------------------------------------------------------------------
 24 | 34 34 78
 25 | 2 1
 26 | 3 1
 27 | 4 1
 28 | 5 1
 29 | 6 1
 30 | 7 1
 31 | 8 1
 32 | 9 1
 33 | 11 1
 34 | 12 1
 35 | 13 1
 36 | 14 1
 37 | 18 1
 38 | 20 1
 39 | 22 1
 40 | 32 1
 41 | 3 2
 42 | 4 2
 43 | 8 2
 44 | 14 2
 45 | 18 2
 46 | 20 2
 47 | 22 2
 48 | 31 2
 49 | 4 3
 50 | 8 3
 51 | 9 3
 52 | 10 3
 53 | 14 3
 54 | 28 3
 55 | 29 3
 56 | 33 3
 57 | 8 4
 58 | 13 4
 59 | 14 4
 60 | 7 5
 61 | 11 5
 62 | 7 6
 63 | 11 6
 64 | 17 6
 65 | 17 7
 66 | 31 9
 67 | 33 9
 68 | 34 9
 69 | 34 10
 70 | 34 14
 71 | 33 15
 72 | 34 15
 73 | 33 16
 74 | 34 16
 75 | 33 19
 76 | 34 19
 77 | 34 20
 78 | 33 21
 79 | 34 21
 80 | 33 23
 81 | 34 23
 82 | 26 24
 83 | 28 24
 84 | 30 24
 85 | 33 24
 86 | 34 24
 87 | 26 25
 88 | 28 25
 89 | 32 25
 90 | 32 26
 91 | 30 27
 92 | 34 27
 93 | 34 28
 94 | 32 29
 95 | 34 29
 96 | 33 30
 97 | 34 30
 98 | 33 31
 99 | 34 31
100 | 33 32
101 | 34 32
102 | 34 33
103 | 


--------------------------------------------------------------------------------
/tests/testdata/test.mtx:
--------------------------------------------------------------------------------
1 | %%MatrixMarket matrix coordinate real symmetric
2 | 5 5 3
3 | 1 1 2
4 | 1 4 3
5 | 2 5 4
6 | 


--------------------------------------------------------------------------------