├── .clang-format ├── .flake8 ├── .pre-commit-config.yaml ├── .style.yapf ├── CMakeLists.txt ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── cmake ├── Modules │ ├── cpm_helpers.cmake │ ├── cuda_arch_helpers.cmake │ └── set_cpu_arch_flags.cmake ├── generate_install_info_py.cmake ├── thirdparty │ ├── get_legate.cmake │ └── get_nccl.cmake └── versions.json ├── conda └── conda-build │ ├── build.sh │ ├── conda_build_config.yaml │ └── meta.yaml ├── examples ├── common.py ├── gmg.py ├── matrix_power.py ├── pde.py ├── spgemm_microbenchmark.py └── spmv_microbenchmark.py ├── install.py ├── legate_sparse ├── __init__.py ├── _version.py ├── base.py ├── config.py ├── coverage.py ├── csr.py ├── dia.py ├── gallery.py ├── install_info.py.in ├── io.py ├── linalg.py ├── module.py ├── runtime.py ├── settings.py ├── types.py └── utils.py ├── legate_sparse_cpp.cmake ├── legate_sparse_python.cmake ├── scripts ├── memlog_analysis.py ├── memlog_cli.py └── memlog_parser.py ├── setup.py ├── src └── legate_sparse │ ├── array │ ├── conv │ │ ├── csr_to_dense.cc │ │ ├── csr_to_dense.cu │ │ ├── csr_to_dense.h │ │ ├── csr_to_dense_omp.cc │ │ ├── csr_to_dense_template.inl │ │ ├── dense_to_csr.cc │ │ ├── dense_to_csr.cu │ │ ├── dense_to_csr.h │ │ ├── dense_to_csr_omp.cc │ │ ├── dense_to_csr_template.inl │ │ ├── pos_to_coordinates.cc │ │ ├── pos_to_coordinates.cu │ │ ├── pos_to_coordinates.h │ │ ├── pos_to_coordinates_omp.cc │ │ └── pos_to_coordinates_template.inl │ ├── csr │ │ ├── get_diagonal.cc │ │ ├── get_diagonal.cu │ │ ├── get_diagonal.h │ │ ├── get_diagonal_omp.cc │ │ ├── get_diagonal_template.inl │ │ ├── indexing.cc │ │ ├── indexing.cu │ │ ├── indexing.h │ │ ├── indexing_omp.cc │ │ ├── indexing_template.inl │ │ ├── spgemm_csr_csr_csr.cc │ │ ├── spgemm_csr_csr_csr.cu │ │ ├── spgemm_csr_csr_csr.h │ │ ├── spgemm_csr_csr_csr_omp.cc │ │ ├── spgemm_csr_csr_csr_template.inl │ │ ├── spmv.cc │ │ ├── spmv.cu │ │ ├── spmv.h │ │ ├── spmv_omp.cc │ │ └── spmv_template.inl │ └── util │ │ ├── scale_rect.cc │ │ ├── scale_rect.cu │ │ ├── scale_rect.h │ │ ├── scale_rect_omp.cc │ │ ├── scale_rect_template.inl │ │ ├── unzip_rect.cc │ │ ├── unzip_rect.cu │ │ ├── unzip_rect.h │ │ ├── unzip_rect_omp.cc │ │ ├── unzip_rect_template.inl │ │ ├── zip_to_rect.cc │ │ ├── zip_to_rect.cu │ │ ├── zip_to_rect.h │ │ ├── zip_to_rect_omp.cc │ │ └── zip_to_rect_template.inl │ ├── cffi.h │ ├── cudalibs.cu │ ├── cudalibs.h │ ├── io │ ├── mtx_to_coo.cc │ └── mtx_to_coo.h │ ├── linalg │ ├── axpby.cc │ ├── axpby.cu │ ├── axpby.h │ ├── axpby_omp.cc │ └── axpby_template.inl │ ├── mapper │ ├── mapper.cc │ └── mapper.h │ ├── partition │ ├── fast_image_partition.cc │ ├── fast_image_partition.cu │ ├── fast_image_partition.h │ └── fast_image_partition_template.inl │ ├── sparse.cc │ ├── sparse.h │ ├── sparse_c.h │ └── util │ ├── cuda_help.h │ ├── cusparse_utils.h │ ├── dispatch.h │ ├── legate_utils.h │ ├── logger.h │ ├── omp_help.h │ ├── thrust_allocator.h │ ├── typedefs.h │ ├── upcast_future.cc │ └── upcast_future.h ├── test.py └── tests ├── integration ├── conftest.py ├── test_cg_axpby.py ├── test_cg_solve.py ├── test_comparison.py ├── test_csr_from_coo.py ├── test_csr_from_csr.py ├── test_csr_from_dense.py ├── test_csr_to_dense.py ├── test_csr_transpose.py ├── test_diagonal.py ├── test_diags.py ├── test_gmres_solve.py ├── test_indexing.py ├── test_io.py ├── test_manual_sorting.py ├── test_nonzero.py ├── test_spgemm.py ├── test_spmv.py ├── test_unary_operation.py └── utils │ ├── banded_matrix.py │ └── sample.py └── testdata ├── GlossGT.mtx ├── Ragusa18.mtx ├── cage4.mtx ├── karate.mtx └── test.mtx /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = 3 | # 'foo' is too complex (N) 4 | C901, 5 | # continuation line missing indentation or outdented 6 | E122, 7 | E203, E501, 8 | F403, F821, W503 9 | max-line-length = 80 10 | max-complexity = 18 11 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/mirrors-mypy 3 | rev: 'v1.5.1' 4 | hooks: 5 | - id: mypy 6 | language: system 7 | pass_filenames: false 8 | args: ['legate_sparse'] 9 | - repo: https://github.com/psf/black 10 | rev: 23.9.1 11 | hooks: 12 | - id: black 13 | - repo: https://github.com/PyCQA/isort 14 | rev: 5.12.0 15 | hooks: 16 | - id: isort 17 | args: ["--profile", "black"] 18 | - repo: https://github.com/PyCQA/flake8 19 | rev: 6.1.0 20 | hooks: 21 | - id: flake8 22 | args: [--config=.flake8] 23 | - repo: https://github.com/pre-commit/mirrors-clang-format 24 | rev: 'v16.0.6' # Use the sha / tag you want to point at 25 | hooks: 26 | - id: clang-format 27 | files: \.(cu|cuh|h|cc|inl)$ 28 | types_or: [] 29 | 30 | default_language_version: 31 | python: python3 32 | -------------------------------------------------------------------------------- /cmake/Modules/cpm_helpers.cmake: -------------------------------------------------------------------------------- 1 | #============================================================================= 2 | # Copyright 2022 NVIDIA Corporation 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | #============================================================================= 16 | 17 | function(get_cpm_git_args _out_var) 18 | 19 | set(oneValueArgs TAG BRANCH REPOSITORY) 20 | cmake_parse_arguments(GIT "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) 21 | 22 | set(repo_tag "") 23 | set(gh_tag_prefix "") 24 | # Default to specifying `GIT_REPOSITORY` and `GIT_TAG` 25 | set(cpm_git_args GIT_REPOSITORY ${GIT_REPOSITORY}) 26 | 27 | if(GIT_BRANCH) 28 | set(gh_tag_prefix "heads") 29 | set(repo_tag "${GIT_BRANCH}") 30 | list(APPEND cpm_git_args GIT_TAG ${GIT_BRANCH}) 31 | elseif(GIT_TAG) 32 | set(gh_tag_prefix "tags") 33 | set(repo_tag "${GIT_TAG}") 34 | list(APPEND cpm_git_args GIT_TAG ${GIT_TAG}) 35 | endif() 36 | 37 | # Remove `.git` suffix from repo URL 38 | if(GIT_REPOSITORY MATCHES "^(.*)(\.git)$") 39 | set(GIT_REPOSITORY "${CMAKE_MATCH_1}") 40 | endif() 41 | if(GIT_REPOSITORY MATCHES "github\.com") 42 | # If retrieving from github use `.zip` URL to download faster 43 | set(cpm_git_args URL "${GIT_REPOSITORY}/archive/${repo_tag}.zip") 44 | elseif(GIT_REPOSITORY MATCHES "gitlab\.com") 45 | # GitLab archive URIs replace slashes with dashes 46 | string(REPLACE "/" "-" archive_tag "${repo_tag}") 47 | string(LENGTH "${GIT_REPOSITORY}" repo_name_len) 48 | string(FIND "${GIT_REPOSITORY}" "/" repo_name_idx REVERSE) 49 | math(EXPR repo_name_len "${repo_name_len} - ${repo_name_idx}") 50 | string(SUBSTRING "${GIT_REPOSITORY}" ${repo_name_idx} ${repo_name_len} repo_name) 51 | # If retrieving from gitlab use `.zip` URL to download faster 52 | set(cpm_git_args URL "${GIT_REPOSITORY}/-/archive/${repo_tag}/${repo_name}-${archive_tag}.zip") 53 | endif() 54 | 55 | set(${_out_var} ${cpm_git_args} PARENT_SCOPE) 56 | 57 | endfunction() 58 | -------------------------------------------------------------------------------- /cmake/Modules/cuda_arch_helpers.cmake: -------------------------------------------------------------------------------- 1 | #============================================================================= 2 | # Copyright 2024 NVIDIA Corporation 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | #============================================================================= 16 | 17 | function(set_cuda_arch_from_names) 18 | set(cuda_archs "") 19 | # translate legacy arch names into numbers 20 | if(CMAKE_CUDA_ARCHITECTURES MATCHES "fermi") 21 | list(APPEND cuda_archs 20) 22 | endif() 23 | if(CMAKE_CUDA_ARCHITECTURES MATCHES "kepler") 24 | list(APPEND cuda_archs 30) 25 | endif() 26 | if(CMAKE_CUDA_ARCHITECTURES MATCHES "k20") 27 | list(APPEND cuda_archs 35) 28 | endif() 29 | if(CMAKE_CUDA_ARCHITECTURES MATCHES "k80") 30 | list(APPEND cuda_archs 37) 31 | endif() 32 | if(CMAKE_CUDA_ARCHITECTURES MATCHES "maxwell") 33 | list(APPEND cuda_archs 52) 34 | endif() 35 | if(CMAKE_CUDA_ARCHITECTURES MATCHES "pascal") 36 | list(APPEND cuda_archs 60) 37 | endif() 38 | if(CMAKE_CUDA_ARCHITECTURES MATCHES "volta") 39 | list(APPEND cuda_archs 70) 40 | endif() 41 | if(CMAKE_CUDA_ARCHITECTURES MATCHES "turing") 42 | list(APPEND cuda_archs 75) 43 | endif() 44 | if(CMAKE_CUDA_ARCHITECTURES MATCHES "ampere") 45 | list(APPEND cuda_archs 80) 46 | endif() 47 | if(CMAKE_CUDA_ARCHITECTURES MATCHES "ada") 48 | list(APPEND cuda_archs 89) 49 | endif() 50 | if(CMAKE_CUDA_ARCHITECTURES MATCHES "hopper") 51 | list(APPEND cuda_archs 90) 52 | endif() 53 | 54 | if(cuda_archs) 55 | list(LENGTH cuda_archs num_archs) 56 | if(num_archs GREATER 1) 57 | # A CMake architecture list entry of "80" means to build both compute and sm. 58 | # What we want is for the newest arch only to build that way, while the rest 59 | # build only for sm. 60 | list(POP_BACK cuda_archs latest_arch) 61 | list(TRANSFORM cuda_archs APPEND "-real") 62 | list(APPEND cuda_archs ${latest_arch}) 63 | else() 64 | list(TRANSFORM cuda_archs APPEND "-real") 65 | endif() 66 | set(CMAKE_CUDA_ARCHITECTURES ${cuda_archs} PARENT_SCOPE) 67 | endif() 68 | endfunction() 69 | 70 | function(add_cuda_architecture_defines defs) 71 | message(VERBOSE "legate: CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}") 72 | 73 | set(_defs ${${defs}}) 74 | 75 | macro(add_def_if_arch_enabled arch def) 76 | if("${arch}" IN_LIST CMAKE_CUDA_ARCHITECTURES OR 77 | ("${arch}-real" IN_LIST CMAKE_CUDA_ARCHITECTURES) OR 78 | ("${arch}-virtual" IN_LIST CMAKE_CUDA_ARCHITECTURES)) 79 | list(APPEND _defs ${def}) 80 | endif() 81 | endmacro() 82 | 83 | add_def_if_arch_enabled("20" "FERMI_ARCH") 84 | add_def_if_arch_enabled("30" "KEPLER_ARCH") 85 | add_def_if_arch_enabled("35" "K20_ARCH") 86 | add_def_if_arch_enabled("37" "K80_ARCH") 87 | add_def_if_arch_enabled("52" "MAXWELL_ARCH") 88 | add_def_if_arch_enabled("60" "PASCAL_ARCH") 89 | add_def_if_arch_enabled("70" "VOLTA_ARCH") 90 | add_def_if_arch_enabled("75" "TURING_ARCH") 91 | add_def_if_arch_enabled("80" "AMPERE_ARCH") 92 | add_def_if_arch_enabled("89" "ADA_ARCH") 93 | add_def_if_arch_enabled("90" "HOPPER_ARCH") 94 | 95 | set(${defs} ${_defs} PARENT_SCOPE) 96 | endfunction() 97 | -------------------------------------------------------------------------------- /cmake/Modules/set_cpu_arch_flags.cmake: -------------------------------------------------------------------------------- 1 | #============================================================================= 2 | # Copyright 2022 NVIDIA Corporation 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | #============================================================================= 16 | 17 | #------------------------------------------------------------------------------# 18 | # Architecture 19 | #------------------------------------------------------------------------------# 20 | if(BUILD_MARCH AND BUILD_MCPU) 21 | message(FATAL_ERROR "BUILD_MARCH and BUILD_MCPU are incompatible") 22 | endif() 23 | 24 | function(set_cpu_arch_flags out_var) 25 | # Try -march first. On platforms that don't support it, GCC will issue a hard 26 | # error, so we'll know not to use it. Default is "native", but explicitly 27 | # setting BUILD_MARCH="" disables use of the flag 28 | if(BUILD_MARCH) 29 | set(INTERNAL_BUILD_MARCH ${BUILD_MARCH}) 30 | elseif(NOT DEFINED BUILD_MARCH) 31 | set(INTERNAL_BUILD_MARCH "native") 32 | endif() 33 | 34 | set(flags "") 35 | 36 | include(CheckCXXCompilerFlag) 37 | if(INTERNAL_BUILD_MARCH) 38 | check_cxx_compiler_flag("-march=${INTERNAL_BUILD_MARCH}" COMPILER_SUPPORTS_MARCH) 39 | if(COMPILER_SUPPORTS_MARCH) 40 | list(APPEND flags "-march=${INTERNAL_BUILD_MARCH}") 41 | elseif(BUILD_MARCH) 42 | message(FATAL_ERROR "The flag -march=${INTERNAL_BUILD_MARCH} is not supported by the compiler") 43 | else() 44 | unset(INTERNAL_BUILD_MARCH) 45 | endif() 46 | endif() 47 | 48 | # Try -mcpu. We do this second because it is deprecated on x86, but 49 | # GCC won't issue a hard error, so we can't tell if it worked or not. 50 | if (NOT INTERNAL_BUILD_MARCH AND NOT DEFINED BUILD_MARCH) 51 | if(BUILD_MCPU) 52 | set(INTERNAL_BUILD_MCPU ${BUILD_MCPU}) 53 | else() 54 | set(INTERNAL_BUILD_MCPU "native") 55 | endif() 56 | 57 | check_cxx_compiler_flag("-mcpu=${INTERNAL_BUILD_MCPU}" COMPILER_SUPPORTS_MCPU) 58 | if(COMPILER_SUPPORTS_MCPU) 59 | list(APPEND flags "-mcpu=${INTERNAL_BUILD_MCPU}") 60 | elseif(BUILD_MCPU) 61 | message(FATAL_ERROR "The flag -mcpu=${INTERNAL_BUILD_MCPU} is not supported by the compiler") 62 | else() 63 | unset(INTERNAL_BUILD_MCPU) 64 | endif() 65 | endif() 66 | 67 | # Add flags for Power architectures 68 | check_cxx_compiler_flag("-maltivec -Werror" COMPILER_SUPPORTS_MALTIVEC) 69 | if(COMPILER_SUPPORTS_MALTIVEC) 70 | list(APPEND flags "-maltivec") 71 | endif() 72 | check_cxx_compiler_flag("-mabi=altivec -Werror" COMPILER_SUPPORTS_MABI_ALTIVEC) 73 | if(COMPILER_SUPPORTS_MABI_ALTIVEC) 74 | list(APPEND flags "-mabi=altivec") 75 | endif() 76 | check_cxx_compiler_flag("-mvsx -Werror" COMPILER_SUPPORTS_MVSX) 77 | if(COMPILER_SUPPORTS_MVSX) 78 | list(APPEND flags "-mvsx") 79 | endif() 80 | 81 | set(${out_var} "${flags}" PARENT_SCOPE) 82 | endfunction() 83 | 84 | set_cpu_arch_flags(arch_flags) 85 | -------------------------------------------------------------------------------- /cmake/generate_install_info_py.cmake: -------------------------------------------------------------------------------- 1 | #============================================================================= 2 | # Copyright 2022-2024 NVIDIA Corporation 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | #============================================================================= 16 | 17 | execute_process( 18 | COMMAND ${CMAKE_C_COMPILER} 19 | -E -DLEGATE_USE_PYTHON_CFFI 20 | -I "${CMAKE_CURRENT_LIST_DIR}/../src/legate_sparse" 21 | -P "${CMAKE_CURRENT_LIST_DIR}/../src/legate_sparse/sparse_c.h" 22 | ECHO_ERROR_VARIABLE 23 | OUTPUT_VARIABLE header 24 | COMMAND_ERROR_IS_FATAL ANY 25 | ) 26 | 27 | set(libpath "") 28 | configure_file( 29 | "${CMAKE_CURRENT_LIST_DIR}/../legate_sparse/install_info.py.in" 30 | "${CMAKE_CURRENT_LIST_DIR}/../legate_sparse/install_info.py" 31 | @ONLY) 32 | -------------------------------------------------------------------------------- /cmake/thirdparty/get_nccl.cmake: -------------------------------------------------------------------------------- 1 | #============================================================================= 2 | # Copyright 2022 NVIDIA Corporation 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | #============================================================================= 16 | 17 | function(find_or_configure_nccl) 18 | 19 | if(TARGET NCCL::NCCL) 20 | return() 21 | endif() 22 | 23 | rapids_find_generate_module(NCCL 24 | HEADER_NAMES nccl.h 25 | LIBRARY_NAMES nccl 26 | ) 27 | 28 | # Currently NCCL has no CMake build-system so we require 29 | # it built and installed on the machine already 30 | rapids_find_package(NCCL REQUIRED) 31 | 32 | endfunction() 33 | 34 | find_or_configure_nccl() 35 | -------------------------------------------------------------------------------- /cmake/versions.json: -------------------------------------------------------------------------------- 1 | { 2 | "packages" : { 3 | "legate" : { 4 | "repo": "legate.internal", 5 | "org": "nv-legate", 6 | "version": "25.03.02", 7 | "git_url" : "git@github.com:nv-legate/legate.internal.git", 8 | "git_shallow": false, 9 | "always_download": false, 10 | "git_tag" : "75dc0a92bbd2dfb79b6b680a0f37cbd0370d0181", 11 | "anaconda_label": "main" 12 | }, 13 | "cupynumeric" : { 14 | "repo": "cupynumeric.internal", 15 | "org": "nv-legate", 16 | "version": "25.03.02", 17 | "git_url" : "git@github.com:nv-legate/cupynumeric.internal", 18 | "git_shallow": false, 19 | "always_download": false, 20 | "git_tag" : "1fa45603c560068508c3be2e0df45aec62359019", 21 | "anaconda_label": "experimental" 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /conda/conda-build/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo -e "\n\n--------------------- CONDA/CONDA-BUILD/BUILD.SH -----------------------\n" 4 | 5 | set -xeo pipefail; 6 | 7 | # If run through CI, BUILD_MARCH is set externally. If it is not set, try to set it. 8 | ARCH=$(uname -m) 9 | if [[ -z "${BUILD_MARCH}" ]]; then 10 | if [[ "${ARCH}" = "aarch64" ]]; then 11 | # Use the gcc march value used by aarch64 Ubuntu. 12 | BUILD_MARCH=armv8-a 13 | else 14 | # Use uname -m otherwise 15 | BUILD_MARCH=$(uname -m | tr '_' '-') 16 | fi 17 | fi 18 | 19 | # Rewrite conda's -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=ONLY to 20 | # -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH 21 | CMAKE_ARGS="$(echo "$CMAKE_ARGS" | sed -r "s@_INCLUDE=ONLY@_INCLUDE=BOTH@g")" 22 | 23 | # Add our options to conda's CMAKE_ARGS 24 | CMAKE_ARGS+=" 25 | --log-level=VERBOSE 26 | -DBUILD_SHARED_LIBS=ON 27 | -DBUILD_MARCH=${BUILD_MARCH} 28 | -DCMAKE_BUILD_TYPE=Release 29 | -DCMAKE_VERBOSE_MAKEFILE=ON 30 | -DCMAKE_BUILD_PARALLEL_LEVEL=${JOBS:-$(nproc --ignore=1)}" 31 | if [ -z "$CPU_ONLY" ]; then 32 | CMAKE_ARGS+="-DCMAKE_CUDA_ARCHITECTURES=all-major" 33 | fi 34 | 35 | export CMAKE_GENERATOR=Ninja 36 | export CUDAHOSTCXX=${CXX} 37 | export OPENSSL_DIR="$PREFIX" 38 | 39 | echo "Environment" 40 | env 41 | 42 | echo "Build starting on $(date)" 43 | CUDAFLAGS="-isystem ${PREFIX}/include -L${PREFIX}/lib" 44 | export CUDAFLAGS 45 | 46 | SKBUILD_BUILD_OPTIONS=-j$CPU_COUNT \ 47 | $PYTHON -m pip install \ 48 | --root / \ 49 | --no-deps \ 50 | --prefix "$PREFIX" \ 51 | --no-build-isolation \ 52 | --upgrade \ 53 | --cache-dir "$PIP_CACHE_DIR" \ 54 | --disable-pip-version-check \ 55 | . -vv 56 | 57 | echo "Build ending on $(date)" 58 | -------------------------------------------------------------------------------- /conda/conda-build/conda_build_config.yaml: -------------------------------------------------------------------------------- 1 | gpu_enabled: 2 | - true 3 | - false 4 | 5 | upload_build: 6 | - false 7 | 8 | python: 9 | - 3.10 10 | - 3.11 11 | - 3.12 12 | 13 | numpy_version: 14 | - ">=1.22,<2" 15 | 16 | cmake_version: 17 | - ">=3.20.1,!=3.23.0" 18 | -------------------------------------------------------------------------------- /legate_sparse/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2024 NVIDIA Corporation 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | 16 | Not sure what is supposed to go in here... 17 | 18 | """ 19 | 20 | import scipy.sparse as _sp # type: ignore 21 | 22 | from .coverage import clone_module # noqa: F401 23 | from .csr import csr_array, csr_matrix # noqa: F401 24 | from .module import * # noqa: F401 25 | 26 | clone_module(_sp, globals()) 27 | 28 | del clone_module 29 | del _sp 30 | -------------------------------------------------------------------------------- /legate_sparse/coverage.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2024 NVIDIA Corporation 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | from __future__ import annotations 16 | 17 | from functools import wraps 18 | from types import FunctionType, MethodDescriptorType, MethodType, ModuleType 19 | from typing import Any, Container, Mapping, Optional, cast 20 | 21 | from legate.core import track_provenance 22 | from typing_extensions import Protocol 23 | 24 | MOD_INTERNAL = {"__dir__", "__getattr__"} 25 | 26 | 27 | def filter_namespace( 28 | ns: Mapping[str, Any], 29 | *, 30 | omit_names: Optional[Container[str]] = None, 31 | omit_types: tuple[type, ...] = (), 32 | ) -> dict[str, Any]: 33 | omit_names = omit_names or set() 34 | return { 35 | attr: value 36 | for attr, value in ns.items() 37 | if attr not in omit_names and not isinstance(value, omit_types) 38 | } 39 | 40 | 41 | def should_wrap(obj: object) -> bool: 42 | return isinstance(obj, (FunctionType, MethodType, MethodDescriptorType)) 43 | 44 | 45 | class AnyCallable(Protocol): 46 | def __call__(self, *args: Any, **kwargs: Any) -> Any: 47 | ... 48 | 49 | 50 | def wrap(func: AnyCallable) -> Any: 51 | @wraps(func) 52 | @track_provenance(nested=True) 53 | def wrapper(*args: Any, **kwargs: Any) -> Any: 54 | return func(*args, **kwargs) 55 | 56 | return wrapper 57 | 58 | 59 | def clone_module(origin_module: ModuleType, new_globals: dict[str, Any]) -> None: 60 | """Copy attributes from one module to another, excluding submodules 61 | 62 | Function types are wrapped with a decorator to report API calls. All 63 | other values are copied as-is. 64 | 65 | Parameters 66 | ---------- 67 | origin_module : ModuleTpe 68 | Existing module to clone attributes from 69 | 70 | new_globals : dict 71 | a globals() dict for the new module to clone into 72 | 73 | Returns 74 | ------- 75 | None 76 | 77 | """ 78 | for attr, value in new_globals.items(): 79 | # Only need to wrap things that are in the origin module to begin with 80 | if attr not in origin_module.__dict__: 81 | continue 82 | if isinstance(value, FunctionType): 83 | wrapped = wrap(cast(AnyCallable, value)) 84 | new_globals[attr] = wrapped 85 | 86 | 87 | def clone_scipy_arr_kind(origin_class: type) -> Any: 88 | """Copy attributes from an origin class to the input class. 89 | 90 | Method types are wrapped with a decorator to report API calls. All 91 | other values are copied as-is. 92 | 93 | """ 94 | 95 | def body(cls: type): 96 | for attr, value in cls.__dict__.items(): 97 | # Only need to wrap things that are in the origin class to begin 98 | # with 99 | if not hasattr(origin_class, attr): 100 | continue 101 | if should_wrap(value): 102 | wrapped = wrap(value) 103 | setattr(cls, attr, wrapped) 104 | 105 | return cls 106 | 107 | return body 108 | -------------------------------------------------------------------------------- /legate_sparse/install_info.py.in: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2024, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | # 9 | # See the LICENSE file for details. 10 | # 11 | 12 | # IMPORTANT: 13 | # * install_info.py is a generated file and should not be modified by hand 14 | 15 | def get_libpath(): 16 | import os, sys, platform 17 | join = os.path.join 18 | exists = os.path.exists 19 | dirname = os.path.dirname 20 | cn_path = dirname(dirname(__file__)) 21 | so_ext = { 22 | "": "", 23 | "Java": ".jar", 24 | "Linux": ".so", 25 | "Darwin": ".dylib", 26 | "Windows": ".dll" 27 | }[platform.system()] 28 | 29 | def find_liblegate_sparse(libdir): 30 | if exists(join(libdir, f"liblegate_sparse{so_ext}")): 31 | return libdir 32 | return None 33 | 34 | return ( 35 | find_liblegate_sparse(join(cn_path, "build", "lib")) or 36 | find_liblegate_sparse(join(dirname(dirname(dirname(cn_path))), "lib")) or 37 | find_liblegate_sparse(join(dirname(dirname(sys.executable)), "lib")) or 38 | "" 39 | ) 40 | 41 | 42 | libpath: str = get_libpath() 43 | header: str = """@header@""" 44 | -------------------------------------------------------------------------------- /legate_sparse/io.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2024 NVIDIA Corporation 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import numpy as np 16 | from legate.core import track_provenance, types 17 | 18 | from .config import SparseOpCode 19 | from .csr import csr_array 20 | from .runtime import runtime 21 | from .types import coord_ty, float64, nnz_ty 22 | from .utils import store_to_cupynumeric_array 23 | 24 | 25 | @track_provenance(runtime.sparse_library) 26 | def mmread(source): 27 | # TODO (rohany): We'll assume for now that all of the nodes in the system 28 | # can access the file passed in, so we don't need to worry about where this 29 | # task gets mapped to. 30 | rows = runtime.create_store(coord_ty, ndim=1) 31 | cols = runtime.create_store(coord_ty, ndim=1) 32 | vals = runtime.create_store(float64, ndim=1) 33 | m = runtime.create_store(coord_ty, optimize_scalar=True, shape=(1,)) 34 | n = runtime.create_store(coord_ty, optimize_scalar=True, shape=(1,)) 35 | nnz = runtime.create_store(nnz_ty, optimize_scalar=True, shape=(1,)) 36 | task = runtime.create_auto_task(SparseOpCode.READ_MTX_TO_COO) 37 | task.add_output(m) 38 | task.add_output(n) 39 | task.add_output(nnz) 40 | task.add_output(rows) 41 | task.add_output(cols) 42 | task.add_output(vals) 43 | task.add_scalar_arg(source, types.string_type) 44 | task.execute() 45 | 46 | m = int(np.asarray(m.get_physical_store().get_inline_allocation())[0]) 47 | n = int(np.asarray(n.get_physical_store().get_inline_allocation())[0]) 48 | nnz = int(np.asarray(nnz.get_physical_store().get_inline_allocation())[0]) 49 | # Slice down each store from the resulting size into the actual size. 50 | sl = slice(0, nnz) 51 | rows = store_to_cupynumeric_array(rows.slice(0, sl)) 52 | cols = store_to_cupynumeric_array(cols.slice(0, sl)) 53 | vals = store_to_cupynumeric_array(vals.slice(0, sl)) 54 | return csr_array((vals, (rows, cols)), shape=(m, n)) 55 | -------------------------------------------------------------------------------- /legate_sparse/module.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2024 NVIDIA Corporation 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Portions of this file are also subject to the following license: 16 | # 17 | # Copyright (c) 2001-2002 Enthought, Inc. 2003-2022, SciPy Developers. 18 | # All rights reserved. 19 | # 20 | # Redistribution and use in source and binary forms, with or without 21 | # modification, are permitted provided that the following conditions 22 | # are met: 23 | # 24 | # 1. Redistributions of source code must retain the above copyright 25 | # notice, this list of conditions and the following disclaimer. 26 | # 27 | # 2. Redistributions in binary form must reproduce the above 28 | # copyright notice, this list of conditions and the following 29 | # disclaimer in the documentation and/or other materials provided 30 | # with the distribution. 31 | # 32 | # 3. Neither the name of the copyright holder nor the names of its 33 | # contributors may be used to endorse or promote products derived 34 | # from this software without specific prior written permission. 35 | # 36 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 37 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 38 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 39 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 40 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 41 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 42 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 43 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 44 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 45 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 46 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 47 | 48 | 49 | from .csr import csr_array # noqa: F401 50 | from .dia import dia_array # noqa: F401 51 | from .gallery import diags # noqa: F401 52 | from .io import mmread # noqa: F401 53 | 54 | # expose default types 55 | from .types import coord_ty, nnz_ty # noqa: F401 56 | 57 | 58 | # is_sparse_matrix returns whether or not an object is a legate 59 | # sparse created sparse matrix. 60 | def is_sparse_matrix(o): 61 | return any((isinstance(o, csr_array),)) 62 | 63 | 64 | issparse = is_sparse_matrix 65 | isspmatrix = is_sparse_matrix 66 | 67 | 68 | # Variants for each particular format type. 69 | def isspmatrix_csr(o): 70 | return isinstance(o, csr_array) 71 | -------------------------------------------------------------------------------- /legate_sparse/runtime.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2024 NVIDIA Corporation 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from __future__ import annotations 15 | 16 | from typing import TYPE_CHECKING 17 | 18 | import numpy as np 19 | from legate.core import ( 20 | AutoTask, 21 | LogicalStore, 22 | ManualTask, 23 | Shape, 24 | TaskTarget, 25 | get_legate_runtime, 26 | get_machine, 27 | types, 28 | ) 29 | 30 | from .config import SparseOpCode, _library 31 | 32 | if TYPE_CHECKING: 33 | from typing import Optional, Union 34 | 35 | import numpy.typing as npt 36 | 37 | TO_CORE_DTYPES = { 38 | np.dtype(np.bool_): types.bool_, 39 | np.dtype(np.int8): types.int8, 40 | np.dtype(np.int16): types.int16, 41 | np.dtype(np.int32): types.int32, 42 | np.dtype(np.int64): types.int64, 43 | np.dtype(np.uint8): types.uint8, 44 | np.dtype(np.uint16): types.uint16, 45 | np.dtype(np.uint32): types.uint32, 46 | np.dtype(np.uint64): types.uint64, 47 | np.dtype(np.float16): types.float16, 48 | np.dtype(np.float32): types.float32, 49 | np.dtype(np.float64): types.float64, 50 | np.dtype(np.complex64): types.complex64, 51 | np.dtype(np.complex128): types.complex128, 52 | } 53 | 54 | 55 | # TODO (marsaev): rename to SparseRuntime to avoid confusion? 56 | class Runtime: 57 | def __init__(self, sparse_library): 58 | self.sparse_library = sparse_library 59 | self.legate_runtime = get_legate_runtime() 60 | self.legate_machine = get_machine() 61 | 62 | # Load all the necessary CUDA libraries if we have GPUs. 63 | if self.num_gpus > 0: 64 | # TODO (rohany): Also handle destroying the cuda libraries when the 65 | # runtime is torn down. 66 | task = self.legate_runtime.create_manual_task( 67 | self.sparse_library, 68 | SparseOpCode.LOAD_CUDALIBS, 69 | launch_shape=Shape((self.num_gpus,)), 70 | ) 71 | task.execute() 72 | self.legate_runtime.issue_execution_fence(block=True) 73 | 74 | @property 75 | def num_procs(self): 76 | return self.legate_machine.count(self.legate_machine.preferred_target) 77 | 78 | @property 79 | def num_gpus(self): 80 | return self.legate_machine.count(TaskTarget.GPU) 81 | 82 | def create_store( 83 | self, 84 | ty: Union[npt.DTypeLike], 85 | shape: Optional[Union[tuple[int, ...], Shape]] = None, 86 | optimize_scalar: bool = False, 87 | ndim: Optional[int] = None, 88 | ) -> LogicalStore: 89 | core_ty = TO_CORE_DTYPES[ty] if isinstance(ty, np.dtype) else ty 90 | return self.legate_runtime.create_store( 91 | core_ty, shape=shape, optimize_scalar=optimize_scalar, ndim=ndim 92 | ) 93 | 94 | # only OpCode 95 | def create_auto_task(self, OpCode) -> AutoTask: 96 | return self.legate_runtime.create_auto_task(self.sparse_library, OpCode) 97 | 98 | # OpCode and launch domains 99 | def create_manual_task(self, OpCode, *args) -> ManualTask: 100 | return self.legate_runtime.create_manual_task( 101 | self.sparse_library, OpCode, *args 102 | ) 103 | 104 | 105 | # TODO (marsaev): rename to sparse_runtime to avoid confusion? 106 | runtime = Runtime(_library) 107 | -------------------------------------------------------------------------------- /legate_sparse/settings.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023-2024 NVIDIA Corporation 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | from __future__ import annotations 16 | 17 | from legate.util.settings import PrioritizedSetting, Settings, convert_bool 18 | 19 | __all__ = ("settings",) 20 | 21 | 22 | class SparseRuntimeSettings(Settings): 23 | fast_spgemm: PrioritizedSetting[bool] = PrioritizedSetting( 24 | "fast-spgemm", 25 | "LEGATE_SPARSE_FAST_SPGEMM", 26 | default=False, 27 | convert=convert_bool, 28 | help=""" 29 | Switch to faster CUSPARSE_SPGEMM_ALG1, which, however, use 30 | significantly more FB memory. It will be used by default when cusparse<12.1, 31 | where memory-restricted SpGEMM was introduced. 32 | """, 33 | ) 34 | 35 | 36 | settings = SparseRuntimeSettings() 37 | -------------------------------------------------------------------------------- /legate_sparse/types.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2024 NVIDIA Corporation 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import numpy 16 | 17 | # Define some common types. Hopefully as we make more 18 | # progress in generalizing the compute kernels, we can 19 | # remove this code. 20 | coord_ty = numpy.dtype(numpy.int64) 21 | nnz_ty = numpy.dtype(numpy.uint64) 22 | float64 = numpy.dtype(numpy.float64) 23 | int32 = numpy.dtype(numpy.int32) 24 | int64 = numpy.dtype(numpy.int64) 25 | uint64 = numpy.dtype(numpy.uint64) 26 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Copyright 2024 NVIDIA Corporation 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from setuptools import find_packages 19 | from skbuild import setup 20 | 21 | # TODO: build yields "cant find legate module".... 22 | """ 23 | import legate.install_info as lg_install_info 24 | import os 25 | from pathlib import Path 26 | 27 | legate_dir = Path(lg_install_info.libpath).parent.as_posix() 28 | 29 | cmake_flags = [ 30 | f"-Dlegate_ROOT:STRING={legate_dir}", 31 | ] 32 | 33 | env_cmake_args = os.environ.get("CMAKE_ARGS") 34 | if env_cmake_args is not None: 35 | cmake_flags.append(env_cmake_args) 36 | os.environ["CMAKE_ARGS"] = " ".join(cmake_flags) 37 | """ 38 | 39 | setup( 40 | name="legate-sparse", 41 | version="25.03.00", 42 | description="An Aspiring Drop-In Replacement for SciPy Sparse module at Scale", 43 | author="NVIDIA Corporation", 44 | license="Apache 2.0", 45 | classifiers=[ 46 | "Intended Audience :: Developers", 47 | "Topic :: Database", 48 | "Topic :: Scientific/Engineering", 49 | "License :: OSI Approved :: Apache Software License", 50 | "Programming Language :: Python", 51 | "Programming Language :: Python :: 3.10", 52 | "Programming Language :: Python :: 3.11", 53 | ], 54 | packages=find_packages( 55 | where=".", 56 | include=["legate_sparse*"], 57 | ), 58 | include_package_data=True, 59 | zip_safe=False, 60 | ) 61 | -------------------------------------------------------------------------------- /src/legate_sparse/array/conv/csr_to_dense.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/array/conv/csr_to_dense.h" 18 | #include "legate_sparse/array/conv/csr_to_dense_template.inl" 19 | 20 | namespace sparse { 21 | 22 | using namespace legate; 23 | 24 | template 25 | struct CSRToDenseImplBody { 26 | using INDEX_TY = type_of; 27 | using VAL_TY = type_of; 28 | 29 | void operator()(const AccessorWO& A_vals, 30 | const AccessorRO, 1>& B_pos, 31 | const AccessorRO& B_crd, 32 | const AccessorRO& B_vals, 33 | const Rect<2>& rect) 34 | { 35 | // Initialize the output array. 36 | for (INDEX_TY i = rect.lo[0]; i < rect.hi[0] + 1; i++) { 37 | for (INDEX_TY j = rect.lo[1]; j < rect.hi[1] + 1; j++) { 38 | A_vals[{i, j}] = 0.0; 39 | } 40 | } 41 | // Do the conversion. 42 | for (INDEX_TY i = rect.lo[0]; i < rect.hi[0] + 1; i++) { 43 | for (size_t jB = B_pos[i].lo; jB < B_pos[i].hi + 1; jB++) { 44 | INDEX_TY j = B_crd[jB]; 45 | A_vals[{i, j}] = B_vals[jB]; 46 | } 47 | } 48 | } 49 | }; 50 | 51 | /*static*/ void CSRToDense::cpu_variant(TaskContext context) 52 | { 53 | csr_to_dense_template(context); 54 | } 55 | 56 | namespace // unnamed 57 | { 58 | static void __attribute__((constructor)) register_tasks(void) { CSRToDense::register_variants(); } 59 | 60 | } // namespace 61 | 62 | } // namespace sparse 63 | -------------------------------------------------------------------------------- /src/legate_sparse/array/conv/csr_to_dense.cu: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/array/conv/csr_to_dense.h" 18 | #include "legate_sparse/array/conv/csr_to_dense_template.inl" 19 | #include "legate_sparse/util/cusparse_utils.h" 20 | 21 | namespace sparse { 22 | 23 | using namespace legate; 24 | 25 | template 26 | __global__ void CSRtoDenseKernel(size_t rows, 27 | Rect<2> bounds, 28 | AccessorWO A_vals, 29 | AccessorRO, 1> B_pos, 30 | AccessorRO B_crd, 31 | AccessorRO B_vals) 32 | { 33 | const auto idx = global_tid_1d(); 34 | if (idx >= rows) { 35 | return; 36 | } 37 | INDEX_TY i = idx + bounds.lo[0]; 38 | // Initialize the row with all zeros. 39 | for (INDEX_TY j = bounds.lo[1]; j < bounds.hi[1] + 1; j++) { 40 | A_vals[{i, j}] = 0.0; 41 | } 42 | // Copy the non-zero values into place. 43 | for (INDEX_TY j_pos = B_pos[i].lo; j_pos < B_pos[i].hi + 1; j_pos++) { 44 | INDEX_TY j = B_crd[j_pos]; 45 | A_vals[{i, j}] = B_vals[j_pos]; 46 | } 47 | } 48 | 49 | template <> 50 | struct CSRToDenseImpl { 51 | template 52 | void operator()(CSRToDenseArgs& args) const 53 | { 54 | using INDEX_TY = type_of; 55 | using VAL_TY = type_of; 56 | 57 | auto& A_vals = args.A_vals; 58 | auto& B_pos = args.B_pos; 59 | auto& B_crd = args.B_crd; 60 | auto& B_vals = args.B_vals; 61 | 62 | // Break out early if the iteration space partition is empty. 63 | if (B_pos.domain().empty()) { 64 | return; 65 | } 66 | 67 | auto stream = get_cached_stream(); 68 | 69 | auto B_domain = B_pos.domain(); 70 | auto rows = B_domain.hi()[0] - B_domain.lo()[0] + 1; 71 | auto blocks = get_num_blocks_1d(rows); 72 | CSRtoDenseKernel<<>>(rows, 73 | A_vals.shape<2>(), 74 | A_vals.write_accessor(), 75 | B_pos.read_accessor, 1>(), 76 | B_crd.read_accessor(), 77 | B_vals.read_accessor()); 78 | LEGATE_SPARSE_CHECK_CUDA_STREAM(stream); 79 | } 80 | }; 81 | 82 | /*static*/ void CSRToDense::gpu_variant(TaskContext context) 83 | { 84 | csr_to_dense_template(context); 85 | } 86 | 87 | } // namespace sparse 88 | -------------------------------------------------------------------------------- /src/legate_sparse/array/conv/csr_to_dense.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | #include "legate_sparse/sparse.h" 20 | #include "legate_sparse/sparse_c.h" 21 | #include "legate.h" 22 | 23 | namespace sparse { 24 | 25 | struct CSRToDenseArgs { 26 | const legate::PhysicalStore& A_vals; 27 | const legate::PhysicalStore& B_pos; 28 | const legate::PhysicalStore& B_crd; 29 | const legate::PhysicalStore& B_vals; 30 | }; 31 | 32 | class CSRToDense : public SparseTask { 33 | public: 34 | static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_CSR_TO_DENSE}; 35 | 36 | public: 37 | static void cpu_variant(legate::TaskContext ctx); 38 | #ifdef LEGATE_USE_OPENMP 39 | static void omp_variant(legate::TaskContext ctx); 40 | #endif 41 | #ifdef LEGATE_USE_CUDA 42 | static void gpu_variant(legate::TaskContext context); 43 | #endif 44 | }; 45 | 46 | } // namespace sparse 47 | -------------------------------------------------------------------------------- /src/legate_sparse/array/conv/csr_to_dense_omp.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/array/conv/csr_to_dense.h" 18 | #include "legate_sparse/array/conv/csr_to_dense_template.inl" 19 | 20 | namespace sparse { 21 | 22 | using namespace legate; 23 | 24 | template 25 | struct CSRToDenseImplBody { 26 | using INDEX_TY = type_of; 27 | using VAL_TY = type_of; 28 | 29 | void operator()(const AccessorWO& A_vals, 30 | const AccessorRO, 1>& B_pos, 31 | const AccessorRO& B_crd, 32 | const AccessorRO& B_vals, 33 | const Rect<2>& rect) 34 | { 35 | // Initialize the output array. 36 | #pragma omp parallel for schedule(static) collapse(2) 37 | for (INDEX_TY i = rect.lo[0]; i < rect.hi[0] + 1; i++) { 38 | for (INDEX_TY j = rect.lo[1]; j < rect.hi[1] + 1; j++) { 39 | A_vals[{i, j}] = 0.0; 40 | } 41 | } 42 | // Do the conversion. 43 | #pragma omp parallel for schedule(monotonic : dynamic, 128) 44 | for (INDEX_TY i = rect.lo[0]; i < rect.hi[0] + 1; i++) { 45 | for (size_t jB = B_pos[i].lo; jB < B_pos[i].hi + 1; jB++) { 46 | INDEX_TY j = B_crd[jB]; 47 | A_vals[{i, j}] = B_vals[jB]; 48 | } 49 | } 50 | } 51 | }; 52 | 53 | /*static*/ void CSRToDense::omp_variant(TaskContext context) 54 | { 55 | csr_to_dense_template(context); 56 | } 57 | 58 | } // namespace sparse 59 | -------------------------------------------------------------------------------- /src/legate_sparse/array/conv/csr_to_dense_template.inl: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | // Useful for IDEs. 20 | #include "legate_sparse/array/conv/csr_to_dense.h" 21 | #include "legate_sparse/util/dispatch.h" 22 | 23 | #include 24 | 25 | namespace sparse { 26 | 27 | using namespace legate; 28 | 29 | template 30 | struct CSRToDenseImplBody; 31 | 32 | template 33 | struct CSRToDenseImpl { 34 | template 35 | void operator()(CSRToDenseArgs& args) const 36 | { 37 | using INDEX_TY = type_of; 38 | using VAL_TY = type_of; 39 | 40 | auto A_vals = args.A_vals.write_accessor(); 41 | auto B_pos = args.B_pos.read_accessor, 1>(); 42 | auto B_crd = args.B_crd.read_accessor(); 43 | auto B_vals = args.B_vals.read_accessor(); 44 | 45 | if (args.A_vals.domain().empty()) { 46 | return; 47 | } 48 | CSRToDenseImplBody()( 49 | A_vals, B_pos, B_crd, B_vals, args.A_vals.shape<2>()); 50 | } 51 | }; 52 | 53 | template 54 | static void csr_to_dense_template(TaskContext context) 55 | { 56 | auto outputs = context.outputs(); 57 | // We have to promote the pos region for the auto-parallelizer to kick in, 58 | // so remove the transformation before proceeding. 59 | // if (inputs[0].transformed()) { inputs[0].remove_transform(); } 60 | 61 | CSRToDenseArgs args{outputs[0], context.inputs()[0], context.inputs()[1], context.inputs()[2]}; 62 | 63 | index_type_value_type_dispatch( 64 | args.B_crd.code(), args.A_vals.code(), CSRToDenseImpl{}, args); 65 | } 66 | 67 | } // namespace sparse 68 | -------------------------------------------------------------------------------- /src/legate_sparse/array/conv/dense_to_csr.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/array/conv/dense_to_csr.h" 18 | #include "legate_sparse/array/conv/dense_to_csr_template.inl" 19 | 20 | namespace sparse { 21 | 22 | using namespace legate; 23 | 24 | template 25 | struct DenseToCSRNNZImplBody { 26 | using VAL_TY = type_of; 27 | 28 | void operator()(const AccessorWO& nnz, 29 | const AccessorRO& B_vals, 30 | const Rect<2>& rect) 31 | { 32 | for (auto i = rect.lo[0]; i < rect.hi[0] + 1; i++) { 33 | size_t row_nnz = 0; 34 | for (auto j = rect.lo[1]; j < rect.hi[1] + 1; j++) { 35 | if (B_vals[{i, j}] != static_cast(0.0)) { 36 | row_nnz++; 37 | } 38 | } 39 | nnz[{i, 0}] = row_nnz; 40 | } 41 | } 42 | }; 43 | 44 | template 45 | struct DenseToCSRImplBody { 46 | using INDEX_TY = type_of; 47 | using VAL_TY = type_of; 48 | 49 | void operator()(const AccessorRO, 2>& A_pos, 50 | const AccessorWO& A_crd, 51 | const AccessorWO& A_vals, 52 | const AccessorRO& B_vals, 53 | const Rect<2>& rect) 54 | { 55 | for (auto i = rect.lo[0]; i < rect.hi[0] + 1; i++) { 56 | coord_t nnz_pos = A_pos[{i, 0}].lo; 57 | for (auto j = rect.lo[1]; j < rect.hi[1] + 1; j++) { 58 | if (B_vals[{i, j}] != static_cast(0.0)) { 59 | A_crd[nnz_pos] = static_cast(j); 60 | A_vals[nnz_pos] = B_vals[{i, j}]; 61 | nnz_pos++; 62 | } 63 | } 64 | } 65 | } 66 | }; 67 | 68 | /*static*/ void DenseToCSRNNZ::cpu_variant(TaskContext context) 69 | { 70 | dense_to_csr_nnz_template(context); 71 | } 72 | 73 | /*static*/ void DenseToCSR::cpu_variant(TaskContext context) 74 | { 75 | dense_to_csr_template(context); 76 | } 77 | 78 | namespace // unnamed 79 | { 80 | static void __attribute__((constructor)) register_tasks(void) 81 | { 82 | DenseToCSRNNZ::register_variants(); 83 | DenseToCSR::register_variants(); 84 | } 85 | 86 | } // namespace 87 | 88 | } // namespace sparse 89 | -------------------------------------------------------------------------------- /src/legate_sparse/array/conv/dense_to_csr.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | #include "legate_sparse/sparse.h" 20 | #include "legate_sparse/sparse_c.h" 21 | #include "legate.h" 22 | 23 | namespace sparse { 24 | 25 | struct DenseToCSRNNZArgs { 26 | const legate::PhysicalStore& nnz; 27 | const legate::PhysicalStore& B_vals; 28 | }; 29 | 30 | class DenseToCSRNNZ : public SparseTask { 31 | public: 32 | static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_DENSE_TO_CSR_NNZ}; 33 | static void cpu_variant(legate::TaskContext ctx); 34 | #ifdef LEGATE_USE_OPENMP 35 | static void omp_variant(legate::TaskContext ctx); 36 | #endif 37 | #ifdef LEGATE_USE_CUDA 38 | static void gpu_variant(legate::TaskContext context); 39 | #endif 40 | }; 41 | 42 | struct DenseToCSRArgs { 43 | const legate::PhysicalStore& A_pos; 44 | const legate::PhysicalStore& A_crd; 45 | const legate::PhysicalStore& A_vals; 46 | const legate::PhysicalStore& B_vals; 47 | }; 48 | 49 | class DenseToCSR : public SparseTask { 50 | public: 51 | static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_DENSE_TO_CSR}; 52 | static void cpu_variant(legate::TaskContext ctx); 53 | #ifdef LEGATE_USE_OPENMP 54 | static void omp_variant(legate::TaskContext ctx); 55 | #endif 56 | #ifdef LEGATE_USE_CUDA 57 | static void gpu_variant(legate::TaskContext context); 58 | #endif 59 | }; 60 | 61 | } // namespace sparse 62 | -------------------------------------------------------------------------------- /src/legate_sparse/array/conv/dense_to_csr_omp.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/array/conv/dense_to_csr.h" 18 | #include "legate_sparse/array/conv/dense_to_csr_template.inl" 19 | 20 | namespace sparse { 21 | 22 | using namespace legate; 23 | 24 | template 25 | struct DenseToCSRNNZImplBody { 26 | using VAL_TY = type_of; 27 | 28 | void operator()(const AccessorWO& nnz, 29 | const AccessorRO& B_vals, 30 | const Rect<2>& rect) 31 | { 32 | #pragma omp parallel for schedule(static) 33 | for (auto i = rect.lo[0]; i < rect.hi[0] + 1; i++) { 34 | size_t row_nnz = 0; 35 | for (auto j = rect.lo[1]; j < rect.hi[1] + 1; j++) { 36 | if (B_vals[{i, j}] != static_cast(0.0)) { 37 | row_nnz++; 38 | } 39 | } 40 | nnz[{i, 0}] = row_nnz; 41 | } 42 | } 43 | }; 44 | 45 | template 46 | struct DenseToCSRImplBody { 47 | using INDEX_TY = type_of; 48 | using VAL_TY = type_of; 49 | 50 | void operator()(const AccessorRO, 2>& A_pos, 51 | const AccessorWO& A_crd, 52 | const AccessorWO& A_vals, 53 | const AccessorRO& B_vals, 54 | const Rect<2>& rect) 55 | { 56 | #pragma omp parallel for schedule(static) 57 | for (auto i = rect.lo[0]; i < rect.hi[0] + 1; i++) { 58 | coord_t nnz_pos = A_pos[{i, 0}].lo; 59 | for (auto j = rect.lo[1]; j < rect.hi[1] + 1; j++) { 60 | if (B_vals[{i, j}] != static_cast(0.0)) { 61 | A_crd[nnz_pos] = static_cast(j); 62 | A_vals[nnz_pos] = B_vals[{i, j}]; 63 | nnz_pos++; 64 | } 65 | } 66 | } 67 | } 68 | }; 69 | 70 | /*static*/ void DenseToCSRNNZ::omp_variant(TaskContext context) 71 | { 72 | dense_to_csr_nnz_template(context); 73 | } 74 | 75 | /*static*/ void DenseToCSR::omp_variant(TaskContext context) 76 | { 77 | dense_to_csr_template(context); 78 | } 79 | 80 | } // namespace sparse 81 | -------------------------------------------------------------------------------- /src/legate_sparse/array/conv/dense_to_csr_template.inl: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | // Useful for IDEs. 20 | #include "legate_sparse/array/conv/dense_to_csr.h" 21 | #include "legate_sparse/util/dispatch.h" 22 | #include "legate_sparse/util/typedefs.h" 23 | 24 | #include 25 | 26 | namespace sparse { 27 | 28 | using namespace legate; 29 | 30 | template 31 | struct DenseToCSRNNZImplBody; 32 | 33 | template 34 | struct DenseToCSRNNZImpl { 35 | template 36 | void operator()(DenseToCSRNNZArgs& args) const 37 | { 38 | using VAL_TY = type_of; 39 | 40 | auto nnz = args.nnz.write_accessor(); 41 | auto B_vals = args.B_vals.read_accessor(); 42 | 43 | if (args.nnz.domain().empty()) { 44 | return; 45 | } 46 | DenseToCSRNNZImplBody()(nnz, B_vals, args.B_vals.shape<2>()); 47 | } 48 | }; 49 | 50 | template 51 | struct DenseToCSRImplBody; 52 | 53 | template 54 | struct DenseToCSRImpl { 55 | template 56 | void operator()(DenseToCSRArgs& args) const 57 | { 58 | using INDEX_TY = type_of; 59 | using VAL_TY = type_of; 60 | 61 | auto A_pos = args.A_pos.read_accessor, 2>(); 62 | auto A_crd = args.A_crd.write_accessor(); 63 | auto A_vals = args.A_vals.write_accessor(); 64 | auto B_vals = args.B_vals.read_accessor(); 65 | 66 | if (args.A_pos.domain().empty()) { 67 | return; 68 | } 69 | DenseToCSRImplBody()( 70 | A_pos, A_crd, A_vals, B_vals, args.B_vals.shape<2>()); 71 | } 72 | }; 73 | 74 | template 75 | static void dense_to_csr_nnz_template(TaskContext context) 76 | { 77 | DenseToCSRNNZArgs args{ 78 | context.output(0), // nnz_per_row 79 | context.input(0) // B_vals 80 | }; 81 | value_type_dispatch(args.B_vals.code(), DenseToCSRNNZImpl{}, args); 82 | } 83 | 84 | template 85 | static void dense_to_csr_template(TaskContext context) 86 | { 87 | DenseToCSRArgs args{ 88 | context.input(0), // A_pos (promoted) 89 | context.output(0), // A_crd 90 | context.output(1), // A_vals 91 | context.input(1) // B_vals 92 | }; 93 | 94 | index_type_value_type_dispatch( 95 | args.A_crd.code(), args.A_vals.code(), DenseToCSRImpl{}, args); 96 | } 97 | 98 | } // namespace sparse 99 | -------------------------------------------------------------------------------- /src/legate_sparse/array/conv/pos_to_coordinates.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/array/conv/pos_to_coordinates.h" 18 | #include "legate_sparse/array/conv/pos_to_coordinates_template.inl" 19 | 20 | namespace sparse { 21 | 22 | using namespace legate; 23 | 24 | template 25 | struct ExpandPosToCoordinatesImplBody { 26 | using INDEX_TY = type_of; 27 | 28 | void operator()(const AccessorRO, 1>& pos, 29 | const AccessorWO& row_indices, 30 | const Rect<1>& rect) 31 | { 32 | for (size_t row = rect.lo[0]; row < rect.hi[0] + 1; row++) { 33 | for (size_t j_pos = pos[row].lo; j_pos < pos[row].hi + 1; j_pos++) { 34 | row_indices[j_pos] = row; 35 | } 36 | } 37 | } 38 | }; 39 | 40 | /*static*/ void ExpandPosToCoordinates::cpu_variant(TaskContext context) 41 | { 42 | pos_to_coordinates_template(context); 43 | } 44 | 45 | namespace // unnamed 46 | { 47 | static void __attribute__((constructor)) register_tasks(void) 48 | { 49 | ExpandPosToCoordinates::register_variants(); 50 | } 51 | } // namespace 52 | 53 | } // namespace sparse 54 | -------------------------------------------------------------------------------- /src/legate_sparse/array/conv/pos_to_coordinates.cu: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/array/conv/pos_to_coordinates.h" 18 | #include "legate_sparse/array/conv/pos_to_coordinates_template.inl" 19 | #include "legate_sparse/util/cuda_help.h" 20 | #include "legate_sparse/util/cusparse_utils.h" 21 | #include 22 | 23 | namespace sparse { 24 | 25 | using namespace legate; 26 | 27 | template 28 | __global__ void fill_row_indices(size_t rows, 29 | size_t offset, 30 | AccessorRO, 1> pos, 31 | AccessorWO row_indices) 32 | { 33 | const auto idx = global_tid_1d(); 34 | 35 | if (idx >= rows) { 36 | return; 37 | } 38 | 39 | size_t row = offset + idx; 40 | for (size_t j_pos = pos[row].lo; j_pos < pos[row].hi + 1; j_pos++) { 41 | row_indices[j_pos] = row; 42 | } 43 | } 44 | 45 | template 46 | struct ExpandPosToCoordinatesImplBody { 47 | using INDEX_TY = type_of; 48 | 49 | void operator()(const AccessorRO, 1>& pos, 50 | const AccessorWO& row_indices, 51 | const Rect<1>& rect) 52 | { 53 | auto stream = get_cached_stream(); 54 | auto blocks = get_num_blocks_1d(rect.volume()); 55 | size_t rows = rect.volume(); 56 | 57 | fill_row_indices<<>>(rows, rect.lo[0], pos, row_indices); 58 | LEGATE_SPARSE_CHECK_CUDA_STREAM(stream); 59 | } 60 | }; 61 | 62 | /*static*/ void ExpandPosToCoordinates::gpu_variant(TaskContext context) 63 | { 64 | pos_to_coordinates_template(context); 65 | } 66 | 67 | } // namespace sparse 68 | -------------------------------------------------------------------------------- /src/legate_sparse/array/conv/pos_to_coordinates.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | #include "legate_sparse/sparse.h" 20 | #include "legate_sparse/sparse_c.h" 21 | #include "legate.h" 22 | 23 | namespace sparse { 24 | 25 | struct ExpandPosToCoordinatesArgs { 26 | const legate::PhysicalStore row_indices; 27 | const legate::PhysicalStore pos; 28 | }; 29 | 30 | class ExpandPosToCoordinates : public SparseTask { 31 | public: 32 | static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_EXPAND_POS_TO_COORDINATES}; 33 | 34 | public: 35 | static void cpu_variant(legate::TaskContext ctx); 36 | #ifdef LEGATE_USE_OPENMP 37 | static void omp_variant(legate::TaskContext ctx); 38 | #endif 39 | #ifdef LEGATE_USE_CUDA 40 | static void gpu_variant(legate::TaskContext ctx); 41 | #endif 42 | }; 43 | 44 | } // namespace sparse 45 | -------------------------------------------------------------------------------- /src/legate_sparse/array/conv/pos_to_coordinates_omp.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/array/conv/pos_to_coordinates.h" 18 | #include "legate_sparse/array/conv/pos_to_coordinates_template.inl" 19 | 20 | namespace sparse { 21 | 22 | using namespace legate; 23 | 24 | template 25 | struct ExpandPosToCoordinatesImplBody { 26 | using INDEX_TY = type_of; 27 | 28 | void operator()(const AccessorRO, 1>& pos, 29 | const AccessorWO& row_indices, 30 | const Rect<1>& rect) 31 | { 32 | #pragma omp parallel for schedule(monotonic : dynamic, 128) 33 | for (auto row = rect.lo[0]; row < rect.hi[0] + 1; row++) { 34 | for (size_t j_pos = pos[row].lo; j_pos < pos[row].hi + 1; j_pos++) { 35 | row_indices[j_pos] = row; 36 | } 37 | } 38 | } 39 | }; 40 | 41 | /*static*/ void ExpandPosToCoordinates::omp_variant(TaskContext context) 42 | { 43 | pos_to_coordinates_template(context); 44 | } 45 | 46 | } // namespace sparse 47 | -------------------------------------------------------------------------------- /src/legate_sparse/array/conv/pos_to_coordinates_template.inl: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | // Useful for IDEs. 20 | #include "legate_sparse/array/conv/pos_to_coordinates.h" 21 | #include "legate_sparse/util/dispatch.h" 22 | 23 | namespace sparse { 24 | using namespace legate; 25 | 26 | template 27 | struct ExpandPosToCoordinatesImplBody; 28 | 29 | template 30 | struct ExpandPosToCoordinatesImpl { 31 | template 32 | void operator()(ExpandPosToCoordinatesArgs& args) const 33 | { 34 | using INDEX_TY = type_of; 35 | 36 | auto pos = args.pos.read_accessor, 1>(); 37 | auto row_indices = args.row_indices.write_accessor(); 38 | auto pos_domain = args.pos.domain(); 39 | auto row_indices_domain = args.row_indices.domain(); 40 | 41 | if (pos_domain.empty() || row_indices_domain.empty()) { 42 | return; 43 | } 44 | ExpandPosToCoordinatesImplBody()(pos, row_indices, args.pos.shape<1>()); 45 | } 46 | }; 47 | 48 | template 49 | static void pos_to_coordinates_template(TaskContext context) 50 | { 51 | ExpandPosToCoordinatesArgs args{ 52 | context.outputs()[0], 53 | context.inputs()[0], 54 | }; 55 | index_type_dispatch(args.row_indices.code(), ExpandPosToCoordinatesImpl(), args); 56 | } 57 | 58 | } // namespace sparse 59 | -------------------------------------------------------------------------------- /src/legate_sparse/array/csr/get_diagonal.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/array/csr/get_diagonal.h" 18 | #include "legate_sparse/array/csr/get_diagonal_template.inl" 19 | 20 | namespace sparse { 21 | 22 | using namespace legate; 23 | 24 | template 25 | struct GetCSRDiagonalImplBody { 26 | using INDEX_TY = type_of; 27 | using VAL_TY = type_of; 28 | 29 | void operator()(const AccessorWO& diag, 30 | const AccessorRO, 1>& pos, 31 | const AccessorRO& crd, 32 | const AccessorRO& vals, 33 | const Rect<1>& rect) 34 | { 35 | for (coord_t i = rect.lo[0]; i < rect.hi[0] + 1; i++) { 36 | diag[i] = 0.0; 37 | for (size_t j_pos = pos[i].lo; j_pos < pos[i].hi + 1; j_pos++) { 38 | if (crd[j_pos] == i) { 39 | diag[i] = vals[j_pos]; 40 | } 41 | } 42 | } 43 | } 44 | }; 45 | 46 | /*static*/ void GetCSRDiagonal::cpu_variant(TaskContext context) 47 | { 48 | get_csr_diagonal_template(context); 49 | } 50 | 51 | namespace // unnamed 52 | { 53 | static void __attribute__((constructor)) register_tasks(void) 54 | { 55 | GetCSRDiagonal::register_variants(); 56 | } 57 | } // namespace 58 | 59 | } // namespace sparse 60 | -------------------------------------------------------------------------------- /src/legate_sparse/array/csr/get_diagonal.cu: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/array/csr/get_diagonal.h" 18 | #include "legate_sparse/array/csr/get_diagonal_template.inl" 19 | #include "legate_sparse/util/cuda_help.h" 20 | 21 | namespace sparse { 22 | 23 | using namespace legate; 24 | 25 | template 26 | __global__ void compute_diag_kernel(size_t rows, 27 | int64_t offset, 28 | AccessorWO diag, 29 | AccessorRO, 1> pos, 30 | AccessorRO crd, 31 | AccessorRO vals) 32 | { 33 | const auto idx = global_tid_1d(); 34 | if (idx >= rows) { 35 | return; 36 | } 37 | auto i = idx + offset; 38 | diag[i] = 0.0; 39 | for (size_t j_pos = pos[i].lo; j_pos < pos[i].hi + 1; j_pos++) { 40 | if (crd[j_pos] == i) { 41 | diag[i] = vals[j_pos]; 42 | } 43 | } 44 | } 45 | 46 | template 47 | struct GetCSRDiagonalImplBody { 48 | using INDEX_TY = type_of; 49 | using VAL_TY = type_of; 50 | 51 | void operator()(const AccessorWO& diag, 52 | const AccessorRO, 1>& pos, 53 | const AccessorRO& crd, 54 | const AccessorRO& vals, 55 | const Rect<1>& rect) 56 | { 57 | auto stream = get_cached_stream(); 58 | auto blocks = get_num_blocks_1d(rect.volume()); 59 | compute_diag_kernel 60 | <<>>(rect.volume(), rect.lo[0], diag, pos, crd, vals); 61 | LEGATE_SPARSE_CHECK_CUDA_STREAM(stream); 62 | } 63 | }; 64 | 65 | /*static*/ void GetCSRDiagonal::gpu_variant(TaskContext context) 66 | { 67 | get_csr_diagonal_template(context); 68 | } 69 | 70 | } // namespace sparse 71 | -------------------------------------------------------------------------------- /src/legate_sparse/array/csr/get_diagonal.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | #include "legate_sparse/sparse.h" 20 | #include "legate_sparse/sparse_c.h" 21 | #include "legate.h" 22 | 23 | #include "legate/mapping/store.h" 24 | 25 | namespace sparse { 26 | struct GetCSRDiagonalArgs { 27 | const legate::PhysicalStore& diag; 28 | const legate::PhysicalStore& pos; 29 | const legate::PhysicalStore& crd; 30 | const legate::PhysicalStore& vals; 31 | }; 32 | 33 | class GetCSRDiagonal : public SparseTask { 34 | public: 35 | static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_CSR_DIAGONAL}; 36 | // TODO (rohany): We could rewrite this having each implementation just make 37 | // a call to thrust::transform, but the implementations are simple enough 38 | // anyway. 39 | static void cpu_variant(legate::TaskContext ctx); 40 | #ifdef LEGATE_USE_OPENMP 41 | static void omp_variant(legate::TaskContext ctx); 42 | #endif 43 | #ifdef LEGATE_USE_CUDA 44 | static void gpu_variant(legate::TaskContext context); 45 | #endif 46 | }; 47 | 48 | } // namespace sparse 49 | -------------------------------------------------------------------------------- /src/legate_sparse/array/csr/get_diagonal_omp.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/array/csr/get_diagonal.h" 18 | #include "legate_sparse/array/csr/get_diagonal_template.inl" 19 | 20 | namespace sparse { 21 | 22 | using namespace legate; 23 | 24 | template 25 | struct GetCSRDiagonalImplBody { 26 | using INDEX_TY = type_of; 27 | using VAL_TY = type_of; 28 | 29 | void operator()(const AccessorWO& diag, 30 | const AccessorRO, 1>& pos, 31 | const AccessorRO& crd, 32 | const AccessorRO& vals, 33 | const Rect<1>& rect) 34 | { 35 | #pragma omp parallel for schedule(monotonic : dynamic, 128) 36 | for (coord_t i = rect.lo[0]; i < rect.hi[0] + 1; i++) { 37 | diag[i] = 0.0; 38 | for (size_t j_pos = pos[i].lo; j_pos < pos[i].hi + 1; j_pos++) { 39 | if (crd[j_pos] == i) { 40 | diag[i] = vals[j_pos]; 41 | } 42 | } 43 | } 44 | } 45 | }; 46 | 47 | /*static*/ void GetCSRDiagonal::omp_variant(TaskContext context) 48 | { 49 | get_csr_diagonal_template(context); 50 | } 51 | 52 | } // namespace sparse 53 | -------------------------------------------------------------------------------- /src/legate_sparse/array/csr/get_diagonal_template.inl: -------------------------------------------------------------------------------- 1 | /* Copyright 2021-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | // Useful for IDEs. 20 | #include "legate_sparse/array/csr/get_diagonal.h" 21 | #include "legate_sparse/util/dispatch.h" 22 | 23 | namespace sparse { 24 | 25 | using namespace legate; 26 | 27 | template 28 | struct GetCSRDiagonalImplBody; 29 | 30 | template 31 | struct GetCSRDiagonalImpl { 32 | template 33 | void operator()(GetCSRDiagonalArgs& args) const 34 | { 35 | using INDEX_TY = type_of; 36 | using VAL_TY = type_of; 37 | 38 | auto diag = args.diag.write_accessor(); 39 | auto pos = args.pos.read_accessor, 1>(); 40 | auto crd = args.crd.read_accessor(); 41 | auto vals = args.vals.read_accessor(); 42 | 43 | assert(args.diag.domain().dense()); 44 | if (args.diag.domain().empty()) { 45 | return; 46 | } 47 | 48 | GetCSRDiagonalImplBody()( 49 | diag, pos, crd, vals, args.diag.shape<1>()); 50 | } 51 | }; 52 | 53 | template 54 | static void get_csr_diagonal_template(TaskContext context) 55 | { 56 | auto inputs = context.inputs(); 57 | GetCSRDiagonalArgs args{context.outputs()[0], inputs[0], inputs[1], inputs[2]}; 58 | index_type_value_type_dispatch( 59 | args.crd.code(), args.diag.code(), GetCSRDiagonalImpl{}, args); 60 | } 61 | } // namespace sparse 62 | -------------------------------------------------------------------------------- /src/legate_sparse/array/csr/indexing.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/array/csr/indexing.h" 18 | #include "legate_sparse/array/csr/indexing_template.inl" 19 | 20 | namespace sparse { 21 | 22 | using namespace legate; 23 | 24 | template 25 | struct CSRIndexingCSRImplBody { 26 | using INDEX_TY = type_of; 27 | using VAL_TY = type_of; 28 | 29 | void operator()(const AccessorRO, 1>& A_pos, 30 | const AccessorRO& A_crd, 31 | const AccessorRW& A_vals, 32 | const AccessorRO, 1>& mask_pos, 33 | const AccessorRO& mask_crd, 34 | const AccessorRO& value, 35 | const Rect<1>& rect) 36 | { 37 | for (size_t row = rect.lo[0]; row < rect.hi[0] + 1; row++) { 38 | size_t j_pos_start = A_pos[row].lo; 39 | size_t j_pos_end = A_pos[row].hi + 1; 40 | 41 | size_t m_pos_start = mask_pos[row].lo; 42 | size_t m_pos_end = mask_pos[row].hi + 1; 43 | 44 | size_t m_pos = m_pos_start; 45 | size_t j_pos = j_pos_start; 46 | 47 | // When the if condition is satisfied, the (row, col) of A and 48 | // mask match. Ideally, we would expect it to match for all 49 | // elements, even though mask stores only the True elements 50 | // making its sparsity pattern differ from A. 51 | // This would be the case if mask was derived from A. 52 | // However, if mask has entries that are not present in A, 53 | // then the else conditions will be hit. 54 | // Note that we don't update the vals array in those cases 55 | // since updating vals would require changing its size 56 | // apriori and hence the sparsity pattern of A, which is not 57 | // supported in this task. 58 | 59 | while (m_pos < m_pos_end && j_pos < j_pos_end) { 60 | if (mask_crd[m_pos] == A_crd[j_pos]) { 61 | A_vals[j_pos] = static_cast(value[0]); 62 | j_pos++; 63 | m_pos++; 64 | } else if (mask_crd[m_pos] > A_crd[j_pos]) { 65 | // this element in A is either not found in mask or is False 66 | // in mask and thus not stored. This means the pointer for 67 | // mask (m_pos) would have skipped ahead of the pointer 68 | // for A (j_pos), so A needs to catch-up; increment j_pos 69 | j_pos++; 70 | } else { // mask_crd[m_pos] < A_crd[j_pos] 71 | // In this case, A is ahead and mask is behind in this row 72 | // which means mask has an entry (r,c) that was not in A. 73 | // Increment m_pos and let mask move ahead 74 | m_pos++; 75 | } 76 | // when either one of the pointers reach the end of the row, 77 | // we are done because we only update vals when (row, col) 78 | // of mask and A match exactly, and if one of the pointers 79 | // has reached the end of this row, the vals for this row 80 | // can never be updated, so exit the loop. 81 | } 82 | } 83 | } 84 | }; 85 | 86 | /* static */ void CSRIndexingCSR::cpu_variant(legate::TaskContext context) 87 | { 88 | csr_indexing_csr_template(context); 89 | } 90 | 91 | namespace // unnamed 92 | { 93 | static void __attribute__((constructor)) register_tasks(void) 94 | { 95 | CSRIndexingCSR::register_variants(); 96 | } 97 | } // namespace 98 | 99 | } // namespace sparse 100 | -------------------------------------------------------------------------------- /src/legate_sparse/array/csr/indexing.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | #include "legate_sparse/sparse.h" 20 | #include "legate_sparse/sparse_c.h" 21 | #include "legate.h" 22 | 23 | namespace sparse { 24 | 25 | struct CSRIndexingCSRArgs { 26 | const legate::PhysicalStore& A_vals; 27 | const legate::PhysicalStore& A_pos; 28 | const legate::PhysicalStore& A_crd; 29 | const legate::PhysicalStore& key_pos; 30 | const legate::PhysicalStore& key_crd; 31 | const legate::PhysicalStore& value; 32 | }; 33 | 34 | class CSRIndexingCSR : public SparseTask { 35 | public: 36 | static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_CSR_INDEXING_CSR}; 37 | 38 | // TODO: The implementatio of the below three variants are 39 | // identical and hence need to be templated (DRY) 40 | 41 | public: 42 | static void cpu_variant(legate::TaskContext context); 43 | 44 | #ifdef LEGATE_USE_OPENMP 45 | static void omp_variant(legate::TaskContext context); 46 | #endif 47 | 48 | #ifdef LEGATE_USE_CUDA 49 | static void gpu_variant(legate::TaskContext context); 50 | #endif 51 | }; 52 | 53 | } // namespace sparse 54 | -------------------------------------------------------------------------------- /src/legate_sparse/array/csr/indexing_omp.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/array/csr/indexing.h" 18 | #include "legate_sparse/array/csr/indexing_template.inl" 19 | 20 | namespace sparse { 21 | 22 | using namespace legate; 23 | 24 | template 25 | struct CSRIndexingCSRImplBody { 26 | using INDEX_TY = type_of; 27 | using VAL_TY = type_of; 28 | 29 | void operator()(const AccessorRO, 1>& A_pos, 30 | const AccessorRO& A_crd, 31 | const AccessorRW& A_vals, 32 | const AccessorRO, 1>& mask_pos, 33 | const AccessorRO& mask_crd, 34 | const AccessorRO& value, 35 | const Rect<1>& rect) 36 | { 37 | std::cout << "OMP variant" << std::endl; 38 | #pragma omp parallel for 39 | for (size_t row = rect.lo[0]; row < rect.hi[0] + 1; row++) { 40 | size_t j_pos_start = A_pos[row].lo; 41 | size_t j_pos_end = A_pos[row].hi + 1; 42 | 43 | size_t m_pos_start = mask_pos[row].lo; 44 | size_t m_pos_end = mask_pos[row].hi + 1; 45 | 46 | size_t m_pos = m_pos_start; 47 | size_t j_pos = j_pos_start; 48 | 49 | // When the if condition is satisfied, the (row, col) of A and 50 | // mask match. Ideally, we would expect it to match for all 51 | // elements, even though mask stores only the True elements 52 | // making its sparsity pattern differ from A. 53 | // This would be the case if mask was derived from A. 54 | // However, if mask has entries that are not present in A, 55 | // then the else conditions will be hit. 56 | // Note that we don't update the vals array in those cases 57 | // since updating vals would require changing its size 58 | // apriori and hence the sparsity pattern of A, which is not 59 | // supported in this task. 60 | 61 | while (m_pos < m_pos_end && j_pos < j_pos_end) { 62 | if (mask_crd[m_pos] == A_crd[j_pos]) { 63 | A_vals[j_pos] = static_cast(value[0]); 64 | j_pos++; 65 | m_pos++; 66 | } else if (mask_crd[m_pos] > A_crd[j_pos]) { 67 | // this element in A is either not found in mask or is False 68 | // in mask and thus not stored. This means the pointer for 69 | // mask (m_pos) would have skipped ahead of the pointer 70 | // for A (j_pos), so A needs to catch-up; increment j_pos 71 | j_pos++; 72 | } else { // mask_crd[m_pos] < A_crd[j_pos] 73 | // In this case, A is ahead and mask is behind in this row 74 | // which means mask has an entry (r,c) that was not in A. 75 | // Increment m_pos and let mask move ahead 76 | m_pos++; 77 | } 78 | // when either one of the pointers reach the end of the row, 79 | // we are done because we only update vals when (row, col) 80 | // of mask and A match exactly, and if one of the pointers 81 | // has reached the end of this row, the vals for this row 82 | // can never be updated, so exit the loop. 83 | } 84 | } 85 | } 86 | }; 87 | 88 | /* static */ void CSRIndexingCSR::omp_variant(TaskContext context) 89 | { 90 | csr_indexing_csr_template(context); 91 | } 92 | 93 | } // namespace sparse 94 | -------------------------------------------------------------------------------- /src/legate_sparse/array/csr/indexing_template.inl: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | #include "legate_sparse/array/csr/indexing.h" 20 | #include "legate_sparse/util/dispatch.h" 21 | 22 | namespace sparse { 23 | 24 | using namespace legate; 25 | 26 | template 27 | struct CSRIndexingCSRImplBody; 28 | 29 | template 30 | struct CSRIndexingCSRImpl { 31 | template 32 | void operator()(const CSRIndexingCSRArgs& args) 33 | { 34 | using INDEX_TY = type_of; 35 | using VAL_TY = type_of; 36 | 37 | auto A_pos = args.A_pos.read_accessor, 1>(); 38 | auto A_crd = args.A_crd.read_accessor(); 39 | auto A_vals = args.A_vals.read_write_accessor(); 40 | 41 | auto key_pos = args.key_pos.read_accessor, 1>(); 42 | auto key_crd = args.key_crd.read_accessor(); 43 | 44 | auto value = args.value.read_accessor(); 45 | 46 | // TODO: Rect is based on A_pos.shape, is that correct? 47 | CSRIndexingCSRImplBody()( 48 | A_pos, A_crd, A_vals, key_pos, key_crd, value, args.A_pos.shape<1>()); 49 | } 50 | }; 51 | 52 | template 53 | static void csr_indexing_csr_template(TaskContext context) 54 | { 55 | CSRIndexingCSRArgs args{ 56 | context.outputs()[0], 57 | context.inputs()[0], 58 | context.inputs()[1], 59 | context.inputs()[2], 60 | context.inputs()[3], 61 | context.inputs()[4], // value 62 | }; 63 | 64 | index_type_value_type_dispatch( 65 | args.A_crd.code(), args.A_vals.code(), CSRIndexingCSRImpl(), args); 66 | } 67 | 68 | } // namespace sparse 69 | -------------------------------------------------------------------------------- /src/legate_sparse/array/csr/spgemm_csr_csr_csr.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | #include "legate_sparse/sparse.h" 20 | #include "legate_sparse/sparse_c.h" 21 | #include "legate.h" 22 | 23 | namespace sparse { 24 | 25 | struct SpGEMMCSRxCSRxCSRNNZArgs { 26 | const legate::PhysicalStore& nnz; 27 | const legate::PhysicalStore& B_pos; 28 | const legate::PhysicalStore& B_crd; 29 | const legate::PhysicalStore& C_pos; 30 | const legate::PhysicalStore& C_crd; 31 | }; 32 | 33 | class SpGEMMCSRxCSRxCSRNNZ : public SparseTask { 34 | public: 35 | static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_SPGEMM_CSR_CSR_CSR_NNZ}; 36 | 37 | static constexpr legate::VariantOptions CPU_VARIANT_OPTIONS = 38 | legate::VariantOptions{}.with_has_allocations(true); 39 | static constexpr legate::VariantOptions OMP_VARIANT_OPTIONS = 40 | legate::VariantOptions{}.with_has_allocations(true); 41 | 42 | public: 43 | static void cpu_variant(legate::TaskContext ctx); 44 | #ifdef LEGATE_USE_OPENMP 45 | static void omp_variant(legate::TaskContext ctx); 46 | #endif 47 | }; 48 | 49 | struct SpGEMMCSRxCSRxCSRArgs { 50 | const legate::PhysicalStore& A_pos; 51 | const legate::PhysicalStore& A_crd; 52 | const legate::PhysicalStore& A_vals; 53 | const legate::PhysicalStore& B_pos; 54 | const legate::PhysicalStore& B_crd; 55 | const legate::PhysicalStore& B_vals; 56 | const legate::PhysicalStore& C_pos; 57 | const legate::PhysicalStore& C_crd; 58 | const legate::PhysicalStore& C_vals; 59 | }; 60 | 61 | class SpGEMMCSRxCSRxCSR : public SparseTask { 62 | public: 63 | static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_SPGEMM_CSR_CSR_CSR}; 64 | 65 | static constexpr legate::VariantOptions CPU_VARIANT_OPTIONS = 66 | legate::VariantOptions{}.with_has_allocations(true); 67 | static constexpr legate::VariantOptions OMP_VARIANT_OPTIONS = 68 | legate::VariantOptions{}.with_has_allocations(true); 69 | 70 | public: 71 | static void cpu_variant(legate::TaskContext ctx); 72 | #ifdef LEGATE_USE_OPENMP 73 | static void omp_variant(legate::TaskContext ctx); 74 | #endif 75 | }; 76 | 77 | struct SpGEMMCSRxCSRxCSRGPUArgs { 78 | const legate::PhysicalStore& A_pos; 79 | const legate::PhysicalStore& A_crd; 80 | const legate::PhysicalStore& A_vals; 81 | const legate::PhysicalStore& B_pos; 82 | const legate::PhysicalStore& B_crd; 83 | const legate::PhysicalStore& B_vals; 84 | const legate::PhysicalStore& C_pos; 85 | const legate::PhysicalStore& C_crd; 86 | const legate::PhysicalStore& C_vals; 87 | const uint64_t A2_dim; 88 | const uint64_t C1_dim; 89 | const uint64_t fast_switch; 90 | std::vector comms; 91 | }; 92 | 93 | // CSRxCSRxCSR SpGEMM for NVIDIA GPUs. Due to limitations with cuSPARSE, 94 | // we take a different approach than on CPUs and OMPs. 95 | class SpGEMMCSRxCSRxCSRGPU : public SparseTask { 96 | public: 97 | static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_SPGEMM_CSR_CSR_CSR_GPU}; 98 | 99 | static constexpr legate::VariantOptions GPU_VARIANT_OPTIONS = 100 | legate::VariantOptions{}.with_has_allocations(true); 101 | 102 | public: 103 | #ifdef LEGATE_USE_CUDA 104 | static void gpu_variant(legate::TaskContext ctx); 105 | #endif 106 | }; 107 | 108 | } // namespace sparse 109 | -------------------------------------------------------------------------------- /src/legate_sparse/array/csr/spmv.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/array/csr/spmv.h" 18 | #include "legate_sparse/array/csr/spmv_template.inl" 19 | 20 | namespace sparse { 21 | 22 | using namespace legate; 23 | 24 | template 25 | struct CSRSpMVRowSplitImplBody { 26 | using INDEX_TY = type_of; 27 | using VAL_TY = type_of; 28 | 29 | void operator()(const AccessorWO& y, 30 | const AccessorRO, 1>& A_pos, 31 | const AccessorRO& A_crd, 32 | const AccessorRO& A_vals, 33 | const AccessorRO& x, 34 | const Rect<1>& rect) 35 | { 36 | for (coord_t i = rect.lo[0]; i < rect.hi[0] + 1; i++) { 37 | VAL_TY sum = 0.0; 38 | for (size_t j_pos = A_pos[i].lo; j_pos < A_pos[i].hi + 1; j_pos++) { 39 | auto j = A_crd[j_pos]; 40 | sum += A_vals[j_pos] * x[j]; 41 | } 42 | y[i] = sum; 43 | } 44 | } 45 | }; 46 | 47 | /*static*/ void CSRSpMVRowSplit::cpu_variant(TaskContext context) 48 | { 49 | csr_spmv_row_split_template(context); 50 | } 51 | 52 | namespace // unnamed 53 | { 54 | static void __attribute__((constructor)) register_tasks(void) 55 | { 56 | CSRSpMVRowSplit::register_variants(); 57 | } 58 | } // namespace 59 | 60 | } // namespace sparse 61 | -------------------------------------------------------------------------------- /src/legate_sparse/array/csr/spmv.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | #include "legate_sparse/sparse.h" 20 | #include "legate_sparse/sparse_c.h" 21 | #include "legate.h" 22 | 23 | namespace sparse { 24 | 25 | struct CSRSpMVRowSplitArgs { 26 | const legate::PhysicalStore& y; 27 | const legate::PhysicalStore& A_pos; 28 | const legate::PhysicalStore& A_crd; 29 | const legate::PhysicalStore& A_vals; 30 | const legate::PhysicalStore& x; 31 | }; 32 | 33 | class CSRSpMVRowSplit : public SparseTask { 34 | public: 35 | static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_CSR_SPMV_ROW_SPLIT}; 36 | 37 | static constexpr legate::VariantOptions GPU_VARIANT_OPTIONS = 38 | legate::VariantOptions{}.with_has_allocations(true); 39 | 40 | public: 41 | static void cpu_variant(legate::TaskContext ctx); 42 | #ifdef LEGATE_USE_OPENMP 43 | static void omp_variant(legate::TaskContext ctx); 44 | #endif 45 | #ifdef LEGATE_USE_CUDA 46 | static void gpu_variant(legate::TaskContext context); 47 | #endif 48 | }; 49 | 50 | } // namespace sparse 51 | -------------------------------------------------------------------------------- /src/legate_sparse/array/csr/spmv_omp.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/array/csr/spmv.h" 18 | #include "legate_sparse/array/csr/spmv_template.inl" 19 | 20 | namespace sparse { 21 | 22 | using namespace legate; 23 | 24 | template 25 | struct CSRSpMVRowSplitImplBody { 26 | using INDEX_TY = type_of; 27 | using VAL_TY = type_of; 28 | 29 | void operator()(const AccessorWO& y, 30 | const AccessorRO, 1>& A_pos, 31 | const AccessorRO& A_crd, 32 | const AccessorRO& A_vals, 33 | const AccessorRO& x, 34 | const Rect<1>& rect) 35 | { 36 | #pragma omp parallel for schedule(monotonic : dynamic, 128) 37 | for (coord_t i = rect.lo[0]; i < rect.hi[0] + 1; i++) { 38 | VAL_TY sum = 0.0; 39 | for (size_t j_pos = A_pos[i].lo; j_pos < A_pos[i].hi + 1; j_pos++) { 40 | auto j = A_crd[j_pos]; 41 | sum += A_vals[j_pos] * x[j]; 42 | } 43 | y[i] = sum; 44 | } 45 | } 46 | }; 47 | 48 | /*static*/ void CSRSpMVRowSplit::omp_variant(TaskContext context) 49 | { 50 | csr_spmv_row_split_template(context); 51 | } 52 | 53 | } // namespace sparse 54 | -------------------------------------------------------------------------------- /src/legate_sparse/array/csr/spmv_template.inl: -------------------------------------------------------------------------------- 1 | /* Copyright 2021-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | // Useful for IDEs. 20 | #include "legate_sparse/array/csr/spmv.h" 21 | #include "legate_sparse/util/dispatch.h" 22 | 23 | #include 24 | 25 | namespace sparse { 26 | 27 | using namespace legate; 28 | 29 | template 30 | struct CSRSpMVRowSplitImplBody; 31 | 32 | template 33 | struct CSRSpMVRowSplitImpl { 34 | template 35 | void operator()(CSRSpMVRowSplitArgs& args) const 36 | { 37 | using INDEX_TY = type_of; 38 | using VAL_TY = type_of; 39 | 40 | auto y = args.y.write_accessor(); 41 | auto A_pos = args.A_pos.read_accessor, 1>(); 42 | auto A_crd = args.A_crd.read_accessor(); 43 | auto A_vals = args.A_vals.read_accessor(); 44 | auto x = args.x.read_accessor(); 45 | 46 | assert(args.y.domain().dense()); 47 | if (args.y.domain().empty()) { 48 | return; 49 | } 50 | 51 | CSRSpMVRowSplitImplBody()( 52 | y, A_pos, A_crd, A_vals, x, args.y.shape<1>()); 53 | } 54 | }; 55 | 56 | template 57 | static void csr_spmv_row_split_template(TaskContext context) 58 | { 59 | auto inputs = context.inputs(); 60 | CSRSpMVRowSplitArgs args{context.outputs()[0], inputs[0], inputs[1], inputs[2], inputs[3]}; 61 | 62 | index_type_value_type_dispatch( 63 | args.A_crd.code(), args.y.code(), CSRSpMVRowSplitImpl{}, args); 64 | } 65 | 66 | } // namespace sparse 67 | -------------------------------------------------------------------------------- /src/legate_sparse/array/util/scale_rect.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/array/util/scale_rect.h" 18 | #include "legate_sparse/array/util/scale_rect_template.inl" 19 | 20 | namespace sparse { 21 | 22 | using namespace legate; 23 | 24 | template <> 25 | struct ScaleRect1ImplBody { 26 | void operator()(const AccessorRW, 1>& output, const int64_t scale, const Rect<1>& rect) 27 | { 28 | for (coord_t i = rect.lo[0]; i < rect.hi[0] + 1; i++) { 29 | output[i].lo = output[i].lo + scale; 30 | output[i].hi = output[i].hi + scale; 31 | } 32 | } 33 | }; 34 | 35 | /*static*/ void ScaleRect1::cpu_variant(TaskContext context) 36 | { 37 | scale_rect_1_template(context); 38 | } 39 | 40 | namespace // unnamed 41 | { 42 | static void __attribute__((constructor)) register_tasks(void) { ScaleRect1::register_variants(); } 43 | } // namespace 44 | 45 | } // namespace sparse 46 | -------------------------------------------------------------------------------- /src/legate_sparse/array/util/scale_rect.cu: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/array/util/scale_rect.h" 18 | #include "legate_sparse/array/util/scale_rect_template.inl" 19 | #include "legate_sparse/util/cuda_help.h" 20 | 21 | namespace sparse { 22 | 23 | using namespace legate; 24 | 25 | __global__ void scale_rect1_kernel(size_t elems, 26 | coord_t offset, 27 | const AccessorRW, 1> out, 28 | int64_t scale) 29 | { 30 | const auto tid = global_tid_1d(); 31 | if (tid >= elems) { 32 | return; 33 | } 34 | const auto idx = tid + offset; 35 | out[idx].lo = out[idx].lo + scale; 36 | out[idx].hi = out[idx].hi + scale; 37 | } 38 | 39 | template <> 40 | struct ScaleRect1ImplBody { 41 | void operator()(const AccessorRW, 1>& output, const int64_t scale, const Rect<1>& rect) 42 | { 43 | auto elems = rect.volume(); 44 | auto blocks = get_num_blocks_1d(elems); 45 | auto stream = get_cached_stream(); 46 | scale_rect1_kernel<<>>(elems, rect.lo, output, scale); 47 | LEGATE_SPARSE_CHECK_CUDA_STREAM(stream); 48 | } 49 | }; 50 | 51 | /*static*/ void ScaleRect1::gpu_variant(TaskContext context) 52 | { 53 | scale_rect_1_template(context); 54 | } 55 | 56 | } // namespace sparse 57 | -------------------------------------------------------------------------------- /src/legate_sparse/array/util/scale_rect.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | #include "legate_sparse/sparse.h" 20 | #include "legate_sparse/sparse_c.h" 21 | #include "legate.h" 22 | 23 | namespace sparse { 24 | 25 | struct ScaleRect1Args { 26 | const legate::PhysicalStore& out; 27 | int64_t scale; 28 | }; 29 | 30 | class ScaleRect1 : public SparseTask { 31 | public: 32 | static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_SCALE_RECT_1}; 33 | static void cpu_variant(legate::TaskContext context); 34 | #ifdef LEGATE_USE_OPENMP 35 | static void omp_variant(legate::TaskContext context); 36 | #endif 37 | #ifdef LEGATE_USE_CUDA 38 | static void gpu_variant(legate::TaskContext context); 39 | #endif 40 | }; 41 | 42 | } // namespace sparse 43 | -------------------------------------------------------------------------------- /src/legate_sparse/array/util/scale_rect_omp.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/array/util/scale_rect.h" 18 | #include "legate_sparse/array/util/scale_rect_template.inl" 19 | 20 | namespace sparse { 21 | 22 | using namespace legate; 23 | 24 | template <> 25 | struct ScaleRect1ImplBody { 26 | void operator()(const AccessorRW, 1>& output, const int64_t scale, const Rect<1>& rect) 27 | { 28 | #pragma omp parallel for schedule(static) 29 | for (coord_t i = rect.lo[0]; i < rect.hi[0] + 1; i++) { 30 | output[i].lo = output[i].lo + scale; 31 | output[i].hi = output[i].hi + scale; 32 | } 33 | } 34 | }; 35 | 36 | /*static*/ void ScaleRect1::omp_variant(TaskContext context) 37 | { 38 | scale_rect_1_template(context); 39 | } 40 | 41 | } // namespace sparse 42 | -------------------------------------------------------------------------------- /src/legate_sparse/array/util/scale_rect_template.inl: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | // Useful for IDEs. 20 | #include "legate_sparse/array/util/scale_rect.h" 21 | #include "legate_sparse/util/dispatch.h" 22 | 23 | namespace sparse { 24 | 25 | using namespace legate; 26 | 27 | template 28 | struct ScaleRect1ImplBody; 29 | 30 | template 31 | struct ScaleRect1Impl { 32 | void operator()(ScaleRect1Args& args) const 33 | { 34 | auto output = args.out.read_write_accessor, 1>(); 35 | if (args.out.domain().empty()) { 36 | return; 37 | } 38 | ScaleRect1ImplBody()(output, args.scale, args.out.shape<1>()); 39 | } 40 | }; 41 | 42 | template 43 | static void scale_rect_1_template(TaskContext context) 44 | { 45 | auto task = context.task_; 46 | auto scale = task->futures[0].get_result(); 47 | ScaleRect1Args args{context.outputs()[0], scale}; 48 | ScaleRect1Impl{}(args); 49 | } 50 | 51 | } // namespace sparse 52 | -------------------------------------------------------------------------------- /src/legate_sparse/array/util/unzip_rect.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/array/util/unzip_rect.h" 18 | #include "legate_sparse/array/util/unzip_rect_template.inl" 19 | 20 | namespace sparse { 21 | 22 | using namespace legate; 23 | 24 | template <> 25 | struct UnZipRect1ImplBody { 26 | void operator()(const AccessorWO& out1, 27 | const AccessorWO& out2, 28 | const AccessorRO, 1>& in, 29 | const Rect<1>& rect) 30 | { 31 | for (coord_t i = rect.lo[0]; i < rect.hi[0] + 1; i++) { 32 | out1[i] = in[i].lo; 33 | out2[i] = in[i].hi; 34 | } 35 | } 36 | }; 37 | 38 | /*static*/ void UnZipRect1::cpu_variant(TaskContext context) 39 | { 40 | unzip_rect_1_template(context); 41 | } 42 | 43 | namespace // unnamed 44 | { 45 | static void __attribute__((constructor)) register_tasks(void) { UnZipRect1::register_variants(); } 46 | } // namespace 47 | 48 | } // namespace sparse 49 | -------------------------------------------------------------------------------- /src/legate_sparse/array/util/unzip_rect.cu: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/array/util/unzip_rect.h" 18 | #include "legate_sparse/array/util/unzip_rect_template.inl" 19 | #include "legate_sparse/util/cuda_help.h" 20 | 21 | namespace sparse { 22 | 23 | using namespace legate; 24 | 25 | __global__ void unzip_rect1_kernel(size_t elems, 26 | coord_t offset, 27 | const AccessorWO lo, 28 | const AccessorWO hi, 29 | const AccessorRO, 1> in) 30 | { 31 | const auto tid = global_tid_1d(); 32 | if (tid >= elems) { 33 | return; 34 | } 35 | const auto idx = tid + offset; 36 | lo[idx] = in[idx].lo; 37 | hi[idx] = in[idx].hi; 38 | } 39 | 40 | template <> 41 | struct UnZipRect1ImplBody { 42 | void operator()(const AccessorWO& out1, 43 | const AccessorWO& out2, 44 | const AccessorRO, 1>& in, 45 | const Rect<1>& rect) 46 | { 47 | auto elems = rect.volume(); 48 | auto blocks = get_num_blocks_1d(elems); 49 | auto stream = get_cached_stream(); 50 | unzip_rect1_kernel<<>>(elems, rect.lo, out1, out2, in); 51 | LEGATE_SPARSE_CHECK_CUDA_STREAM(stream); 52 | } 53 | }; 54 | 55 | /*static*/ void UnZipRect1::gpu_variant(TaskContext context) 56 | { 57 | unzip_rect_1_template(context); 58 | } 59 | 60 | } // namespace sparse 61 | -------------------------------------------------------------------------------- /src/legate_sparse/array/util/unzip_rect.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | #include "legate_sparse/sparse.h" 20 | #include "legate_sparse/sparse_c.h" 21 | #include "legate.h" 22 | 23 | namespace sparse { 24 | 25 | struct UnZipRect1Args { 26 | const legate::PhysicalStore& out1; 27 | const legate::PhysicalStore& out2; 28 | const legate::PhysicalStore& in; 29 | }; 30 | 31 | class UnZipRect1 : public SparseTask { 32 | public: 33 | static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_UNZIP_RECT_1}; 34 | static void cpu_variant(legate::TaskContext ctx); 35 | #ifdef LEGATE_USE_OPENMP 36 | static void omp_variant(legate::TaskContext ctx); 37 | #endif 38 | #ifdef LEGATE_USE_CUDA 39 | static void gpu_variant(legate::TaskContext context); 40 | #endif 41 | }; 42 | 43 | } // namespace sparse 44 | -------------------------------------------------------------------------------- /src/legate_sparse/array/util/unzip_rect_omp.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/array/util/unzip_rect.h" 18 | #include "legate_sparse/array/util/unzip_rect_template.inl" 19 | 20 | namespace sparse { 21 | 22 | using namespace legate; 23 | 24 | template <> 25 | struct UnZipRect1ImplBody { 26 | void operator()(const AccessorWO& out1, 27 | const AccessorWO& out2, 28 | const AccessorRO, 1>& in, 29 | const Rect<1>& rect) 30 | { 31 | #pragma omp parallel for schedule(static) 32 | for (coord_t i = rect.lo[0]; i < rect.hi[0] + 1; i++) { 33 | out1[i] = in[i].lo; 34 | out2[i] = in[i].hi; 35 | } 36 | } 37 | }; 38 | 39 | /*static*/ void UnZipRect1::omp_variant(TaskContext context) 40 | { 41 | unzip_rect_1_template(context); 42 | } 43 | 44 | } // namespace sparse 45 | -------------------------------------------------------------------------------- /src/legate_sparse/array/util/unzip_rect_template.inl: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | // Useful for IDEs. 20 | #include "legate_sparse/array/util/unzip_rect.h" 21 | #include "legate_sparse/util/dispatch.h" 22 | 23 | namespace sparse { 24 | 25 | using namespace legate; 26 | 27 | template 28 | struct UnZipRect1ImplBody; 29 | 30 | template 31 | struct UnZipRect1Impl { 32 | void operator()(UnZipRect1Args& args) const 33 | { 34 | auto out1 = args.out1.write_accessor(); 35 | auto out2 = args.out2.write_accessor(); 36 | auto in = args.in.read_accessor, 1>(); 37 | if (args.in.domain().empty()) { 38 | return; 39 | } 40 | UnZipRect1ImplBody()(out1, out2, in, args.in.shape<1>()); 41 | } 42 | }; 43 | 44 | template 45 | static void unzip_rect_1_template(TaskContext context) 46 | { 47 | auto outputs = context.outputs(); 48 | UnZipRect1Args args{outputs[0], outputs[1], context.inputs()[0]}; 49 | UnZipRect1Impl{}(args); 50 | } 51 | 52 | } // namespace sparse 53 | -------------------------------------------------------------------------------- /src/legate_sparse/array/util/zip_to_rect.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/array/util/zip_to_rect.h" 18 | #include "legate_sparse/array/util/zip_to_rect_template.inl" 19 | 20 | namespace sparse { 21 | 22 | using namespace legate; 23 | 24 | template 25 | struct ZipToRect1ImplBody { 26 | void operator()(const AccessorWO, 1>& output, 27 | const AccessorRO& lo, 28 | const AccessorRO& hi, 29 | const Rect<1>& rect) 30 | { 31 | for (coord_t i = rect.lo[0]; i < rect.hi[0] + 1; i++) { 32 | output[i] = Rect<1>{Point<1>{lo[i]}, Point<1>{hi[i] - 1}}; 33 | } 34 | } 35 | }; 36 | 37 | /*static*/ void ZipToRect1::cpu_variant(TaskContext context) 38 | { 39 | zip_to_rect_1_template(context); 40 | } 41 | 42 | namespace // unnamed 43 | { 44 | static void __attribute__((constructor)) register_tasks(void) { ZipToRect1::register_variants(); } 45 | } // namespace 46 | 47 | } // namespace sparse 48 | -------------------------------------------------------------------------------- /src/legate_sparse/array/util/zip_to_rect.cu: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/array/util/zip_to_rect.h" 18 | #include "legate_sparse/array/util/zip_to_rect_template.inl" 19 | #include "legate_sparse/util/cuda_help.h" 20 | 21 | namespace sparse { 22 | 23 | using namespace legate; 24 | 25 | template 26 | __global__ void zip_rect1_kernel(size_t elems, 27 | coord_t offset, 28 | const AccessorWO, 1> out, 29 | const AccessorRO lo, 30 | const AccessorRO hi) 31 | { 32 | const auto tid = global_tid_1d(); 33 | if (tid >= elems) { 34 | return; 35 | } 36 | const auto idx = tid + offset; 37 | out[idx] = {lo[idx], hi[idx] - 1}; 38 | } 39 | 40 | template 41 | struct ZipToRect1ImplBody { 42 | void operator()(const AccessorWO, 1>& output, 43 | const AccessorRO& lo, 44 | const AccessorRO& hi, 45 | const Rect<1>& rect) 46 | { 47 | auto stream = get_cached_stream(); 48 | auto elems = rect.volume(); 49 | auto blocks = get_num_blocks_1d(elems); 50 | zip_rect1_kernel<<>>(elems, rect.lo, output, lo, hi); 51 | LEGATE_SPARSE_CHECK_CUDA_STREAM(stream); 52 | } 53 | }; 54 | 55 | /*static*/ void ZipToRect1::gpu_variant(TaskContext context) 56 | { 57 | zip_to_rect_1_template(context); 58 | } 59 | 60 | } // namespace sparse 61 | -------------------------------------------------------------------------------- /src/legate_sparse/array/util/zip_to_rect.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | #include "legate_sparse/sparse.h" 20 | #include "legate_sparse/sparse_c.h" 21 | #include "legate.h" 22 | 23 | namespace sparse { 24 | 25 | struct ZipToRect1Args { 26 | const legate::PhysicalStore& out; 27 | const legate::PhysicalStore& lo; 28 | const legate::PhysicalStore& hi; 29 | }; 30 | 31 | class ZipToRect1 : public SparseTask { 32 | public: 33 | static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_ZIP_TO_RECT_1}; 34 | static void cpu_variant(legate::TaskContext ctx); 35 | #ifdef LEGATE_USE_OPENMP 36 | static void omp_variant(legate::TaskContext ctx); 37 | #endif 38 | #ifdef LEGATE_USE_CUDA 39 | static void gpu_variant(legate::TaskContext ctx); 40 | #endif 41 | }; 42 | 43 | } // namespace sparse 44 | -------------------------------------------------------------------------------- /src/legate_sparse/array/util/zip_to_rect_omp.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/array/util/zip_to_rect.h" 18 | #include "legate_sparse/array/util/zip_to_rect_template.inl" 19 | 20 | namespace sparse { 21 | 22 | using namespace legate; 23 | 24 | template 25 | struct ZipToRect1ImplBody { 26 | void operator()(const AccessorWO, 1>& output, 27 | const AccessorRO& lo, 28 | const AccessorRO& hi, 29 | const Rect<1>& rect) 30 | { 31 | #pragma omp parallel for schedule(static) 32 | for (coord_t i = rect.lo[0]; i < rect.hi[0] + 1; i++) { 33 | output[i] = Rect<1>{Point<1>{lo[i]}, Point<1>{hi[i] - 1}}; 34 | } 35 | } 36 | }; 37 | 38 | /*static*/ void ZipToRect1::omp_variant(TaskContext context) 39 | { 40 | zip_to_rect_1_template(context); 41 | } 42 | 43 | } // namespace sparse 44 | -------------------------------------------------------------------------------- /src/legate_sparse/array/util/zip_to_rect_template.inl: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | // Useful for IDEs. 20 | #include "legate_sparse/array/util/zip_to_rect.h" 21 | #include "legate_sparse/util/dispatch.h" 22 | 23 | namespace sparse { 24 | 25 | using namespace legate; 26 | 27 | template 28 | struct ZipToRect1ImplBody; 29 | 30 | template 31 | struct ZipToRect1Impl { 32 | void operator()(ZipToRect1Args& args) const 33 | { 34 | auto output = args.out.write_accessor, 1>(); 35 | auto lo = args.lo.read_accessor(); 36 | auto hi = args.hi.read_accessor(); 37 | if (args.out.domain().empty()) { 38 | return; 39 | } 40 | ZipToRect1ImplBody()(output, lo, hi, args.out.shape<1>()); 41 | } 42 | }; 43 | 44 | template 45 | static void zip_to_rect_1_template(TaskContext context) 46 | { 47 | auto inputs = context.inputs(); 48 | ZipToRect1Args args{context.outputs()[0], inputs[0], inputs[1]}; 49 | if (inputs[0].data().type().code() == legate::Type::Code::INT64) { 50 | ZipToRect1Impl{}(args); 51 | } else { 52 | assert(inputs[0].data().type().code() == legate::Type::Code::UINT64); 53 | ZipToRect1Impl{}(args); 54 | } 55 | } 56 | 57 | } // namespace sparse 58 | -------------------------------------------------------------------------------- /src/legate_sparse/cffi.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2023-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #ifndef __LEGATE_SPARSE_CFFI_H__ 18 | #define __LEGATE_SPARSE_CFFI_H__ 19 | 20 | enum LegateSparseOpCode { 21 | _LEGATE_SPARSE_OP_CODE_BASE = 0, 22 | LEGATE_SPARSE_CSR_TO_DENSE, 23 | LEGATE_SPARSE_DENSE_TO_CSR_NNZ, 24 | LEGATE_SPARSE_DENSE_TO_CSR, 25 | LEGATE_SPARSE_BOUNDS_FROM_PARTITIONED_COORDINATES, 26 | LEGATE_SPARSE_SORTED_COORDS_TO_COUNTS, 27 | LEGATE_SPARSE_EXPAND_POS_TO_COORDINATES, 28 | 29 | // File IO. 30 | LEGATE_SPARSE_READ_MTX_TO_COO, 31 | 32 | // Operations on matrices that aren't quite tensor algebra related. 33 | LEGATE_SPARSE_CSR_DIAGONAL, 34 | 35 | // Indexing a CSR matrix with another CSR matrix 36 | LEGATE_SPARSE_CSR_INDEXING_CSR, 37 | 38 | // Linear algebra operations 39 | LEGATE_SPARSE_CSR_SPMV_ROW_SPLIT, 40 | LEGATE_SPARSE_SPGEMM_CSR_CSR_CSR_NNZ, 41 | LEGATE_SPARSE_SPGEMM_CSR_CSR_CSR, 42 | LEGATE_SPARSE_SPGEMM_CSR_CSR_CSR_GPU, 43 | 44 | // Dense linear algebra tasks needed for things 45 | // like iterative linear solvers. 46 | LEGATE_SPARSE_AXPBY, 47 | 48 | // nonzero API 49 | LEGATE_SPARSE_NONZERO, 50 | 51 | // Utility tasks. 52 | LEGATE_SPARSE_ZIP_TO_RECT_1, 53 | LEGATE_SPARSE_UNZIP_RECT_1, 54 | LEGATE_SPARSE_SCALE_RECT_1, 55 | LEGATE_SPARSE_FAST_IMAGE_RANGE, 56 | LEGATE_SPARSE_UPCAST_FUTURE_TO_REGION, 57 | 58 | // Utility tasks for loading cuda libraries. 59 | LEGATE_SPARSE_LOAD_CUDALIBS, 60 | LEGATE_SPARSE_UNLOAD_CUDALIBS, 61 | 62 | LEGATE_SPARSE_LAST_TASK, // must be last 63 | }; 64 | 65 | #endif // __LEGATE_SPARSE_CFFI_H__ 66 | -------------------------------------------------------------------------------- /src/legate_sparse/cudalibs.cu: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/sparse.h" 18 | #include "legate_sparse/sparse_c.h" 19 | #include "legate_sparse/cudalibs.h" 20 | 21 | #include 22 | 23 | namespace sparse { 24 | 25 | CUDALibraries::CUDALibraries() : finalized_(false), cusparse_(nullptr) {} 26 | 27 | CUDALibraries::~CUDALibraries() { finalize(); } 28 | 29 | void CUDALibraries::finalize() 30 | { 31 | if (finalized_) { 32 | return; 33 | } 34 | if (cusparse_ != nullptr) { 35 | finalize_cusparse(); 36 | } 37 | finalized_ = true; 38 | } 39 | 40 | void CUDALibraries::finalize_cusparse() 41 | { 42 | CHECK_CUSPARSE(cusparseDestroy(cusparse_)); 43 | cusparse_ = nullptr; 44 | } 45 | 46 | cusparseHandle_t CUDALibraries::get_cusparse() 47 | { 48 | if (this->cusparse_ == nullptr) { 49 | CHECK_CUSPARSE(cusparseCreate(&this->cusparse_)); 50 | } 51 | return this->cusparse_; 52 | } 53 | 54 | static CUDALibraries& get_cuda_libraries(legate::Processor proc) 55 | { 56 | if (proc.kind() != legate::Processor::TOC_PROC) { 57 | fprintf(stderr, "Illegal request for CUDA libraries for non-GPU processor"); 58 | LEGATE_ABORT("Illegal request for CUDA libraries for non-GPU processor"); 59 | } 60 | 61 | static CUDALibraries cuda_libraries[LEGION_MAX_NUM_PROCS]; 62 | const auto proc_id = proc.id & (LEGION_MAX_NUM_PROCS - 1); 63 | return cuda_libraries[proc_id]; 64 | } 65 | 66 | legate::cuda::StreamView get_cached_stream() 67 | { 68 | return legate::cuda::StreamPool::get_stream_pool().get_stream(); 69 | } 70 | 71 | cusparseHandle_t get_cusparse() 72 | { 73 | const auto proc = legate::Processor::get_executing_processor(); 74 | auto& lib = get_cuda_libraries(proc); 75 | return lib.get_cusparse(); 76 | } 77 | 78 | class LoadCUDALibsTask : public SparseTask { 79 | public: 80 | static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_LOAD_CUDALIBS}; 81 | 82 | public: 83 | static void gpu_variant(legate::TaskContext context) 84 | { 85 | const auto proc = legate::Processor::get_executing_processor(); 86 | auto& lib = get_cuda_libraries(proc); 87 | lib.get_cusparse(); 88 | } 89 | }; 90 | 91 | class UnloadCUDALibsTask : public SparseTask { 92 | public: 93 | static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_UNLOAD_CUDALIBS}; 94 | 95 | public: 96 | static void gpu_variant(legate::TaskContext context) 97 | { 98 | const auto proc = legate::Processor::get_executing_processor(); 99 | auto& lib = get_cuda_libraries(proc); 100 | lib.finalize(); 101 | } 102 | }; 103 | 104 | static void __attribute__((constructor)) register_tasks(void) 105 | { 106 | LoadCUDALibsTask::register_variants(); 107 | UnloadCUDALibsTask::register_variants(); 108 | } 109 | 110 | } // namespace sparse 111 | -------------------------------------------------------------------------------- /src/legate_sparse/cudalibs.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | #include "legate_sparse/util/cuda_help.h" 20 | 21 | namespace sparse { 22 | 23 | struct CUDALibraries { 24 | public: 25 | CUDALibraries(); 26 | ~CUDALibraries(); 27 | 28 | private: 29 | // Prevent copying and overwriting. 30 | CUDALibraries(const CUDALibraries& rhs) = delete; 31 | CUDALibraries& operator=(const CUDALibraries& rhs) = delete; 32 | 33 | public: 34 | void finalize(); 35 | cusparseHandle_t get_cusparse(); 36 | 37 | private: 38 | void finalize_cusparse(); 39 | 40 | private: 41 | bool finalized_; 42 | cusparseHandle_t cusparse_; 43 | }; 44 | 45 | } // namespace sparse 46 | -------------------------------------------------------------------------------- /src/legate_sparse/io/mtx_to_coo.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | #include "legate_sparse/sparse.h" 20 | #include "legate_sparse/sparse_c.h" 21 | #include "legate.h" 22 | 23 | namespace sparse { 24 | 25 | class ReadMTXToCOO : public SparseTask { 26 | public: 27 | static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_READ_MTX_TO_COO}; 28 | 29 | static constexpr legate::VariantOptions CPU_VARIANT_OPTIONS = 30 | legate::VariantOptions{}.with_has_allocations(true); 31 | 32 | public: 33 | static void cpu_variant(legate::TaskContext ctx); 34 | }; 35 | 36 | } // namespace sparse 37 | -------------------------------------------------------------------------------- /src/legate_sparse/linalg/axpby.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/linalg/axpby.h" 18 | #include "legate_sparse/linalg/axpby_template.inl" 19 | 20 | namespace sparse { 21 | 22 | using namespace legate; 23 | 24 | template 25 | struct AXPBYImplBody { 26 | using VAL_TY = type_of; 27 | 28 | void operator()(const AccessorRW& y, 29 | const AccessorRO& x, 30 | const AccessorRO& a, 31 | const AccessorRO& b, 32 | const Rect<1>& rect) 33 | { 34 | auto val = a[0] / b[0]; 35 | if (NEGATE) { 36 | val = static_cast(-1) * val; 37 | } 38 | for (coord_t i = rect.lo[0]; i < rect.hi[0] + 1; i++) { 39 | if (IS_ALPHA) { 40 | y[i] = val * x[i] + y[i]; 41 | } else { 42 | y[i] = x[i] + val * y[i]; 43 | } 44 | } 45 | } 46 | }; 47 | 48 | /*static*/ void AXPBY::cpu_variant(TaskContext context) 49 | { 50 | axpby_template(context); 51 | } 52 | 53 | namespace // unnamed 54 | { 55 | static void __attribute__((constructor)) register_tasks(void) { AXPBY::register_variants(); } 56 | } // namespace 57 | 58 | } // namespace sparse 59 | -------------------------------------------------------------------------------- /src/legate_sparse/linalg/axpby.cu: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/linalg/axpby.h" 18 | #include "legate_sparse/linalg/axpby_template.inl" 19 | #include "legate_sparse/util/cuda_help.h" 20 | 21 | namespace sparse { 22 | 23 | using namespace legate; 24 | 25 | template 26 | __global__ void axpby_kernel(size_t elems, 27 | coord_t offset, 28 | AccessorRW y, 29 | AccessorRO x, 30 | AccessorRO a, 31 | AccessorRO b) 32 | { 33 | const auto idx = global_tid_1d(); 34 | if (idx >= elems) { 35 | return; 36 | } 37 | auto i = idx + offset; 38 | auto val = a[0] / b[0]; 39 | if (NEGATE) { 40 | val = static_cast(-1) * val; 41 | } 42 | if (IS_ALPHA) { 43 | y[i] = val * x[i] + y[i]; 44 | } else { 45 | y[i] = x[i] + val * y[i]; 46 | } 47 | } 48 | 49 | template 50 | struct AXPBYImplBody { 51 | using VAL_TY = type_of; 52 | 53 | void operator()(const AccessorRW& y, 54 | const AccessorRO& x, 55 | const AccessorRO& a, 56 | const AccessorRO& b, 57 | const Rect<1>& rect) 58 | { 59 | auto elems = rect.volume(); 60 | auto blocks = get_num_blocks_1d(elems); 61 | auto stream = get_cached_stream(); 62 | axpby_kernel 63 | <<>>(elems, rect.lo[0], y, x, a, b); 64 | LEGATE_SPARSE_CHECK_CUDA_STREAM(stream); 65 | } 66 | }; 67 | 68 | /*static*/ void AXPBY::gpu_variant(TaskContext context) 69 | { 70 | axpby_template(context); 71 | } 72 | 73 | } // namespace sparse 74 | -------------------------------------------------------------------------------- /src/legate_sparse/linalg/axpby.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | #include "legate_sparse/sparse.h" 20 | #include "legate_sparse/sparse_c.h" 21 | #include "legate.h" 22 | 23 | namespace sparse { 24 | 25 | struct AXPBYArgs { 26 | const legate::PhysicalStore& y; 27 | const legate::PhysicalStore& x; 28 | const legate::PhysicalStore& a; 29 | const legate::PhysicalStore& b; 30 | const bool isalpha; 31 | const bool negate; 32 | }; 33 | 34 | class AXPBY : public SparseTask { 35 | public: 36 | static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_AXPBY}; 37 | static void cpu_variant(legate::TaskContext ctx); 38 | #ifdef LEGATE_USE_OPENMP 39 | static void omp_variant(legate::TaskContext ctx); 40 | #endif 41 | #ifdef LEGATE_USE_CUDA 42 | static void gpu_variant(legate::TaskContext ctx); 43 | #endif 44 | }; 45 | 46 | } // namespace sparse 47 | -------------------------------------------------------------------------------- /src/legate_sparse/linalg/axpby_omp.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/linalg/axpby.h" 18 | #include "legate_sparse/linalg/axpby_template.inl" 19 | 20 | namespace sparse { 21 | 22 | using namespace legate; 23 | 24 | template 25 | struct AXPBYImplBody { 26 | using VAL_TY = type_of; 27 | 28 | void operator()(const AccessorRW& y, 29 | const AccessorRO& x, 30 | const AccessorRO& a, 31 | const AccessorRO& b, 32 | const Rect<1>& rect) 33 | { 34 | auto val = a[0] / b[0]; 35 | if (NEGATE) { 36 | val = static_cast(-1) * val; 37 | } 38 | #pragma omp parallel for schedule(static) 39 | for (coord_t i = rect.lo[0]; i < rect.hi[0] + 1; i++) { 40 | if (IS_ALPHA) { 41 | y[i] = val * x[i] + y[i]; 42 | } else { 43 | y[i] = x[i] + val * y[i]; 44 | } 45 | } 46 | } 47 | }; 48 | 49 | /*static*/ void AXPBY::omp_variant(TaskContext context) 50 | { 51 | axpby_template(context); 52 | } 53 | 54 | } // namespace sparse 55 | -------------------------------------------------------------------------------- /src/legate_sparse/linalg/axpby_template.inl: -------------------------------------------------------------------------------- 1 | /* Copyright 2021-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | // Useful for IDEs. 20 | #include "legate_sparse/linalg/axpby.h" 21 | #include "legate_sparse/util/dispatch.h" 22 | 23 | namespace sparse { 24 | 25 | using namespace legate; 26 | 27 | template 28 | struct AXPBYImplBody; 29 | 30 | template 31 | struct AXPBYImpl { 32 | template 33 | void operator()(AXPBYArgs& args) const 34 | { 35 | using VAL_TY = type_of; 36 | auto y = args.y.read_write_accessor(); 37 | auto x = args.x.read_accessor(); 38 | auto a = args.a.read_accessor(); 39 | auto b = args.b.read_accessor(); 40 | if (args.y.domain().empty()) { 41 | return; 42 | } 43 | if (args.isalpha) { 44 | if (args.negate) { 45 | AXPBYImplBody()(y, x, a, b, args.y.shape<1>()); 46 | } else { 47 | AXPBYImplBody()(y, x, a, b, args.y.shape<1>()); 48 | } 49 | } else { 50 | if (args.negate) { 51 | AXPBYImplBody()(y, x, a, b, args.y.shape<1>()); 52 | } else { 53 | AXPBYImplBody()(y, x, a, b, args.y.shape<1>()); 54 | } 55 | } 56 | } 57 | }; 58 | 59 | template 60 | static void axpby_template(TaskContext context) 61 | { 62 | AXPBYArgs args{ 63 | context.outputs()[0], 64 | context.inputs()[0], 65 | context.inputs()[1], 66 | context.inputs()[2], 67 | context.scalars()[0].value(), 68 | context.scalars()[1].value(), 69 | }; 70 | value_type_dispatch(args.y.code(), AXPBYImpl{}, args); 71 | } 72 | 73 | } // namespace sparse 74 | -------------------------------------------------------------------------------- /src/legate_sparse/mapper/mapper.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | #include "legate/mapping/mapping.h" 20 | 21 | namespace sparse { 22 | 23 | class LegateSparseMapper : public legate::mapping::Mapper { 24 | public: 25 | // Virtual mapping functions of LegateMapper that need to be overridden. 26 | virtual std::vector store_mappings( 27 | const legate::mapping::Task& task, 28 | const std::vector& options) override; 29 | 30 | auto allocation_pool_size(const legate::mapping::Task& task, 31 | legate::mapping::StoreTarget memory_kind) 32 | -> std::optional override; 33 | 34 | virtual legate::Scalar tunable_value(legate::TunableID tunable_id) override; 35 | }; 36 | 37 | } // namespace sparse 38 | -------------------------------------------------------------------------------- /src/legate_sparse/partition/fast_image_partition.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/partition/fast_image_partition.h" 18 | #include "legate_sparse/partition/fast_image_partition_template.inl" 19 | 20 | namespace sparse { 21 | 22 | using namespace legate; 23 | 24 | namespace // unnamed 25 | { 26 | static void __attribute__((constructor)) register_tasks(void) 27 | { 28 | FastImageRange::register_variants(); 29 | } 30 | } // namespace 31 | 32 | } // namespace sparse -------------------------------------------------------------------------------- /src/legate_sparse/partition/fast_image_partition.cu: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/partition/fast_image_partition.h" 18 | #include "legate_sparse/partition/fast_image_partition_template.inl" 19 | #include "legate_sparse/util/cuda_help.h" 20 | #include "legate_sparse/util/thrust_allocator.h" 21 | 22 | #include 23 | #include 24 | 25 | namespace sparse { 26 | 27 | using namespace legate; 28 | 29 | template 30 | struct FastImageRangeImplBody { 31 | using INDEX_TY = type_of; 32 | 33 | void operator()(const AccessorWO, 1>& out_pos, 34 | const AccessorRO, 1>& in_pos, 35 | const AccessorRO& in_crd, 36 | const Rect<1>& rowbounds, 37 | const Rect<1>& bounds) 38 | { 39 | ThrustAllocator alloc(Memory::GPU_FB_MEM); 40 | auto stream = get_cached_stream(); 41 | auto thrust_exec_policy = thrust::cuda::par(alloc).on(stream); 42 | 43 | thrust::pair result = thrust::minmax_element( 44 | thrust_exec_policy, in_crd.ptr(bounds.lo[0]), in_crd.ptr(bounds.hi[0]) + 1); 45 | 46 | // out[idx] = {lo[idx], hi[idx] - 1}; 47 | INDEX_TY lo_idx, hi_idx; 48 | cudaMemcpyAsync(&lo_idx, result.first, sizeof(INDEX_TY), cudaMemcpyDefault, stream); 49 | cudaMemcpyAsync(&hi_idx, result.second, sizeof(INDEX_TY), cudaMemcpyDefault, stream); 50 | thrust::fill(thrust_exec_policy, 51 | out_pos.ptr(rowbounds.lo[0]), 52 | out_pos.ptr(rowbounds.hi[0]) + 1, 53 | Rect<1>({lo_idx, hi_idx})); 54 | 55 | LEGATE_SPARSE_CHECK_CUDA_STREAM(stream); 56 | } 57 | }; 58 | 59 | /*static*/ void FastImageRange::gpu_variant(TaskContext context) 60 | { 61 | fast_image_range_template(context); 62 | } 63 | 64 | } // namespace sparse 65 | -------------------------------------------------------------------------------- /src/legate_sparse/partition/fast_image_partition.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | #include "legate_sparse/sparse.h" 20 | #include "legate_sparse/sparse_c.h" 21 | #include "legate.h" 22 | 23 | namespace sparse { 24 | 25 | struct FastImageRangeArgs { 26 | const legate::PhysicalStore output_pos; 27 | const legate::PhysicalStore input_pos; 28 | const legate::PhysicalStore input_crd; 29 | }; 30 | 31 | // only for CSR SpGEMM on GPU right now 32 | class FastImageRange : public SparseTask { 33 | public: 34 | static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_FAST_IMAGE_RANGE}; 35 | 36 | static constexpr legate::VariantOptions GPU_VARIANT_OPTIONS = 37 | legate::VariantOptions{}.with_has_allocations(true); 38 | 39 | public: 40 | #ifdef LEGATE_USE_CUDA 41 | static void gpu_variant(legate::TaskContext context); 42 | #endif 43 | }; 44 | 45 | } // namespace sparse 46 | -------------------------------------------------------------------------------- /src/legate_sparse/partition/fast_image_partition_template.inl: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | // Useful for IDEs. 20 | #include "legate_sparse/util/dispatch.h" 21 | #include "legate_sparse/util/typedefs.h" 22 | #include "legate_sparse/partition/fast_image_partition.h" 23 | 24 | namespace sparse { 25 | 26 | using namespace legate; 27 | 28 | template 29 | struct FastImageRangeImplBody; 30 | 31 | template 32 | struct FastImageRangeImpl { 33 | template 34 | void operator()(FastImageRangeArgs& args) const 35 | { 36 | using INDEX_TY = type_of; 37 | 38 | auto output_pos = args.output_pos.write_accessor, 1>(); 39 | auto input_pos = args.input_pos.read_accessor, 1>(); 40 | auto input_crd = args.input_crd.read_accessor(); 41 | assert(args.input_pos.domain().dense()); 42 | assert(args.input_crd.domain().dense()); 43 | if (args.input_crd.domain().empty()) { 44 | return; 45 | } 46 | FastImageRangeImplBody()( 47 | output_pos, input_pos, input_crd, args.input_pos.shape<1>(), args.input_crd.shape<1>()); 48 | } 49 | }; 50 | 51 | template 52 | static void fast_image_range_template(TaskContext context) 53 | { 54 | FastImageRangeArgs args{context.output(0), context.input(0), context.input(1)}; 55 | index_type_dispatch(args.input_crd.code(), FastImageRangeImpl{}, args); 56 | } 57 | 58 | } // namespace sparse 59 | -------------------------------------------------------------------------------- /src/legate_sparse/sparse.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2021-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/sparse.h" 18 | 19 | #include "legate_sparse/mapper/mapper.h" 20 | 21 | #include "legate.h" 22 | 23 | using namespace legate; 24 | 25 | namespace sparse { 26 | 27 | static const char* const library_name = "legate.sparse"; 28 | 29 | TaskRegistrar& Sparse::get_registrar() 30 | { 31 | static TaskRegistrar registrar; 32 | return registrar; 33 | } 34 | 35 | void registration_callback() 36 | { 37 | ResourceConfig config; 38 | // TODO (rohany): I want to use the enums here, but I'm not sure the best way 39 | // to keep this in line with the Python import since there seems to be a 40 | // cyclic dependency. 41 | // config.max_tasks = LEGATE_SPARSE_LAST_TASK; 42 | config.max_tasks = 100; 43 | // SJ; Sat 11 Jan 2025 01:16:31 PM PST 44 | // Do we need to set max_projections if we don't use any projection functors? 45 | // TODO (rohany): We're dynamically generating projections... How does cupynumeric handle this? 46 | // config.max_projections = 1000; 47 | 48 | auto options = VariantOptions{}.with_has_allocations(false); 49 | auto ctx = Runtime::get_runtime()->create_library( 50 | library_name, 51 | config, 52 | std::make_unique(), 53 | {{VariantCode::CPU, options}, {VariantCode::GPU, options}, {VariantCode::OMP, options}}); 54 | 55 | Sparse::get_registrar().register_all_tasks(ctx); 56 | } 57 | 58 | } // namespace sparse 59 | 60 | extern "C" { 61 | 62 | void legate_sparse_perform_registration(void) { sparse::registration_callback(); } 63 | } 64 | -------------------------------------------------------------------------------- /src/legate_sparse/sparse.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | #include "legate.h" 20 | 21 | namespace sparse { 22 | 23 | enum class VariantKind : int { 24 | CPU = 0, 25 | OMP = 1, 26 | GPU = 2, 27 | }; 28 | 29 | struct Sparse { 30 | static legate::TaskRegistrar& get_registrar(); 31 | }; 32 | 33 | template 34 | struct SparseTask : public legate::LegateTask { 35 | using Registrar = Sparse; 36 | }; 37 | 38 | } // namespace sparse 39 | -------------------------------------------------------------------------------- /src/legate_sparse/sparse_c.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #ifndef __SPARSE_C_H 18 | #define __SPARSE_C_H 19 | 20 | #include "cffi.h" 21 | 22 | #ifdef __cplusplus 23 | extern "C" { 24 | #endif 25 | 26 | void legate_sparse_perform_registration(); 27 | 28 | #ifdef __cplusplus 29 | } 30 | #endif 31 | 32 | #endif // __SPARSE_C_H 33 | -------------------------------------------------------------------------------- /src/legate_sparse/util/legate_utils.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | #include "legate/utilities/typedefs.h" 20 | #include "realm/logging.h" 21 | #include 22 | 23 | namespace sparse { 24 | 25 | // Create 1D extents from lower and upper bounds 26 | template 27 | legate::Rect<1> create_1d_extents(const T& lo, const Q& hi) 28 | { 29 | return legate::Rect<1>{legate::Point<1>{lo}, legate::Point<1>{hi}}; 30 | } 31 | 32 | inline Realm::Logger& get_logger() 33 | { 34 | static Realm::Logger logger("legate-sparse"); 35 | return logger; 36 | } 37 | 38 | // Remove the path and use only the filename 39 | #define __FILENAME__ (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__) 40 | 41 | // Macros for buffer allocation logging 42 | #if ENABLE_BUFFER_LOGGING 43 | #define CREATE_BUFFER(T, size, mem, desc) \ 44 | [&]() { \ 45 | auto buf = legate::create_buffer(size, mem); \ 46 | get_logger().print() << "Buffer allocation at " << __FILENAME__ << ":" << __LINE__ \ 47 | << " - Size: " << size << " Type: " << #T << " Description: " << desc; \ 48 | return buf; \ 49 | }() 50 | #else 51 | #define CREATE_BUFFER(T, size, mem, desc) legate::create_buffer(size, mem) 52 | #endif 53 | 54 | #if ENABLE_BUFFER_LOGGING 55 | #define LOG_BUFFER(T, size, desc) \ 56 | get_logger().print() << "Buffer allocation at " << __FILENAME__ << ":" << __LINE__ \ 57 | << " - Size: " << size << " Type: " << #T << " Description: " << desc 58 | #else 59 | #define LOG_BUFFER(T, size, desc) 60 | #endif 61 | 62 | } // namespace sparse 63 | -------------------------------------------------------------------------------- /src/legate_sparse/util/logger.h: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/legate_sparse/util/omp_help.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | namespace sparse { 24 | 25 | // Simple STL vector-based thread local storage for OpenMP threads to avoid false sharing 26 | template 27 | struct ThreadLocalStorage { 28 | private: 29 | static constexpr size_t CACHE_LINE_SIZE = 64; 30 | 31 | public: 32 | ThreadLocalStorage(size_t num_threads) 33 | : storage_(CACHE_LINE_SIZE * num_threads), num_threads_(num_threads) 34 | { 35 | } 36 | ~ThreadLocalStorage() {} 37 | 38 | public: 39 | VAL& operator[](size_t idx) 40 | { 41 | return *reinterpret_cast(storage_.data() + CACHE_LINE_SIZE * idx); 42 | } 43 | 44 | private: 45 | std::vector storage_; 46 | size_t num_threads_; 47 | }; 48 | 49 | } // namespace sparse 50 | -------------------------------------------------------------------------------- /src/legate_sparse/util/thrust_allocator.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | #include "legate.h" 20 | #include "legate_sparse/util/legate_utils.h" 21 | 22 | namespace sparse { 23 | 24 | using namespace legate; 25 | 26 | class ThrustAllocator : public legate::ScopedAllocator { 27 | public: 28 | using value_type = char; 29 | 30 | ThrustAllocator(Memory::Kind kind) : legate::ScopedAllocator(kind) {} 31 | 32 | char* allocate(size_t num_bytes) 33 | { 34 | LOG_BUFFER(char, num_bytes, "ThrustAllocator::allocate"); 35 | return static_cast(ScopedAllocator::allocate(num_bytes)); 36 | } 37 | 38 | void deallocate(char* ptr, size_t n) { ScopedAllocator::deallocate(ptr); } 39 | }; 40 | 41 | } // namespace sparse 42 | -------------------------------------------------------------------------------- /src/legate_sparse/util/typedefs.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | namespace sparse { 18 | 19 | // We use uint64_t's to accumulate the non-zeros per row in 20 | // different sparse matrix computations. 21 | using nnz_ty = uint64_t; 22 | 23 | } // namespace sparse -------------------------------------------------------------------------------- /src/legate_sparse/util/upcast_future.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #include "legate_sparse/util/upcast_future.h" 18 | 19 | namespace sparse { 20 | 21 | using namespace legate; 22 | 23 | template 24 | void upcast_impl(legate::TaskContext ctx) 25 | { 26 | auto& in_fut = ctx.inputs()[0]; 27 | const T* src; 28 | T* dst; 29 | switch (in_fut.dim()) { 30 | case 0: { 31 | // Futures can be 0-dimensional. legate doesn't appear to complain 32 | // if we make a 1-D accessor of a 0-D "store". 33 | dst = ctx.outputs()[0].write_accessor().ptr(0); 34 | src = ctx.inputs()[0].read_accessor().ptr(0); 35 | break; 36 | } 37 | case 1: { 38 | dst = ctx.outputs()[0].write_accessor().ptr(0); 39 | src = ctx.inputs()[0].read_accessor().ptr(0); 40 | break; 41 | } 42 | case 2: { 43 | dst = ctx.outputs()[0].write_accessor().ptr({0, 0}); 44 | src = ctx.inputs()[0].read_accessor().ptr({0, 0}); 45 | break; 46 | } 47 | case 3: { 48 | dst = ctx.outputs()[0].write_accessor().ptr({0, 0, 0}); 49 | src = ctx.inputs()[0].read_accessor().ptr({0, 0, 0}); 50 | break; 51 | } 52 | } 53 | memcpy(dst, src, sizeof(T)); 54 | } 55 | 56 | /*static*/ void UpcastFutureToRegion::cpu_variant(TaskContext ctx) 57 | { 58 | assert(ctx.is_single_task()); 59 | auto future_size = ctx.scalars()[0].value(); 60 | switch (future_size) { 61 | case 1: { 62 | upcast_impl(ctx); 63 | break; 64 | } 65 | case 2: { 66 | upcast_impl(ctx); 67 | break; 68 | } 69 | case 4: { 70 | upcast_impl(ctx); 71 | break; 72 | } 73 | case 8: { 74 | upcast_impl(ctx); 75 | break; 76 | } 77 | default: { 78 | assert(false); 79 | } 80 | } 81 | } 82 | 83 | namespace // unnamed 84 | { 85 | static void __attribute__((constructor)) register_tasks(void) 86 | { 87 | UpcastFutureToRegion::register_variants(); 88 | } 89 | } // namespace 90 | 91 | } // namespace sparse 92 | -------------------------------------------------------------------------------- /src/legate_sparse/util/upcast_future.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2024 NVIDIA Corporation 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | * 15 | */ 16 | 17 | #pragma once 18 | 19 | #include "legate_sparse/sparse.h" 20 | #include "legate_sparse/sparse_c.h" 21 | #include "legate.h" 22 | 23 | namespace sparse { 24 | 25 | class UpcastFutureToRegion : public SparseTask { 26 | public: 27 | static constexpr auto TASK_ID = legate::LocalTaskID{LEGATE_SPARSE_UPCAST_FUTURE_TO_REGION}; 28 | static void cpu_variant(legate::TaskContext ctx); 29 | 30 | private: 31 | template 32 | static void cpu_variant_impl(legate::TaskContext ctx); 33 | }; 34 | 35 | } // namespace sparse 36 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2021-2024 NVIDIA Corporation 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | from __future__ import annotations 18 | 19 | import sys 20 | 21 | from legate.tester import CustomTest, FeatureType 22 | from legate.tester.config import Config 23 | from legate.tester.project import Project 24 | from legate.tester.test_plan import TestPlan 25 | from legate.tester.test_system import TestSystem 26 | from legate.util.types import EnvDict 27 | 28 | 29 | class ProjectLegateSparse(Project): 30 | def skipped_examples(self) -> set[str]: 31 | return [] 32 | 33 | def custom_files(self) -> list[CustomTest]: 34 | return [] 35 | 36 | def stage_env(self, feature: FeatureType) -> EnvDict: 37 | return {} 38 | 39 | 40 | if __name__ == "__main__": 41 | config = Config(sys.argv, project=ProjectLegateSparse()) 42 | 43 | system = TestSystem(dry_run=config.dry_run) 44 | 45 | plan = TestPlan(config, system) 46 | 47 | plan.execute() 48 | sys.exit(0) 49 | -------------------------------------------------------------------------------- /tests/integration/conftest.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import pytest 3 | from scipy import sparse as scipy_sparse 4 | from utils.sample import simple_system_gen 5 | 6 | import legate_sparse as sparse 7 | 8 | 9 | @pytest.fixture 10 | def create_mask(): 11 | """ 12 | Create a boolean mask matrix with a random sparsity pattern 13 | """ 14 | 15 | def _create_mask(rows, density=0.3): 16 | cols = rows 17 | nnz = int(rows * cols * density) 18 | 19 | # SciPy 20 | row_idx = numpy.random.randint(0, rows, size=nnz) 21 | col_idx = numpy.random.randint(0, cols, size=nnz) 22 | data = numpy.ones(nnz, dtype=bool) 23 | A_scipy = scipy_sparse.csr_array((data, (row_idx, col_idx)), shape=(rows, cols)) 24 | 25 | # Sparse 26 | A_sparse = sparse.csr_array(A_scipy.todense()) 27 | 28 | # Verify matrices are equivalent 29 | A_scipy_dense = numpy.asarray(A_scipy.todense()) 30 | A_sparse_dense = numpy.asarray(A_sparse.todense()) 31 | assert numpy.all( 32 | numpy.allclose(A_scipy_dense, A_sparse_dense, rtol=1e-5, atol=1e-6) 33 | ) 34 | 35 | return A_scipy, A_sparse 36 | 37 | return _create_mask 38 | 39 | 40 | @pytest.fixture 41 | def create_matrix(): 42 | """ 43 | Create matrices in SciPy and Legate Sparse that are equivalent 44 | """ 45 | 46 | def _create_matrix(N, tol=0.5): 47 | _, A_scipy, _ = simple_system_gen(N, N, scipy_sparse.csr_array, tol=tol) 48 | A_sparse = sparse.csr_array(A_scipy) 49 | 50 | # Verify matrices are equivalent 51 | A_scipy_dense = numpy.asarray(A_scipy.todense()) 52 | A_sparse_dense = numpy.asarray(A_sparse.todense()) 53 | assert numpy.all( 54 | numpy.allclose(A_scipy_dense, A_sparse_dense, rtol=1e-5, atol=1e-6) 55 | ) 56 | 57 | return A_scipy, A_sparse 58 | 59 | return _create_matrix 60 | -------------------------------------------------------------------------------- /tests/integration/test_cg_axpby.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2024 NVIDIA Corporation 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import cupynumeric as np 16 | import pytest 17 | 18 | import legate_sparse.linalg as sparse_linalg 19 | 20 | 21 | @pytest.mark.parametrize("y", [[2.0, 3.0]]) 22 | @pytest.mark.parametrize("x", [[0.0, 1.0]]) 23 | @pytest.mark.parametrize("a", [[2.0]]) 24 | @pytest.mark.parametrize("b", [[3.0]]) 25 | @pytest.mark.parametrize("isalpha", [True, False]) 26 | @pytest.mark.parametrize("negate", [True, False]) 27 | def test_cg_linalg(y, x, a, b, isalpha, negate): 28 | scalar = a[0] / b[0] 29 | if negate: 30 | scalar = -scalar 31 | alpha = scalar if isalpha else 1.0 32 | beta = 1.0 if isalpha else scalar 33 | expected_y = alpha * np.asarray(x) + beta * np.asarray(y) 34 | 35 | y = np.array(y) 36 | x = np.array(x) 37 | a = np.array(a) 38 | b = np.array(b) 39 | 40 | sparse_linalg.cg_axpby(y, x, a, b, isalpha=isalpha, negate=negate) 41 | 42 | assert np.allclose(expected_y, y) 43 | 44 | 45 | if __name__ == "__main__": 46 | import sys 47 | 48 | sys.exit(pytest.main(sys.argv)) 49 | -------------------------------------------------------------------------------- /tests/integration/test_cg_solve.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023-2024 NVIDIA Corporation 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import cupynumeric as np 16 | import pytest 17 | from utils.sample import sample_dense, sample_dense_vector 18 | 19 | import legate_sparse.linalg as linalg 20 | from legate_sparse import csr_array 21 | 22 | 23 | def test_cg_solve(): 24 | N, D = 1000, 1000 25 | seed = 471014 26 | A = sample_dense(N, D, 0.1, seed) 27 | A = 0.5 * (A + A.T) 28 | A = A + N * np.eye(N) 29 | # Assert that A is indeed positive semi-definite. 30 | assert np.all(np.linalg.eigvals(A) > 0) 31 | A = csr_array(A) 32 | x = sample_dense_vector(D, 0.1, seed) 33 | y = A @ x 34 | x_pred, iters = linalg.cg(A, y, tol=1e-8) 35 | assert np.allclose((A @ x_pred), y, rtol=1e-8, atol=0.0) 36 | 37 | 38 | def test_cg_solve_with_callback(): 39 | N, D = 1000, 1000 40 | seed = 471014 41 | A = sample_dense(N, D, 0.1, seed) 42 | A = 0.5 * (A + A.T) 43 | A = A + N * np.eye(N) 44 | # Assert that A is indeed positive semi-definite. 45 | assert np.all(np.linalg.eigvals(A) > 0) 46 | A = csr_array(A) 47 | x = sample_dense_vector(D, 0.1, seed) 48 | y = A @ x 49 | residuals = [] 50 | 51 | def callback(x): 52 | # Test that nothing goes wrong if we do some arbitrary computation in 53 | # the callback on x. 54 | residuals.append(y - A @ x) 55 | 56 | x_pred, iters = linalg.cg(A, y, tol=1e-8, callback=callback) 57 | assert np.allclose((A @ x_pred), y, rtol=1e-8, atol=0.0) 58 | assert len(residuals) > 0 59 | 60 | 61 | # def test_cg_solve_with_identity_preconditioner(): 62 | # N, D = 1000, 1000 63 | # seed = 471014 64 | # A = sample_dense(N, D, 0.1, seed) 65 | # A = 0.5 * (A + A.T) 66 | # A = A + N * np.eye(N) 67 | # # Assert that A is indeed positive semi-definite. 68 | # assert np.all(np.linalg.eigvals(A) > 0) 69 | # A = csr_array(A) 70 | # x = sample_dense_vector(D, 0.1, seed) 71 | # y = A @ x 72 | # assert np.allclose((A @ x), y) 73 | # x_pred, iters = linalg.cg(A, y, M=eye(A.shape[0]), tol=1e-8) 74 | # assert np.allclose((A @ x_pred), y) 75 | 76 | 77 | def test_cg_solve_with_linear_operator(): 78 | N, D = 1000, 1000 79 | seed = 471014 80 | A = sample_dense(N, D, 0.1, seed) 81 | A = 0.5 * (A + A.T) 82 | A = A + N * np.eye(N) 83 | # Assert that A is indeed positive semi-definite. 84 | assert np.all(np.linalg.eigvals(A) > 0) 85 | A = csr_array(A) 86 | x = sample_dense_vector(D, 0.1, seed) 87 | y = A @ x 88 | 89 | def matvec(x): 90 | return A @ x 91 | 92 | x_pred, iters = linalg.cg( 93 | linalg.LinearOperator(A.shape, matvec=matvec), y, tol=1e-8 94 | ) 95 | assert np.allclose((A @ x_pred), y, rtol=1e-8, atol=0.0) 96 | 97 | def matvec(x, out=None): 98 | return A.dot(x, out=out) 99 | 100 | x_pred, iters = linalg.cg( 101 | linalg.LinearOperator(A.shape, matvec=matvec), y, tol=1e-8 102 | ) 103 | assert np.allclose((A @ x_pred), y, rtol=1e-8, atol=0.0) 104 | 105 | 106 | if __name__ == "__main__": 107 | import sys 108 | 109 | pytest.main(sys.argv) 110 | sys.exit(0) 111 | -------------------------------------------------------------------------------- /tests/integration/test_comparison.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2024 NVIDIA Corporation 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import cupynumeric as np 16 | import pytest 17 | from utils.sample import simple_system_gen 18 | 19 | import legate_sparse as sparse 20 | 21 | # TODO: Enable "eq" after #209 is fixed 22 | COMPARISON_OPS = [ 23 | ("gt", lambda x, y: x > y), 24 | ("lt", lambda x, y: x < y), 25 | ("ge", lambda x, y: x >= y), 26 | ("le", lambda x, y: x <= y), 27 | # ("eq", lambda x, y: x == y), 28 | ("ne", lambda x, y: x != y), 29 | ] 30 | 31 | 32 | @pytest.mark.parametrize("N", [8, 13]) 33 | @pytest.mark.parametrize("threshold", [0.3, 0.5]) 34 | @pytest.mark.parametrize("op_name, op_func", COMPARISON_OPS) 35 | def test_comparison_operation(N, threshold, op_name, op_func): 36 | """Test element-wise comparison operations on non-zero entries of the matrix 37 | 38 | Parameters 39 | ---------- 40 | N : int 41 | Size of the test matrix 42 | threshold : float 43 | Value to compare against 44 | op_name : str 45 | Name of the comparison operation 46 | op_func : callable 47 | The comparison function to test 48 | """ 49 | A_dense, A_sparse, _ = simple_system_gen(N, N, sparse.csr_array, tol=0.7) 50 | 51 | sparse_result = op_func(A_sparse, threshold) 52 | dense_result = op_func(A_dense[A_dense != 0], threshold) 53 | 54 | assert sparse_result.astype(int).sum() == dense_result.astype(int).sum() 55 | 56 | 57 | @pytest.mark.parametrize("op_name, op_func", COMPARISON_OPS) 58 | def test_comparison_error_cases(op_name, op_func): 59 | """Test error cases for comparison operations. 60 | 61 | Parameters 62 | ---------- 63 | op_name : str 64 | Name of the comparison operation 65 | op_func : callable 66 | The comparison function to test 67 | """ 68 | N = 8 69 | _, A_sparse, _ = simple_system_gen(N, N, sparse.csr_array, tol=0.7) 70 | 71 | # Test comparison with non-scalar values 72 | invalid_comparisons = [ 73 | np.array([1, 2, 3]), # 1D array 74 | np.array([[1, 2], [3, 4]]), # 2D array 75 | "string", # string 76 | [1, 2, 3], # list 77 | ] 78 | 79 | for invalid_value in invalid_comparisons: 80 | with pytest.raises(AssertionError): 81 | op_func(A_sparse, invalid_value) 82 | 83 | 84 | if __name__ == "__main__": 85 | import sys 86 | 87 | sys.exit(pytest.main(sys.argv)) 88 | -------------------------------------------------------------------------------- /tests/integration/test_csr_from_coo.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 NVIDIA Corporation 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import sys 16 | 17 | import cupynumeric as np 18 | import numpy 19 | import pytest 20 | from utils.sample import simple_system_gen 21 | 22 | import legate_sparse as sparse 23 | 24 | 25 | @pytest.mark.parametrize("N", [7, 13]) 26 | @pytest.mark.parametrize("M", [5, 29]) 27 | def test_csr_from_coo(N, M): 28 | shape = (N, M) 29 | 30 | np.random.seed(0) 31 | 32 | # This can generate duplicates nnz 33 | # nnz = N*M // 2 34 | # row_ind = np.random.random_integers(0, high=(N-1), size=nnz) 35 | # col_ind = np.random.random_integers(0, high=(M-1), size=nnz) 36 | # vals = np.random.rand(nnz) 37 | 38 | # so we just extract sparsity from dense matrix 39 | A_dense_orig, _, _ = simple_system_gen(N, M, sparse.csr_array) 40 | nnzs = np.argwhere(A_dense_orig > 0.0) 41 | vals = A_dense_orig.ravel() 42 | vals = vals[vals > 0.0] 43 | 44 | row_ind, col_ind = nnzs[:, 0], nnzs[:, 1] 45 | 46 | # we want test on unsorted inputs 47 | perm = np.array(numpy.random.permutation(numpy.arange(row_ind.shape[0]))) 48 | row_ind = row_ind[perm] 49 | col_ind = col_ind[perm] 50 | 51 | A = sparse.csr_array((vals, (row_ind, col_ind)), shape=shape) 52 | 53 | A_dense = np.zeros(shape=shape) 54 | for r, c, v in zip(row_ind, col_ind, vals): 55 | A_dense[r, c] = v 56 | 57 | assert np.all(np.isclose(A_dense, A.todense())) 58 | 59 | 60 | if __name__ == "__main__": 61 | sys.exit(pytest.main(sys.argv)) 62 | -------------------------------------------------------------------------------- /tests/integration/test_csr_from_csr.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 NVIDIA Corporation 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import sys 16 | 17 | import cupynumeric as np 18 | import pytest 19 | from legate.core import get_legate_runtime 20 | 21 | import legate_sparse as sparse 22 | 23 | 24 | def test_csr_from_csr_fixed(): 25 | """ 26 | 2 0 0 0 1 0 27 | 5 8 0 0 0 2 28 | 0 0 3 4 0 0 29 | 0 6 0 1 0 0 30 | 9 0 0 0 4 0 31 | 7 0 0 0 2 1 32 | """ 33 | row_offsets = np.array([0, 2, 5, 7, 9, 11, 14], dtype=np.int64) 34 | csr_vals = np.array([2, 1, 5, 8, 2, 3, 4, 6, 1, 9, 4, 7, 2, 1], dtype=np.float64) 35 | col_indices = np.array([0, 4, 0, 1, 5, 2, 3, 1, 3, 0, 4, 0, 4, 5], dtype=np.int64) 36 | matrix_shape = (6, 6) 37 | 38 | A = sparse.csr_array( # noqa: F841 39 | (csr_vals, col_indices, row_offsets), shape=matrix_shape 40 | ) 41 | 42 | get_legate_runtime().issue_execution_fence(block=True) 43 | 44 | 45 | @pytest.mark.parametrize("N", [7, 13]) 46 | @pytest.mark.parametrize("M", [5, 29]) 47 | def test_csr_from_csr_gen(N, M): 48 | nnz_per_row = np.random.randint(M, size=N) 49 | row_offsets = np.append([0], np.cumsum(nnz_per_row)) 50 | nnz = row_offsets[-1] 51 | col_indices = np.random.randint(M, size=nnz) 52 | csr_vals = np.random.rand(nnz) 53 | matrix_shape = (N, M) 54 | 55 | A = sparse.csr_array( # noqa: F841 56 | (csr_vals, col_indices, row_offsets), shape=matrix_shape 57 | ) 58 | 59 | 60 | @pytest.mark.parametrize("N", [7, 13]) 61 | @pytest.mark.parametrize("M", [5, 29]) 62 | def test_csr_from_empty(N, M): 63 | A = sparse.csr_array((N, M), dtype=np.float64) # noqa: F841 64 | 65 | 66 | if __name__ == "__main__": 67 | sys.exit(pytest.main(sys.argv)) 68 | -------------------------------------------------------------------------------- /tests/integration/test_csr_from_dense.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 NVIDIA Corporation 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import sys 16 | 17 | import cupynumeric as np 18 | import pytest 19 | from legate.core import get_legate_runtime 20 | from utils.sample import simple_system_gen 21 | 22 | import legate_sparse as sparse 23 | 24 | 25 | @pytest.mark.parametrize("N", [7, 13]) 26 | @pytest.mark.parametrize("M", [5, 29]) 27 | def test_csr_from_csr(N, M): 28 | np.random.seed(0) 29 | A_dense, A, _ = simple_system_gen(N, M, sparse.csr_array) 30 | 31 | get_legate_runtime().issue_execution_fence(block=True) 32 | 33 | 34 | if __name__ == "__main__": 35 | sys.exit(pytest.main(sys.argv)) 36 | -------------------------------------------------------------------------------- /tests/integration/test_csr_to_dense.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 NVIDIA Corporation 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import sys 16 | 17 | import cupynumeric as np 18 | import pytest 19 | 20 | import legate_sparse as sparse 21 | 22 | 23 | def test_csr_to_dense(): 24 | row_offsets = np.array([0, 2, 5, 7, 9, 11, 14], dtype=np.int64) 25 | csr_vals = np.array([2, 1, 5, 8, 2, 3, 4, 6, 1, 9, 4, 7, 2, 1], dtype=np.float64) 26 | col_indices = np.array([0, 4, 0, 1, 5, 2, 3, 1, 3, 0, 4, 0, 4, 5], dtype=np.int64) 27 | matrix_shape = (6, 6) 28 | 29 | A = sparse.csr_array((csr_vals, col_indices, row_offsets), shape=matrix_shape) 30 | 31 | B = A.todense() 32 | expected_B = np.array( 33 | [ 34 | [2, 0, 0, 0, 1, 0], 35 | [5, 8, 0, 0, 0, 2], 36 | [0, 0, 3, 4, 0, 0], 37 | [0, 6, 0, 1, 0, 0], 38 | [9, 0, 0, 0, 4, 0], 39 | [7, 0, 0, 0, 2, 1], 40 | ], 41 | dtype=np.float64, 42 | ) 43 | 44 | assert (B == expected_B).all() 45 | 46 | 47 | if __name__ == "__main__": 48 | sys.exit(pytest.main(sys.argv)) 49 | -------------------------------------------------------------------------------- /tests/integration/test_csr_transpose.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 NVIDIA Corporation 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import sys 16 | 17 | import cupynumeric as np 18 | import pytest 19 | from utils.sample import simple_system_gen 20 | 21 | import legate_sparse as sparse 22 | 23 | 24 | @pytest.mark.parametrize("N", [5, 29]) 25 | @pytest.mark.parametrize("M", [7, 13]) 26 | @pytest.mark.parametrize("iscopy", [True, False]) 27 | def test_csr_spgemm(N, M, iscopy): 28 | np.random.seed(0) 29 | A_dense, A, _ = simple_system_gen(N, M, sparse.csr_array) 30 | 31 | assert np.all(np.isclose(A_dense, A.T.transpose(copy=iscopy).todense())) 32 | 33 | 34 | if __name__ == "__main__": 35 | sys.exit(pytest.main(sys.argv)) 36 | -------------------------------------------------------------------------------- /tests/integration/test_diagonal.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 NVIDIA Corporation 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import sys 16 | 17 | import cupynumeric as np 18 | import pytest 19 | from utils.sample import simple_system_gen 20 | 21 | from legate_sparse import csr_array 22 | 23 | 24 | @pytest.mark.parametrize("N", [7, 13]) 25 | @pytest.mark.parametrize("with_zeros", [True, False]) 26 | def test_csr_diagonal(N, with_zeros): 27 | M = N 28 | np.random.seed(0) 29 | A_dense, _, _ = simple_system_gen(N, M, None, tol=0.2) 30 | 31 | if not with_zeros: 32 | A_dense += np.eye(N, M) 33 | 34 | A = csr_array(A_dense) 35 | dense_diag = np.diagonal(A_dense) 36 | csr_diag = A.diagonal() 37 | 38 | assert np.all(np.isclose(dense_diag, csr_diag)) 39 | 40 | 41 | if __name__ == "__main__": 42 | sys.exit(pytest.main(sys.argv)) 43 | -------------------------------------------------------------------------------- /tests/integration/test_diags.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 NVIDIA Corporation 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import sys 16 | 17 | import cupynumeric as np 18 | import pytest 19 | import scipy.sparse as sp 20 | 21 | import legate_sparse as sparse 22 | 23 | 24 | @pytest.mark.parametrize("N", [12, 34]) 25 | @pytest.mark.parametrize("diagonals", [3, 5]) 26 | @pytest.mark.parametrize("dtype", (np.float32, np.float64, np.complex64, np.complex128)) 27 | @pytest.mark.parametrize("fmt", ["csr", "dia"]) 28 | def test_diags(N, diagonals, dtype, fmt): 29 | A = sparse.diags( 30 | [1] * diagonals, 31 | [x - (diagonals // 2) for x in range(diagonals)], 32 | shape=(N, N), 33 | format=fmt, 34 | dtype=dtype, 35 | ) 36 | 37 | if fmt == "dia": 38 | A = A.tocsr() 39 | 40 | B = sp.diags( 41 | [1] * diagonals, 42 | [x - (diagonals // 2) for x in range(diagonals)], 43 | shape=(N, N), 44 | format=fmt, 45 | dtype=dtype, 46 | ) 47 | 48 | assert np.array_equal(A.todense(), B.todense()) 49 | 50 | 51 | if __name__ == "__main__": 52 | sys.exit(pytest.main(sys.argv)) 53 | -------------------------------------------------------------------------------- /tests/integration/test_gmres_solve.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2024 NVIDIA Corporation 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import cupynumeric as np 16 | import pytest 17 | from utils.sample import sample_dense, sample_dense_vector 18 | 19 | import legate_sparse.linalg as linalg 20 | from legate_sparse import csr_array 21 | 22 | 23 | def test_gmres_solve(): 24 | N, D = 1000, 1000 25 | seed = 471014 26 | A = sample_dense(N, D, 0.1, seed) 27 | A = 0.5 * (A + A.T) 28 | A = A + N * np.eye(N) 29 | A = csr_array(A) 30 | x = sample_dense_vector(D, 0.1, seed) 31 | 32 | y = A @ x 33 | assert np.allclose((A @ x), y) 34 | 35 | x_pred, iters = linalg.gmres(A, y, atol=1e-5, tol=1e-5, maxiter=300) 36 | assert np.allclose((A @ x_pred), y, atol=1e-8) 37 | 38 | 39 | if __name__ == "__main__": 40 | import sys 41 | 42 | sys.exit(pytest.main(sys.argv)) 43 | -------------------------------------------------------------------------------- /tests/integration/test_io.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2024 NVIDIA Corporation 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from pathlib import Path 16 | 17 | import cupynumeric as np 18 | import pytest 19 | import scipy.io as sci_io 20 | 21 | import legate_sparse.io as legate_io 22 | 23 | TEST_DIR = Path(__file__).parent.parent 24 | 25 | 26 | @pytest.fixture 27 | def test_mtx_files(): 28 | mtx_files = [ 29 | "test.mtx", 30 | "GlossGT.mtx", 31 | "Ragusa18.mtx", 32 | "cage4.mtx", 33 | "karate.mtx", 34 | ] 35 | return [str(TEST_DIR / "testdata" / mtx_file) for mtx_file in mtx_files] 36 | 37 | 38 | def test_mmread(test_mtx_files): 39 | for mtx_file in test_mtx_files: 40 | arr = legate_io.mmread(mtx_file) 41 | s = sci_io.mmread(mtx_file) 42 | assert np.array_equal(arr.todense(), s.todense()) 43 | 44 | 45 | if __name__ == "__main__": 46 | import sys 47 | 48 | sys.exit(pytest.main(sys.argv)) 49 | -------------------------------------------------------------------------------- /tests/integration/test_manual_sorting.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2024 NVIDIA Corporation 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import cupynumeric as np 16 | import numpy 17 | import pytest 18 | from legate_sparse.utils import sort_by_rows_then_cols 19 | 20 | 21 | def test_manual_sorting(): 22 | N = 5 23 | for _ in range(N): 24 | rows = np.random.randint(0, 100, 100) 25 | cols = np.random.randint(0, 100, 100) 26 | assert numpy.allclose( 27 | sort_by_rows_then_cols(rows, cols), numpy.lexsort((cols, rows)) 28 | ) 29 | 30 | 31 | if __name__ == "__main__": 32 | import sys 33 | 34 | sys.exit(pytest.main(sys.argv)) 35 | -------------------------------------------------------------------------------- /tests/integration/test_nonzero.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2024 NVIDIA Corporation 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import cupynumeric as np 16 | import numpy 17 | import pytest 18 | from utils.sample import simple_system_gen 19 | 20 | import legate_sparse as sparse 21 | 22 | 23 | @pytest.mark.parametrize("N", [8, 13]) 24 | def test_nonzero(N): 25 | """ 26 | This test checks that the nonzero method returns the correct indices for a sparse matrix. 27 | """ 28 | np.random.seed(0) 29 | A_dense, _, _ = simple_system_gen(N, N, None, tol=0.2) 30 | 31 | r_numpy, c_numpy = numpy.nonzero(A_dense) 32 | 33 | A = sparse.csr_array(A_dense) 34 | r_scipy, c_scipy = A.nonzero() 35 | 36 | assert np.all(r_numpy == r_scipy) 37 | assert np.all(c_numpy == c_scipy) 38 | 39 | 40 | if __name__ == "__main__": 41 | import sys 42 | 43 | sys.exit(pytest.main(sys.argv)) 44 | -------------------------------------------------------------------------------- /tests/integration/test_spgemm.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 NVIDIA Corporation 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import sys 16 | 17 | import cupynumeric as np 18 | import pytest 19 | from legate_sparse.runtime import runtime 20 | from utils.banded_matrix import banded_matrix 21 | from utils.sample import simple_system_gen 22 | 23 | import legate_sparse as sparse 24 | 25 | 26 | @pytest.mark.parametrize("N", [5, 29]) 27 | def test_csr_spgemm(N): 28 | np.random.seed(0) 29 | A_dense, A, _ = simple_system_gen(N, N, sparse.csr_array) 30 | 31 | B = A.copy() 32 | 33 | C = A @ B 34 | 35 | assert np.all(np.isclose(C.todense(), A_dense @ A_dense)) 36 | 37 | 38 | @pytest.mark.parametrize("N", [5, 29]) 39 | @pytest.mark.parametrize("unsupported_dtype", ["int", "bool"]) 40 | def test_csr_spgemm_unsupported_dtype(N, unsupported_dtype): 41 | np.random.seed(0) 42 | 43 | nnz_per_row = 3 44 | A = banded_matrix(N, nnz_per_row).astype(unsupported_dtype) 45 | B = banded_matrix(N, nnz_per_row).astype(unsupported_dtype) 46 | 47 | if runtime.num_gpus > 0: 48 | expected_exp = NotImplementedError 49 | with pytest.raises(expected_exp): 50 | C = A @ B # noqa: F841 51 | 52 | 53 | if __name__ == "__main__": 54 | sys.exit(pytest.main(sys.argv)) 55 | -------------------------------------------------------------------------------- /tests/integration/test_spmv.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 NVIDIA Corporation 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import sys 16 | 17 | import cupynumeric as np 18 | import pytest 19 | from legate_sparse.runtime import runtime 20 | from utils.banded_matrix import banded_matrix 21 | from utils.sample import simple_system_gen 22 | 23 | import legate_sparse as sparse 24 | 25 | 26 | @pytest.mark.parametrize("N", [5, 29]) 27 | @pytest.mark.parametrize("M", [7, 17]) 28 | @pytest.mark.parametrize("inline", [True, False]) 29 | def test_csr_spmv(N, M, inline): 30 | np.random.seed(0) 31 | A_dense, A, x = simple_system_gen(N, M, sparse.csr_array) 32 | 33 | if inline: 34 | y = np.ndarray((N,)) 35 | A.dot(x, out=y) 36 | else: 37 | y = A @ x 38 | 39 | assert np.all(np.isclose(y, A_dense @ x)) 40 | 41 | 42 | @pytest.mark.parametrize("N", [5, 29]) 43 | @pytest.mark.parametrize("nnz_per_row", [3, 9]) 44 | @pytest.mark.parametrize("unsupported_dtype", ["int", "bool"]) 45 | def test_csr_spmv_unsupported_dtype(N, nnz_per_row, unsupported_dtype): 46 | np.random.seed(0) 47 | 48 | A = banded_matrix(N, nnz_per_row).astype(unsupported_dtype) 49 | x = np.ndarray((N,)) 50 | 51 | if runtime.num_gpus > 0: 52 | expected_exp = NotImplementedError 53 | with pytest.raises(expected_exp): 54 | y = A.dot(x) # noqa: F841 55 | 56 | 57 | if __name__ == "__main__": 58 | sys.exit(pytest.main(sys.argv)) 59 | -------------------------------------------------------------------------------- /tests/integration/test_unary_operation.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 NVIDIA Corporation 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import sys 16 | 17 | import cupynumeric as np 18 | import pytest 19 | 20 | import legate_sparse as sparse 21 | 22 | 23 | def test_unary_operation(): 24 | row_offsets = np.array([0, 2, 5, 7, 9, 11, 14], dtype=np.int64) 25 | csr_vals = np.array([2, 1, 5, 8, 2, 3, 4, 6, 1, 9, 4, 7, 2, 1], dtype=np.float64) 26 | col_indices = np.array([0, 4, 0, 1, 5, 2, 3, 1, 3, 0, 4, 0, 4, 5], dtype=np.int64) 27 | matrix_shape = (6, 6) 28 | 29 | A = sparse.csr_array((csr_vals, col_indices, row_offsets), shape=matrix_shape) 30 | 31 | B = A * 2 32 | Bvalues = np.asarray(B.vals) 33 | expected_Bvalues = np.array( 34 | [4, 2, 10, 16, 4, 6, 8, 12, 2, 18, 8, 14, 4, 2], dtype=np.float64 35 | ) 36 | assert (Bvalues == expected_Bvalues).all() 37 | 38 | C = A.multiply(3) 39 | Cvalues = np.asarray(C.vals) 40 | expected_Cvalues = np.array( 41 | [6, 3, 15, 24, 6, 9, 12, 18, 3, 27, 12, 21, 6, 3], dtype=np.float64 42 | ) 43 | assert (Cvalues == expected_Cvalues).all() 44 | 45 | D = A.conj().conj() 46 | assert np.all(np.isclose(A.todense(), D.todense())) 47 | 48 | 49 | if __name__ == "__main__": 50 | sys.exit(pytest.main(sys.argv)) 51 | -------------------------------------------------------------------------------- /tests/integration/utils/banded_matrix.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023-2024 NVIDIA Corporation 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import cupynumeric as np 16 | 17 | import legate_sparse as sparse 18 | 19 | 20 | def banded_matrix( 21 | N: int, 22 | nnz_per_row: int, 23 | from_diags: bool = True, 24 | init_with_ones: bool = True, 25 | verbose: bool = False, 26 | ): 27 | """ 28 | Parameters 29 | ---------- 30 | N: int 31 | Size of the NxN sparse matrix 32 | nnz_per_row: int 33 | Number of non-zero elements per row (odd number) 34 | from_diags: bool 35 | use sparse.diags to generate the banded matrix (default = True) 36 | init_with_ones: bool 37 | Initialize the matrix with ones instead of arange 38 | 39 | Returns 40 | ------- 41 | csr_array: 42 | Return a sparse matrix 43 | """ 44 | 45 | if from_diags: 46 | return sparse.diags( 47 | np.array([1] * nnz_per_row), 48 | np.array([x - (nnz_per_row // 2) for x in range(nnz_per_row)]), 49 | shape=(N, N), 50 | format="csr", 51 | dtype=np.float64, 52 | ) 53 | else: 54 | assert N > nnz_per_row 55 | assert nnz_per_row % 2 == 1 56 | half_nnz = nnz_per_row // 2 57 | 58 | pred_nrows = nnz_per_row - half_nnz 59 | post_nrows = pred_nrows 60 | main_rows = N - pred_nrows - post_nrows 61 | 62 | pred = np.arange(nnz_per_row - half_nnz, nnz_per_row + 1) 63 | post = np.flip(pred) 64 | nnz_arr = np.concatenate((pred, np.ones(main_rows) * nnz_per_row, post)) 65 | 66 | if sparse.__name__ == "legate_sparse": 67 | row_offsets = np.zeros(N + 1).astype(sparse.coord_ty) 68 | else: 69 | row_offsets = np.zeros(N + 1).astype(int) 70 | 71 | row_offsets[1 : N + 1] = np.cumsum(nnz_arr) 72 | nnz = row_offsets[-1] 73 | 74 | col_indices = np.tile( 75 | np.arange(-half_nnz, nnz_per_row - half_nnz), (N,) 76 | ) + np.repeat(np.arange(N), nnz_per_row) 77 | 78 | if init_with_ones: 79 | data = np.ones(N * nnz_per_row).astype(np.float64) 80 | else: 81 | data = np.arange(N * nnz_per_row).astype(np.float64) / N 82 | 83 | mask = col_indices >= 0 84 | mask &= col_indices < N 85 | 86 | col_indices = col_indices[mask] 87 | data = data[mask] 88 | assert data.shape[0] == nnz 89 | assert col_indices.shape[0] == nnz 90 | 91 | if verbose: 92 | np.set_printoptions(linewidth=1000) 93 | print(f"data : {data}") 94 | print(f"col_indices: {col_indices}") 95 | print(f"row_offsets: {row_offsets}") 96 | 97 | return sparse.csr_array( 98 | (data, col_indices.astype(np.int64), row_offsets.astype(np.int64)), 99 | shape=(N, N), 100 | copy=False, 101 | ) 102 | -------------------------------------------------------------------------------- /tests/integration/utils/sample.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023-2024 NVIDIA Corporation 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import cupynumeric as np 16 | import numpy 17 | import scipy.sparse as scpy 18 | import scipy.stats as stats 19 | 20 | 21 | class Normal(stats.rv_continuous): 22 | def _rvs(self, *args, size=None, random_state=None): 23 | return random_state.standard_normal(size) 24 | 25 | 26 | def sample(N: int, D: int, density: float, seed: int): 27 | NormalType = Normal(seed=seed) 28 | SeededNormal = NormalType() 29 | return scpy.random( 30 | N, 31 | D, 32 | density=density, 33 | format="csr", 34 | dtype=numpy.float64, 35 | random_state=seed, 36 | data_rvs=SeededNormal.rvs, 37 | ) 38 | 39 | 40 | def sample_dense(N: int, D: int, density: float, seed: int): 41 | return numpy.asarray(sample(N, D, density, seed).todense()) 42 | 43 | 44 | def sample_dense_vector(N: int, density: float, seed: int): 45 | return sample_dense(N, 1, density, seed).squeeze() 46 | 47 | 48 | def simple_system_gen(N, M, cls, tol=0.5): 49 | a_dense = np.random.rand(N, M) 50 | x = np.random.rand(M) 51 | a_dense = np.where(a_dense < tol, a_dense, 0) 52 | 53 | a_sparse = None if cls is None else cls(a_dense) 54 | 55 | return a_dense, a_sparse, x 56 | -------------------------------------------------------------------------------- /tests/testdata/GlossGT.mtx: -------------------------------------------------------------------------------- 1 | %%MatrixMarket matrix coordinate pattern general 2 | %------------------------------------------------------------------------------- 3 | % UF Sparse Matrix Collection, Tim Davis 4 | % http://www.cise.ufl.edu/research/sparse/matrices/Pajek/GlossGT 5 | % name: Pajek/GlossGT 6 | % [Pajek network: graph and digraph glossary] 7 | % id: 1501 8 | % date: 2001 9 | % author: W. Cherowitzo 10 | % ed: V. Batagelj 11 | % fields: name title A id kind notes aux date author ed 12 | % aux: nodename coord 13 | % kind: directed graph 14 | %------------------------------------------------------------------------------- 15 | % notes: 16 | % ------------------------------------------------------------------------------ 17 | % Pajek network converted to sparse adjacency matrix for inclusion in UF sparse 18 | % matrix collection, Tim Davis. For Pajek datasets, See V. Batagelj & A. Mrvar, 19 | % http://vlado.fmf.uni-lj.si/pub/networks/data/. 20 | % ------------------------------------------------------------------------------ 21 | % Bill Cherowitzo: Graph and Digraph Glossary 22 | % http://www-math.cudenver.edu/~wcherowi/courses/m4408/glossary.html 23 | % Pajek's network: Barbara Zemlji"c, 2. nov 2003 24 | % The original problem had 3D xyz coordinates, but all values of z were equal 25 | % to 0, and have been removed. This graph has 2D coordinates. 26 | %------------------------------------------------------------------------------- 27 | 72 72 122 28 | 3 4 29 | 15 4 30 | 16 4 31 | 2 6 32 | 7 6 33 | 26 6 34 | 28 6 35 | 37 6 36 | 41 6 37 | 46 6 38 | 66 6 39 | 53 9 40 | 48 12 41 | 14 16 42 | 19 17 43 | 29 17 44 | 11 19 45 | 17 19 46 | 69 19 47 | 1 20 48 | 29 20 49 | 31 20 50 | 69 20 51 | 15 22 52 | 39 22 53 | 52 22 54 | 57 22 55 | 7 26 56 | 41 26 57 | 49 26 58 | 64 26 59 | 66 26 60 | 67 26 61 | 25 27 62 | 2 28 63 | 4 28 64 | 6 28 65 | 8 28 66 | 18 28 67 | 33 28 68 | 38 28 69 | 41 28 70 | 55 28 71 | 56 28 72 | 61 28 73 | 1 30 74 | 8 30 75 | 13 30 76 | 18 30 77 | 26 30 78 | 28 30 79 | 29 30 80 | 33 30 81 | 36 30 82 | 37 30 83 | 41 30 84 | 56 30 85 | 62 30 86 | 63 30 87 | 69 30 88 | 71 30 89 | 15 31 90 | 40 32 91 | 13 37 92 | 21 38 93 | 36 38 94 | 66 38 95 | 52 39 96 | 46 40 97 | 18 41 98 | 10 42 99 | 21 42 100 | 54 43 101 | 59 43 102 | 36 44 103 | 71 45 104 | 10 46 105 | 42 46 106 | 60 50 107 | 20 51 108 | 31 51 109 | 32 51 110 | 35 52 111 | 39 52 112 | 32 58 113 | 40 58 114 | 42 58 115 | 46 58 116 | 48 58 117 | 63 62 118 | 14 65 119 | 17 65 120 | 62 65 121 | 51 68 122 | 8 69 123 | 29 69 124 | 58 69 125 | 63 69 126 | 4 71 127 | 7 71 128 | 8 71 129 | 18 71 130 | 22 71 131 | 25 71 132 | 26 71 133 | 37 71 134 | 38 71 135 | 39 71 136 | 41 71 137 | 44 71 138 | 45 71 139 | 47 71 140 | 52 71 141 | 57 71 142 | 62 71 143 | 19 72 144 | 25 72 145 | 27 72 146 | 31 72 147 | 33 72 148 | 64 72 149 | 68 72 150 | -------------------------------------------------------------------------------- /tests/testdata/Ragusa18.mtx: -------------------------------------------------------------------------------- 1 | %%MatrixMarket matrix coordinate integer general 2 | %------------------------------------------------------------------------------- 3 | % UF Sparse Matrix Collection, Tim Davis 4 | % http://www.cise.ufl.edu/research/sparse/matrices/Pajek/Ragusa18 5 | % name: Pajek/Ragusa18 6 | % [Pajek network: Ragusa set] 7 | % id: 1516 8 | % date: 2006 9 | % author: V. Batagelj 10 | % ed: V. Batagelj 11 | % fields: name title A id kind notes aux date author ed 12 | % aux: nodename coord 13 | % kind: directed weighted graph 14 | %------------------------------------------------------------------------------- 15 | % notes: 16 | % ------------------------------------------------------------------------------ 17 | % Pajek network converted to sparse adjacency matrix for inclusion in UF sparse 18 | % matrix collection, Tim Davis. For Pajek datasets, See V. Batagelj & A. Mrvar, 19 | % http://vlado.fmf.uni-lj.si/pub/networks/data/. 20 | % ------------------------------------------------------------------------------ 21 | %------------------------------------------------------------------------------- 22 | 23 23 64 23 | 11 1 1 24 | 4 2 1 25 | 7 2 1 26 | 8 2 1 27 | 9 2 1 28 | 10 2 1 29 | 11 2 2 30 | 15 2 1 31 | 17 2 1 32 | 21 2 2 33 | 23 2 1 34 | 8 3 2 35 | 11 3 1 36 | 16 3 2 37 | 17 3 1 38 | 21 3 1 39 | 7 6 1 40 | 13 6 1 41 | 2 7 1 42 | 6 7 1 43 | 8 7 1 44 | 9 8 1 45 | 13 9 1 46 | 19 9 1 47 | 23 9 2 48 | 9 10 1 49 | 2 11 2 50 | 11 11 2 51 | 12 11 1 52 | 16 11 1 53 | 17 11 1 54 | 21 11 1 55 | 11 12 2 56 | 18 12 1 57 | 2 13 2 58 | 5 13 1 59 | 6 13 1 60 | 7 13 1 61 | 6 14 1 62 | 9 14 1 63 | 8 16 4 64 | 12 16 1 65 | 16 16 1 66 | 6 19 1 67 | 9 19 1 68 | 21 20 1 69 | 1 21 1 70 | 2 21 1 71 | 3 21 2 72 | 6 21 1 73 | 7 21 1 74 | 11 21 2 75 | 12 21 3 76 | 16 21 1 77 | 17 21 1 78 | 21 21 1 79 | 22 21 1 80 | 23 21 1 81 | 11 22 1 82 | 4 23 1 83 | 8 23 1 84 | 13 23 1 85 | 18 23 1 86 | 21 23 1 87 | -------------------------------------------------------------------------------- /tests/testdata/cage4.mtx: -------------------------------------------------------------------------------- 1 | %%MatrixMarket matrix coordinate real general 2 | %------------------------------------------------------------------------------- 3 | % UF Sparse Matrix Collection, Tim Davis 4 | % http://www.cise.ufl.edu/research/sparse/matrices/vanHeukelum/cage4 5 | % name: vanHeukelum/cage4 6 | % [DNA electrophoresis, 4 monomers in polymer. A. van Heukelum, Utrecht U.] 7 | % id: 905 8 | % date: 2003 9 | % author: A. van Heukelum 10 | % ed: T. Davis 11 | % fields: title A name id date author ed kind 12 | % kind: directed weighted graph 13 | %------------------------------------------------------------------------------- 14 | 9 9 49 15 | 1 1 .75 16 | 2 1 .075027667114587 17 | 4 1 .0916389995520797 18 | 5 1 .0375138335572935 19 | 8 1 .0458194997760398 20 | 1 2 .137458499328119 21 | 2 2 .687569167786467 22 | 3 2 .0916389995520797 23 | 5 2 .0375138335572935 24 | 6 2 .0458194997760398 25 | 2 3 .112541500671881 26 | 3 3 .666666666666667 27 | 4 3 .13745849932812 28 | 6 3 .0458194997760398 29 | 7 3 .0375138335572935 30 | 1 4 .112541500671881 31 | 3 4 .075027667114587 32 | 4 4 .729097498880199 33 | 7 4 .0375138335572935 34 | 8 4 .0458194997760398 35 | 1 5 .137458499328119 36 | 2 5 .075027667114587 37 | 5 5 .537513833557293 38 | 6 5 .075027667114587 39 | 7 5 .0916389995520797 40 | 9 5 .0833333333333333 41 | 2 6 .112541500671881 42 | 3 6 .0916389995520797 43 | 5 6 .13745849932812 44 | 6 6 .445874834005214 45 | 8 6 .13745849932812 46 | 9 6 .075027667114587 47 | 3 7 .075027667114587 48 | 4 7 .13745849932812 49 | 5 7 .112541500671881 50 | 7 7 .470791832661453 51 | 8 7 .112541500671881 52 | 9 7 .0916389995520797 53 | 1 8 .112541500671881 54 | 4 8 .0916389995520797 55 | 6 8 .075027667114587 56 | 7 8 .0916389995520797 57 | 8 8 .54581949977604 58 | 9 8 .0833333333333333 59 | 5 9 .25 60 | 6 9 .150055334229174 61 | 7 9 .183277999104159 62 | 8 9 .25 63 | 9 9 .166666666666667 64 | -------------------------------------------------------------------------------- /tests/testdata/karate.mtx: -------------------------------------------------------------------------------- 1 | %%MatrixMarket matrix coordinate pattern symmetric 2 | %------------------------------------------------------------------------------- 3 | % UF Sparse Matrix Collection, Tim Davis 4 | % http://www.cise.ufl.edu/research/sparse/matrices/Newman/karate 5 | % name: Newman/karate 6 | % [Karate club, from Wayne Zachary, 1977] 7 | % id: 2399 8 | % date: 1977 9 | % author: W. Zachary 10 | % ed: M. Newman 11 | % fields: name title A id date author kind notes ed 12 | % kind: undirected graph 13 | %------------------------------------------------------------------------------- 14 | % notes: 15 | % Network collection from M. Newman 16 | % http://www-personal.umich.edu/~mejn/netdata/ 17 | % 18 | % The graph "karate" contains the network of friendships between the 34 19 | % members of a karate club at a US university, as described by Wayne Zachary 20 | % in 1977. If you use these data in your work, please cite W. W. Zachary, An 21 | % information flow model for conflict and fission in small groups, Journal of 22 | % Anthropological Research 33, 452-473 (1977). 23 | %------------------------------------------------------------------------------- 24 | 34 34 78 25 | 2 1 26 | 3 1 27 | 4 1 28 | 5 1 29 | 6 1 30 | 7 1 31 | 8 1 32 | 9 1 33 | 11 1 34 | 12 1 35 | 13 1 36 | 14 1 37 | 18 1 38 | 20 1 39 | 22 1 40 | 32 1 41 | 3 2 42 | 4 2 43 | 8 2 44 | 14 2 45 | 18 2 46 | 20 2 47 | 22 2 48 | 31 2 49 | 4 3 50 | 8 3 51 | 9 3 52 | 10 3 53 | 14 3 54 | 28 3 55 | 29 3 56 | 33 3 57 | 8 4 58 | 13 4 59 | 14 4 60 | 7 5 61 | 11 5 62 | 7 6 63 | 11 6 64 | 17 6 65 | 17 7 66 | 31 9 67 | 33 9 68 | 34 9 69 | 34 10 70 | 34 14 71 | 33 15 72 | 34 15 73 | 33 16 74 | 34 16 75 | 33 19 76 | 34 19 77 | 34 20 78 | 33 21 79 | 34 21 80 | 33 23 81 | 34 23 82 | 26 24 83 | 28 24 84 | 30 24 85 | 33 24 86 | 34 24 87 | 26 25 88 | 28 25 89 | 32 25 90 | 32 26 91 | 30 27 92 | 34 27 93 | 34 28 94 | 32 29 95 | 34 29 96 | 33 30 97 | 34 30 98 | 33 31 99 | 34 31 100 | 33 32 101 | 34 32 102 | 34 33 103 | -------------------------------------------------------------------------------- /tests/testdata/test.mtx: -------------------------------------------------------------------------------- 1 | %%MatrixMarket matrix coordinate real symmetric 2 | 5 5 3 3 | 1 1 2 4 | 1 4 3 5 | 2 5 4 6 | --------------------------------------------------------------------------------