├── requirements.txt ├── requirements_dev.txt ├── setup.cfg ├── .gitmodules ├── .gitignore ├── demo ├── micro_benchmark │ ├── spmv │ │ ├── run_multiple.sh │ │ ├── spmv_single_benchmark.py │ │ └── spmv_benchmark (copy).py │ └── spmm_benchmark.py └── poisson │ └── poisson.py ├── mtspy ├── __init__.py ├── cpp │ ├── thread_control.hpp │ ├── eigen.hpp │ ├── cmake │ │ └── modules │ │ │ └── FindEigen3.cmake │ ├── sparse.hpp │ └── main.cpp ├── threads.py ├── linear_operator.py ├── utils.py └── sparse_ops.py ├── tests ├── test_thread_control.py ├── test_utils.py ├── test_linear_operator.py └── test_spmv.py ├── Dockerfile ├── .github └── workflows │ └── python-package.yml ├── README.md ├── CMakeLists.txt ├── setup.py └── LICENSE /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.18.3 2 | scipy>=1.4.1 -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | # In requirements_dev.txt 2 | -r requirements.txt 3 | pytest>=5.1.3 -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = .git,*migrations* 3 | max-line-length = 119 4 | ignore = E203 -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "pybind11"] 2 | path = third-party/pybind11 3 | url = https://github.com/pybind/pybind11.git -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | dist/ 3 | _build/ 4 | _generate/ 5 | .vscode/ 6 | .pytest_cache/ 7 | *.so 8 | *.py[cod] 9 | *.egg-info 10 | *.tar.gz 11 | *.mtx 12 | *.csv* 13 | *.optrpt 14 | *coverage* 15 | *.optrpt 16 | -------------------------------------------------------------------------------- /demo/micro_benchmark/spmv/run_multiple.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | matrix_list=('vanHeukelum/cage15' 'Schenk/nlpkkt200' 'Fluorem/HV15R' 'Janna/Queen_4147') 4 | 5 | for matrix in "${matrix_list[@]}" 6 | do 7 | for i in {1..5} 8 | do 9 | echo "Matrix $matrix, run number $i" 10 | python3 spmv_single_benchmark.py "$matrix" --threads=12 11 | done 12 | done 13 | -------------------------------------------------------------------------------- /mtspy/__init__.py: -------------------------------------------------------------------------------- 1 | import mtspy_cpp as cpp 2 | from .threads import thread_control 3 | from .sparse_ops import matvec, matmat, spmatmat 4 | from .linear_operator import LinearOperator, aslinearoperator 5 | 6 | __all__ = [ 7 | "cpp", 8 | "thread_control", 9 | "matvec", 10 | "matmat", 11 | "spmatmat", 12 | "LinearOperator", 13 | "aslinearoperator" 14 | ] 15 | -------------------------------------------------------------------------------- /mtspy/cpp/thread_control.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef _OPENMP 4 | #include 5 | #else 6 | #define omp_get_max_threads() 1 7 | #define omp_get_num_threads() 1 8 | #define omp_set_num_threads(n) (0) 9 | #endif 10 | 11 | int get_max_threads() 12 | { 13 | return omp_get_max_threads(); 14 | } 15 | 16 | int get_num_threads() 17 | { 18 | return omp_get_num_threads(); 19 | } 20 | 21 | void set_num_threads(int n) 22 | { 23 | assert(n > 0); 24 | omp_set_num_threads(n); 25 | } -------------------------------------------------------------------------------- /tests/test_thread_control.py: -------------------------------------------------------------------------------- 1 | from mtspy import thread_control 2 | import multiprocessing 3 | 4 | 5 | def test_thread_control(): 6 | n = multiprocessing.cpu_count() 7 | thread_control.set_num_threads(n) 8 | 9 | assert(thread_control.get_max_threads() == n) 10 | 11 | # Request the use of less 12 | with thread_control(1, True): 13 | assert(thread_control.get_max_threads() == 1) 14 | 15 | # More threads than available 16 | with thread_control(n + 1, True): 17 | assert(thread_control.get_max_threads() == n) 18 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:latest as base 2 | 3 | WORKDIR /tmp 4 | 5 | ENV DEBIAN_FRONTEND=noninteractive 6 | RUN apt-get -qq update && \ 7 | apt-get -y install \ 8 | curl \ 9 | cmake \ 10 | g++ \ 11 | libomp-dev\ 12 | libeigen3-dev \ 13 | liblapack-dev \ 14 | libopenblas-dev\ 15 | pkg-config \ 16 | python3-dev \ 17 | python3-matplotlib \ 18 | python3-numpy \ 19 | python3-pip \ 20 | python3-scipy \ 21 | python3-setuptools \ 22 | git && \ 23 | apt-get clean && \ 24 | rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* 25 | 26 | -------------------------------------------------------------------------------- /demo/poisson/poisson.py: -------------------------------------------------------------------------------- 1 | # The matrix ML_Laplace has been obtained by discretizing a 2D 2 | # Poisson equation with a Meshless Local Petrov-Galerkin method. 3 | 4 | from scipy.sparse.linalg import LinearOperator, gmres, cg 5 | import numpy 6 | 7 | from mtspy.utils import get_csr_matrix, IterationCallback 8 | import mtspy 9 | 10 | matrix = mtspy.utils.get_csr_matrix("ACUSIM/Pres_Poisson") 11 | 12 | 13 | A = mtspy.aslinearoperator(matrix) 14 | b = numpy.random.rand(A.shape[0]) 15 | 16 | 17 | # with mtspy.thread_control(2, True): 18 | # itcount = IterationCallback() 19 | # x = cg(matrix, b, callback=itcount) 20 | 21 | # print(itcount.nit) 22 | 23 | # with mtspy.thread_control(2, True): 24 | # itcount = IterationCallback() 25 | # x = cg(L, b, callback=itcount) 26 | 27 | # print(itcount.nit) 28 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | container: igorbaratta/mtspy 9 | steps: 10 | - name: git Actions 11 | uses: srt32/git-actions@v0.0.3 12 | - uses: actions/checkout@v2 13 | with: 14 | submodules: recursive 15 | - name: Install dependencies 16 | run: | 17 | python3 -m pip install --upgrade pip 18 | pip install flake8 pytest pytest-cov 19 | if [ -f requirements_dev.txt ]; then pip install -r requirements_dev.txt; fi 20 | - name: Verify pep8, pyflakes and circular complexity 21 | run: flake8 mtspy 22 | - name: Install mtspy 23 | run: python3 -m pip install -v . 24 | - name: Run unit tests 25 | run: python3 -m pytest -v tests/ --cov=mtspy/ --cov-report=xml 26 | - name: Upload coverage to Codecov 27 | uses: codecov/codecov-action@v1 28 | with: 29 | file: ./coverage.xml 30 | 31 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import mtspy 3 | 4 | from mtspy.utils import get_csr_matrix, IterationCallback, ResidualCallback 5 | from scipy.sparse import identity 6 | from scipy.sparse.linalg import cg 7 | 8 | 9 | def test_donwload_matrix(): 10 | matrix = get_csr_matrix("HB/1138_bus") 11 | L = mtspy.aslinearoperator(matrix) 12 | 13 | m, n = L.shape 14 | nnz = L.matrix.nnz 15 | assert(m == 1138) 16 | assert(n == 1138) 17 | assert(nnz == 4054) 18 | 19 | # Check if matrix market file already exists, 20 | # reuse file if it is True 21 | matrix = get_csr_matrix("HB/1138_bus") 22 | 23 | 24 | def test_callbacks(): 25 | L = mtspy.aslinearoperator(identity(100)) 26 | b = numpy.ones(100) 27 | 28 | itcount = IterationCallback() 29 | residuals = ResidualCallback() 30 | 31 | x1, info = cg(L, b, callback=itcount) 32 | assert(info == 0) 33 | assert(itcount.nit == 1) 34 | 35 | x2, info = cg(L, b, callback=residuals) 36 | assert(info == 0) 37 | assert(sum(residuals.residual - b).all() == 0) 38 | 39 | assert (numpy.allclose(x1, x2)) 40 | -------------------------------------------------------------------------------- /tests/test_linear_operator.py: -------------------------------------------------------------------------------- 1 | import mtspy 2 | import numpy 3 | from scipy import sparse 4 | import pytest 5 | 6 | dtype_list = [numpy.float32, numpy.float64, numpy.complex64, numpy.complex128] 7 | matrix_type = ["csr", "csc"] 8 | 9 | 10 | @pytest.mark.parametrize('dtype', dtype_list) 11 | @pytest.mark.parametrize('mtype', matrix_type) 12 | def test_linear_operator(dtype, mtype): 13 | N = 1000 14 | v0 = numpy.random.rand(N, 1).astype(dtype) 15 | M = sparse.random(N, N, density=0.1, format=mtype, dtype=dtype) 16 | 17 | L = mtspy.aslinearoperator(M) 18 | 19 | v1 = L @ v0 20 | v2 = L.matvec(v0) 21 | v3 = M @ v0 22 | assert(numpy.allclose(v1, v2)) 23 | assert(numpy.allclose(v1, v3)) 24 | 25 | C = mtspy.aslinearoperator(L) 26 | 27 | MM = M @ M 28 | LC = L @ C 29 | LM = L._matmat(M) 30 | 31 | assert((MM - LC).data.size == 0) 32 | assert((MM - LM).data.size == 0) 33 | 34 | B = numpy.random.rand(N, 100).astype(dtype) 35 | assert(numpy.allclose(L @ B, M @ B)) 36 | 37 | 38 | @pytest.mark.xfail() 39 | def test_linear_noktype(): 40 | N = 1000 41 | dense = numpy.ones((N, N)) 42 | C = sparse.linalg.aslinearoperator(dense) 43 | L = mtspy.aslinearoperator(C) 44 | -------------------------------------------------------------------------------- /mtspy/threads.py: -------------------------------------------------------------------------------- 1 | import mtspy_cpp 2 | import time 3 | 4 | 5 | class thread_control(): 6 | @classmethod 7 | def get_max_threads(csl): 8 | return mtspy_cpp.get_max_threads() 9 | 10 | @classmethod 11 | def get_num_threads(csl): 12 | return mtspy_cpp.get_num_threads() 13 | 14 | @classmethod 15 | def set_num_threads(cls, n): 16 | assert type(n) == int 17 | mtspy_cpp.set_num_threads(n) 18 | 19 | def __init__(self, num_threads, timer=False): 20 | self._cached_max_threads = self.get_max_threads() 21 | self._num_threads = min(num_threads, self._cached_max_threads) 22 | self._timer = timer 23 | 24 | def __enter__(self): 25 | self._start_time = time.perf_counter() 26 | self.set_num_threads(self._num_threads) 27 | return self 28 | 29 | def __exit__(self, type, value, traceback): 30 | self.set_num_threads(self._cached_max_threads) 31 | self._num_threads = self.get_num_threads() 32 | self.elapsed_time = time.perf_counter() - self._start_time 33 | if self._timer: 34 | print("Elapsed time (s): ", self.elapsed_time) 35 | 36 | @property 37 | def num_threads(self): 38 | return self._num_threads 39 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mtspy 2 | 3 | ![CI](https://github.com/IgorBaratta/mtspy/workflows/CI/badge.svg) 4 | ![CI Docker images](https://img.shields.io/docker/cloud/build/igorbaratta/mtspy) 5 | [![codecov](https://codecov.io/gh/IgorBaratta/mtspy/branch/master/graph/badge.svg)](https://codecov.io/gh/IgorBaratta/mtspy) 6 | ![stability-wip](https://img.shields.io/badge/stability-work_in_progress-lightgrey.svg) 7 | 8 | 9 | Multi-threaded sparse matrix operations in Python 10 | 11 | ## Installation 12 | 13 | Install with: 14 | 15 | ```shell 16 | pip3 install git+https://github.com/IgorBaratta/mtspy.git --upgrade 17 | ``` 18 | 19 | 20 | To install mtspy along with the requirements, use: 21 | ```shell 22 | git clone --recursive https://github.com/IgorBaratta/mtspy.git 23 | cd mtspy 24 | python3 -m pip install -r requirements.txt 25 | python3 -m pip install . 26 | ``` 27 | 28 | ### Requirements 29 | 30 | Make sure to clone with **--recursive** to download the required submodules! 31 | 32 | - Numpy 33 | - Scipy 34 | - C++ compiler with OpenMP 4.5 support (eg.: gcc>5) 35 | - pybind11 36 | 37 | Using docker container with all requirements installed: 38 | 39 | ```shell 40 | docker pull igorbaratta/mtspy:latest 41 | ``` 42 | 43 | 44 | ### Tests 45 | ``` 46 | python3 -m pytest -v tests/ 47 | ``` 48 | 49 | ## License 50 | 51 | **mtspy** is licensed under the GNU General Public License v3.0 -- see the [LICENSE](LICENSE) file for details 52 | -------------------------------------------------------------------------------- /mtspy/linear_operator.py: -------------------------------------------------------------------------------- 1 | import scipy.sparse as sp 2 | import scipy.sparse.linalg 3 | import numpy 4 | 5 | from mtspy.sparse_ops import matvec, matmat, spmatmat 6 | 7 | 8 | class LinearOperator(scipy.sparse.linalg.LinearOperator): 9 | def __init__(self, matrix: sp.csr_matrix): 10 | self.matrix = matrix 11 | super().__init__(shape=matrix.shape, dtype=matrix.dtype) 12 | self.__adj = None 13 | self.args = (matrix,) 14 | 15 | def _matvec(self, x): 16 | return matvec(self.matrix, x) 17 | 18 | def _matmat(self, X): 19 | if isinstance(X, LinearOperator): 20 | return spmatmat(self.matrix, X.matrix) 21 | elif isinstance(X, sp.spmatrix): 22 | return spmatmat(self.matrix, sp.csr_matrix(X)) 23 | elif isinstance(X, numpy.ndarray): 24 | return matmat(self.matrix, X) 25 | else: 26 | raise TypeError("type not understood") 27 | 28 | def __matmul__(self, other): 29 | return self._matmat(other) 30 | 31 | 32 | def aslinearoperator(A): 33 | """ 34 | Return A as a mtspy LinearOperator. 35 | """ 36 | # check type 37 | if isinstance(A, LinearOperator): 38 | return A 39 | elif isinstance(A, sp.spmatrix): 40 | return LinearOperator(A.tocsr()) 41 | else: 42 | try: 43 | return LinearOperator(sp.csr_matrix(A)) 44 | except TypeError: 45 | raise TypeError('type not understood') 46 | -------------------------------------------------------------------------------- /demo/micro_benchmark/spmv/spmv_single_benchmark.py: -------------------------------------------------------------------------------- 1 | import os 2 | import csv 3 | import argparse 4 | import numpy 5 | import sys 6 | 7 | from mtspy import thread_control, matvec 8 | from mtspy.utils import get_csr_matrix 9 | 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument("name", help="Matrix name from SuiteSparse Collection eg: HB/west0479", type=str) 12 | parser.add_argument("--threads", default=1, help="Maximum number of threads to use", type=int) 13 | parser.add_argument("--verbose", default=0, help="Use verbose mode", type=int) 14 | args = parser.parse_args() 15 | 16 | thread_control.set_num_threads(args.threads) 17 | A = get_csr_matrix(args.name, args.verbose) 18 | m, n = A.shape 19 | x = numpy.ones(m, A.dtype) 20 | 21 | max_threads = thread_control.get_max_threads() 22 | 23 | col_name = ["Name", "nnz", "rows", "cols", "dtype", "int type", "threads", 24 | "mtspy time(s)", "mtspy gflops", "scipy time(s)", "scipy gflops"] 25 | 26 | for thread_number in range(1, max_threads + 1): 27 | with thread_control(thread_number) as th: 28 | y0 = A @ x 29 | 30 | sp_time = th.elapsed_time 31 | sp_gflops = (2 * A.nnz / sp_time) * 1e-9 32 | 33 | with thread_control(thread_number) as th: 34 | y1 = matvec(A, x) 35 | 36 | mt_time = th.elapsed_time 37 | mt_gflops = (2 * A.nnz / mt_time) * 1e-9 38 | speedup = sp_time / mt_time 39 | 40 | exists = os.path.exists('performance.csv') 41 | row_list = [args.name, A.nnz, m, n, A.dtype, A.indices.dtype, 42 | thread_number, mt_time, mt_gflops, sp_time, sp_gflops] 43 | 44 | with open('performance.csv', 'a') as csvfile: 45 | writer = csv.writer(csvfile, dialect='excel', delimiter=',') 46 | if not exists: 47 | writer.writerow(col_name) 48 | writer.writerow(row_list) 49 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | project(mtspy_cpp) 3 | 4 | # Set C++ standard 5 | set(CMAKE_CXX_STANDARD 14) 6 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 7 | set(CMAKE_CXX_EXTENSIONS OFF) 8 | 9 | # Set c++ flags 10 | include(CheckCXXCompilerFlag) 11 | CHECK_CXX_COMPILER_FLAG("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE) 12 | if(COMPILER_SUPPORTS_MARCH_NATIVE) 13 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") 14 | endif() 15 | 16 | if(NOT CMAKE_BUILD_TYPE) 17 | set(CMAKE_BUILD_TYPE Release) 18 | endif() 19 | 20 | set(CMAKE_CXX_FLAGS "-Wall -Wextra") 21 | set(CMAKE_CXX_FLAGS_DEBUG "-g") 22 | set(CMAKE_CXX_FLAGS_RELEASE "-O2") 23 | 24 | # find packages and dependencies 25 | set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/mtspy/cpp/cmake/modules") 26 | 27 | find_package(pybind11 CONFIG HINTS $ENV{PYBIND11_DIR} $ENV{PYBIND11_ROOT} QUIET) 28 | if (NOT pybind11_FOUND) 29 | add_subdirectory("${PROJECT_SOURCE_DIR}/third-party/pybind11") 30 | set(pybind11_FOUND) 31 | endif() 32 | 33 | # Check for required packages 34 | find_package(OpenMP REQUIRED) 35 | pybind11_add_module(mtspy_cpp mtspy/cpp/main.cpp) 36 | 37 | target_link_libraries(mtspy_cpp PRIVATE OpenMP::OpenMP_CXX) 38 | 39 | # Set compiler specific flags for intel compiler 40 | if(CMAKE_CXX_COMPILER_ID MATCHES "Intel") 41 | message(STATUS "Setting Intel compiler specific flags") 42 | set(FLAGS_INTEL_DEFAULT "-xAVX -m64 -parallel -qopenmp -qopt-report=2 -qopt-report-phase=vec -qopt-report=2 -qopt-report-phase=par") 43 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS_INTEL_DEFAULT}") 44 | endif() 45 | 46 | 47 | # Find optional packages 48 | find_package(Eigen3 3.3.7 NO_MODULE) 49 | if (TARGET Eigen3::Eigen) 50 | add_definitions(-DUSE_EIGEN_BACKEND) 51 | message(STATUS "Eigen backend is available.") 52 | target_link_libraries(mtspy_cpp PUBLIC Eigen3::Eigen) 53 | endif() -------------------------------------------------------------------------------- /demo/micro_benchmark/spmm_benchmark.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy 3 | import sys 4 | 5 | from mtspy import thread_control, matmat 6 | from mtspy.utils import get_csr_matrix 7 | 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument("name", help="Matrix name from SuiteSparse Collection, eg: HB/west0479", type=str) 10 | parser.add_argument("--ncols", default=10, help="Number of columns of the dense matrix", type=int) 11 | parser.add_argument("--threads", default=1, help="Maximum number of threads to use", type=int) 12 | parser.add_argument("--verbose", default=0, help="Use verbose mode", type=int) 13 | args = parser.parse_args() 14 | 15 | thread_control.set_num_threads(args.threads) 16 | A = get_csr_matrix(args.name, args.verbose) 17 | m, n = A.shape 18 | x = numpy.ones((n, args.ncols), A.dtype) 19 | 20 | with thread_control(args.threads) as th: 21 | y0 = A @ x 22 | 23 | sp_time = th.elapsed_time 24 | sp_gflops = (2 * A.nnz * args.ncols / sp_time) * 1e-9 25 | 26 | with thread_control(args.threads) as th: 27 | y1 = matmat(A, x) 28 | 29 | mt_time = th.elapsed_time 30 | mt_gflops = (2 * A.nnz * args.ncols / mt_time) * 1e-9 31 | 32 | # TODO: add summary to a csv 33 | # Print summary 34 | speedup = sp_time / mt_time 35 | 36 | print("\n=========================") 37 | print("Sparse Matrix Data:") 38 | print("Name: \t\t", args.name) 39 | print("NNZ: \t\t", A.nnz) 40 | print("nrows: \t\t", m) 41 | print("ncols: \t\t", n) 42 | print("Data Type: \t", A.dtype) 43 | print("Index Type: \t", A.indices.dtype) 44 | 45 | 46 | print("\nDense Matrix Data:") 47 | print("nrows: \t\t", x.shape[0]) 48 | print("ncols: \t\t", x.shape[1]) 49 | 50 | 51 | print("\n=========================") 52 | print("SpMM Time") 53 | print("mtspy(s):\t", mt_time) 54 | print("Ref (s):\t", sp_time) 55 | print("Sepeedup: \t", speedup) 56 | 57 | print("\nEstimated GFLOPS") 58 | print("mtspy: \t\t", mt_gflops) 59 | print("Ref: \t\t", sp_gflops) 60 | 61 | print("\n# threads: \t", thread_control.get_max_threads()) 62 | -------------------------------------------------------------------------------- /mtspy/utils.py: -------------------------------------------------------------------------------- 1 | import tarfile 2 | import urllib.request 3 | import os 4 | from scipy.io import mmread 5 | from scipy.sparse import csr_matrix 6 | 7 | 8 | def get_csr_matrix(Name: str, verbose: bool = True) -> csr_matrix: 9 | """ 10 | Get matrix from the SuiteSparse Matrix Collection website and 11 | convert to the scipy.csr format. 12 | 13 | """ 14 | base_url = "https://suitesparse-collection-website.herokuapp.com/MM/" 15 | url = base_url + Name + ".tar.gz" 16 | infile = Name.split("/")[1] 17 | dest_file = infile + '/' + infile + ".mtx" 18 | 19 | # Download the file if it does not exist 20 | if os.path.isfile(dest_file): 21 | if verbose: 22 | print('\t -----------------------------------------------------------') 23 | print('\t File already exists.') 24 | else: 25 | if verbose: 26 | print('\t -----------------------------------------------------------') 27 | print('\t Downloading matrix file from suitesparse collection') 28 | urllib.request.urlretrieve(url, infile + '.tar.gz') 29 | 30 | if verbose: 31 | print('\t -----------------------------------------------------------') 32 | print('\t Extrating tar.gz file to folder ./', infile) 33 | tar = tarfile.open(infile + '.tar.gz') 34 | tar.extractall() 35 | tar.close() 36 | 37 | if verbose: 38 | print('\t -----------------------------------------------------------') 39 | print('\t Reading matrix and converting to csr format') 40 | A = mmread(dest_file) 41 | A = A.tocsr() 42 | 43 | if verbose: 44 | print('\t -----------------------------------------------------------') 45 | print("\t Done! \n") 46 | 47 | return A 48 | 49 | 50 | class IterationCallback: 51 | nit = 0 52 | 53 | def __call__(self, x): 54 | self.nit += 1 55 | 56 | 57 | class ResidualCallback: 58 | nit = 0 59 | residual = [] 60 | 61 | def __call__(self, x): 62 | self.nit += 1 63 | self.residual.append(x) 64 | -------------------------------------------------------------------------------- /demo/micro_benchmark/spmv/spmv_benchmark (copy).py: -------------------------------------------------------------------------------- 1 | import os 2 | import csv 3 | import argparse 4 | import numpy 5 | import sys 6 | 7 | from mtspy import thread_control, matvec 8 | from mtspy.utils import get_csr_matrix 9 | 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument("name", help="Matrix name from SuiteSparse Collection eg: HB/west0479", type=str) 12 | parser.add_argument("--threads", default=1, help="Maximum number of threads to use", type=int) 13 | parser.add_argument("--verbose", default=0, help="Use verbose mode", type=int) 14 | args = parser.parse_args() 15 | 16 | thread_control.set_num_threads(args.threads) 17 | A = get_csr_matrix(args.name, args.verbose) 18 | m, n = A.shape 19 | x = numpy.ones(m, A.dtype) 20 | 21 | with thread_control(args.threads) as th: 22 | y0 = A @ x 23 | 24 | sp_time = th.elapsed_time 25 | sp_gflops = (2 * A.nnz / sp_time) * 1e-9 26 | 27 | with thread_control(args.threads) as th: 28 | y1 = matvec(A, x) 29 | 30 | mt_time = th.elapsed_time 31 | mt_gflops = (2 * A.nnz / mt_time) * 1e-9 32 | speedup = sp_time / mt_time 33 | 34 | if (args.verbose): 35 | print("\n=========================") 36 | print("Sparse Matrix Data:") 37 | print("Name: \t\t", args.name) 38 | print("NNZ: \t\t", A.nnz) 39 | print("nrows: \t\t", m) 40 | print("ncols: \t\t", n) 41 | print("Data Type: \t", A.dtype) 42 | print("Index Type: \t", A.indices.dtype) 43 | 44 | print("\n=========================") 45 | print("SpMV Time") 46 | print("mtspy(s):\t", mt_time) 47 | print("Ref (s):\t", sp_time) 48 | print("Sepeedup: \t", speedup) 49 | 50 | print("\nEstimated GFLOPS") 51 | print("mtspy: \t\t", mt_gflops) 52 | print("Ref: \t\t", sp_gflops) 53 | 54 | print("\n# threads: \t", thread_control.get_max_threads()) 55 | 56 | 57 | exists = os.path.exists('performance.csv') 58 | row_list = [args.name, A.nnz, m, n, A.dtype, A.indices.dtype, 59 | thread_control.get_max_threads(), mt_time, mt_gflops, sp_time, sp_gflops] 60 | col_name = ["Name", "nnz", "rows", "cols", "dtype", "int type", "threads", 61 | "mtspy time(s)", "mtspy gflops", "scipy time(s)", "scipy gflops"] 62 | with open('performance.csv', 'a') as csvfile: 63 | writer = csv.writer(csvfile, delimiter=',') 64 | if not exists: 65 | writer.writerow(col_name) 66 | writer.writerow(row_list) 67 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import sys 4 | import platform 5 | import subprocess 6 | 7 | from setuptools import setup, find_packages, Extension 8 | from setuptools.command.build_ext import build_ext 9 | from distutils.version import LooseVersion 10 | 11 | with open("README.md", "r") as fh: 12 | long_description = fh.read() 13 | 14 | if sys.version_info < (3, 6): 15 | print("Python 3.6 or higher required, please upgrade.") 16 | sys.exit(1) 17 | 18 | 19 | class CMakeExtension(Extension): 20 | def __init__(self, name, sourcedir=''): 21 | Extension.__init__(self, name, sources=[]) 22 | self.sourcedir = os.path.abspath(sourcedir) 23 | 24 | 25 | class CMakeBuild(build_ext): 26 | def run(self): 27 | for ext in self.extensions: 28 | self.build_extension(ext) 29 | 30 | def build_extension(self, ext): 31 | extdir = os.path.abspath(os.path.dirname( 32 | self.get_ext_fullpath(ext.name))) 33 | # required for auto-detection of auxiliary "native" libs 34 | if not extdir.endswith(os.path.sep): 35 | extdir += os.path.sep 36 | 37 | cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir, 38 | '-DPYTHON_EXECUTABLE=' + sys.executable] 39 | 40 | cfg = 'Debug' if self.debug else 'Release' 41 | build_args = ['--config', cfg] 42 | 43 | cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg] 44 | build_args += ['--', '-j4'] 45 | 46 | env = os.environ.copy() 47 | env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(env.get('CXXFLAGS', ''), 48 | self.distribution.get_version()) 49 | if not os.path.exists(self.build_temp): 50 | os.makedirs(self.build_temp) 51 | subprocess.check_call(['cmake', ext.sourcedir] + 52 | cmake_args, cwd=self.build_temp, env=env) 53 | subprocess.check_call(['cmake', '--build', '.'] + 54 | build_args, cwd=self.build_temp) 55 | 56 | 57 | setup( 58 | name='mtspy', 59 | version='0.0.1', 60 | author='Igor Baratta', 61 | author_email='igorbaratta@gmail.com', 62 | description='Multi-threaded sparse matrix operations in Python', 63 | long_description=long_description, 64 | long_description_content_type="text/markdown", 65 | url="https://github.com/IgorBaratta/mtspy", 66 | ext_modules=[CMakeExtension('mtspy_cpp')], 67 | cmdclass=dict(build_ext=CMakeBuild), 68 | packages=find_packages(), 69 | python_requires='>=3.6', 70 | zip_safe=False, 71 | ) 72 | -------------------------------------------------------------------------------- /tests/test_spmv.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from scipy import sparse 3 | import pytest 4 | import warnings 5 | 6 | import mtspy 7 | 8 | dtype_list = [numpy.float32, numpy.float64, numpy.complex64, numpy.complex128] 9 | if mtspy.cpp.has_eigen(): 10 | eigen_backend = [False, True] 11 | else: 12 | eigen_backend = [False] 13 | 14 | 15 | @pytest.mark.parametrize('dtype', dtype_list) 16 | @pytest.mark.parametrize('use_eigen', eigen_backend) 17 | def test_sparse_vec_int32(dtype, use_eigen): 18 | N = 1000 19 | v0 = numpy.random.rand(N, 1).astype(dtype) 20 | M = sparse.random(N, N, density=0.1, format="csr", dtype=dtype) 21 | v1 = mtspy.matvec(M, v0, use_eigen) 22 | v2 = M @ v0 23 | assert(numpy.allclose(v1, v2)) 24 | 25 | 26 | @pytest.mark.parametrize('dtype', dtype_list) 27 | @pytest.mark.parametrize('use_eigen', eigen_backend) 28 | def test_sparse_vec_int64(dtype, use_eigen): 29 | N = 1000 30 | v0 = numpy.random.rand(N, 1).astype(dtype) 31 | M = sparse.random(N, N, density=0.1, format="csr", dtype=dtype) 32 | # Change sparse matrix indices dtype 33 | M.indices = M.indices.astype(numpy.int64) 34 | M.indptr = M.indptr.astype(numpy.int64) 35 | v1 = mtspy.matvec(M, v0, use_eigen) 36 | v2 = M @ v0 37 | assert(numpy.allclose(v1, v2)) 38 | 39 | 40 | @pytest.mark.parametrize('dtype', dtype_list) 41 | @pytest.mark.parametrize('use_eigen', eigen_backend) 42 | def test_sparse_dense_int32(dtype, use_eigen): 43 | N = 1000 44 | v0 = numpy.ones((N, 10), dtype=dtype) 45 | M = sparse.random(N, N, density=0.1, format="csr", dtype=dtype) 46 | v1 = mtspy.matmat(M, v0, use_eigen) 47 | v2 = M @ v0 48 | assert(numpy.allclose(v1, v2)) 49 | 50 | 51 | @pytest.mark.parametrize('dtype', dtype_list) 52 | @pytest.mark.skipif(not mtspy.cpp.has_eigen(), reason="Sparse-Sparse Matrix Product only supported with eigen backend.") 53 | def test_sparse_sparse_int32(dtype): 54 | m, n, k = 500, 1000, 2000 55 | A = sparse.random(m, k, density=0.1, format="csr") 56 | B = sparse.random(k, n, density=0.1, format="csr") 57 | 58 | C1 = mtspy.spmatmat(A, B, True) 59 | C2 = A @ B 60 | assert((C1 - C2).data.all()) 61 | 62 | 63 | @pytest.mark.parametrize('dtype', dtype_list) 64 | @pytest.mark.parametrize('use_eigen', eigen_backend) 65 | def test_sparse_csc_vec_int32(recwarn, dtype, use_eigen): 66 | warnings.simplefilter("always") 67 | N = 100 68 | v0 = numpy.random.rand(N, 1).astype(dtype) 69 | M = sparse.random(N, N, density=0.1, format="csc", dtype=dtype) 70 | v1 = mtspy.matvec(M, v0, use_eigen) 71 | v2 = M @ v0 72 | assert(numpy.allclose(v1, v2)) 73 | assert len(recwarn) == 1 74 | assert recwarn.pop(sparse.SparseEfficiencyWarning) 75 | 76 | 77 | def test_sparse_ops_errors(): 78 | v0 = numpy.random.rand(10, 1) 79 | M = sparse.random(5, 5, density=0.1, format="csr") 80 | with pytest.raises(ValueError): 81 | v1 = mtspy.matvec(M, v0) 82 | v1 = mtspy.matmat(M, v0) 83 | 84 | 85 | def test_complex_warning(recwarn): 86 | M = sparse.random(5, 5, density=0.1, format="csr") 87 | v0 = numpy.random.rand(M.shape[0], 1) 88 | v0 = v0.astype(numpy.complex128) 89 | v1 = mtspy.matvec(M, v0) 90 | v2 = mtspy.matmat(M, v0) 91 | assert len(recwarn) == 2 92 | assert recwarn.pop(sparse.SparseEfficiencyWarning) 93 | -------------------------------------------------------------------------------- /mtspy/cpp/eigen.hpp: -------------------------------------------------------------------------------- 1 | #ifdef USE_EIGEN_BACKEND 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | template 9 | using einge_array_t = Eigen::Ref>; 10 | 11 | // declares a row-major dense matrix type of ScalarType 12 | template 13 | using eigen_dense_t = Eigen::Matrix; 14 | 15 | // declares a row-major sparse matrix type of ScalarType in Eigen, using indices of 16 | // type Indextype (int32 or int64). 17 | // Note: Eigen only work with row-major sparse matrix in parallel 18 | template 19 | using sparse_matrix_t = Eigen::SparseMatrix; 20 | 21 | template 22 | eigen_dense_t 23 | sparse_dense_eigen(IndType rows, IndType cols, IndType nnz, 24 | const einge_array_t &data, 25 | const einge_array_t &indptr, 26 | const einge_array_t &indices, 27 | const eigen_dense_t &dense) 28 | { 29 | // get data pointers 30 | const ScalarType *data_ptr = data.data(); 31 | const IndType *displ_ptr = indptr.data(); 32 | const IndType *indices_ptr = indices.data(); 33 | 34 | // Temporarily release global interpreter lock (GIL) 35 | pybind11::gil_scoped_release release; 36 | Eigen::Map> sm1(rows, cols, nnz, displ_ptr, indices_ptr, data_ptr); 37 | eigen_dense_t output = sm1 * dense; 38 | 39 | pybind11::gil_scoped_acquire acquire; 40 | return output; 41 | } 42 | 43 | template 44 | sparse_matrix_t 45 | sparse_sparse_eigen(IndType A_rows, IndType A_cols, IndType A_nnz, 46 | const einge_array_t &A_data, 47 | const einge_array_t &A_indptr, 48 | const einge_array_t &A_indices, 49 | IndType B_rows, IndType B_cols, IndType B_nnz, 50 | const einge_array_t &B_data, 51 | const einge_array_t &B_indptr, 52 | const einge_array_t &B_indices) 53 | { 54 | 55 | // map matrix input matrices to eigen sparse matrix 56 | Eigen::Map> A_eigen(A_rows, A_cols, A_nnz, 57 | A_indptr.data(), 58 | A_indices.data(), 59 | A_data.data()); 60 | 61 | Eigen::Map> B_eigen(B_rows, B_cols, B_nnz, 62 | B_indptr.data(), 63 | B_indices.data(), 64 | B_data.data()); 65 | 66 | // Temporarily release global interpreter lock (GIL) 67 | pybind11::gil_scoped_release release; 68 | sparse_matrix_t C = A_eigen * B_eigen; 69 | pybind11::gil_scoped_acquire acquire; 70 | 71 | return C; 72 | } 73 | 74 | #endif 75 | 76 | bool has_eigen() 77 | { 78 | #ifdef USE_EIGEN_BACKEND 79 | return true; 80 | #else 81 | return false; 82 | #endif 83 | } -------------------------------------------------------------------------------- /mtspy/cpp/cmake/modules/FindEigen3.cmake: -------------------------------------------------------------------------------- 1 | # - Try to find Eigen3 lib 2 | # 3 | # This module supports requiring a minimum version, e.g. you can do 4 | # find_package(Eigen3 3.1.2) 5 | # to require version 3.1.2 or newer of Eigen3. 6 | # 7 | # Once done this will define 8 | # 9 | # EIGEN3_FOUND - system has eigen lib with correct version 10 | # EIGEN3_INCLUDE_DIR - the eigen include directory 11 | # EIGEN3_VERSION - eigen version 12 | # 13 | # and the following imported target: 14 | # 15 | # Eigen3::Eigen - The header-only Eigen library 16 | # 17 | # This module reads hints about search locations from 18 | # the following environment variables: 19 | # 20 | # EIGEN3_ROOT 21 | # EIGEN3_ROOT_DIR 22 | 23 | # Copyright (c) 2006, 2007 Montel Laurent, 24 | # Copyright (c) 2008, 2009 Gael Guennebaud, 25 | # Copyright (c) 2009 Benoit Jacob 26 | # Redistribution and use is allowed according to the terms of the 2-clause BSD license. 27 | 28 | if(NOT Eigen3_FIND_VERSION) 29 | if(NOT Eigen3_FIND_VERSION_MAJOR) 30 | set(Eigen3_FIND_VERSION_MAJOR 2) 31 | endif() 32 | if(NOT Eigen3_FIND_VERSION_MINOR) 33 | set(Eigen3_FIND_VERSION_MINOR 91) 34 | endif() 35 | if(NOT Eigen3_FIND_VERSION_PATCH) 36 | set(Eigen3_FIND_VERSION_PATCH 0) 37 | endif() 38 | 39 | set(Eigen3_FIND_VERSION "${Eigen3_FIND_VERSION_MAJOR}.${Eigen3_FIND_VERSION_MINOR}.${Eigen3_FIND_VERSION_PATCH}") 40 | endif() 41 | 42 | macro(_eigen3_check_version) 43 | file(READ "${EIGEN3_INCLUDE_DIR}/Eigen/src/Core/util/Macros.h" _eigen3_version_header) 44 | 45 | string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen3_world_version_match "${_eigen3_version_header}") 46 | set(EIGEN3_WORLD_VERSION "${CMAKE_MATCH_1}") 47 | string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen3_major_version_match "${_eigen3_version_header}") 48 | set(EIGEN3_MAJOR_VERSION "${CMAKE_MATCH_1}") 49 | string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen3_minor_version_match "${_eigen3_version_header}") 50 | set(EIGEN3_MINOR_VERSION "${CMAKE_MATCH_1}") 51 | 52 | set(EIGEN3_VERSION ${EIGEN3_WORLD_VERSION}.${EIGEN3_MAJOR_VERSION}.${EIGEN3_MINOR_VERSION}) 53 | if(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION}) 54 | set(EIGEN3_VERSION_OK FALSE) 55 | else() 56 | set(EIGEN3_VERSION_OK TRUE) 57 | endif() 58 | 59 | if(NOT EIGEN3_VERSION_OK) 60 | 61 | message(STATUS "Eigen3 version ${EIGEN3_VERSION} found in ${EIGEN3_INCLUDE_DIR}, " 62 | "but at least version ${Eigen3_FIND_VERSION} is required") 63 | endif() 64 | endmacro() 65 | 66 | if (EIGEN3_INCLUDE_DIR) 67 | 68 | # in cache already 69 | _eigen3_check_version() 70 | set(EIGEN3_FOUND ${EIGEN3_VERSION_OK}) 71 | set(Eigen3_FOUND ${EIGEN3_VERSION_OK}) 72 | 73 | else () 74 | 75 | # search first if an Eigen3Config.cmake is available in the system, 76 | # if successful this would set EIGEN3_INCLUDE_DIR and the rest of 77 | # the script will work as usual 78 | find_package(Eigen3 ${Eigen3_FIND_VERSION} NO_MODULE QUIET) 79 | 80 | if(NOT EIGEN3_INCLUDE_DIR) 81 | find_path(EIGEN3_INCLUDE_DIR NAMES signature_of_eigen3_matrix_library 82 | HINTS 83 | ENV EIGEN3_ROOT 84 | ENV EIGEN3_ROOT_DIR 85 | PATHS 86 | ${CMAKE_INSTALL_PREFIX}/include 87 | ${KDE4_INCLUDE_DIR} 88 | PATH_SUFFIXES eigen3 eigen 89 | ) 90 | endif() 91 | 92 | if(EIGEN3_INCLUDE_DIR) 93 | _eigen3_check_version() 94 | endif() 95 | 96 | include(FindPackageHandleStandardArgs) 97 | find_package_handle_standard_args(Eigen3 DEFAULT_MSG EIGEN3_INCLUDE_DIR EIGEN3_VERSION_OK) 98 | 99 | mark_as_advanced(EIGEN3_INCLUDE_DIR) 100 | 101 | endif() 102 | 103 | if(EIGEN3_FOUND AND NOT TARGET Eigen3::Eigen) 104 | add_library(Eigen3::Eigen INTERFACE IMPORTED) 105 | set_target_properties(Eigen3::Eigen PROPERTIES 106 | INTERFACE_INCLUDE_DIRECTORIES "${EIGEN3_INCLUDE_DIR}") 107 | endif() 108 | -------------------------------------------------------------------------------- /mtspy/sparse_ops.py: -------------------------------------------------------------------------------- 1 | import mtspy_cpp as cpp 2 | import numpy 3 | from scipy import sparse 4 | import warnings 5 | 6 | 7 | def matvec(A: sparse.spmatrix, x: numpy.ndarray, use_eigen=False) -> numpy.ndarray: 8 | """ 9 | Performs the operation y = A * x where A is an (m, n) sparse matrix 10 | and x is a column vector or rank-1 array of size m. 11 | 12 | To avoid copies it's recommended that A and x have the same dtype. 13 | """ 14 | 15 | m, n = A.shape 16 | 17 | if sparse.issparse(A): 18 | if not isinstance(A, sparse.csr_matrix): 19 | warnings.warn("Converting sparse matrix to CSR", 20 | sparse.SparseEfficiencyWarning) 21 | A = sparse.csr_matrix(A) 22 | 23 | if x.shape == (n,): 24 | output_shape = (m,) 25 | elif x.shape == (n, 1): 26 | output_shape = (m, 1) 27 | else: 28 | raise ValueError('Dimension mismatch') 29 | 30 | if numpy.iscomplexobj(x) and not numpy.iscomplexobj(A): 31 | A = A.astype(x.dtype) 32 | warnings.warn("Converting sparse matrix to complex", 33 | sparse.SparseEfficiencyWarning) 34 | 35 | # Convert to row-major (C-style) if it's not already. 36 | x = numpy.asanyarray(x, dtype=A.dtype, order='C') 37 | 38 | spmv = cpp.sparse_vec 39 | 40 | # If use_eigen is true try to use as a computational backend, 41 | # fallback to built-in spmv if it's not linked 42 | if use_eigen: 43 | try: 44 | spmv = cpp.sparse_dense_eigen 45 | except AttributeError: 46 | print("Eigen not available, using built-in backend.") 47 | 48 | y = spmv(m, n, A.nnz, 49 | A.data, A.indptr, 50 | A.indices, x) 51 | 52 | return numpy.reshape(y, output_shape, 'C') 53 | 54 | 55 | def matmat(A: sparse.spmatrix, X: numpy.ndarray, use_eigen: bool = False) -> numpy.ndarray: 56 | """ 57 | Performs the operation C = A * B, where A is a (m, k) sparse matrix 58 | and B is a (k, n) dense matrix. 59 | 60 | 61 | To avoid copies it's recommended that A and X have the same dtype. 62 | """ 63 | 64 | m, n = A.shape 65 | 66 | if sparse.issparse(A): 67 | if not isinstance(A, sparse.csr_matrix): 68 | warnings.warn("Converting sparse matrix to CSR", 69 | sparse.SparseEfficiencyWarning) 70 | A = sparse.csr_matrix(A) 71 | 72 | if X.shape[0] != n: 73 | raise ValueError('Dimension mismatch') 74 | 75 | if A.dtype == X.dtype: 76 | dtype = A.dtype 77 | elif numpy.iscomplexobj(X) and not numpy.iscomplexobj(A): 78 | dtype = X.dtype 79 | A = A.astype(dtype) 80 | warnings.warn("Converting sparse matrix to complex", 81 | sparse.SparseEfficiencyWarning) 82 | 83 | # Convert to row-major (C-style) if it's not already. 84 | X = numpy.asanyarray(X, dtype=dtype, order='C') 85 | 86 | spmm = cpp.sparse_dense 87 | # If use_eigen is true try to use as a computational backend, 88 | # fallback to built-in spmv if it's not linked 89 | if use_eigen: 90 | try: 91 | spmm = cpp.sparse_dense_eigen 92 | except AttributeError: 93 | print("Eigen not available, using built-in backend instead.") 94 | 95 | Y = spmm(m, n, A.nnz, A.data, A.indptr, A.indices, X) 96 | 97 | return Y 98 | 99 | 100 | def spmatmat(A: sparse.spmatrix, B: sparse.spmatrix , use_eigen: bool = True) -> numpy.ndarray: 101 | """ 102 | Performs the operation C = A * B, where A is a (m, k) sparse matrix 103 | and B is a (k, n) sparse matrix. 104 | """ 105 | 106 | if use_eigen: 107 | try: 108 | spmspm = cpp.sparse_sparse_eigen 109 | except AttributeError: 110 | print("Eigen not available.") 111 | raise NotImplementedError 112 | else: 113 | raise NotImplementedError 114 | 115 | C = spmspm(A.shape[0], A.shape[1], A.nnz, A.data, A.indptr, A.indices, 116 | B.shape[0], B.shape[1], B.nnz, B.data, B.indptr, B.indices,) 117 | 118 | return C 119 | -------------------------------------------------------------------------------- /mtspy/cpp/sparse.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | // declares a array_t type of T 5 | template 6 | using array_t = pybind11::array_t; 7 | 8 | template 9 | array_t 10 | sparse_vec(IndType rows, IndType cols, IndType nnz, const array_t &data, 11 | const array_t &displ, const array_t &indices, 12 | const array_t &vec) 13 | { 14 | // get data pointers 15 | const ScalarType *data_ptr = data.data(); 16 | const IndType *displ_ptr = displ.data(); 17 | const IndType *indices_ptr = indices.data(); 18 | const ScalarType *vec_ptr = vec.data(); 19 | 20 | const IndType vsize = static_cast(vec.size()); 21 | if (vsize != cols) 22 | throw std::runtime_error("Size mismatch."); 23 | 24 | if (nnz != displ_ptr[rows]) 25 | throw std::runtime_error("The sparse matrix data are not consistent."); 26 | 27 | // Allocate output array 28 | array_t result(rows); 29 | auto buffer = result.request(true); 30 | ScalarType *result_ptr = (ScalarType *)buffer.ptr; 31 | 32 | // Temporarily release global interpreter lock (GIL) 33 | pybind11::gil_scoped_release release; 34 | 35 | ScalarType result_i; 36 | #pragma omp parallel for schedule(guided) private(result_i) 37 | for (IndType i = 0; i < rows; i++) 38 | { 39 | const IndType local_size = displ_ptr[i + 1] - displ_ptr[i]; 40 | const ScalarType *current_data = data_ptr + displ_ptr[i]; 41 | const IndType *current_inds = indices_ptr + displ_ptr[i]; 42 | 43 | // FIXME: Consider custom reduction(+: result_i) for complex 44 | result_i = 0; 45 | #pragma omp simd 46 | for (IndType j = 0; j < local_size; j++) 47 | { 48 | const IndType idx = current_inds[j]; 49 | result_i += (current_data[j] * vec_ptr[idx]); 50 | } 51 | 52 | result_ptr[i] = result_i; 53 | } 54 | 55 | pybind11::gil_scoped_acquire acquire; 56 | 57 | return result; 58 | } 59 | 60 | template 61 | array_t 62 | sparse_dense(IndType srows, IndType scols, IndType nnz, const array_t &data, 63 | const array_t &displ, const array_t &indices, 64 | const array_t &dense) 65 | { 66 | // get pointers to sparse matrix data 67 | const ScalarType *data_ptr = data.data(); 68 | const IndType *displ_ptr = displ.data(); 69 | const IndType *indices_ptr = indices.data(); 70 | 71 | // get pointers to dense matrix data 72 | const IndType drows = dense.shape(0); 73 | const IndType dcols = dense.shape(1); 74 | const ScalarType *dense_ptr = dense.data(); 75 | 76 | // Check data consistency 77 | if (drows != scols) 78 | throw std::runtime_error("Size mismatch."); 79 | 80 | if (nnz != displ_ptr[srows]) 81 | throw std::runtime_error("The sparse matrix data are not consistent."); 82 | 83 | // Allocate output array 84 | array_t result(srows * dcols); 85 | auto buffer = result.request(true); 86 | ScalarType *result_ptr = (ScalarType *)buffer.ptr; 87 | std::fill(result.mutable_data(), result.mutable_data() + result.size(), 0.); 88 | 89 | // Temporarily release global interpreter lock (GIL) 90 | pybind11::gil_scoped_release release; 91 | 92 | #pragma omp parallel for schedule(guided) 93 | for (IndType i = 0; i < srows; i++) 94 | { 95 | const IndType local_size = displ_ptr[i + 1] - displ_ptr[i]; 96 | const ScalarType *current_data = data_ptr + displ_ptr[i]; 97 | const IndType *current_inds = indices_ptr + displ_ptr[i]; 98 | 99 | for (IndType k = 0; k < local_size; k++) 100 | #pragma omp simd 101 | for (IndType j = 0; j < dcols; j++) 102 | { 103 | const IndType idx = (dcols * current_inds[k]) + j; 104 | result_ptr[i * dcols + j] += current_data[k] * dense_ptr[idx]; 105 | } 106 | } 107 | 108 | pybind11::gil_scoped_acquire acquire; 109 | 110 | result.resize({srows, dcols}); 111 | 112 | return result; 113 | } -------------------------------------------------------------------------------- /mtspy/cpp/main.cpp: -------------------------------------------------------------------------------- 1 | #include "eigen.hpp" 2 | #include "sparse.hpp" 3 | #include "thread_control.hpp" 4 | #include 5 | #include 6 | 7 | namespace py = pybind11; 8 | 9 | PYBIND11_MODULE(mtspy_cpp, m) 10 | { 11 | //-------------------------------------------------------------------------------------// 12 | // Sparse Matrix Vector Product (32 bit indices) 13 | m.def("sparse_vec", &sparse_vec, py::return_value_policy::move); 14 | m.def("sparse_vec", &sparse_vec, py::return_value_policy::move); 15 | m.def("sparse_vec", &sparse_vec, std::int32_t>, py::return_value_policy::move); 16 | m.def("sparse_vec", &sparse_vec, std::int32_t>, py::return_value_policy::move); 17 | // Sparse Matrix Vector Product (64 bit indices) 18 | m.def("sparse_vec", &sparse_vec, py::return_value_policy::move); 19 | m.def("sparse_vec", &sparse_vec, py::return_value_policy::move); 20 | m.def("sparse_vec", &sparse_vec, std::int64_t>, py::return_value_policy::move); 21 | m.def("sparse_vec", &sparse_vec, std::int64_t>, py::return_value_policy::move); 22 | 23 | //-------------------------------------------------------------------------------------// 24 | // Sparse Matrix - Dense Matrix Product (32 bit indices) 25 | m.def("sparse_dense", &sparse_dense, py::return_value_policy::move); 26 | m.def("sparse_dense", &sparse_dense, py::return_value_policy::move); 27 | m.def("sparse_dense", &sparse_dense, std::int32_t>, py::return_value_policy::move); 28 | m.def("sparse_dense", &sparse_dense, std::int32_t>, py::return_value_policy::move); 29 | // Sparse Matrix - Dense Matrix Product (64 bit indices) 30 | m.def("sparse_dense", &sparse_dense, py::return_value_policy::move); 31 | m.def("sparse_dense", &sparse_dense, py::return_value_policy::move); 32 | m.def("sparse_dense", &sparse_dense, std::int64_t>, py::return_value_policy::move); 33 | m.def("sparse_dense", &sparse_dense, std::int64_t>, py::return_value_policy::move); 34 | 35 | #ifdef USE_EIGEN_BACKEND 36 | //-------------------------------------------------------------------------------------// 37 | // Sparse Matrix - Dense Matrix Product using eigen backend (32 bit indices) 38 | m.def("sparse_dense_eigen", &sparse_dense_eigen, py::return_value_policy::move); 39 | m.def("sparse_dense_eigen", &sparse_dense_eigen, py::return_value_policy::move); 40 | m.def("sparse_dense_eigen", &sparse_dense_eigen, std::int32_t>, py::return_value_policy::move); 41 | m.def("sparse_dense_eigen", &sparse_dense_eigen, std::int32_t>, py::return_value_policy::move); 42 | 43 | // Sparse Matrix - Dense Matrix Product using eigen backend (64 bit indices) 44 | m.def("sparse_dense_eigen", &sparse_dense_eigen, py::return_value_policy::move); 45 | m.def("sparse_dense_eigen", &sparse_dense_eigen, py::return_value_policy::move); 46 | m.def("sparse_dense_eigen", &sparse_dense_eigen, std::int64_t>, py::return_value_policy::move); 47 | m.def("sparse_dense_eigen", &sparse_dense_eigen, std::int64_t>, py::return_value_policy::move); 48 | 49 | //-------------------------------------------------------------------------------------// 50 | // Sparse Matrix - Sparse Matrix Product using eigen backend (32 bit indices) 51 | m.def("sparse_sparse_eigen", &sparse_sparse_eigen, py::return_value_policy::move); 52 | m.def("sparse_sparse_eigen", &sparse_sparse_eigen, py::return_value_policy::move); 53 | m.def("sparse_sparse_eigen", &sparse_sparse_eigen, std::int32_t>, py::return_value_policy::move); 54 | m.def("sparse_sparse_eigen", &sparse_sparse_eigen, std::int32_t>, py::return_value_policy::move); 55 | 56 | // Sparse Matrix - Sparse Matrix Product using eigen backend (64 bit indices) 57 | m.def("sparse_sparse_eigen", &sparse_sparse_eigen, py::return_value_policy::move); 58 | m.def("sparse_sparse_eigen", &sparse_sparse_eigen, py::return_value_policy::move); 59 | m.def("sparse_sparse_eigen", &sparse_sparse_eigen, std::int64_t>, py::return_value_policy::move); 60 | m.def("sparse_sparse_eigen", &sparse_sparse_eigen, std::int64_t>, py::return_value_policy::move); 61 | 62 | #endif 63 | 64 | m.def("has_eigen", &has_eigen); 65 | 66 | //-------------------------------------------------------------------------------------// 67 | // Thread Control: 68 | m.def("get_max_threads", &get_max_threads); 69 | m.def("get_num_threads", &get_num_threads); 70 | m.def("set_num_threads", &set_num_threads); 71 | 72 | #ifdef VERSION_INFO 73 | m.attr("__version__") = VERSION_INFO; 74 | #else 75 | m.attr("__version__") = "dev"; 76 | #endif 77 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | --------------------------------------------------------------------------------