├── .github └── workflows │ ├── python-package.yml │ └── python-publish.yml ├── .gitignore ├── MANIFEST ├── README.md ├── Tests └── ssgetpy_test.py ├── demo.ipynb ├── requirements.txt ├── setup.py └── ssgetpy ├── __init__.py ├── __main__.py ├── bundle.py ├── config.py ├── csvindex.py ├── db.py ├── dbinstance.py ├── matrix.py └── query.py /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python package 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | strategy: 17 | matrix: 18 | python-version: [3.6, 3.7, 3.8] 19 | 20 | steps: 21 | - uses: actions/checkout@v2 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v2 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade pip 29 | pip install flake8 pytest 30 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 31 | pip install -e . 32 | - name: Lint with flake8 33 | run: | 34 | # stop the build if there are Python syntax errors or undefined names 35 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 36 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 37 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 38 | - name: Test with pytest 39 | run: | 40 | pytest 41 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflows will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | name: Upload Python Package 5 | 6 | on: 7 | release: 8 | types: [created] 9 | 10 | jobs: 11 | deploy: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Set up Python 18 | uses: actions/setup-python@v2 19 | with: 20 | python-version: '3.x' 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | pip install setuptools wheel twine 25 | - name: Build and publish 26 | env: 27 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 28 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 29 | run: | 30 | python setup.py sdist bdist_wheel 31 | twine upload dist/* 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.py~ -------------------------------------------------------------------------------- /MANIFEST: -------------------------------------------------------------------------------- 1 | setup.py 2 | ssgetpy\__init__.py 3 | ssgetpy\__main__.py 4 | ssgetpy\bundle.py 5 | ssgetpy\config.py 6 | ssgetpy\csvindex.py 7 | ssgetpy\db.py 8 | ssgetpy\dbinstance.py 9 | ssgetpy\matrix.py 10 | ssgetpy\query.py 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SSGETPY: Search and download sparse matrices from the SuiteSparse Matrix Collection 2 | ![Python package](https://github.com/drdarshan/PyUFGet/workflows/Python%20package/badge.svg) [![PyPI version](https://badge.fury.io/py/ssgetpy.svg)](https://badge.fury.io/py/ssgetpy) [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/drdarshan/ssgetpy/master?filepath=demo.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/drdarshan/ssgetpy/blob/master/demo.ipynb) 3 | 4 | 5 | `ssgetpy` is a little Python library and command-line program to search, filter and download matrices from the [SuiteSparse Matrix Collection](https://people.engr.tamu.edu/davis/matrices.html) similar to the existing Java and MATLAB tools. 6 | 7 | The index of matrices is created from the same CSV file used by the 8 | Java interface. However, the index in cached in a local SQLite 9 | database to make querying it more convenient. 10 | 11 | ## Requirements and installation 12 | 13 | `ssgetpy` works with Python 3.6 or above. Besides the standard 14 | library, it depends on `requests` and `tqdm`. Since `ssgetpy` doesn't 15 | actually parse matrix data, it doesn't require dependencies like 16 | `NumPy` or `SciPy`. 17 | 18 | To install, simply run: 19 | ``` 20 | pip install ssgetpy 21 | ``` 22 | 23 | This will install the `ssgetpy` Python module as well as a `ssgetpy` command-line script. 24 | 25 | From Python, run ``import ssgetpy`` and type ``help(ssgetpy)`` to get a detailed 26 | help message on how to use ``ssgetpy`` to search and download sparse matrices. 27 | 28 | From the command-line, run ``ssgetpy`` or ``ssgetpy --help`` to see the 29 | list of options. 30 | 31 | ## Examples 32 | Make sure you first run ``from ssgetpy import search, fetch``. Replace 33 | ``fetch`` with ``search`` to only return the corresponding ``Matrix`` objects 34 | without downloading them. 35 | 36 | * Download matrix with ID 42 in the MatrixMarket format: ``fetch(42)`` 37 | * Download matrices in the Harwell-Boeing collection with less than 38 | 1000 non-zeros: ``fetch(group = 'HB', nzbounds = (None, 1000))`` 39 | * Download only the first 5 problems arising from structural analysis: 40 | ``fetch(kind = "structural", limit = 5)`` 41 | * Download the problems in the previous example as MATLAB .MAT files: ``fetch(kind = "structural", format = "MAT", limit = 5)`` 42 | 43 | For more examples, please see the accompanying [Jupyter notebook](demo.ipynb). 44 | 45 | 46 | 47 | ## License 48 | *ssgetpy* is licensed under the [MIT/X11 license](http://www.opensource.org/licenses/mit-license.php): 49 | 50 | Permission is hereby granted, free of charge, to any person obtaining 51 | a copy of this software and associated documentation files (the 52 | "Software"), to deal in the Software without restriction, including 53 | without limitation the rights to use, copy, modify, merge, publish, 54 | distribute, sublicense, and/or sell copies of the Software, and to 55 | permit persons to whom the Software is furnished to do so, subject to 56 | the following conditions: 57 | 58 | The above copyright notice and this permission notice shall be 59 | included in all copies or substantial portions of the Software. 60 | 61 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 62 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 63 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 64 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 65 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 66 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 67 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 68 | 69 | -------------------------------------------------------------------------------- /Tests/ssgetpy_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import ssgetpy as p 4 | 5 | 6 | class TestPyUFGet(unittest.TestCase): 7 | def test_search_by_id(self): 8 | m = p.search(42) 9 | self.assertEqual(len(m), 1) 10 | self.assertEqual(m[0].id, 42) 11 | 12 | def test_search_by_group(self): 13 | matrices = p.search("HB/*") 14 | for matrix in matrices: 15 | self.assertEqual(matrix.group, "HB") 16 | 17 | def test_search_by_name(self): 18 | matrices = p.search("c-") 19 | self.assertTrue(len(matrices) > 0) 20 | for matrix in matrices: 21 | self.assertTrue(matrix.name.startswith("c-")) 22 | 23 | def test_filter_by_rows(self): 24 | matrices = p.search(rowbounds=(None, 1000)) 25 | self.assertTrue(len(matrices) > 0) 26 | for matrix in matrices: 27 | self.assertTrue(matrix.rows <= 1000) 28 | 29 | def test_filter_by_shape(self): 30 | rmin = 50 31 | rmax = 1000 32 | cmin = 200 33 | cmax = 5000 34 | matrices = p.search(rowbounds=(rmin, rmax), colbounds=(cmin, cmax)) 35 | self.assertTrue(len(matrices) > 0) 36 | for matrix in matrices: 37 | self.assertTrue( 38 | matrix.rows >= rmin 39 | and matrix.rows <= rmax 40 | and matrix.cols >= cmin 41 | and matrix.cols <= cmax 42 | ) 43 | 44 | def test_filter_by_spd_true(self): 45 | matrices = p.search(isspd=True) 46 | self.assertTrue(len(matrices) > 0) 47 | for matrix in matrices: 48 | self.assertTrue(matrix.isspd) 49 | 50 | def test_filter_by_spd_false(self): 51 | matrices = p.search(isspd=False) 52 | self.assertTrue(len(matrices) > 0) 53 | for matrix in matrices: 54 | self.assertFalse(matrix.isspd) 55 | 56 | 57 | if __name__ == "__main__": 58 | unittest.main() 59 | -------------------------------------------------------------------------------- /demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Python interface to the SuiteSparse Matrix Collection\n", 8 | "\n", 9 | "This notebook walks you through some of the features of the `ssgetpy` package that provides a search and download interface for the [Suite Sparse](https://suitesparse.com) matrix collection. \n", 10 | "\n", 11 | "The simplest way to install `ssgetpy` is via:\n", 12 | "```\n", 13 | "pip install ssgetpy\n", 14 | "```\n", 15 | "\n", 16 | "This installs both the `ssgetpy` Python module as well as the `ssgetpy` command-line script. \n", 17 | "\n", 18 | "\n", 19 | "This notebook only covers the library version of `ssgetpy`. To get more information on the command-line script run:\n", 20 | "```\n", 21 | "$ ssgetpy --help\n", 22 | "```\n", 23 | "\n", 24 | "Before proceeding with the rest of this notebook, please install `ssgetpy` into your environment. If you are running this notebook under Binder, `ssgetpy` will already be installed for you. If you are running this notebook in Google Colaboratory, the following cell will install `ssgetpy`: " 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 1, 30 | "metadata": { 31 | "scrolled": false 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "ipy = get_ipython()\n", 36 | "if 'google.colab' in str(ipy):\n", 37 | " import sys\n", 38 | " ipy.run_cell('!{sys.executable} -m pip install ssgetpy')" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "First import `ssgetpy` via:" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 2, 51 | "metadata": { 52 | "scrolled": false 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "import ssgetpy" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "## Basic query interface\n", 64 | "\n", 65 | "The primary interface to `ssgetpy` is via `ssgetpy.search`. Running `search` without any arguments returns the first 10 matrices in the collection:" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 3, 71 | "metadata": { 72 | "scrolled": false 73 | }, 74 | "outputs": [ 75 | { 76 | "data": { 77 | "text/html": [ 78 | "
IdGroupNameRowsColsNNZDType2D/3D Discretization?SPD?Pattern SymmetryNumerical SymmetryKindSpy Plot
1HB1138_bus113811384054realNoYes1.01.0power network problem
2HB494_bus4944941666realNoYes1.01.0power network problem
3HB662_bus6626622474realNoYes1.01.0power network problem
4HB685_bus6856853249realNoYes1.01.0power network problem
5HBabb3133131761557binaryNoNo0.00.0least squares problem
6HBarc1301301301037realYesNo0.760.0materials problem
7HBash21921985438binaryNoNo0.00.0least squares problem
8HBash2922922922208binaryNoNo1.01.0least squares problem
9HBash331331104662binaryNoNo0.00.0least squares problem
10HBash6086081881216binaryNoNo0.00.0least squares problem
" 79 | ], 80 | "text/plain": [ 81 | "[Matrix(1, 'HB', '1138_bus', 1138, 1138, 4054, 'real', False, True, 1.0, 1.0, 'power network problem', 'https://sparse.tamu.edu/files/HB/1138_bus.png'),\n", 82 | " Matrix(2, 'HB', '494_bus', 494, 494, 1666, 'real', False, True, 1.0, 1.0, 'power network problem', 'https://sparse.tamu.edu/files/HB/494_bus.png'),\n", 83 | " Matrix(3, 'HB', '662_bus', 662, 662, 2474, 'real', False, True, 1.0, 1.0, 'power network problem', 'https://sparse.tamu.edu/files/HB/662_bus.png'),\n", 84 | " Matrix(4, 'HB', '685_bus', 685, 685, 3249, 'real', False, True, 1.0, 1.0, 'power network problem', 'https://sparse.tamu.edu/files/HB/685_bus.png'),\n", 85 | " Matrix(5, 'HB', 'abb313', 313, 176, 1557, 'binary', False, False, 0.0, 0.0, 'least squares problem', 'https://sparse.tamu.edu/files/HB/abb313.png'),\n", 86 | " Matrix(6, 'HB', 'arc130', 130, 130, 1037, 'real', True, False, 0.7586805555555556, 0.0, 'materials problem', 'https://sparse.tamu.edu/files/HB/arc130.png'),\n", 87 | " Matrix(7, 'HB', 'ash219', 219, 85, 438, 'binary', False, False, 0.0, 0.0, 'least squares problem', 'https://sparse.tamu.edu/files/HB/ash219.png'),\n", 88 | " Matrix(8, 'HB', 'ash292', 292, 292, 2208, 'binary', False, False, 1.0, 1.0, 'least squares problem', 'https://sparse.tamu.edu/files/HB/ash292.png'),\n", 89 | " Matrix(9, 'HB', 'ash331', 331, 104, 662, 'binary', False, False, 0.0, 0.0, 'least squares problem', 'https://sparse.tamu.edu/files/HB/ash331.png'),\n", 90 | " Matrix(10, 'HB', 'ash608', 608, 188, 1216, 'binary', False, False, 0.0, 0.0, 'least squares problem', 'https://sparse.tamu.edu/files/HB/ash608.png')]" 91 | ] 92 | }, 93 | "execution_count": 3, 94 | "metadata": {}, 95 | "output_type": "execute_result" 96 | } 97 | ], 98 | "source": [ 99 | "ssgetpy.search()" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "Notice that search result comes with minimal Jupyter integration that shows some metadata along with the distribution of the non-zero values. Click on the group or name link to go a web page in the SuiteSparse matrix collection that has much more information about the group or the matrix respectively.\n", 107 | "\n", 108 | "### Query filters\n", 109 | "\n", 110 | "You can add more filters via keyword arguments as follows:\n", 111 | "\n", 112 | "|Argument | Description | Type | Default | Notes |\n", 113 | "|---------|-------------|------|---------|-------| \n", 114 | "|`rowbounds` | Number of rows | `tuple`: `(min_value, max_value)` | `(None, None)`| `min_value` or `max_value` can be `None` which implies \"don't care\" |\n", 115 | "|`colbounds` | Number of columns | `tuple`: `(min_value, max_value)` | `(None, None)` | |\n", 116 | "|`nzbounds` | Number of non-zeros | `tuple`: `(min_value, max_value)` | `(None, None)`| |\n", 117 | "|`isspd` | SPD? | `bool` or `None` | `None` | `None` implies \"don't care\" |\n", 118 | "|`is2d3d` | 2D/3D Discretization? | `bool` or `None` | `None` | |\n", 119 | "| `dtype` | Non-zero data type | `real`, `complex`, `binary` or `None` | `None` | |\n", 120 | "| `group` | Matrix group | `str` or `None` | `None` | Supports partial matches; `None` implies \"don't care\" |\n", 121 | "| `kind` | Problem domain | `str` or `None` | `None` | Supports partial matches; `None` implies \"don't care\" |\n", 122 | "| `limit` | Max number of results | `int` | 10 | |\n", 123 | "\n", 124 | "> Note that numerical and pattern symmetry filters are not yet supported.\n", 125 | "\n", 126 | "As an example of using the above filters, here is a query that returns five, non-SPD matrices with $1000\\leq \\text{NNZ} \\leq 10000$:" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 4, 132 | "metadata": { 133 | "scrolled": false 134 | }, 135 | "outputs": [ 136 | { 137 | "data": { 138 | "text/html": [ 139 | "
IdGroupNameRowsColsNNZDType2D/3D Discretization?SPD?Pattern SymmetryNumerical SymmetryKindSpy Plot
5HBabb3133131761557binaryNoNo0.00.0least squares problem
6HBarc1301301301037realYesNo0.760.0materials problem
8HBash2922922922208binaryNoNo1.01.0least squares problem
10HBash6086081881216binaryNoNo0.00.0least squares problem
12HBash9589582921916binaryNoNo0.00.0least squares problem
" 140 | ], 141 | "text/plain": [ 142 | "[Matrix(5, 'HB', 'abb313', 313, 176, 1557, 'binary', False, False, 0.0, 0.0, 'least squares problem', 'https://sparse.tamu.edu/files/HB/abb313.png'),\n", 143 | " Matrix(6, 'HB', 'arc130', 130, 130, 1037, 'real', True, False, 0.7586805555555556, 0.0, 'materials problem', 'https://sparse.tamu.edu/files/HB/arc130.png'),\n", 144 | " Matrix(8, 'HB', 'ash292', 292, 292, 2208, 'binary', False, False, 1.0, 1.0, 'least squares problem', 'https://sparse.tamu.edu/files/HB/ash292.png'),\n", 145 | " Matrix(10, 'HB', 'ash608', 608, 188, 1216, 'binary', False, False, 0.0, 0.0, 'least squares problem', 'https://sparse.tamu.edu/files/HB/ash608.png'),\n", 146 | " Matrix(12, 'HB', 'ash958', 958, 292, 1916, 'binary', False, False, 0.0, 0.0, 'least squares problem', 'https://sparse.tamu.edu/files/HB/ash958.png')]" 147 | ] 148 | }, 149 | "execution_count": 4, 150 | "metadata": {}, 151 | "output_type": "execute_result" 152 | } 153 | ], 154 | "source": [ 155 | "ssgetpy.search(nzbounds=(1000,10000), isspd=False, limit=5)" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "## Working with search results\n", 163 | "The result of a search query is a collection of `Matrix` objects. The collection can be sliced using the same syntax as for vanilla Python `list`s as shown below:" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 5, 169 | "metadata": { 170 | "scrolled": false 171 | }, 172 | "outputs": [ 173 | { 174 | "data": { 175 | "text/html": [ 176 | "
IdGroupNameRowsColsNNZDType2D/3D Discretization?SPD?Pattern SymmetryNumerical SymmetryKindSpy Plot
24HBbcsstk0266664356realYesYes1.01.0structural problem
26HBbcsstk041321323648realYesYes1.01.0structural problem
27HBbcsstk051531532423realYesYes1.01.0structural problem
28HBbcsstk064204207860realYesYes1.01.0structural problem
" 177 | ], 178 | "text/plain": [ 179 | "[Matrix(24, 'HB', 'bcsstk02', 66, 66, 4356, 'real', True, True, 1.0, 1.0, 'structural problem', 'https://sparse.tamu.edu/files/HB/bcsstk02.png'),\n", 180 | " Matrix(26, 'HB', 'bcsstk04', 132, 132, 3648, 'real', True, True, 1.0, 1.0, 'structural problem', 'https://sparse.tamu.edu/files/HB/bcsstk04.png'),\n", 181 | " Matrix(27, 'HB', 'bcsstk05', 153, 153, 2423, 'real', True, True, 1.0, 1.0, 'structural problem', 'https://sparse.tamu.edu/files/HB/bcsstk05.png'),\n", 182 | " Matrix(28, 'HB', 'bcsstk06', 420, 420, 7860, 'real', True, True, 1.0, 1.0, 'structural problem', 'https://sparse.tamu.edu/files/HB/bcsstk06.png')]" 183 | ] 184 | }, 185 | "execution_count": 5, 186 | "metadata": {}, 187 | "output_type": "execute_result" 188 | } 189 | ], 190 | "source": [ 191 | "result = ssgetpy.search(kind='structural', nzbounds=(1000,10000))\n", 192 | "result[:4]" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "An individual element in the collection can be used as follows:" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 6, 205 | "metadata": { 206 | "scrolled": false 207 | }, 208 | "outputs": [ 209 | { 210 | "data": { 211 | "text/html": [ 212 | "
IdGroupNameRowsColsNNZDType2D/3D Discretization?SPD?Pattern SymmetryNumerical SymmetryKindSpy Plot
24HBbcsstk0266664356realYesYes1.01.0structural problem
" 213 | ], 214 | "text/plain": [ 215 | "Matrix(24, 'HB', 'bcsstk02', 66, 66, 4356, 'real', True, True, 1.0, 1.0, 'structural problem', 'https://sparse.tamu.edu/files/HB/bcsstk02.png')" 216 | ] 217 | }, 218 | "execution_count": 6, 219 | "metadata": {}, 220 | "output_type": "execute_result" 221 | } 222 | ], 223 | "source": [ 224 | "small_matrix = result[0]\n", 225 | "small_matrix" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": 7, 231 | "metadata": { 232 | "scrolled": false 233 | }, 234 | "outputs": [ 235 | { 236 | "data": { 237 | "text/plain": [ 238 | "4356" 239 | ] 240 | }, 241 | "execution_count": 7, 242 | "metadata": {}, 243 | "output_type": "execute_result" 244 | } 245 | ], 246 | "source": [ 247 | "small_matrix.nnz" 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "metadata": {}, 253 | "source": [ 254 | "We can download a matrix locally using the `download` method:" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 8, 260 | "metadata": { 261 | "scrolled": false 262 | }, 263 | "outputs": [ 264 | { 265 | "data": { 266 | "text/plain": [ 267 | "('C:\\\\Users\\\\drdar\\\\AppData\\\\Roaming\\\\ssgetpy\\\\MM\\\\HB\\\\bcsstk02.tar.gz',\n", 268 | " 'C:\\\\Users\\\\drdar\\\\AppData\\\\Roaming\\\\ssgetpy\\\\MM\\\\HB\\\\bcsstk02.tar.gz')" 269 | ] 270 | }, 271 | "execution_count": 8, 272 | "metadata": {}, 273 | "output_type": "execute_result" 274 | } 275 | ], 276 | "source": [ 277 | "small_matrix.download()" 278 | ] 279 | }, 280 | { 281 | "cell_type": "markdown", 282 | "metadata": {}, 283 | "source": [ 284 | "The `download` methods supports the following arguments:\n", 285 | "\n", 286 | "|Argument| Description | Data type | Default value | Notes|\n", 287 | "|--------|-------------|-----------|---------------|------|\n", 288 | "|`format`| Sparse matrix storage format | One of (`'MM', 'RB', 'MAT'`) | `MM` | `MM` is Matrix Market, `RB` is Rutherford-Boeing and `MAT` is MATLAB MAT-file format|\n", 289 | "|`destpath` | Path to download | `str` | `~/.ssgetpy` on Unix `%APPDATA%\\ssgetpy` on Windows | The full filename for the matrix is obtained via `os.path.join(destpath, format, group_name, matrix_name + extension)`where `extention` is `.tar.gz` for `MM` and `RB` and `.mat` for `MAT`|\n", 290 | "|`extract` | Extract TGZ archive? | `bool` | `False` | Only applicable to `MM` and `RB` formats |\n", 291 | "\n", 292 | "The return value is a two-element `tuple` containing the local path where the matrix was downloaded to along with the path for the extracted file, if applicable. \n", 293 | "\n", 294 | "Note that `download` does not actually download the file again if it already exists in the path. " 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": 9, 300 | "metadata": { 301 | "scrolled": false 302 | }, 303 | "outputs": [ 304 | { 305 | "data": { 306 | "text/plain": [ 307 | "('C:\\\\Users\\\\drdar\\\\AppData\\\\Roaming\\\\ssgetpy\\\\MM\\\\HB\\\\bcsstk02.tar.gz',\n", 308 | " 'C:\\\\Users\\\\drdar\\\\AppData\\\\Roaming\\\\ssgetpy\\\\MM\\\\HB\\\\bcsstk02.tar.gz')" 309 | ] 310 | }, 311 | "execution_count": 9, 312 | "metadata": {}, 313 | "output_type": "execute_result" 314 | } 315 | ], 316 | "source": [ 317 | "small_matrix.download()" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": 10, 323 | "metadata": { 324 | "scrolled": false 325 | }, 326 | "outputs": [ 327 | { 328 | "data": { 329 | "text/plain": [ 330 | "('C:\\\\Users\\\\drdar\\\\AppData\\\\Roaming\\\\ssgetpy\\\\MM\\\\HB\\\\bcsstk02',\n", 331 | " 'C:\\\\Users\\\\drdar\\\\AppData\\\\Roaming\\\\ssgetpy\\\\MM\\\\HB\\\\bcsstk02.tar.gz')" 332 | ] 333 | }, 334 | "execution_count": 10, 335 | "metadata": {}, 336 | "output_type": "execute_result" 337 | } 338 | ], 339 | "source": [ 340 | "small_matrix.download(extract=True)" 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": {}, 346 | "source": [ 347 | "Finally, `download` also works directly on the output of `search`, so you don't have to download one matrix at a time. For example, to download the first five matrices in the previous query, you could use:" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": 11, 353 | "metadata": { 354 | "scrolled": false 355 | }, 356 | "outputs": [ 357 | { 358 | "data": { 359 | "application/vnd.jupyter.widget-view+json": { 360 | "model_id": "b9ad495a1ef5462a889164dcb19a4d92", 361 | "version_major": 2, 362 | "version_minor": 0 363 | }, 364 | "text/plain": [ 365 | "HBox(children=(FloatProgress(value=0.0, description='Overall progress', max=5.0, style=ProgressStyle(descripti…" 366 | ] 367 | }, 368 | "metadata": {}, 369 | "output_type": "display_data" 370 | }, 371 | { 372 | "name": "stdout", 373 | "output_type": "stream", 374 | "text": [ 375 | "\n" 376 | ] 377 | } 378 | ], 379 | "source": [ 380 | "result[:5].download()" 381 | ] 382 | }, 383 | { 384 | "cell_type": "code", 385 | "execution_count": null, 386 | "metadata": { 387 | "scrolled": false 388 | }, 389 | "outputs": [], 390 | "source": [] 391 | } 392 | ], 393 | "metadata": { 394 | "kernelspec": { 395 | "display_name": "Python 3", 396 | "language": "python", 397 | "name": "python3" 398 | }, 399 | "language_info": { 400 | "codemirror_mode": { 401 | "name": "ipython", 402 | "version": 3 403 | }, 404 | "file_extension": ".py", 405 | "mimetype": "text/x-python", 406 | "name": "python", 407 | "nbconvert_exporter": "python", 408 | "pygments_lexer": "ipython3", 409 | "version": "3.7.6" 410 | } 411 | }, 412 | "nbformat": 4, 413 | "nbformat_minor": 4 414 | } 415 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests>=2.22 2 | tqdm>=4.48 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | 3 | from setuptools import setup 4 | 5 | HERE = pathlib.Path(__file__).parent 6 | 7 | README = (HERE / "README.md").read_text() 8 | 9 | setup( 10 | name="ssgetpy", 11 | version="1.0-pre2", 12 | description="A Python interface to the SuiteSparse Matrix Collection", 13 | author="Sudarshan Raghunathan", 14 | author_email="darshan@alum.mit.edu", 15 | url="http://www.github.com/drdarshan/ssgetpy", 16 | long_description=README, 17 | long_description_content_type="text/markdown", 18 | packages=["ssgetpy"], 19 | entry_points={"console_scripts": ["ssgetpy = ssgetpy.query:cli", ], }, 20 | python_requires=">3.5.2", 21 | install_requires=["requests>=2.22", "tqdm>=4.41"], 22 | classifiers=[ 23 | "Programming Language :: Python :: 3", 24 | "License :: OSI Approved :: MIT License", 25 | "Operating System :: OS Independent", 26 | "Topic :: Scientific/Engineering :: Mathematics", 27 | ], 28 | ) 29 | -------------------------------------------------------------------------------- /ssgetpy/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The `ssgetpy` module provides interfaces to search and download matrices 3 | from the SuiteSparse Matrix Collection. 4 | 5 | There are two ways to use `ssgetpy`: 6 | * By importing the `ssgetpy` module in Python, or 7 | * As a standalone command-line tool 8 | 9 | To search for matrices that match a given criterion, use `ssgetpy.search`: 10 | 11 | `ssgetpy.search(name_or_id, **kwargs)` 12 | 13 | `ssgetpy.search` only returns a list of `Matrix` objects that match 14 | the selection criterion. To download the matrices themselves, use the 15 | `download` method in the `Matrix object` or use `ssgetpy.fetch` :: 16 | 17 | `ssgetpy.fetch(name_or_id, format, location, **kwargs)` 18 | 19 | The rules for specifying the search criteria in `ssgetpy.search` and 20 | `ssgetpy.fetch` are as follows: 21 | 22 | 1. `name_or_id` can be either the numerical ID of the matrix such as 23 | `42` or a pattern such as `"HB/ash*"` or `"c-"`. This field is 24 | optional. 25 | 2. `**kwargs` is a set of key-value pairs with search constraints: 26 | - matid: An integer matrix ID such as `42` 27 | - group: The matrix group name such as `HB` 28 | - name: A pattern containing the matrix name such as `ash-` 29 | - rowbounds: A tuple of the form (min, max) containing the 30 | minimum and maximum rows. The min or max value can be set to 31 | `None`. 32 | - colbounds: A tuple of the form (min, max) containing the minimum 33 | and maximum columns. 34 | - nzbounds: A tuple of the form (min, max) containing the minimum 35 | and maximum non-zero values. 36 | - dtype: The matrix data type, one of `real`, `complex` or `binary`. 37 | - is2d3d: If true, only selects matrices arising from 38 | 2D and 3D discretizations. 39 | - isspd: If true, only selects SPD matrices. 40 | - kind: A string describing the problem domain, 41 | see http://www.cise.ufl.edu/research/sparse/matrices/kind.html 42 | - limit: Number of matrices to return, defaults to 10. 43 | 44 | If `name_or_id` is specified, it overrides any conflicting key-value settings 45 | in `**kwargs`. 46 | 47 | In `ssgetpy.fetch`, `format` can be one of 'MM', 'MAT' or 'RB'; 'MM' 48 | is the default if `format` is omitted. Finally, `location` refers 49 | to the directory where the matrices will be downloaded on the local 50 | machine. It defaults to `%APPDATA%/`ssgetpy`` on Windows and 51 | `~/.ssgetpy` on Unix-like platforms. 52 | 53 | In addition to its usage as a Python library, `ssgetpy` can be run from 54 | the command line as follows :: 55 | 56 | python `ssgetpy` 57 | Usage: `ssgetpy` [options] 58 | 59 | Options: 60 | -h, --help show this help message and exit 61 | -i MATID, --id=MATID Download a matrix with the given ID. 62 | -g GROUP, --group=GROUP 63 | The matrix group. 64 | -n NAME, --name=NAME The name or a pattern matching the name of the 65 | matrix/matrices. 66 | -d DTYPE, --data-type=DTYPE 67 | The element type of the matrix/matrices, 68 | can be one of 'real', 'complex' or 'binary'. 69 | -s, --spd Only selects SPD matrices. 70 | -f FORMAT, --format=FORMAT 71 | The format in which to download the matrix/matrices. 72 | Can be one of 'MM', 'MAT' or 'RB' for MatrixMarket, 73 | MATLAB or Rutherford-Boeing formats respectively. 74 | Defaults to 'MM'. 75 | -l LIMIT, --limit=LIMIT 76 | The maximum number of matrices to be downloaded. 77 | Defaults to 10. 78 | -o LOCATION, --outdir=LOCATION 79 | The directory in the local machine where matrices 80 | will be downloaded to. 81 | Defaults to `%AppData%/ssgetpy` on Windows 82 | and `~/.ssgetpy` on Unix. 83 | 84 | Size and Non-zero filters: 85 | These options may be used to restrict the shape or number of non-zero 86 | elements of the matrices to be downloaded 87 | 88 | --min-rows=MIN_ROWS 89 | The minimum number of rows in the matrix/matrices. 90 | --max-rows=MAX_ROWS 91 | The maximum number of rows in the matrix/matrices. 92 | --min-cols=MIN_COLS 93 | The minimum number of columns in the matrix/matrices. 94 | --max-cols=MAX_COLS 95 | The maximum number of columns in the matrix/matrices. 96 | --min-nnzs=MIN_NNZS 97 | The minimum number of non-zero values in the 98 | matrix/matrices. 99 | --max-nnzs=MAX_NNZS 100 | The maximum number of non-zero values in the 101 | matrix/matrices. 102 | """ 103 | from .query import fetch, search 104 | 105 | __all__ = ["fetch", "search"] 106 | -------------------------------------------------------------------------------- /ssgetpy/__main__.py: -------------------------------------------------------------------------------- 1 | from .query import cli 2 | 3 | cli() 4 | -------------------------------------------------------------------------------- /ssgetpy/bundle.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | import os 3 | import shutil 4 | import tarfile 5 | 6 | 7 | def extract(bundle): 8 | basedir, filename = os.path.split(bundle) 9 | tarfilename = os.path.join( 10 | basedir, ".".join((filename.split(".")[0], "tar")) 11 | ) 12 | gzfile = gzip.open(bundle, "rb") 13 | with open(tarfilename, "wb") as outtarfile: 14 | shutil.copyfileobj(gzfile, outtarfile) 15 | gzfile.close() 16 | tarfile.open(tarfilename).extractall(basedir) 17 | os.unlink(tarfilename) 18 | os.unlink(bundle) 19 | -------------------------------------------------------------------------------- /ssgetpy/config.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | SS_DIR = None 8 | SS_DB = "index.db" 9 | SS_TABLE = "MATRICES" 10 | SS_ROOT_URL = "https://sparse.tamu.edu" 11 | SS_INDEX_URL = "/".join((SS_ROOT_URL, "files", "ssstats.csv")) 12 | 13 | if sys.platform == "win32": 14 | SS_DIR = os.path.join(os.environ["APPDATA"], "ssgetpy") 15 | else: 16 | SS_DIR = os.path.join(os.environ["HOME"], ".ssgetpy") 17 | 18 | SS_DB = os.path.join(SS_DIR, SS_DB) 19 | 20 | os.makedirs(SS_DIR, exist_ok=True) 21 | 22 | 23 | def dump(): 24 | logger.debug( 25 | dict( 26 | SS_DIR=SS_DIR, 27 | SS_DB=SS_DB, 28 | SS_TABLE=SS_TABLE, 29 | SS_ROOT_URL=SS_ROOT_URL, 30 | SS_INDEX_URL=SS_INDEX_URL, 31 | ) 32 | ) 33 | -------------------------------------------------------------------------------- /ssgetpy/csvindex.py: -------------------------------------------------------------------------------- 1 | """ 2 | The `csvindex` module parses the SSStats.csv file and 3 | generates entries for each row in a Matrix database 4 | """ 5 | 6 | import csv 7 | import logging 8 | 9 | import requests 10 | 11 | from .config import SS_INDEX_URL 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | def getdtype(real, logical): 17 | """ 18 | Converts a (real, logical) pair into one of the three types: 19 | 'real', 'complex' and 'binary' 20 | """ 21 | return "binary" if logical else ("real" if real else "complex") 22 | 23 | 24 | def gen_rows(csvrows): 25 | """ 26 | Creates a generator that returns a single row in the matrix database. 27 | """ 28 | reader = csv.reader(csvrows) 29 | matid = 0 30 | for line in reader: 31 | matid += 1 32 | group = line[0] 33 | name = line[1] 34 | rows = int(line[2]) 35 | cols = int(line[3]) 36 | nnz = int(line[4]) 37 | real = bool(int(line[5])) 38 | logical = bool(int(line[6])) 39 | is2d3d = bool(int(line[7])) 40 | isspd = bool(int(line[8])) 41 | psym = float(line[9]) 42 | nsym = float(line[10]) 43 | kind = line[11] 44 | yield matid, group, name, rows, cols, nnz, getdtype( 45 | real, logical 46 | ), is2d3d, isspd, psym, nsym, kind 47 | 48 | 49 | def generate(): 50 | response = requests.get(SS_INDEX_URL) 51 | lines = response.iter_lines() 52 | 53 | # Read the number of entries 54 | logger.info(f"Number of entries in the CSV file: {next(lines)}") 55 | # Read the last modified date 56 | logger.info(f"Last modified date: {next(lines)}") 57 | 58 | return gen_rows(line.decode("utf-8") for line in lines) 59 | -------------------------------------------------------------------------------- /ssgetpy/db.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | 4 | from .config import SS_DB, SS_TABLE 5 | from .matrix import Matrix, MatrixList 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | def _from_timestamp(timestamp): 11 | if hasattr(datetime.datetime, "fromisoformat"): 12 | return datetime.datetime.fromisoformat(timestamp) 13 | return datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S") 14 | 15 | 16 | class MatrixDB: 17 | def __init__(self, db=SS_DB, table=SS_TABLE): 18 | import sqlite3 19 | 20 | self.db = db 21 | self.matrix_table = table 22 | self.update_table = "update_table" 23 | self.conn = sqlite3.connect(self.db) 24 | self._create_table() 25 | 26 | def _get_nrows(self): 27 | return int( 28 | self.conn.execute( 29 | "SELECT COUNT(*) FROM %s" % self.matrix_table 30 | ).fetchall()[0][0] 31 | ) 32 | 33 | nrows = property(_get_nrows) 34 | 35 | def _get_last_update(self): 36 | last_update = self.conn.execute( 37 | "SELECT MAX(update_date) " + f"from {self.update_table}" 38 | ).fetchall()[0][0] 39 | return ( 40 | _from_timestamp(last_update) 41 | if last_update 42 | else datetime.datetime.utcfromtimestamp(0) 43 | ) 44 | 45 | last_update = property(_get_last_update) 46 | 47 | def _drop_table(self): 48 | self.conn.execute("DROP TABLE IF EXISTS %s" % self.matrix_table) 49 | self.conn.execute(f"DROP TABLE IF EXISTS {self.update_table}") 50 | self.conn.commit() 51 | 52 | def _create_table(self): 53 | self.conn.execute( 54 | """CREATE TABLE IF NOT EXISTS %s ( 55 | id INTEGER PRIMARY KEY, 56 | matrixgroup TEXT, 57 | name TEXT, 58 | rows INTEGER, 59 | cols INTEGER, 60 | nnz INTEGER, 61 | dtype TEXT, 62 | is2d3d INTEGER, 63 | isspd INTEGER, 64 | psym REAL, 65 | nsym REAL, 66 | kind TEXT)""" 67 | % self.matrix_table 68 | ) 69 | 70 | self.conn.execute( 71 | f"CREATE TABLE IF NOT EXISTS {self.update_table} " 72 | + "(update_date TIMESTAMP)" 73 | ) 74 | self.conn.commit() 75 | 76 | def insert(self, values): 77 | self.conn.executemany( 78 | "INSERT INTO %s VALUES(?,?,?,?,?,?,?,?,?,?,?,?)" 79 | % self.matrix_table, 80 | values, 81 | ) 82 | self.conn.execute( 83 | f"INSERT INTO {self.update_table} " + "VALUES (datetime('now'))" 84 | ) 85 | self.conn.commit() 86 | 87 | def refresh(self, values): 88 | self._drop_table() 89 | self._create_table() 90 | self.insert(values) 91 | 92 | def dump(self): 93 | return self.conn.execute( 94 | "SELECT * from %s" % self.matrix_table 95 | ).fetchall() 96 | 97 | @staticmethod 98 | def _is_constraint(field, value): 99 | return value and "(%s = '%s')" % (field, value) 100 | 101 | @staticmethod 102 | def _like_constraint(field, value): 103 | return value and "(%s LIKE '%%%s%%')" % (field, value) 104 | 105 | @staticmethod 106 | def _sz_constraint(field, bounds): 107 | if bounds is None or (bounds[0] is None and bounds[1] is None): 108 | return None 109 | constraints = [] 110 | if bounds[0] is not None: 111 | constraints.append("%s >= %d" % (field, bounds[0])) 112 | if bounds[1] is not None: 113 | constraints.append("%s <= %d" % (field, bounds[1])) 114 | return " ( " + " AND ".join(constraints) + " ) " 115 | 116 | @staticmethod 117 | def _bool_constraint(field, value): 118 | if value is None: 119 | return None 120 | elif value: 121 | return "(%s = 1)" % field 122 | else: 123 | return "(%s = 0)" % field 124 | 125 | def search( 126 | self, 127 | matid=None, 128 | group=None, 129 | name=None, 130 | rowbounds=None, 131 | colbounds=None, 132 | nzbounds=None, 133 | dtype=None, 134 | is2d3d=None, 135 | isspd=None, 136 | kind=None, 137 | limit=10, 138 | ): 139 | 140 | querystring = "SELECT * FROM %s" % self.matrix_table 141 | 142 | mid_constraint = MatrixDB._is_constraint("id", matid) 143 | grp_constraint = MatrixDB._is_constraint("matrixgroup", group) 144 | nam_constraint = MatrixDB._like_constraint("name", name) 145 | row_constraint = MatrixDB._sz_constraint("rows", rowbounds) 146 | col_constraint = MatrixDB._sz_constraint("cols", colbounds) 147 | nnz_constraint = MatrixDB._sz_constraint("nnz", nzbounds) 148 | dty_constraint = MatrixDB._is_constraint("dtype", dtype) 149 | geo_constraint = MatrixDB._bool_constraint("is2d3d", is2d3d) 150 | spd_constraint = MatrixDB._bool_constraint("isspd", isspd) 151 | knd_constraint = MatrixDB._like_constraint("kind", kind) 152 | 153 | constraints = list( 154 | filter( 155 | lambda x: x is not None, 156 | ( 157 | mid_constraint, 158 | grp_constraint, 159 | nam_constraint, 160 | row_constraint, 161 | col_constraint, 162 | nnz_constraint, 163 | dty_constraint, 164 | geo_constraint, 165 | spd_constraint, 166 | knd_constraint, 167 | ), 168 | ) 169 | ) 170 | 171 | if any(constraints): 172 | querystring += " WHERE " + " AND ".join(constraints) 173 | 174 | querystring += " LIMIT (%s)" % limit 175 | 176 | logger.debug(querystring) 177 | 178 | return MatrixList( 179 | Matrix(*x) for x in self.conn.execute(querystring).fetchall() 180 | ) 181 | -------------------------------------------------------------------------------- /ssgetpy/dbinstance.py: -------------------------------------------------------------------------------- 1 | """ 2 | The `dbinstance` module creates a singleton `MatrixDB` database 3 | instance, populating it from ssstats.csv if necessary. 4 | """ 5 | import datetime 6 | import logging 7 | 8 | from . import csvindex 9 | from .db import MatrixDB 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | instance = MatrixDB() 15 | 16 | if instance.nrows == 0 or ( 17 | datetime.datetime.utcnow() - instance.last_update 18 | ) > datetime.timedelta(days=90): 19 | logger.info("{Re}creating index from CSV file...") 20 | instance.refresh(csvindex.generate()) 21 | -------------------------------------------------------------------------------- /ssgetpy/matrix.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | import requests 5 | from tqdm.auto import tqdm 6 | 7 | from . import bundle 8 | from .config import SS_DIR, SS_ROOT_URL 9 | 10 | 11 | class MatrixList(list): 12 | def _repr_html_(self): 13 | body = "".join(r.to_html_row() for r in self) 14 | return f"{Matrix.html_header()}{body}
" 15 | 16 | def __getitem__(self, expr): 17 | result = super().__getitem__(expr) 18 | return MatrixList(result) if isinstance(expr, slice) else result 19 | 20 | def download(self, format="MM", destpath=None, extract=False): 21 | with tqdm(total=len(self), desc="Overall progress") as pbar: 22 | for matrix in self: 23 | matrix.download(format, destpath, extract) 24 | pbar.update(1) 25 | 26 | 27 | class Matrix: 28 | """ 29 | A `Matrix` object represents an entry in the SuiteSparse matrix collection. 30 | It has the following attributes: 31 | `id` : The unique identifier for the matrix in the database. 32 | `group`: The name of the group this matrix belongs to. 33 | `name` : The name of this matrix. 34 | `rows` : The number of rows. 35 | `cols` : The number of columns. 36 | `nnz` : The number of non-zero elements. 37 | `dtype`: The datatype of non-zero elements: `real`, `complex` or `binary` 38 | `is2d3d`: True if this matrix comes from a 2D or 3D discretization. 39 | `isspd` : True if this matrix is symmetric, positive definite 40 | `kind` : The underlying problem domain 41 | """ 42 | 43 | attr_list = [ 44 | "Id", 45 | "Group", 46 | "Name", 47 | "Rows", 48 | "Cols", 49 | "NNZ", 50 | "DType", 51 | "2D/3D Discretization?", 52 | "SPD?", 53 | "Pattern Symmetry", 54 | "Numerical Symmetry", 55 | "Kind", 56 | "Spy Plot", 57 | ] 58 | 59 | @staticmethod 60 | def html_header(): 61 | return ( 62 | "" 63 | + "".join(f"{attr}" for attr in Matrix.attr_list) 64 | + "" 65 | ) 66 | 67 | def __init__( 68 | self, 69 | identifier, 70 | group, 71 | name, 72 | rows, 73 | cols, 74 | nnz, 75 | dtype, 76 | is2d3d, 77 | isspd, 78 | psym, 79 | nsym, 80 | kind, 81 | ): 82 | self.id = identifier 83 | self.group = group 84 | self.name = name 85 | self.rows = rows 86 | self.cols = cols 87 | self.nnz = nnz 88 | self.dtype = dtype 89 | self.is2d3d = not not is2d3d 90 | self.isspd = not not isspd 91 | self.psym = psym 92 | self.nsym = nsym 93 | self.kind = kind 94 | 95 | def to_tuple(self): 96 | """ 97 | Returns the fields in a `Matrix` instance in a tuple. 98 | """ 99 | return ( 100 | self.id, 101 | self.group, 102 | self.name, 103 | self.rows, 104 | self.cols, 105 | self.nnz, 106 | self.dtype, 107 | self.is2d3d, 108 | self.isspd, 109 | self.psym, 110 | self.nsym, 111 | self.kind, 112 | self.icon_url(), 113 | ) 114 | 115 | def _render_item_html(self, key, value): 116 | if key == "Spy Plot": 117 | return f'' 118 | if key == "Group": 119 | return f'{value}' 120 | if key == "Name": 121 | return f'{value}' 122 | if key in ("Pattern Symmetry", "Numerical Symmetry"): 123 | return f"{value:0.2}" 124 | if key in ("2D/3D Discretization?", "SPD?"): 125 | return "Yes" if value else "No" 126 | 127 | return str(value) 128 | 129 | def to_html_row(self): 130 | return ( 131 | "" 132 | + "".join( 133 | f"{self._render_item_html(key, value)}" 134 | for key, value in zip(Matrix.attr_list, self.to_tuple()) 135 | ) 136 | + "" 137 | ) 138 | 139 | def _filename(self, format="MM"): 140 | if format == "MM" or format == "RB": 141 | return self.name + ".tar.gz" 142 | elif format == "MAT": 143 | return self.name + ".mat" 144 | else: 145 | raise ValueError("Format must be 'MM', 'MAT' or 'RB'") 146 | 147 | def _defaultdestpath(self, format="MM"): 148 | return os.path.join(SS_DIR, format, self.group) 149 | 150 | def icon_url(self): 151 | return "/".join((SS_ROOT_URL, "files", self.group, self.name + ".png")) 152 | 153 | def group_info_url(self): 154 | return "/".join((SS_ROOT_URL, self.group)) 155 | 156 | def matrix_info_url(self): 157 | return "/".join((SS_ROOT_URL, self.group, self.name)) 158 | 159 | def url(self, format="MM"): 160 | """ 161 | Returns the URL for this `Matrix` instance. 162 | """ 163 | fname = self._filename(format) 164 | directory = format.lower() if format == "MAT" else format 165 | return "/".join((SS_ROOT_URL, directory, self.group, fname)) 166 | 167 | def localpath(self, format="MM", destpath=None, extract=False): 168 | destpath = destpath or self._defaultdestpath(format) 169 | 170 | # localdestpath is the directory containing the unzipped files 171 | # in the case of MM and RB (if extract is true) or 172 | # the file itself in the case of MAT (or if extract is False) 173 | localdest = os.path.join(destpath, self._filename(format)) 174 | localdestpath = ( 175 | localdest 176 | if (format == "MAT" or not extract) 177 | else os.path.join(destpath, self.name) 178 | ) 179 | 180 | return localdestpath, localdest 181 | 182 | def download(self, format="MM", destpath=None, extract=False): 183 | """ 184 | Downloads this `Matrix` instance to the local machine, 185 | optionally unpacking any TAR.GZ files. 186 | """ 187 | # destpath is the directory containing the matrix 188 | # It is of the form ~/.PyUFGet/MM/HB 189 | destpath = destpath or self._defaultdestpath(format) 190 | 191 | # localdest is matrix file (.MAT or .TAR.GZ) 192 | # if extract = True, localdestpath is the directory 193 | # containing the unzipped matrix 194 | localdestpath, localdest = self.localpath(format, destpath, extract) 195 | 196 | if not os.access(localdestpath, os.F_OK): 197 | # Create the destination path if necessary 198 | os.makedirs(destpath, exist_ok=True) 199 | 200 | response = requests.get(self.url(format), stream=True) 201 | content_length = int(response.headers["content-length"]) 202 | 203 | with open(localdest, "wb") as outfile, tqdm( 204 | total=content_length, desc=self.name, unit="B" 205 | ) as pbar: 206 | for chunk in response.iter_content(chunk_size=4096): 207 | outfile.write(chunk) 208 | pbar.update(4096) 209 | time.sleep(0.1) 210 | 211 | if extract and (format == "MM" or format == "RB"): 212 | bundle.extract(localdest) 213 | 214 | return localdestpath, localdest 215 | 216 | def __str__(self): 217 | return str(self.to_tuple()) 218 | 219 | def __repr__(self): 220 | return "Matrix" + str(self.to_tuple()) 221 | 222 | def _repr_html_(self): 223 | return ( 224 | f"{Matrix.html_header()}" 225 | + f"{self.to_html_row()}
" 226 | ) 227 | -------------------------------------------------------------------------------- /ssgetpy/query.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import sys 4 | 5 | from .config import SS_DIR 6 | from .dbinstance import instance 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | def search(name_or_id=None, **kwargs): 12 | """ 13 | Search for matrix/matrices with a given name pattern or numeric ID. 14 | Optionally, limit search to matrices of a specific data type or 15 | with the specified range of rows, columns and non-zero values. 16 | """ 17 | logger.debug("Name or ID = " + str(name_or_id)) 18 | if name_or_id is not None: 19 | if isinstance(name_or_id, str): 20 | if "/" in name_or_id: 21 | group, name = name_or_id.split("/") 22 | kwargs["group"] = group 23 | if not name == "" and not name == "*": 24 | kwargs["name"] = name 25 | else: 26 | kwargs["name"] = name_or_id 27 | elif isinstance(name_or_id, int): 28 | kwargs["matid"] = name_or_id 29 | else: 30 | raise ValueError( 31 | "First argument to search " + "must be a string or an integer" 32 | ) 33 | 34 | return instance.search(**kwargs) 35 | 36 | 37 | def fetch( 38 | name_or_id=None, format="MM", location=None, dry_run=False, **kwargs 39 | ): 40 | matrices = search(name_or_id, **kwargs) 41 | if len(matrices) > 0: 42 | logger.info( 43 | "Found %d %s" 44 | % (len(matrices), "entry" if len(matrices) == 1 else "entries") 45 | ) 46 | for matrix in matrices: 47 | logger.info( 48 | "Downloading %s/%s to %s" 49 | % ( 50 | matrix.group, 51 | matrix.name, 52 | matrix.localpath(format, location, extract=True)[0], 53 | ) 54 | ) 55 | if not dry_run: 56 | matrix.download(format, location, extract=True) 57 | return matrices 58 | 59 | 60 | def cli(argv=sys.argv[1:]): 61 | parser = argparse.ArgumentParser(prog="ssget") 62 | 63 | parser.add_argument( 64 | "-i", 65 | "--id", 66 | action="store", 67 | type=int, 68 | dest="matid", 69 | help="Download a matrix with the given ID.", 70 | ) 71 | parser.add_argument( 72 | "-g", 73 | "--group", 74 | action="store", 75 | type=str, 76 | dest="group", 77 | help="The matrix group.", 78 | ) 79 | parser.add_argument( 80 | "-n", 81 | "--name", 82 | action="store", 83 | type=str, 84 | dest="name", 85 | help="The name or a pattern matching the name of the matrix/matrices.", 86 | ) 87 | parser.add_argument( 88 | "-d", 89 | "--data-type", 90 | action="store", 91 | type=str, 92 | dest="dtype", 93 | help="The element type of the matrix/matrices" 94 | ", can be one of 'real', 'complex' or 'binary'.", 95 | ) 96 | parser.add_argument( 97 | "-f", 98 | "--format", 99 | action="store", 100 | type=str, 101 | dest="format", 102 | default="MM", 103 | help="The format in which to download the matrix/matrices.\ 104 | Can be one of 'MM', 'MAT' or 'RB' for MatrixMarket, \ 105 | MATLAB or Rutherford-Boeing formats respectively.", 106 | ) 107 | parser.add_argument( 108 | "-l", 109 | "--limit", 110 | action="store", 111 | type=int, 112 | default=10, 113 | dest="limit", 114 | help="The maximum number of matrices to be downloaded.", 115 | ) 116 | parser.add_argument( 117 | "-o", 118 | "--outdir", 119 | action="store", 120 | type=str, 121 | dest="location", 122 | help="The directory in the local machine where matrices will be \ 123 | downloaded to. Defaults to " 124 | + SS_DIR, 125 | ) 126 | parser.add_argument( 127 | "--dry-run", 128 | action="store_true", 129 | dest="dry_run", 130 | default=False, 131 | help="If True, only print the matrices that will be downloaded \ 132 | but do not actually download them.", 133 | ) 134 | 135 | g = parser.add_argument_group( 136 | "Size and Non-zero filters", 137 | "These options may be used to restrict the shape or number " 138 | + "of non-zero elements of the matrices to be downloaded", 139 | ) 140 | 141 | g.add_argument( 142 | "--min-rows", 143 | action="store", 144 | type=int, 145 | dest="min_rows", 146 | help="The minimum number of rows in the matrix/matrices.", 147 | ) 148 | g.add_argument( 149 | "--max-rows", 150 | action="store", 151 | type=int, 152 | dest="max_rows", 153 | help="The maximum number of rows in the matrix/matrices.", 154 | ) 155 | g.add_argument( 156 | "--min-cols", 157 | action="store", 158 | type=int, 159 | dest="min_cols", 160 | help="The minimum number of columns in the matrix/matrices.", 161 | ) 162 | g.add_argument( 163 | "--max-cols", 164 | action="store", 165 | type=int, 166 | dest="max_cols", 167 | help="The maximum number of columns in the matrix/matrices.", 168 | ) 169 | g.add_argument( 170 | "--min-nnzs", 171 | action="store", 172 | type=int, 173 | dest="min_nnzs", 174 | help="The minimum number of non-zero values in the matrix/matrices.", 175 | ) 176 | g.add_argument( 177 | "--max-nnzs", 178 | action="store", 179 | type=int, 180 | dest="max_nnzs", 181 | help="The maximum number of non-zero values in the matrix/matrices.", 182 | ) 183 | 184 | lg = parser.add_argument_group( 185 | "Logging and verbosity options", 186 | "These options govern the level of spew from ssgetpy. " 187 | + "By default, ssgetpy prints a small number of messages " 188 | + "such as the number of matrices being downloaded and " 189 | + "where they are being downloaded to. " 190 | + "To suppress these message, pass --quiet. " 191 | + "To enable debug diagnostics, pass --verbose.", 192 | ) 193 | lg.add_argument( 194 | "-v", 195 | "--verbose", 196 | action="store_true", 197 | dest="verbose", 198 | default=False, 199 | help="Enable debug diagnostics.", 200 | ) 201 | lg.add_argument( 202 | "-q", 203 | "--quiet", 204 | action="store_true", 205 | dest="quiet", 206 | default=False, 207 | help="Do not print any messages to the console.", 208 | ) 209 | 210 | if len(argv) == 0: 211 | parser.print_help() 212 | return 213 | 214 | args = parser.parse_args(argv) 215 | 216 | optdict = dict( 217 | matid=args.matid, 218 | group=args.group, 219 | name=args.name, 220 | rowbounds=(args.min_rows, args.max_rows), 221 | colbounds=(args.min_cols, args.max_cols), 222 | nzbounds=(args.min_nnzs, args.max_nnzs), 223 | dtype=args.dtype, # isspd = args.isspd,\ 224 | limit=args.limit, 225 | ) 226 | 227 | if args.quiet: 228 | pass 229 | elif args.verbose: 230 | logging.basicConfig(level=logging.DEBUG) 231 | else: 232 | logging.basicConfig(level=logging.INFO) 233 | 234 | name_or_id = None 235 | if args.matid: 236 | name_or_id = args.matid 237 | elif args.name: 238 | name_or_id = args.matid 239 | 240 | fetch(name_or_id, args.format, args.location, args.dry_run, **optdict) 241 | --------------------------------------------------------------------------------