├── .github ├── PULL_REQUEST_TEMPLATE.md ├── release_checklist.md └── workflows │ └── ci.yml ├── .gitignore ├── .gitmodules ├── .readthedocs.yml ├── CHANGELOG.rst ├── LICENSE ├── MANIFEST.in ├── README.rst ├── benchmark_scripts ├── benchmark.py ├── benchmark_cgzipreader.py ├── gzipread128kblocks.py ├── gzipreadlines.py ├── gzipthreadsread128kblocks.py ├── gzipwrite128kblocks.py ├── gzipwritelines.py ├── memory_leak_test.py ├── profile_igziplinewriter.py └── profile_igzipreader.py ├── codecov.yml ├── docs ├── Makefile ├── conf.py ├── includes │ ├── CHANGELOG.rst │ └── README.rst ├── index.rst └── make.bat ├── pyproject.toml ├── requirements-docs.txt ├── setup.py ├── src └── isal │ ├── __init__.py │ ├── _isal.pyi │ ├── _isalmodule.c │ ├── _version.pyi │ ├── crc32_combine.h │ ├── igzip.py │ ├── igzip_lib.pyi │ ├── igzip_libmodule.c │ ├── igzip_threaded.py │ ├── isal_shared.h │ ├── isal_zlib.pyi │ ├── isal_zlibmodule.c │ └── py.typed ├── tests ├── __init__.py ├── data │ ├── README.md │ ├── concatenated.fastq.gz │ ├── seeds.txt │ ├── test.fastq.bgzip.gz │ └── test.fastq.gz ├── test_compat.py ├── test_gzip_compliance.py ├── test_igzip.py ├── test_igzip_lib.py ├── test_igzip_threaded.py └── test_zlib_compliance.py └── tox.ini /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 2 | ### Checklist 3 | - [ ] Pull request details were added to CHANGELOG.rst 4 | - [ ] Documentation was updated (if needed) 5 | -------------------------------------------------------------------------------- /.github/release_checklist.md: -------------------------------------------------------------------------------- 1 | Release checklist 2 | - [ ] Check outstanding issues on JIRA and Github. 3 | - [ ] Check [latest documentation](https://python-isal.readthedocs.io/en/latest/) looks fine. 4 | - [ ] Create a release branch. 5 | - [ ] Change current development version in `CHANGELOG.rst` to stable version. 6 | - [ ] Check if the address sanitizer does not find any problems using `tox -e asan` 7 | - [ ] Merge the release branch into `main`. 8 | - [ ] Created an annotated tag with the stable version number. Include changes 9 | from CHANGELOG.rst. 10 | - [ ] Push tag to remote. This triggers the wheel/sdist build on github CI. 11 | - [ ] merge `main` branch back into `develop`. 12 | - [ ] Build the new tag on readthedocs. Only build the last patch version of 13 | each minor version. So `1.1.1` and `1.2.0` but not `1.1.0`, `1.1.1` and `1.2.0`. 14 | - [ ] Create a new release on github. 15 | - [ ] Update the package on conda-forge. 16 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Continous integration 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - develop 8 | - main 9 | tags: 10 | - "*" 11 | 12 | jobs: 13 | lint: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v4 17 | with: 18 | submodules: recursive 19 | - name: Set up Python 3.9 20 | uses: actions/setup-python@v5 21 | with: 22 | python-version: "3.9" 23 | - name: Install tox 24 | run: pip install tox 25 | - name: Lint 26 | run: tox -e lint 27 | 28 | package-checks: 29 | strategy: 30 | matrix: 31 | tox_env: 32 | - docs 33 | - twine_check 34 | runs-on: ubuntu-latest 35 | steps: 36 | - uses: actions/checkout@v4 37 | with: 38 | submodules: recursive 39 | - name: Set up Python 3.9 40 | uses: actions/setup-python@v5 41 | with: 42 | python-version: "3.9" 43 | - name: Install isal 44 | run: sudo apt-get install libisal-dev 45 | - name: Install tox and upgrade setuptools and pip 46 | run: pip install --upgrade tox setuptools pip 47 | - name: Run tox -e ${{ matrix.tox_env }} 48 | run: tox -e ${{ matrix.tox_env }} 49 | env: 50 | PYTHON_ISAL_LINK_DYNAMIC: True 51 | 52 | test-static: 53 | runs-on: ${{ matrix.os }} 54 | strategy: 55 | matrix: 56 | python-version: 57 | - "3.9" 58 | - "3.10" 59 | - "3.11" 60 | - "3.12" 61 | - "3.13" 62 | - "pypy-3.9" 63 | - "pypy-3.10" 64 | os: ["ubuntu-latest"] 65 | include: 66 | - os: "macos-13" 67 | python-version: "3.9" 68 | - os: "macos-14" 69 | python-version: "3.10" 70 | - os: "windows-latest" 71 | python-version: "3.9" 72 | steps: 73 | - uses: actions/checkout@v4 74 | with: 75 | submodules: recursive 76 | - name: Set up Python ${{ matrix.python-version }} 77 | uses: actions/setup-python@v5 78 | with: 79 | python-version: ${{ matrix.python-version }} 80 | - name: Install tox and upgrade setuptools 81 | run: pip install --upgrade tox setuptools 82 | - name: Install build dependencies (Linux) # Yasm in pypa/manylinux images. 83 | run: sudo apt install nasm 84 | if: runner.os == 'Linux' 85 | - name: Install build dependencies (Macos) 86 | # Install yasm because nasm does not work when building wheels. 87 | # Probably because of nasm-filter.sh not filtering all flags that can not be used. 88 | run: brew install nasm 89 | if: runner.os == 'macOS' 90 | - name: Set MSVC developer prompt 91 | uses: ilammy/msvc-dev-cmd@v1 92 | if: runner.os == 'Windows' 93 | - name: Install nasm (Windows) 94 | uses: ilammy/setup-nasm@v1 95 | if: runner.os == 'Windows' 96 | - name: Run tests 97 | run: tox 98 | - name: Upload coverage report 99 | uses: codecov/codecov-action@v1 100 | 101 | test-arch: 102 | if: startsWith(github.ref, 'refs/tags') || github.ref == 'refs/heads/develop' || github.ref == 'refs/heads/main' 103 | runs-on: "ubuntu-latest" 104 | strategy: 105 | matrix: 106 | python_version: 107 | - "3.9" 108 | steps: 109 | - uses: actions/checkout@v4 110 | with: 111 | submodules: recursive 112 | - uses: uraimo/run-on-arch-action@v3 113 | name: Build & run test 114 | with: 115 | arch: none 116 | distro: none 117 | base_image: "--platform=linux/arm64 quay.io/pypa/manylinux2014_aarch64" 118 | # versioningit needs an accessible git repository but the container 119 | # is run as root, which is different from the repository user. 120 | # use git config to override this. 121 | run: |- 122 | git config --global --add safe.directory $PWD 123 | CFLAGS="-DNDEBUG -g0" python${{matrix.python_version}} -m pip install . pytest 124 | python${{matrix.python_version}} -m pytest tests 125 | 126 | # Test if the python-isal conda package can be build. Which is linked 127 | # dynamically to the conda isa-l package. 128 | test-dynamic: 129 | runs-on: ${{ matrix.os }} 130 | defaults: 131 | run: 132 | # This is needed for miniconda, see: 133 | # https://github.com/marketplace/actions/setup-miniconda#important. 134 | shell: bash -l {0} 135 | strategy: 136 | matrix: 137 | os: ["ubuntu-latest", "macos-13", "windows-latest"] 138 | python_version: [ "python" ] 139 | include: 140 | - os: "ubuntu-latest" 141 | python_version: "pypy" 142 | steps: 143 | - uses: actions/checkout@v4 144 | with: 145 | submodules: recursive 146 | - name: Install miniconda. 147 | uses: conda-incubator/setup-miniconda@v3 # https://github.com/conda-incubator/setup-miniconda. 148 | with: 149 | channels: conda-forge,defaults 150 | - name: Install requirements (universal) 151 | run: conda install isa-l ${{ matrix.python_version}} tox 152 | - name: Set MSVC developer prompt 153 | uses: ilammy/msvc-dev-cmd@v1 154 | if: runner.os == 'Windows' 155 | - name: Run tests (dynamic link) 156 | run: tox 157 | env: 158 | PYTHON_ISAL_LINK_DYNAMIC: True 159 | 160 | deploy: 161 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') 162 | runs-on: ${{ matrix.os }} 163 | needs: 164 | - lint 165 | - package-checks 166 | - test-static 167 | - test-dynamic 168 | - test-arch 169 | strategy: 170 | matrix: 171 | os: 172 | - ubuntu-latest 173 | - macos-13 174 | - macos-14 175 | - windows-latest 176 | cibw_archs_linux: ["x86_64"] 177 | cibw_before_all_linux: 178 | - >- 179 | curl -o nasm-2.15.05.tar.gz https://www.nasm.us/pub/nasm/releasebuilds/2.15.05/nasm-2.15.05.tar.gz && 180 | tar -xzvf nasm-2.15.05.tar.gz && 181 | cd nasm-2.15.05/ && 182 | ./autogen.sh && 183 | ./configure && 184 | make nasm && 185 | install -c nasm /usr/bin/nasm 186 | build_sdist: [true] 187 | include: 188 | - os: "ubuntu-latest" 189 | cibw_archs_linux: "aarch64" 190 | cibw_before_all_linux: "true" # The true command exits with 0 191 | steps: 192 | - uses: actions/checkout@v4 193 | with: 194 | submodules: recursive 195 | fetch-depth: 0 # Fetch everything to get accurately versioned tag. 196 | - uses: actions/setup-python@v2 # Some issues where caused by higher versions. 197 | name: Install Python 198 | - name: Install cibuildwheel twine wheel 199 | run: python -m pip install cibuildwheel twine wheel 200 | - name: Install build dependencies (Macos) 201 | run: brew install nasm 202 | if: runner.os == 'macOS' 203 | - name: Set MSVC developer prompt 204 | uses: ilammy/msvc-dev-cmd@v1 205 | if: runner.os == 'Windows' 206 | - name: Install nasm (Windows) 207 | uses: ilammy/setup-nasm@v1 208 | if: runner.os == 'Windows' 209 | - name: Set up QEMU 210 | if: ${{runner.os == 'Linux' && matrix.cibw_archs_linux == 'aarch64'}} 211 | uses: docker/setup-qemu-action@v3 212 | with: 213 | platforms: arm64 214 | - name: Build wheels 215 | run: cibuildwheel --output-dir dist 216 | env: 217 | CIBW_SKIP: "*-win32 *-manylinux_i686 cp38-macosx_*arm64 cp39-macosx_*arm64" # Skip 32 bit and problematic mac builds. 218 | CIBW_ARCHS_LINUX: ${{ matrix.cibw_archs_linux }} 219 | CIBW_BEFORE_ALL_LINUX: ${{ matrix.cibw_before_all_linux }} 220 | # Fully test the build wheels again. 221 | CIBW_TEST_REQUIRES: "pytest" 222 | # Simple tests that requires the project to be build correctly 223 | # Skip extensive compatibility testing which is slow. 224 | CIBW_TEST_COMMAND_LINUX: >- 225 | pytest -v {project}/tests/test_igzip.py 226 | {project}/tests/test_gzip_compliance.py 227 | {project}/tests/test_zlib_compliance.py 228 | {project}/tests/test_igzip_lib.py 229 | -k 'not test_compress_decompress' 230 | CIBW_TEST_COMMAND_MACOS: >- 231 | pytest -v {project}/tests/test_igzip.py 232 | {project}/tests/test_gzip_compliance.py 233 | {project}/tests/test_zlib_compliance.py 234 | {project}/tests/test_igzip_lib.py 235 | -k 'not test_compress_decompress' 236 | # Windows does not have the test module apparently. Do more expensive 237 | # tests to verify build. 238 | CIBW_TEST_COMMAND_WINDOWS: >- 239 | pytest {project}/tests/test_igzip.py 240 | {project}/tests/test_igzip_lib.py 241 | {project}/tests/test_compat.py 242 | CIBW_ENVIRONMENT_LINUX: >- 243 | PYTHON_ISAL_BUILD_CACHE=True 244 | PYTHON_ISAL_BUILD_CACHE_FILE=/tmp/build_cache 245 | CFLAGS="-O3 -DNDEBUG" 246 | CIBW_ENVIRONMENT_WINDOWS: >- 247 | PYTHON_ISAL_BUILD_CACHE=True 248 | PYTHON_ISAL_BUILD_CACHE_FILE=${{ runner.temp }}\build_cache 249 | CIBW_ENVIRONMENT_MACOS: >- 250 | PYTHON_ISAL_BUILD_CACHE=True 251 | PYTHON_ISAL_BUILD_CACHE_FILE=${{ runner.temp }}/build_cache 252 | CFLAGS="-O3 -DNDEBUG" 253 | - name: Build sdist 254 | if: ${{runner.os == 'Linux' && matrix.cibw_archs_linux == 'x86_64'}} 255 | run: | 256 | pip install build 257 | python -m build --sdist 258 | - uses: actions/upload-artifact@v4 259 | with: 260 | name: "dist-${{ runner.os }}-${{ runner.arch }}-${{ matrix.cibw_archs_linux }}" 261 | path: "dist/" 262 | - name: Publish package to TestPyPI 263 | # pypa/gh-action-pypi-publish@master does not work on OSX 264 | # Alpha, Beta and dev releases contain a - in the tag. 265 | if: contains(github.ref, '-') && startsWith(github.ref, 'refs/tags') 266 | run: twine upload --skip-existing --repository-url https://test.pypi.org/legacy/ dist/* 267 | env: 268 | TWINE_USERNAME: __token__ 269 | TWINE_PASSWORD: ${{ secrets.TEST_PYPI_API_TOKEN }} 270 | - name: Publish package to PyPI 271 | if: "!contains(github.ref, '-') && startsWith(github.ref, 'refs/tags')" 272 | run: twine upload --skip-existing dist/* 273 | env: 274 | TWINE_USERNAME: __token__ 275 | TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} 276 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | src/isal/_version.py 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | pip-wheel-metadata/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | db.sqlite3-journal 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | .python-version 88 | 89 | # pipenv 90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 93 | # install all needed dependencies. 94 | #Pipfile.lock 95 | 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 97 | __pypackages__/ 98 | 99 | # Celery stuff 100 | celerybeat-schedule 101 | celerybeat.pid 102 | 103 | # SageMath parsed files 104 | *.sage.py 105 | 106 | # Environments 107 | .env 108 | .venv 109 | env/ 110 | venv/ 111 | ENV/ 112 | env.bak/ 113 | venv.bak/ 114 | 115 | # Spyder project settings 116 | .spyderproject 117 | .spyproject 118 | 119 | # Rope project settings 120 | .ropeproject 121 | 122 | # mkdocs documentation 123 | /site 124 | 125 | # mypy 126 | .mypy_cache/ 127 | .dmypy.json 128 | dmypy.json 129 | 130 | # Pyre type checker 131 | .pyre/ 132 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "src/isal/isa-l"] 2 | path = src/isal/isa-l 3 | url = https://github.com/intel/isa-l.git 4 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | formats: [] # Do not build epub and pdf 3 | 4 | python: 5 | install: 6 | - requirements: "requirements-docs.txt" 7 | - method: "pip" 8 | path: "." 9 | 10 | sphinx: 11 | configuration: docs/conf.py 12 | 13 | build: 14 | os: "ubuntu-22.04" 15 | tools: 16 | python: "3" 17 | apt_packages: 18 | - libisal-dev 19 | -------------------------------------------------------------------------------- /CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | ========== 2 | Changelog 3 | ========== 4 | 5 | .. Newest changes should be on top. 6 | 7 | .. This document is user facing. Please word the changes in such a way 8 | .. that users understand how the changes affect the new version. 9 | 10 | 11 | version 1.8.0-dev 12 | ----------------- 13 | + Python 3.8 is no longer supported. 14 | + Change build backend to setuptools-scm which is more commonly used and 15 | supported. 16 | + Include test packages in the source distribution, so source distribution 17 | installations can be verified. 18 | + Fix an issue where some tests failed because they ignored PYTHONPATH. 19 | 20 | version 1.7.2 21 | ----------------- 22 | + Use upstream ISA-L version 2.31.1 which includes patches to make 23 | installation on MacOS ARM64 possible. 24 | + Fix a bug where bytes were copied in the wrong order on big endian 25 | architectures. Fixes test failures on s390x. 26 | + Enable building on GNU/Hurd platforms. 27 | 28 | version 1.7.1 29 | ----------------- 30 | + Fix a bug where flushing files when writing in threaded mode did not work 31 | properly. 32 | + Prevent threaded opening from blocking python exit when an error is thrown 33 | in the calling thread. 34 | 35 | version 1.7.0 36 | ----------------- 37 | + Include a patched ISA-L version 2.31. The applied patches make compilation 38 | and wheelbuilding on MacOS ARM64 possible. 39 | + Fix a bug where READ and WRITE in isal.igzip were inconsistent with the 40 | values in gzip on Python 3.13 41 | + Small simplifications to the ``igzip.compress`` function, which should lead 42 | to less overhead. 43 | 44 | version 1.6.1 45 | ----------------- 46 | + Fix a bug where streams that were passed to igzip_threaded.open where closed. 47 | 48 | version 1.6.0 49 | ----------------- 50 | + Fix a bug where compression levels for IGzipFile where checked in read mode. 51 | + Update statically linked ISA-L release to 2.31.0 52 | + Fix an error that occurred in the ``__close__`` function when a threaded 53 | writer was initialized with incorrect parameters. 54 | 55 | version 1.5.3 56 | ----------------- 57 | + Fix a bug where append mode would not work when using 58 | ``igzip_threaded.open``. 59 | 60 | version 1.5.2 61 | ----------------- 62 | + Fix a bug where a filehandle remained opened when ``igzip_threaded.open`` 63 | was used for writing with a wrong compression level. 64 | + Fix a memory leak that occurred when an error was thrown for a gzip header 65 | with the wrong magic numbers. 66 | + Fix a memory leak that occurred when isal_zlib.decompressobj was given a 67 | wrong wbits value. 68 | 69 | version 1.5.1 70 | ----------------- 71 | + Fix a memory leak in the GzipReader.readall implementation. 72 | 73 | version 1.5.0 74 | ----------------- 75 | + Make a special case for threads==1 in ``igzip_threaded.open`` for writing 76 | files. This now combines the writing and compression thread for less 77 | overhead. 78 | + Maximize time spent outside the GIL for ``igzip_threaded.open`` writing. 79 | This has decreased wallclock time significantly. 80 | 81 | version 1.4.1 82 | ----------------- 83 | + Fix several errors related to unclosed files and buffers. 84 | 85 | version 1.4.0 86 | ----------------- 87 | + Drop support for python 3.7 and PyPy 3.8 as these are no longer supported. 88 | Add testing and support for python 3.12 and PyPy 3.10. 89 | + Added an experimental ``isal.igzip_threaded`` module which has an 90 | ``open`` function. 91 | This can be used to read and write large files in a streaming fashion 92 | while escaping the GIL. 93 | + The internal ``igzip._IGzipReader`` has been rewritten in C. As a result the 94 | overhead of decompressing files has significantly been reduced and 95 | ``python -m isal.igzip`` is now very close to the C ``igzip`` application. 96 | + The ``igzip._IGZipReader`` in C is now used in ``igzip.decompress``. The 97 | ``_GzipReader`` also can read from objects that support the buffer protocol. 98 | This has reduced overhead significantly. 99 | 100 | version 1.3.0 101 | ----------------- 102 | + Gzip headers are now actively checked for a BGZF extra field. If found the 103 | block size is taken into account when decompressing. This has further 104 | improved bgzf decompression speed by 5% on some files compared to the 105 | more generic solution of 1.2.0. 106 | + Integrated CPython 3.11 code for reading gzip headers. This leads to more 107 | commonality between the python-isal code and the upstream gzip.py code. 108 | This has enabled the change above. It comes at the cost of a slight increase 109 | in overhead at the ``gzip.decompress`` function. 110 | 111 | version 1.2.0 112 | ----------------- 113 | + Bgzip files are now detected and a smaller reading buffer is used to 114 | accomodate the fact that bgzip blocks are typically less than 64K. (Unlike 115 | normal gzip files that consist of one block that spans the entire file.) 116 | This has reduced decompression time for bgzip files by roughly 12%. 117 | + Speed-up source build by using ISA-L Unix-specific makefile rather than the 118 | autotools build. 119 | + Simplify build setup. ISA-L release flags are now used and not 120 | overwritten with python release flags when building the included static 121 | library. 122 | + Fix bug where zdict's could not be set for ``isal_zlib.decompressobj`` and 123 | ``igzip_lib.IgzipDecompressor``. 124 | + Escape GIL when calling inflate, deflate, crc32 and adler32 functions just 125 | like in CPython. This allows for utilising more CPU cores in combination 126 | with the threading module. This comes with a very slight cost in efficiency 127 | for strict single-threaded applications. 128 | 129 | version 1.1.0 130 | ----------------- 131 | + Added tests and support for Python 3.11. 132 | 133 | version 1.0.1 134 | ------------------ 135 | + Fixed failing tests and wheel builds for PyPy. 136 | 137 | version 1.0.0 138 | ------------------ 139 | Python-isal has been rewritten as a C-extension (first implementation was in 140 | Cython). This has made the library faster in many key areas. 141 | 142 | + Since the module now mostly contains code copied from CPython and then 143 | modified to work with ISA-L the license has been changed to the 144 | Python Software Foundation License version 2. 145 | + Python versions lower than 3.7 are no longer supported. Python 3.6 is out 146 | of support since December 2021. 147 | + Stub files with type information have now been updated to correctly display 148 | positional-only arguments. 149 | + Expose ``READ`` and ``WRITE`` constants on the ``igzip`` module. These are 150 | also present in Python's stdlib ``gzip`` module and exposing them allows for 151 | better drop-in capability of ``igzip``. Thanks to @alexander-beedie in 152 | https://github.com/pycompression/python-isal/pull/115. 153 | + A ``--no-name`` flag has been added to ``python -m isal.igzip``. 154 | + Reduced wheel size by not including debug symbols in the binary. Thanks to 155 | @marcelm in https://github.com/pycompression/python-isal/pull/108. 156 | + Cython is no longer required as a build dependency. 157 | + isal_zlib.compressobj and isal_zlib.decompressobj are now about six times 158 | faster. 159 | + igzip.decompress has 30% less overhead when called. 160 | + Error structure has been simplified. There is only ``IsalError`` which has 161 | ``Exception`` as baseclass instead of ``OSError``. ``isal_zlib.IsalError``, 162 | ``igzip_lib.IsalError``, ``isal_zlib.error`` and ``igzip_lib.error`` are 163 | all aliases of the same error class. 164 | + GzipReader now uses larger input and output buffers (128k) by default and 165 | IgzipDecompressor.decompress has been updated to allocate ``maxsize`` buffers 166 | when these are of reasonable size, instead of growing the buffer to maxsize 167 | on every call. This has improved gzip decompression speeds by 7%. 168 | + Patch statically linked included library (ISA-L 2.30.0) to fix the following: 169 | 170 | + ISA-L library version variables are now available on windows as well, 171 | for the statically linked version available on PyPI. 172 | + Wheels are now always build with nasm for the x86 architecture. 173 | Previously yasm was used for Linux and MacOS due to build issues. 174 | + Fixed a bug upstream in ISA-L were zlib headers would be created with an 175 | incorrect wbits value. 176 | 177 | + Python-isal shows up in Python profiler reports. 178 | + Support and tests for Python 3.10 were added. 179 | + Due to a change in the deployment process wheels should work for older 180 | versions of pip. 181 | + Added a ``crc`` property to the IgzipDecompressor class. Depending on the 182 | decompression flag chosen, this will update with an adler32 or crc32 183 | checksum. 184 | + All the decompression NO_HDR flags on igzip_lib were 185 | incorrectly documented. This is now fixed. 186 | 187 | version 0.11.1 188 | ------------------ 189 | + Fixed an issue which occurred rarely that caused IgzipDecompressor's 190 | unused_data to report back incorrectly. This caused checksum errors when 191 | reading gzip files. The issue was more likely to trigger in multi-member gzip 192 | files. 193 | 194 | version 0.11.0 195 | ------------------ 196 | In this release the ``python -m isal.igzip`` relatively slow decompression rate 197 | has been improved in both speed and usability. Previously it was 19% slower 198 | than ``igzip`` when used with the ``-d`` flag for decompressing, now it is 199 | just 8% slower. Also some extra flags were added to make it easier to select 200 | the output file. 201 | 202 | + Prompt when an output file is overwritten with the ``python -m isal.igzip`` 203 | command line utility and provide the ``-f`` or ``--force`` flags to force 204 | overwriting. 205 | + Added ``-o`` and ``--output`` flags to the ``python -m isal.igzip`` command 206 | line utility to allow the user to select the destination of the output file. 207 | + Reverse a bug in the build system which caused some docstring and parameter 208 | information on ``igzip_lib`` and ``isal_zlib`` to disappear in the 209 | documentation and the REPL. 210 | + Increase the buffer size for ``python -m isal.igzip`` so it is now closer 211 | to speeds reached with ``igzip``. 212 | + Add a ``READ_BUFFER_SIZE`` attribute to ``igzip`` which allows setting the 213 | amount of raw data that is read at once. 214 | + Add an ``igzip_lib.IgzipDecompressor`` object which can decompress without 215 | using an unconsumed_tail and is therefore more efficient. 216 | 217 | version 0.10.0 218 | ------------------ 219 | + Added an ``igzip_lib`` module which allows more direct access to ISA-L's 220 | igzip_lib API. This allows features such as headerless compression and 221 | decompression, as well as setting the memory levels manually. 222 | + Added more extensive documentation. 223 | 224 | version 0.9.0 225 | ----------------- 226 | + Fix a bug where a AttributeError was triggered when zlib.Z_RLE or 227 | zlib.Z_FIXED were not present. 228 | + Add support for Linux aarch64 builds. 229 | + Add support for pypy by adding pypy tests to the CI and setting up wheel 230 | building support. 231 | 232 | version 0.8.1 233 | ----------------- 234 | + Fix a bug where multi-member gzip files where read incorrectly due to an 235 | offset error. This was caused by ISA-L's decompressobj having a small 236 | bitbuffer which was not taken properly into account in some circumstances. 237 | 238 | version 0.8.0 239 | ----------------- 240 | + Speed up ``igzip.compress`` and ``igzip.decompress`` by improving the 241 | implementation. 242 | + Make sure compiler arguments are passed to ISA-L compilation step. Previously 243 | ISA-L was compiled without optimisation steps, causing the statically linked 244 | library to be significantly slower. 245 | + A unused constant from the ``isal_zlib`` library was removed: 246 | ``ISAL_DEFAULT_HIST_BITS``. 247 | + Refactor isal_zlib.pyx to work almost the same as zlibmodule.c. This has made 248 | the code look cleaner and has reduced some overhead. 249 | 250 | version 0.7.0 251 | ----------------- 252 | + Remove workarounds in the ``igzip`` module for the ``unconsumed_tail`` 253 | and ``unused_data`` bugs. ``igzip._IGzipReader`` now functions the same 254 | as ``gzip._GzipReader`` with only a few calls replaced with ``isal_zlib`` 255 | calls for speed. 256 | + Correctly implement ``unused_data`` and ``unconsumed_tail`` on 257 | ``isal_zlib.Decompress`` objects. 258 | It works the same as in CPython's zlib now. 259 | + Correctly implement flush implementation on ``isal_zlib.Compress`` and 260 | ``isal_zlib.Decompress`` objects. 261 | It works the same as in CPython's zlib now. 262 | 263 | version 0.6.1 264 | ----------------- 265 | + Fix a crash that occurs when opening a file that did not end in ``.gz`` while 266 | outputting to stdout using ``python -m isal.igzip``. 267 | 268 | version 0.6.0 269 | ----------------- 270 | + ``python -m gzip``'s behaviour has been changed since fixing bug: 271 | `bpo-43316 `_. This bug was not present 272 | in ``python -m isal.igzip`` but it handled the error differently than the 273 | solution in CPython. This is now corrected and ``python -m isal.igzip`` 274 | handles the error the same as the fixed ``python -m gzip``. 275 | + Installation on Windows is now supported. Wheels are provided for Windows as 276 | well. 277 | 278 | version 0.5.0 279 | ----------------- 280 | + Fix a bug where negative integers were not allowed for the ``adler32`` and 281 | ``crc32`` functions in ``isal_zlib``. 282 | + Provided stubs (type-hint files) for ``isal_zlib`` and ``_isal`` modules. 283 | Package is now tested with mypy to ensure correct type information. 284 | + The command-line interface now reads in blocks of 32K instead of 8K. This 285 | improves performance by about 6% when compressing and 11% when decompressing. 286 | A hidden ``-b`` flag was added to adjust the buffer size for benchmarks. 287 | + A ``-c`` or ``--stdout`` flag was added to the CLI interface of isal.igzip. 288 | This allows it to behave more like the ``gzip`` or ``pigz`` command line 289 | interfaces. 290 | 291 | version 0.4.0 292 | ----------------- 293 | + Move wheel building to cibuildwheel on github actions CI. Wheels are now 294 | provided for Mac OS as well. 295 | + Make a tiny change in setup.py so python-isal can be build on Mac OS X. 296 | 297 | version 0.3.0 298 | ----------------- 299 | + Set included ISA-L library at version 2.30.0. 300 | + Python-isal now comes with a source distribution of ISA-L in its source 301 | distribution against which python-isal is linked statically upon installation 302 | by default. Dynamic linking against system libraries is now optional. Wheels 303 | with the statically linked ISA-L are now provided on PyPI. 304 | 305 | version 0.2.0 306 | ----------------- 307 | + Fixed a bug where writing of the gzip header would crash if an older version 308 | of Python 3.7 was used such as on Debian or Ubuntu. This is due to 309 | differences between point releases because of a backported feature. The code 310 | now checks if the backported feature is present. 311 | + Added Python 3.9 to the testing. 312 | + Fixed ``setup.py`` to list setuptools as a requirement. 313 | + Changed homepage to reflect move to pycompression organization. 314 | 315 | version 0.1.0 316 | ----------------- 317 | + Publish API documentation on readthedocs. 318 | + Add API documentation. 319 | + Ensure the igzip module is fully compatible with the gzip stdlib module. 320 | + Add compliance tests from CPython to ensure isal_zlib and igzip are validated 321 | to the same standards as the zlib and gzip modules. 322 | + Added a working gzip app using ``python -m isal.igzip`` 323 | + Add test suite that tests all possible settings for functions on the 324 | isal_zlib module. 325 | + Create igzip module which implements all gzip functions and methods. 326 | + Create isal_zlib module which implements all zlib functions and methods. 327 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 2 | -------------------------------------------- 3 | 4 | 1. This LICENSE AGREEMENT is between the Python Software Foundation 5 | ("PSF"), and the Individual or Organization ("Licensee") accessing and 6 | otherwise using this software ("Python") in source or binary form and 7 | its associated documentation. 8 | 9 | 2. Subject to the terms and conditions of this License Agreement, PSF hereby 10 | grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, 11 | analyze, test, perform and/or display publicly, prepare derivative works, 12 | distribute, and otherwise use Python alone or in any derivative version, 13 | provided, however, that PSF's License Agreement and PSF's notice of copyright, 14 | i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 15 | 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021 Python Software Foundation; 16 | All Rights Reserved" are retained in Python alone or in any derivative version 17 | prepared by Licensee. 18 | 19 | 3. In the event Licensee prepares a derivative work that is based on 20 | or incorporates Python or any part thereof, and wants to make 21 | the derivative work available to others as provided herein, then 22 | Licensee hereby agrees to include in any such work a brief summary of 23 | the changes made to Python. 24 | 25 | 4. PSF is making Python available to Licensee on an "AS IS" 26 | basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR 27 | IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND 28 | DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS 29 | FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT 30 | INFRINGE ANY THIRD PARTY RIGHTS. 31 | 32 | 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 33 | FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS 34 | A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, 35 | OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 36 | 37 | 6. This License Agreement will automatically terminate upon a material 38 | breach of its terms and conditions. 39 | 40 | 7. Nothing in this License Agreement shall be deemed to create any 41 | relationship of agency, partnership, or joint venture between PSF and 42 | Licensee. This License Agreement does not grant permission to use PSF 43 | trademarks or trade name in a trademark sense to endorse or promote 44 | products or services of Licensee, or any third party. 45 | 46 | 8. By copying, installing or otherwise using Python, Licensee 47 | agrees to be bound by the terms and conditions of this License 48 | Agreement. 49 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | graft src/isal/isa-l 2 | include src/isal/*.h 3 | prune docs 4 | prune .github 5 | exclude .git* 6 | prune benchmark_scripts 7 | exclude requirements-docs.txt 8 | exclude codecov.yml 9 | exclude .readthedocs.yml 10 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | .. image:: https://img.shields.io/pypi/v/isal.svg 2 | :target: https://pypi.org/project/isal/ 3 | :alt: 4 | 5 | .. image:: https://img.shields.io/conda/v/conda-forge/python-isal.svg 6 | :target: https://github.com/conda-forge/python-isal-feedstock 7 | :alt: 8 | 9 | .. image:: https://img.shields.io/pypi/pyversions/isal.svg 10 | :target: https://pypi.org/project/isal/ 11 | :alt: 12 | 13 | .. image:: https://img.shields.io/pypi/l/isal.svg 14 | :target: https://github.com/pycompression/python-isal/blob/main/LICENSE 15 | :alt: 16 | 17 | .. image:: https://img.shields.io/conda/pn/conda-forge/python-isal.svg 18 | :target: https://github.com/conda-forge/python-isal-feedstock 19 | :alt: 20 | 21 | .. image:: https://github.com/pycompression/python-isal//actions/workflows/ci.yml/badge.svg 22 | :target: https://github.com/pycompression/python-isal/actions 23 | :alt: 24 | 25 | .. image:: https://codecov.io/gh/pycompression/python-isal/branch/develop/graph/badge.svg 26 | :target: https://codecov.io/gh/pycompression/python-isal 27 | :alt: 28 | 29 | .. image:: https://readthedocs.org/projects/python-isal/badge 30 | :target: https://python-isal.readthedocs.io 31 | :alt: 32 | 33 | 34 | python-isal 35 | =========== 36 | 37 | .. introduction start 38 | 39 | Faster zlib and gzip compatible compression and decompression 40 | by providing Python bindings for the ISA-L library. 41 | 42 | This package provides Python bindings for the `ISA-L 43 | `_ library. The Intel(R) Intelligent Storage 44 | Acceleration Library (ISA-L) implements several key algorithms in `assembly 45 | language `_. This includes 46 | a variety of functions to provide zlib/gzip-compatible compression. 47 | 48 | ``python-isal`` provides the bindings by offering four modules: 49 | 50 | + ``isal_zlib``: A drop-in replacement for the zlib module that uses ISA-L to 51 | accelerate its performance. 52 | + ``igzip``: A drop-in replacement for the gzip module that uses ``isal_zlib`` 53 | instead of ``zlib`` to perform its compression and checksum tasks, which 54 | improves performance. 55 | + ``igzip_threaded`` offers an ``open`` function which returns buffered read 56 | or write streams that can be used to read and write large files while 57 | escaping the GIL using one or multiple threads. This functionality only 58 | works for streaming, seeking is not supported. 59 | + ``igzip_lib``: Provides compression functions which have full access to the 60 | API of ISA-L's compression functions. 61 | 62 | ``isal_zlib`` and ``igzip`` are almost fully compatible with ``zlib`` and 63 | ``gzip`` from the Python standard library. There are some minor differences 64 | see: differences-with-zlib-and-gzip-modules_. 65 | 66 | .. introduction end 67 | 68 | Quickstart 69 | ---------- 70 | 71 | .. quickstart start 72 | 73 | The python-isal modules can be imported as follows 74 | 75 | .. code-block:: python 76 | 77 | from isal import isal_zlib 78 | from isal import igzip 79 | from isal import igzip_lib 80 | 81 | ``isal_zlib`` and ``igzip`` are meant to be used as drop in replacements so 82 | their api and functions are the same as the stdlib's modules. Except where 83 | ISA-L does not support the same calls as zlib (See differences below). 84 | 85 | A full API documentation can be found on `our readthedocs page 86 | `_. 87 | 88 | ``python -m isal.igzip`` implements a simple gzip-like command line 89 | application (just like ``python -m gzip``). Full usage documentation can be 90 | found on `our readthedocs page `_. 91 | 92 | 93 | .. quickstart end 94 | 95 | Installation 96 | ------------ 97 | - with pip: ``pip install isal`` 98 | - with conda: ``conda install python-isal`` 99 | 100 | Installation is supported on Linux, Windows and MacOS. For more advanced 101 | installation options check the `documentation 102 | `_. 103 | 104 | python-isal as a dependency in your project 105 | ------------------------------------------- 106 | 107 | .. dependency start 108 | 109 | Python-isal supports a limited amount of platforms for which wheels have been 110 | made available. To prevent your users from running into issues when installing 111 | your project please list a python-isal dependency as follows. 112 | 113 | ``setup.cfg``:: 114 | 115 | install_requires = 116 | isal; platform.machine == "x86_64" or platform.machine == "AMD64" or platform.machine == "aarch64" 117 | 118 | ``setup.py``:: 119 | 120 | extras_require={ 121 | ":platform.machine == 'x86_64' or platform.machine == 'AMD64' or platform.machine == 'aarch64'": ['isal'] 122 | }, 123 | 124 | .. dependency end 125 | 126 | .. _differences-with-zlib-and-gzip-modules: 127 | 128 | Differences with zlib and gzip modules 129 | -------------------------------------- 130 | 131 | .. differences start 132 | 133 | + Compression level 0 in ``zlib`` and ``gzip`` means **no compression**, while 134 | in ``isal_zlib`` and ``igzip`` this is the **lowest compression level**. 135 | This is a design choice that was inherited from the ISA-L library. 136 | + Compression levels range from 0 to 3, not 1 to 9. ``isal_zlib.Z_DEFAULT_COMPRESSION`` 137 | has been aliased to ``isal_zlib.ISAL_DEFAULT_COMPRESSION`` (2). 138 | + ``isal_zlib`` only supports ``NO_FLUSH``, ``SYNC_FLUSH``, ``FULL_FLUSH`` and 139 | ``FINISH_FLUSH``. Other flush modes are not supported and will raise errors. 140 | + ``zlib.Z_DEFAULT_STRATEGY``, ``zlib.Z_RLE`` etc. are exposed as 141 | ``isal_zlib.Z_DEFAULT_STRATEGY``, ``isal_zlib.Z_RLE`` etc. for compatibility 142 | reasons. However, ``isal_zlib`` only supports a default strategy and will 143 | give warnings when other strategies are used. 144 | + ``zlib`` supports different memory levels from 1 to 9 (with 8 default). 145 | ``isal_zlib`` supports memory levels smallest, small, medium, large and 146 | largest. These have been mapped to levels 1, 2-3, 4-6, 7-8 and 9. So 147 | ``isal_zlib`` can be used with zlib compatible memory levels. 148 | + ``igzip.open`` returns a class ``IGzipFile`` instead of ``GzipFile``. Since 149 | the compression levels are not compatible, a difference in naming was chosen 150 | to reflect this. ``igzip.GzipFile`` does exist as an alias of 151 | ``igzip.IGzipFile`` for compatibility reasons. 152 | + ``igzip._GzipReader`` has been rewritten in C. Since this is a private member 153 | it should not affect compatibility, but it may cause some issues for 154 | instances where this code is used directly. If such issues should occur, 155 | please report them so the compatibility issues can be fixed. 156 | 157 | .. differences end 158 | 159 | Contributing 160 | ------------ 161 | .. contributing start 162 | 163 | Please make a PR or issue if you feel anything can be improved. Bug reports 164 | are also very welcome. Please report them on the `github issue tracker 165 | `_. 166 | 167 | .. contributing end 168 | 169 | Development 170 | ----------- 171 | .. development start 172 | 173 | The repository needs to be cloned recursively to make sure the 174 | `ISA-L `_ repository is checked out: 175 | ``git clone --recursive https://github.com/pycompression/python-isal.git``. If 176 | the repository is already checked out you can use ``git submodule update --init``. 177 | 178 | Patches should be made on a feature branch. To run the testing install ``tox`` 179 | with ``pip install tox`` and run the commands ``tox -e lint`` and 180 | ``tox``. That will run most of the testing that is also performed by the CI. 181 | For changes to the documentation run ``tox -e docs``. For changes to the C 182 | code please also run ``tox -e asan`` to check for memory leaks. This requires 183 | libasan to be installed. 184 | 185 | Building requires the 186 | `ISA-L build requirements `_ 187 | as well. 188 | 189 | .. development end 190 | 191 | Acknowledgements 192 | ---------------- 193 | 194 | .. acknowledgements start 195 | 196 | This project builds upon the software and experience of many. Many thanks to: 197 | 198 | + The `ISA-L contributors 199 | `_ for making ISA-L. 200 | Special thanks to @gbtucker for always being especially helpful and 201 | responsive. 202 | + The `Cython contributors 203 | `_ for making it easy 204 | to create an extension and helping a novice get start with pointer addresses. 205 | + The `CPython contributors 206 | `_. 207 | Python-isal mimicks ``zlibmodule.c`` and ``gzip.py`` from the standard 208 | library to make it easier for python users to adopt it. 209 | + `@marcelm `_ for taking a chance on this project 210 | and make it a dependency for his `xopen 211 | `_ and by extension `cutadapt 212 | `_ projects. This gave python-isal its 213 | first users who used python-isal in production. 214 | + Mark Adler (@madler) for the excellent comments in his pigz code which made 215 | it very easy to replicate the behaviour for writing gzip with multiple 216 | threads using the ``threading`` and ``isal_zlib`` modules. Another thanks 217 | for his permissive license, which allowed the crc32_combine code to be 218 | included in the project. (ISA-L does not provide a crc32_combine function, 219 | unlike zlib.) And yet another thanks to Mark Adler and also for 220 | Jean-loup Gailly for creating the gzip format which is very heavily used 221 | in bioinformatics. Without that, I would have never written this library 222 | from which I have learned so much. 223 | + The `github actions team `_ for 224 | creating the actions CI service that enables building and testing on all 225 | three major operating systems. 226 | + `@animalize `_ for explaining how to test and 227 | build python-isal for ARM 64-bit platforms. 228 | + And last but not least: everyone who submitted a bug report or a feature 229 | request. These make the project better! 230 | 231 | Python-isal would not have been possible without you! 232 | 233 | .. acknowledgements end 234 | -------------------------------------------------------------------------------- /benchmark_scripts/benchmark.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import gzip 3 | import io # noqa: F401 used in timeit strings 4 | import timeit 5 | import zlib 6 | from pathlib import Path 7 | from typing import Dict 8 | 9 | from isal import igzip, isal_zlib # noqa: F401 used in timeit strings 10 | 11 | DATA_DIR = Path(__file__).parent.parent / "tests" / "data" 12 | COMPRESSED_FILE = DATA_DIR / "test.fastq.gz" 13 | with gzip.open(str(COMPRESSED_FILE), mode="rb") as file_h: 14 | data = file_h.read() 15 | 16 | sizes: Dict[str, bytes] = { 17 | "0b": b"", 18 | "8b": data[:8], 19 | "128b": data[:128], 20 | "1kb": data[:1024], 21 | "8kb": data[:8 * 1024], 22 | "16kb": data[:16 * 1024], 23 | "32kb": data[:32 * 1024], 24 | "64kb": data[:64 * 1024], 25 | # "128kb": data[:128*1024], 26 | # "512kb": data[:512*1024] 27 | } 28 | compressed_sizes = {name: zlib.compress(data_block) 29 | for name, data_block in sizes.items()} 30 | 31 | compressed_sizes_gzip = {name: gzip.compress(data_block) 32 | for name, data_block in sizes.items()} 33 | 34 | 35 | def show_sizes(): 36 | print("zlib sizes") 37 | print("name\t" + "\t".join(str(level) for level in range(-1, 10))) 38 | for name, data_block in sizes.items(): 39 | orig_size = max(len(data_block), 1) 40 | rel_sizes = ( 41 | str(round(len(zlib.compress(data_block, level)) / orig_size, 3)) 42 | for level in range(-1, 10)) 43 | print(name + "\t" + "\t".join(rel_sizes)) 44 | 45 | print("isal sizes") 46 | print("name\t" + "\t".join(str(level) for level in range(0, 4))) 47 | for name, data_block in sizes.items(): 48 | orig_size = max(len(data_block), 1) 49 | rel_sizes = ( 50 | str(round(len(isal_zlib.compress(data_block, level)) / orig_size, 51 | 3)) 52 | for level in range(0, 4)) 53 | print(name + "\t" + "\t".join(rel_sizes)) 54 | 55 | 56 | def benchmark(name: str, 57 | names_and_data: Dict[str, bytes], 58 | isal_string: str, 59 | zlib_string: str, 60 | number: int = 10_000, 61 | **kwargs): 62 | print(name) 63 | print("name\tisal\tzlib\tratio") 64 | for name, data_block in names_and_data.items(): 65 | timeit_kwargs = dict(globals=dict(**globals(), **locals()), 66 | number=number, **kwargs) 67 | isal_time = timeit.timeit(isal_string, **timeit_kwargs) 68 | zlib_time = timeit.timeit(zlib_string, **timeit_kwargs) 69 | isal_microsecs = round(isal_time * (1_000_000 / number), 2) 70 | zlib_microsecs = round(zlib_time * (1_000_000 / number), 2) 71 | ratio = round(isal_time / zlib_time, 2) 72 | print("{0}\t{1}\t{2}\t{3}".format(name, 73 | isal_microsecs, 74 | zlib_microsecs, 75 | ratio)) 76 | 77 | 78 | # show_sizes() 79 | 80 | def argument_parser() -> argparse.ArgumentParser: 81 | parser = argparse.ArgumentParser() 82 | parser.add_argument("--all", action="store_true") 83 | parser.add_argument("--checksums", action="store_true") 84 | parser.add_argument("--functions", action="store_true") 85 | parser.add_argument("--gzip", action="store_true") 86 | parser.add_argument("--sizes", action="store_true") 87 | parser.add_argument("--objects", action="store_true") 88 | return parser 89 | 90 | 91 | if __name__ == "__main__": 92 | args = argument_parser().parse_args() 93 | if args.checksums or args.all: 94 | benchmark("CRC32", sizes, 95 | "isal_zlib.crc32(data_block)", 96 | "zlib.crc32(data_block)") 97 | 98 | benchmark("Adler32", sizes, 99 | "isal_zlib.adler32(data_block)", 100 | "zlib.adler32(data_block)") 101 | if args.functions or args.all: 102 | benchmark("zlib compression", sizes, 103 | "isal_zlib.compress(data_block, 1)", 104 | "zlib.compress(data_block, 1)") 105 | 106 | benchmark("zlib decompression", compressed_sizes, 107 | "isal_zlib.decompress(data_block)", 108 | "zlib.decompress(data_block)") 109 | 110 | if args.gzip or args.all: 111 | benchmark("gzip compression", sizes, 112 | "igzip.compress(data_block, 1)", 113 | "gzip.compress(data_block, 1)") 114 | 115 | benchmark("gzip decompression", compressed_sizes_gzip, 116 | "igzip.decompress(data_block)", 117 | "gzip.decompress(data_block)") 118 | if args.objects or args.all: 119 | benchmark("zlib Compress instantiation", {"": b""}, 120 | "a = isal_zlib.compressobj()", 121 | "a = zlib.compressobj()") 122 | benchmark("zlib Decompress instantiation", {"": b""}, 123 | "a = isal_zlib.decompressobj()", 124 | "a = zlib.decompressobj()") 125 | benchmark("Gzip Writer instantiation", {"": b""}, 126 | "a = igzip.GzipFile(fileobj=io.BytesIO(), mode='wb' )", 127 | "a = gzip.GzipFile(fileobj=io.BytesIO(), mode='wb')") 128 | benchmark("Gzip Reader instantiation", {"": b""}, 129 | "a = igzip.GzipFile(fileobj=io.BytesIO(), mode='rb' )", 130 | "a = gzip.GzipFile(fileobj=io.BytesIO(), mode='rb')") 131 | if args.sizes or args.all: 132 | show_sizes() 133 | -------------------------------------------------------------------------------- /benchmark_scripts/benchmark_cgzipreader.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from isal.isal_zlib import _GzipReader 4 | 5 | if __name__ == "__main__": 6 | with open(sys.argv[1], "rb") as f: 7 | reader = _GzipReader(f, 512 * 1024) 8 | while True: 9 | block = reader.read(128 * 1024) 10 | if not block: 11 | break 12 | -------------------------------------------------------------------------------- /benchmark_scripts/gzipread128kblocks.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from isal import igzip 4 | 5 | with igzip.open(sys.argv[1], "rb") as gzip_file: 6 | while True: 7 | block = gzip_file.read(128 * 1024) 8 | if not block: 9 | break 10 | -------------------------------------------------------------------------------- /benchmark_scripts/gzipreadlines.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from isal import igzip 4 | 5 | with igzip.open(sys.argv[1], "rb") as gzip_file: 6 | for line in gzip_file: 7 | pass 8 | -------------------------------------------------------------------------------- /benchmark_scripts/gzipthreadsread128kblocks.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from isal import igzip_threaded 4 | 5 | with igzip_threaded.open(sys.argv[1], "rb") as gzip_file: 6 | while True: 7 | block = gzip_file.read(128 * 1024) 8 | if not block: 9 | break 10 | -------------------------------------------------------------------------------- /benchmark_scripts/gzipwrite128kblocks.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | from isal import igzip 5 | 6 | with open(sys.argv[1], "rb") as in_file: 7 | with igzip.open(os.devnull, "wb") as out_gzip: 8 | while True: 9 | block = in_file.read(128 * 1024) 10 | if block == b"": 11 | break 12 | out_gzip.write(block) 13 | -------------------------------------------------------------------------------- /benchmark_scripts/gzipwritelines.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | from isal import igzip 5 | 6 | with open(sys.argv[1], "rb") as in_file: 7 | with igzip.open(os.devnull, "wb") as out_gzip: 8 | for line in in_file: 9 | out_gzip.write(line) 10 | -------------------------------------------------------------------------------- /benchmark_scripts/memory_leak_test.py: -------------------------------------------------------------------------------- 1 | import gc 2 | import resource 3 | import sys 4 | 5 | from isal import igzip 6 | 7 | for _ in range(10): 8 | with igzip.open(sys.argv[1], "rb") as reader: 9 | a = reader.read() 10 | print(len(a)) 11 | gc.collect() 12 | memory_usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss 13 | memory_usage_mb = memory_usage / 1024 14 | print(f"Maximum memory usage: {memory_usage_mb:.2f} MiB") 15 | del a 16 | objects_and_size = [(sys.getsizeof(obj), type(obj)) for obj in 17 | gc.get_objects()] 18 | objects_and_size.sort(key=lambda x: x[0], reverse=True) 19 | print(objects_and_size[:10]) 20 | -------------------------------------------------------------------------------- /benchmark_scripts/profile_igziplinewriter.py: -------------------------------------------------------------------------------- 1 | import cProfile 2 | import os 3 | import sys 4 | 5 | from isal import igzip 6 | 7 | 8 | def main(): 9 | with open(sys.argv[1], mode="rb") as in_file: 10 | with igzip.open(os.devnull, mode="wb") as gzip_h: 11 | for line in in_file: 12 | gzip_h.write(line) 13 | 14 | 15 | if __name__ == "__main__": 16 | cProfile.run("main()") 17 | -------------------------------------------------------------------------------- /benchmark_scripts/profile_igzipreader.py: -------------------------------------------------------------------------------- 1 | import cProfile 2 | import sys 3 | 4 | from isal import igzip 5 | 6 | 7 | def main(): 8 | with igzip.open(sys.argv[1], mode="rb") as gzip_h: 9 | while True: 10 | block = gzip_h.read(128*1024) 11 | if block == b"": 12 | return 13 | 14 | 15 | if __name__ == "__main__": 16 | cProfile.run("main()") 17 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | status: 3 | project: 4 | default: 5 | target: 90 # let's try to hit high standards 6 | patch: 7 | default: 8 | target: 90 # Tests should be written for new features 9 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | from isal import __version__ 8 | 9 | # -- Project information ----------------------------------------------------- 10 | 11 | # Get package information from the installed package. 12 | 13 | project = 'python-isal' 14 | copyright = '2020, Leiden University Medical Center' 15 | author = 'Leiden University Medical Center' 16 | 17 | # The short X.Y version 18 | version = __version__ 19 | # The full version, including alpha/beta/rc tags 20 | release = __version__ 21 | 22 | # -- General configuration --------------------------------------------------- 23 | 24 | # Add any Sphinx extension module names here, as strings. They can be 25 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 26 | # ones. 27 | extensions = ["sphinx.ext.autodoc", 'sphinxarg.ext'] 28 | 29 | # Add any paths that contain templates here, relative to this directory. 30 | templates_path = ['_templates'] 31 | 32 | # List of patterns, relative to source directory, that match files and 33 | # directories to ignore when looking for source files. 34 | # This pattern also affects html_static_path and html_extra_path. 35 | # includes/* prevents double indexing 36 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'includes/*'] 37 | 38 | 39 | # -- Options for HTML output ------------------------------------------------- 40 | 41 | # The theme to use for HTML and HTML Help pages. See the documentation for 42 | # a list of builtin themes. 43 | # 44 | html_theme = 'sphinx_rtd_theme' 45 | 46 | # Add any paths that contain custom static files (such as style sheets) here, 47 | # relative to this directory. They are copied after the builtin static files, 48 | # so a file named "default.css" will overwrite the builtin "default.css". 49 | html_static_path = ['_static'] -------------------------------------------------------------------------------- /docs/includes/CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | ../../CHANGELOG.rst -------------------------------------------------------------------------------- /docs/includes/README.rst: -------------------------------------------------------------------------------- 1 | ../../README.rst -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. python-isal documentation master file, created by 2 | sphinx-quickstart on Fri Sep 11 15:42:56 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | ======================================= 7 | Welcome to python-isal's documentation! 8 | ======================================= 9 | 10 | .. toctree:: 11 | :maxdepth: 2 12 | :caption: Contents: 13 | 14 | ============ 15 | Introduction 16 | ============ 17 | 18 | .. include:: includes/README.rst 19 | :start-after: .. introduction start 20 | :end-before: .. introduction end 21 | 22 | ========== 23 | Quickstart 24 | ========== 25 | 26 | .. include:: includes/README.rst 27 | :start-after: .. quickstart start 28 | :end-before: .. quickstart end 29 | 30 | ============ 31 | Installation 32 | ============ 33 | Installation with pip 34 | --------------------- 35 | 36 | :: 37 | 38 | pip install isal 39 | 40 | Installation is supported on Linux, MacOS and Windows. On most platforms 41 | wheels are provided. 42 | The installation will include a staticallly linked version of ISA-L. 43 | If a wheel is not provided for your system the 44 | installation will build ISA-L first in a temporary directory. Please check the 45 | `ISA-L homepage `_ for the build requirements. 46 | 47 | The latest development version of python-isal can be installed with:: 48 | 49 | pip install git+https://github.com/rhpvorderman/python-isal.git 50 | 51 | This requires having the build requirements installed. 52 | If you wish to link 53 | dynamically against a version of libisal installed on your system use:: 54 | 55 | PYTHON_ISAL_LINK_DYNAMIC=true pip install isal --no-binary isal 56 | 57 | ISA-L is available in numerous Linux distro's as well as on conda via the 58 | conda-forge channel. Checkout the `ports documentation 59 | `_ on the ISA-L project wiki 60 | to find out how to install it. It is important that the development headers 61 | are also installed. 62 | 63 | On Debian and Ubuntu the ISA-L libraries (including the development headers) 64 | can be installed with:: 65 | 66 | sudo apt install libisal-dev 67 | 68 | Installation via conda 69 | ---------------------- 70 | Python-isal can be installed via conda, for example using 71 | the `miniconda `_ installer 72 | with a properly setup `conda-forge 73 | `_ 74 | channel. When used with bioinformatics tools setting up `bioconda 75 | `_ 76 | provides a clear set of installation instructions for conda. 77 | 78 | python-isal is available on conda-forge and can be installed with:: 79 | 80 | conda install python-isal 81 | 82 | This will automatically install the ISA-L library dependency as well, since 83 | it is available on conda-forge. 84 | 85 | =========================================== 86 | python-isal as a dependency in your project 87 | =========================================== 88 | 89 | .. include:: includes/README.rst 90 | :start-after: .. dependency start 91 | :end-before: .. dependency end 92 | 93 | .. _differences-with-zlib-and-gzip-modules: 94 | 95 | ====================================== 96 | Differences with zlib and gzip modules 97 | ====================================== 98 | 99 | .. include:: includes/README.rst 100 | :start-after: .. differences start 101 | :end-before: .. differences end 102 | 103 | ============================ 104 | API Documentation: isal_zlib 105 | ============================ 106 | 107 | .. automodule:: isal.isal_zlib 108 | :members: 109 | 110 | .. autoclass:: Compress 111 | :members: 112 | 113 | .. autoclass:: Decompress 114 | :members: 115 | 116 | ======================== 117 | API-documentation: igzip 118 | ======================== 119 | 120 | .. automodule:: isal.igzip 121 | :members: compress, decompress, open, BadGzipFile, GzipFile, READ_BUFFER_SIZE 122 | 123 | .. autoclass:: IGzipFile 124 | :members: 125 | :special-members: __init__ 126 | 127 | ================================= 128 | API-documentation: igzip_threaded 129 | ================================= 130 | 131 | .. automodule:: isal.igzip_threaded 132 | :members: open 133 | 134 | ============================ 135 | API Documentation: igzip_lib 136 | ============================ 137 | 138 | .. automodule:: isal.igzip_lib 139 | :members: compress, decompress, 140 | 141 | .. autoclass:: IgzipDecompressor 142 | :members: 143 | 144 | ========================== 145 | python -m isal.igzip usage 146 | ========================== 147 | 148 | .. argparse:: 149 | :module: isal.igzip 150 | :func: _argument_parser 151 | :prog: python -m isal.igzip 152 | 153 | 154 | ============ 155 | Contributing 156 | ============ 157 | .. include:: includes/README.rst 158 | :start-after: .. contributing start 159 | :end-before: .. contributing end 160 | 161 | =========== 162 | Development 163 | =========== 164 | .. include:: includes/README.rst 165 | :start-after: .. development start 166 | :end-before: .. development end 167 | 168 | ================ 169 | Acknowledgements 170 | ================ 171 | .. include:: includes/README.rst 172 | :start-after: .. acknowledgements start 173 | :end-before: .. acknowledgements end 174 | 175 | .. include:: includes/CHANGELOG.rst 176 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=77", "setuptools-scm>=8"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "isal" 7 | dynamic = ["version"] 8 | description = """ 9 | Faster zlib and gzip compatible compression and decompression by providing \ 10 | python bindings for the ISA-L ibrary.""" 11 | license="PSF-2.0" 12 | keywords=["isal", "isa-l", "compression", "deflate", "gzip", "igzip"] 13 | authors = [{name = "Leiden University Medical Center"}, 14 | {email = "r.h.p.vorderman@lumc.nl"}] 15 | readme = "README.rst" 16 | requires-python = ">=3.9" # Because of setuptools version 17 | classifiers = [ 18 | "Programming Language :: Python :: 3 :: Only", 19 | "Programming Language :: Python :: 3", 20 | "Programming Language :: Python :: 3.9", 21 | "Programming Language :: Python :: 3.10", 22 | "Programming Language :: Python :: 3.11", 23 | "Programming Language :: Python :: 3.12", 24 | "Programming Language :: Python :: 3.13", 25 | "Programming Language :: Python :: Implementation :: CPython", 26 | "Programming Language :: Python :: Implementation :: PyPy", 27 | "Programming Language :: C", 28 | "Development Status :: 5 - Production/Stable", 29 | "Topic :: System :: Archiving :: Compression", 30 | "Operating System :: POSIX :: Linux", 31 | "Operating System :: MacOS", 32 | "Operating System :: Microsoft :: Windows", 33 | ] 34 | urls.homepage = "https://github.com/pycompression/python-isal" 35 | urls.documentation = "python-isal.readthedocs.io" 36 | 37 | [tool.setuptools_scm] 38 | version_file = "src/isal/_version.py" 39 | 40 | [tool.setuptools.packages.find] 41 | where = ["src"] 42 | include = ["isal"] 43 | 44 | [tool.setuptools.package-data] 45 | isal = ['*.pyi', 'py.typed', 'isa-l/LICENSE', 'isa-l/README.md', 'isa-l/Release_notes.txt'] 46 | [tool.setuptools.exclude-package-data] 47 | isal = [ 48 | "*.c", 49 | "*.h", 50 | "isa-l/*/*", 51 | "isa-l/Mak*", 52 | "isa-l/.*", 53 | "isa-l/autogen.sh", 54 | "isa-l/Doxyfile", 55 | "isa-l/CONTRIBUTING.md", 56 | "isa-l/SECURITY.md", 57 | "isa-l/configure.ac", 58 | "isa-l/isa-l.*", 59 | "isa-l/libisal.pc.in", 60 | "isa-l/make.inc", 61 | ] 62 | -------------------------------------------------------------------------------- /requirements-docs.txt: -------------------------------------------------------------------------------- 1 | # https://github.com/sphinx-doc/sphinx/issues/13415 2 | sphinx <8 3 | sphinx-rtd-theme 4 | sphinx-argparse 5 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Leiden University Medical Center 2 | # Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 3 | # 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022 4 | # Python Software Foundation; All Rights Reserved 5 | 6 | # This file is part of python-isal which is distributed under the 7 | # PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2. 8 | 9 | import functools 10 | import os 11 | import platform 12 | import shutil 13 | import subprocess 14 | import sys 15 | import tempfile 16 | from pathlib import Path 17 | 18 | from setuptools import Extension, setup 19 | from setuptools.command.build_ext import build_ext 20 | 21 | ISA_L_SOURCE = os.path.join("src", "isal", "isa-l") 22 | 23 | SYSTEM_IS_BSD = (sys.platform.startswith("freebsd") or 24 | sys.platform.startswith("netbsd")) 25 | SYSTEM_IS_UNIX = (sys.platform.startswith("linux") or 26 | sys.platform.startswith("darwin") or 27 | sys.platform.startswith("gnu") or 28 | SYSTEM_IS_BSD) 29 | SYSTEM_IS_WINDOWS = sys.platform.startswith("win") 30 | 31 | # Since pip builds in a temp directory by default, setting a fixed file in 32 | # /tmp works during the entire session. 33 | DEFAULT_CACHE_FILE = Path(tempfile.gettempdir() 34 | ).absolute() / ".isal_build_cache" 35 | BUILD_CACHE = os.environ.get("PYTHON_ISAL_BUILD_CACHE") 36 | BUILD_CACHE_FILE = Path(os.environ.get("PYTHON_ISAL_BUILD_CACHE_FILE", 37 | DEFAULT_CACHE_FILE)) 38 | 39 | EXTENSIONS = [ 40 | Extension("isal.isal_zlib", ["src/isal/isal_zlibmodule.c"]), 41 | Extension("isal.igzip_lib", ["src/isal/igzip_libmodule.c"]), 42 | Extension("isal._isal", ["src/isal/_isalmodule.c"]), 43 | ] 44 | 45 | 46 | class BuildIsalExt(build_ext): 47 | def build_extension(self, ext): 48 | # Add option to link dynamically for packaging systems such as conda. 49 | # Always link dynamically on readthedocs to simplify install. 50 | if (os.getenv("PYTHON_ISAL_LINK_DYNAMIC") is not None or 51 | os.environ.get("READTHEDOCS") is not None): 52 | # Check for isa-l include directories. This is useful when 53 | # installing in a conda environment. 54 | possible_prefixes = [sys.exec_prefix, sys.base_exec_prefix] 55 | for prefix in possible_prefixes: 56 | if Path(prefix, "include", "isa-l").exists(): 57 | ext.include_dirs = [os.path.join(prefix, "include")] 58 | ext.library_dirs = [os.path.join(prefix, "lib")] 59 | break # Only one include directory is needed. 60 | # On windows include is in Library apparently 61 | elif Path(prefix, "Library", "include", "isa-l").exists(): 62 | ext.include_dirs = [os.path.join(prefix, "Library", 63 | "include")] 64 | ext.library_dirs = [os.path.join(prefix, "Library", "lib")] 65 | break 66 | if SYSTEM_IS_UNIX: 67 | ext.libraries = ["isal"] # libisal.so* 68 | elif SYSTEM_IS_WINDOWS: 69 | ext.libraries = ["isa-l"] # isa-l.dll 70 | else: 71 | raise NotImplementedError( 72 | f"Unsupported platform: {sys.platform}") 73 | else: 74 | isa_l_build_dir = build_isa_l() 75 | if SYSTEM_IS_UNIX: 76 | ext.extra_objects = [ 77 | os.path.join(isa_l_build_dir, "bin", "isa-l.a")] 78 | elif SYSTEM_IS_WINDOWS: 79 | ext.extra_objects = [ 80 | os.path.join(isa_l_build_dir, "isa-l_static.lib")] 81 | else: 82 | raise NotImplementedError( 83 | f"Unsupported platform: {sys.platform}") 84 | ext.include_dirs = [isa_l_build_dir] 85 | super().build_extension(ext) 86 | 87 | 88 | # Use a cache to prevent isa-l from being build twice. According to the 89 | # functools docs lru_cache with maxsize None is faster. The shortcut called 90 | # 'cache' is only available from python 3.9 onwards. 91 | # see: https://docs.python.org/3/library/functools.html#functools.cache 92 | @functools.lru_cache(maxsize=None) 93 | def build_isa_l(): 94 | # Check for cache 95 | if BUILD_CACHE: 96 | if BUILD_CACHE_FILE.exists(): 97 | cache_path = Path(BUILD_CACHE_FILE.read_text()) 98 | if (cache_path / "isa-l.h").exists(): 99 | return str(cache_path) 100 | 101 | # Creating temporary directories 102 | build_dir = tempfile.mktemp() 103 | shutil.copytree(ISA_L_SOURCE, build_dir) 104 | 105 | # Build environment is a copy of OS environment to allow user to influence 106 | # it. 107 | build_env = os.environ.copy() 108 | if SYSTEM_IS_UNIX: 109 | build_env["CFLAGS"] = build_env.get("CFLAGS", "") + " -fPIC" 110 | if hasattr(os, "sched_getaffinity"): 111 | cpu_count = len(os.sched_getaffinity(0)) 112 | else: # sched_getaffinity not available on all platforms 113 | cpu_count = os.cpu_count() or 1 # os.cpu_count() can return None 114 | run_args = dict(cwd=build_dir, env=build_env) 115 | if SYSTEM_IS_UNIX: 116 | if platform.machine() == "aarch64": 117 | cflags_param = "CFLAGS_aarch64" 118 | else: 119 | cflags_param = "CFLAGS_" 120 | make_cmd = "make" 121 | if SYSTEM_IS_BSD: 122 | make_cmd = "gmake" 123 | subprocess.run([make_cmd, "-j", str(cpu_count), "-f", "Makefile.unx", 124 | "isa-l.h", "bin/isa-l.a", 125 | f"{cflags_param}={build_env.get('CFLAGS', '')}"], 126 | **run_args) 127 | elif SYSTEM_IS_WINDOWS: 128 | subprocess.run(["nmake", "/f", "Makefile.nmake"], **run_args) 129 | else: 130 | raise NotImplementedError(f"Unsupported platform: {sys.platform}") 131 | shutil.copytree(os.path.join(build_dir, "include"), 132 | os.path.join(build_dir, "isa-l")) 133 | if BUILD_CACHE: 134 | BUILD_CACHE_FILE.write_text(build_dir) 135 | return build_dir 136 | 137 | 138 | setup( 139 | cmdclass={"build_ext": BuildIsalExt}, 140 | ext_modules=EXTENSIONS 141 | ) 142 | -------------------------------------------------------------------------------- /src/isal/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2 | # 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022 3 | # Python Software Foundation; All Rights Reserved 4 | 5 | # This file is part of python-isal which is distributed under the 6 | # PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2. 7 | 8 | from ._isal import (ISAL_MAJOR_VERSION, ISAL_MINOR_VERSION, ISAL_PATCH_VERSION, 9 | ISAL_VERSION) 10 | from ._version import __version__ 11 | 12 | __all__ = [ 13 | "ISAL_MAJOR_VERSION", 14 | "ISAL_MINOR_VERSION", 15 | "ISAL_PATCH_VERSION", 16 | "ISAL_VERSION", 17 | "__version__" 18 | ] 19 | -------------------------------------------------------------------------------- /src/isal/_isal.pyi: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2 | # 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022 3 | # Python Software Foundation; All Rights Reserved 4 | 5 | # This file is part of python-isal which is distributed under the 6 | # PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2. 7 | 8 | ISAL_MAJOR_VERSION: int 9 | ISAL_MINOR_VERSION: int 10 | ISAL_PATCH_VERSION: int 11 | ISAL_VERSION: str 12 | -------------------------------------------------------------------------------- /src/isal/_isalmodule.c: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 3 | 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022 4 | Python Software Foundation; All Rights Reserved 5 | 6 | This file is part of python-isal which is distributed under the 7 | PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2. 8 | 9 | This file is not originally from the CPython distribution. But it does 10 | contain mostly example code from the Python docs. Also dual licensing just 11 | for this one file seemed silly. 12 | */ 13 | 14 | #define PY_SSIZE_T_CLEAN 15 | #include 16 | #include 17 | 18 | static struct PyModuleDef _isal_module = { 19 | PyModuleDef_HEAD_INIT, 20 | "_isal", /* name of module */ 21 | NULL, /* module documentation, may be NULL */ 22 | -1, 23 | NULL 24 | }; 25 | 26 | 27 | PyMODINIT_FUNC 28 | PyInit__isal(void) 29 | { 30 | PyObject *m = PyModule_Create(&_isal_module); 31 | if (m == NULL) { 32 | return NULL; 33 | } 34 | PyModule_AddIntMacro(m, ISAL_MAJOR_VERSION); 35 | PyModule_AddIntMacro(m, ISAL_MINOR_VERSION); 36 | PyModule_AddIntMacro(m, ISAL_PATCH_VERSION); 37 | 38 | PyObject *isal_version = PyUnicode_FromFormat( 39 | "%d.%d.%d", ISAL_MAJOR_VERSION, ISAL_MINOR_VERSION, ISAL_PATCH_VERSION); 40 | if (isal_version == NULL) { 41 | return NULL; 42 | } 43 | PyModule_AddObject(m, "ISAL_VERSION", isal_version); 44 | return m; 45 | } 46 | -------------------------------------------------------------------------------- /src/isal/_version.pyi: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2 | # 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022 3 | # Python Software Foundation; All Rights Reserved 4 | 5 | # This file is part of python-isal which is distributed under the 6 | # PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2. 7 | 8 | __version__: str 9 | -------------------------------------------------------------------------------- /src/isal/crc32_combine.h: -------------------------------------------------------------------------------- 1 | /* pigz.c -- parallel implementation of gzip 2 | * Copyright (C) 2007-2023 Mark Adler 3 | * Version 2.8 19 Aug 2023 Mark Adler 4 | */ 5 | 6 | /* 7 | This software is provided 'as-is', without any express or implied 8 | warranty. In no event will the author be held liable for any damages 9 | arising from the use of this software. 10 | 11 | Permission is granted to anyone to use this software for any purpose, 12 | including commercial applications, and to alter it and redistribute it 13 | freely, subject to the following restrictions: 14 | 15 | 1. The origin of this software must not be misrepresented; you must not 16 | claim that you wrote the original software. If you use this software 17 | in a product, an acknowledgment in the product documentation would be 18 | appreciated but is not required. 19 | 2. Altered source versions must be plainly marked as such, and must not be 20 | misrepresented as being the original software. 21 | 3. This notice may not be removed or altered from any source distribution. 22 | 23 | Mark Adler 24 | madler@alumni.caltech.edu 25 | 26 | */ 27 | 28 | /* 29 | Alterations from original: 30 | - typedef for crc_t 31 | - local declarations replaced with static inline 32 | - g.block selector in crc32_comb removed 33 | */ 34 | 35 | #include 36 | #include 37 | 38 | typedef uint32_t crc_t; 39 | 40 | // CRC-32 polynomial, reflected. 41 | #define POLY 0xedb88320 42 | 43 | // Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC 44 | // polynomial, reflected. For speed, this requires that a not be zero. 45 | static inline crc_t multmodp(crc_t a, crc_t b) { 46 | crc_t m = (crc_t)1 << 31; 47 | crc_t p = 0; 48 | for (;;) { 49 | if (a & m) { 50 | p ^= b; 51 | if ((a & (m - 1)) == 0) 52 | break; 53 | } 54 | m >>= 1; 55 | b = b & 1 ? (b >> 1) ^ POLY : b >> 1; 56 | } 57 | return p; 58 | } 59 | 60 | // Table of x^2^n modulo p(x). 61 | static const crc_t x2n_table[] = { 62 | 0x40000000, 0x20000000, 0x08000000, 0x00800000, 0x00008000, 63 | 0xedb88320, 0xb1e6b092, 0xa06a2517, 0xed627dae, 0x88d14467, 64 | 0xd7bbfe6a, 0xec447f11, 0x8e7ea170, 0x6427800e, 0x4d47bae0, 65 | 0x09fe548f, 0x83852d0f, 0x30362f1a, 0x7b5a9cc3, 0x31fec169, 66 | 0x9fec022a, 0x6c8dedc4, 0x15d6874d, 0x5fde7a4e, 0xbad90e37, 67 | 0x2e4e5eef, 0x4eaba214, 0xa8a472c0, 0x429a969e, 0x148d302a, 68 | 0xc40ba6d0, 0xc4e22c3c}; 69 | 70 | // Return x^(n*2^k) modulo p(x). 71 | static inline crc_t x2nmodp(size_t n, unsigned k) { 72 | crc_t p = (crc_t)1 << 31; // x^0 == 1 73 | while (n) { 74 | if (n & 1) 75 | p = multmodp(x2n_table[k & 31], p); 76 | n >>= 1; 77 | k++; 78 | } 79 | return p; 80 | } 81 | 82 | // This uses the pre-computed g.shift value most of the time. Only the last 83 | // combination requires a new x2nmodp() calculation. 84 | static inline unsigned long crc32_comb(unsigned long crc1, unsigned long crc2, 85 | size_t len2) { 86 | return multmodp(x2nmodp(len2, 3), crc1) ^ crc2; 87 | } 88 | -------------------------------------------------------------------------------- /src/isal/igzip.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2 | # 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022 3 | # Python Software Foundation; All Rights Reserved 4 | 5 | # This file is part of python-isal which is distributed under the 6 | # PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2. 7 | 8 | # This file uses code from CPython's Lib/gzip.py 9 | # Changes compared to CPython: 10 | # - Subclassed GzipFile to IGzipFile. Methods that included calls to zlib have 11 | # been overwritten with the same methods, but now calling to isal_zlib. 12 | # - _GzipReader is implemented in C in isal_zlib and allows dropping the GIL. 13 | # - Gzip.compress does not use a GzipFile to compress in memory, but creates a 14 | # simple header using _create_simple_gzip_header and compresses the data with 15 | # igzip_lib.compress using the DECOMP_GZIP_NO_HDR flag. This change was 16 | # ported to Python 3.11, using zlib.compress(wbits=-15) in that instance. 17 | # - Gzip.decompress creates an isal_zlib.decompressobj and decompresses the 18 | # data that way instead of using GzipFile. This change was ported to 19 | # Python 3.11. 20 | # - The main() function's gzip utility has now support for a -c flag for easier 21 | # use. 22 | 23 | 24 | """Similar to the stdlib gzip module. But using the Intel Storage Accelaration 25 | Library to speed up its methods.""" 26 | 27 | import argparse 28 | import builtins 29 | import gzip 30 | import io 31 | import os 32 | import shutil 33 | import struct 34 | import sys 35 | import time 36 | from typing import Optional, SupportsInt 37 | 38 | from . import igzip_lib, isal_zlib 39 | from .isal_zlib import _GzipReader 40 | 41 | __all__ = ["IGzipFile", "open", "compress", "decompress", "BadGzipFile", 42 | "READ_BUFFER_SIZE"] 43 | 44 | _COMPRESS_LEVEL_FAST = isal_zlib.ISAL_BEST_SPEED 45 | _COMPRESS_LEVEL_TRADEOFF = isal_zlib.ISAL_DEFAULT_COMPRESSION 46 | _COMPRESS_LEVEL_BEST = isal_zlib.ISAL_BEST_COMPRESSION 47 | 48 | # The amount of data that is read in at once when decompressing a file. 49 | # Increasing this value may increase performance. 50 | # After 512K the performance does not increase anymore on a Ryzen 5 3600 test 51 | # system. 52 | READ_BUFFER_SIZE = 512 * 1024 53 | 54 | FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16 55 | READ = gzip.READ 56 | WRITE = gzip.WRITE 57 | 58 | BadGzipFile = gzip.BadGzipFile # type: ignore 59 | 60 | 61 | # The open method was copied from the CPython source with minor adjustments. 62 | def open(filename, mode="rb", compresslevel=_COMPRESS_LEVEL_TRADEOFF, 63 | encoding=None, errors=None, newline=None): 64 | """Open a gzip-compressed file in binary or text mode. This uses the isa-l 65 | library for optimized speed. 66 | 67 | The filename argument can be an actual filename (a str or bytes object), or 68 | an existing file object to read from or write to. 69 | 70 | The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or "ab" for 71 | binary mode, or "rt", "wt", "xt" or "at" for text mode. The default mode is 72 | "rb", and the default compresslevel is 2. 73 | 74 | For binary mode, this function is equivalent to the GzipFile constructor: 75 | GzipFile(filename, mode, compresslevel). In this case, the encoding, errors 76 | and newline arguments must not be provided. 77 | 78 | For text mode, a GzipFile object is created, and wrapped in an 79 | io.TextIOWrapper instance with the specified encoding, error handling 80 | behavior, and line ending(s). 81 | 82 | """ 83 | if "t" in mode: 84 | if "b" in mode: 85 | raise ValueError("Invalid mode: %r" % (mode,)) 86 | else: 87 | if encoding is not None: 88 | raise ValueError( 89 | "Argument 'encoding' not supported in binary mode") 90 | if errors is not None: 91 | raise ValueError("Argument 'errors' not supported in binary mode") 92 | if newline is not None: 93 | raise ValueError("Argument 'newline' not supported in binary mode") 94 | 95 | gz_mode = mode.replace("t", "") 96 | # __fspath__ method is os.PathLike 97 | if isinstance(filename, (str, bytes)) or hasattr(filename, "__fspath__"): 98 | binary_file = IGzipFile(filename, gz_mode, compresslevel) 99 | elif hasattr(filename, "read") or hasattr(filename, "write"): 100 | binary_file = IGzipFile(None, gz_mode, compresslevel, filename) 101 | else: 102 | raise TypeError("filename must be a str or bytes object, or a file") 103 | 104 | if "t" in mode: 105 | return io.TextIOWrapper(binary_file, encoding, errors, newline) 106 | else: 107 | return binary_file 108 | 109 | 110 | class IGzipFile(gzip.GzipFile): 111 | """The IGzipFile class simulates most of the methods of a file object with 112 | the exception of the truncate() method. 113 | 114 | This class only supports opening files in binary mode. If you need to open 115 | a compressed file in text mode, use the gzip.open() function. 116 | """ 117 | def __init__(self, filename=None, mode=None, 118 | compresslevel=isal_zlib.ISAL_DEFAULT_COMPRESSION, 119 | fileobj=None, mtime=None): 120 | """Constructor for the IGzipFile class. 121 | 122 | At least one of fileobj and filename must be given a 123 | non-trivial value. 124 | 125 | The new class instance is based on fileobj, which can be a regular 126 | file, an io.BytesIO object, or any other object which simulates a file. 127 | It defaults to None, in which case filename is opened to provide 128 | a file object. 129 | 130 | When fileobj is not None, the filename argument is only used to be 131 | included in the gzip file header, which may include the original 132 | filename of the uncompressed file. It defaults to the filename of 133 | fileobj, if discernible; otherwise, it defaults to the empty string, 134 | and in this case the original filename is not included in the header. 135 | 136 | The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', 'wb', 'x', 137 | or 'xb' depending on whether the file will be read or written. 138 | The default is the mode of fileobj if discernible; otherwise, the 139 | default is 'rb'. A mode of 'r' is equivalent to one of 'rb', and 140 | similarly for 'w' and 'wb', 'a' and 'ab', and 'x' and 'xb'. 141 | 142 | The compresslevel argument is an integer from 0 to 3 controlling the 143 | level of compression; 0 is fastest and produces the least compression, 144 | and 3 is slowest and produces the most compression. Unlike 145 | gzip.GzipFile 0 is NOT no compression. The default is 2. 146 | 147 | The mtime argument is an optional numeric timestamp to be written 148 | to the last modification time field in the stream when compressing. 149 | If omitted or None, the current time is used. 150 | """ 151 | if not (isal_zlib.ISAL_BEST_SPEED <= compresslevel 152 | <= isal_zlib.ISAL_BEST_COMPRESSION) and "r" not in mode: 153 | raise ValueError( 154 | f"Compression level should be between " 155 | f"{isal_zlib.ISAL_BEST_SPEED} and " 156 | f"{isal_zlib.ISAL_BEST_COMPRESSION}, got {compresslevel}." 157 | ) 158 | super().__init__(filename, mode, compresslevel, fileobj, mtime) 159 | if self.mode == WRITE: 160 | self.compress = isal_zlib.compressobj(compresslevel, 161 | isal_zlib.DEFLATED, 162 | -isal_zlib.MAX_WBITS, 163 | isal_zlib.DEF_MEM_LEVEL, 164 | 0) 165 | if self.mode == READ: 166 | raw = _GzipReader(self.fileobj, READ_BUFFER_SIZE) 167 | self._buffer = io.BufferedReader(raw) 168 | 169 | def __repr__(self): 170 | s = repr(self.fileobj) 171 | return '' 172 | 173 | def _write_gzip_header(self, compresslevel=_COMPRESS_LEVEL_TRADEOFF): 174 | # Python 3.9 added a `compresslevel` parameter to write gzip header. 175 | # This only determines the value of one extra flag. Because this change 176 | # was backported to 3.7 and 3.8 in later point versions, the attributes 177 | # of the function should be checked before trying to use the 178 | # compresslevel parameter. 179 | # The gzip header has an extra flag that can be set depending on the 180 | # compression level used. This should be set when either the fastest or 181 | # best method is used. ISAL level 0 is larger than gzip level 1 and 182 | # much faster, so setting the flag for fastest level is appropriate. 183 | # ISAL level 1,2 and 3 (best)are similar in size and fall around the 184 | # gzip level 3 size. So setting no extra flag 185 | # (by using COMPRESS_LEVEL_TRADEOFF) is appropriate here. 186 | if ("compresslevel" in super()._write_gzip_header.__code__.co_varnames 187 | and hasattr(gzip, "_COMPRESS_LEVEL_FAST") 188 | and hasattr(gzip, "_COMPRESS_LEVEL_TRADEOFF")): 189 | if compresslevel == _COMPRESS_LEVEL_FAST: 190 | super()._write_gzip_header(gzip._COMPRESS_LEVEL_FAST) 191 | else: 192 | super()._write_gzip_header(gzip._COMPRESS_LEVEL_TRADEOFF) 193 | else: 194 | super()._write_gzip_header() 195 | 196 | def write(self, data): 197 | self._check_not_closed() 198 | if self.mode != WRITE: 199 | import errno 200 | raise OSError(errno.EBADF, "write() on read-only IGzipFile object") 201 | 202 | if self.fileobj is None: 203 | raise ValueError("write() on closed IGzipFile object") 204 | 205 | if isinstance(data, bytes): 206 | length = len(data) 207 | else: 208 | # accept any data that supports the buffer protocol 209 | data = memoryview(data) 210 | length = data.nbytes 211 | 212 | if length > 0: 213 | self.fileobj.write(self.compress.compress(data)) 214 | self.size += length 215 | self.crc = isal_zlib.crc32(data, self.crc) 216 | self.offset += length 217 | return length 218 | 219 | 220 | # Aliases for improved compatibility with CPython gzip module. 221 | GzipFile = IGzipFile 222 | _IGzipReader = _GzipReader 223 | 224 | 225 | def compress(data, compresslevel: int = _COMPRESS_LEVEL_BEST, *, 226 | mtime: Optional[SupportsInt] = None) -> bytes: 227 | """Compress data in one shot and return the compressed string. 228 | Optional argument is the compression level, in range of 0-3. 229 | """ 230 | if mtime is None: 231 | mtime = time.time() 232 | # There is no best compression level. ISA-L only provides algorithms for 233 | # fast and medium levels. 234 | xfl = 4 if compresslevel == _COMPRESS_LEVEL_FAST else 0 235 | # Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra 236 | # fields added to header), mtime, xfl and os (255 for unknown OS). 237 | header = struct.pack("