├── .gitattributes ├── .github ├── dependabot.yml └── workflows │ └── ci_and_build_release.yml ├── .gitignore ├── LICENSE.md ├── MANIFEST.in ├── README.md ├── doc └── yenc-draft.1.3.txt ├── pyproject.toml ├── pytest.ini ├── setup.py ├── src ├── __init__.py ├── crc32.cc ├── crc32.h ├── crcutil-1.0 │ ├── AUTHORS │ ├── COPYING │ ├── ChangeLog │ ├── INSTALL │ ├── LICENSE │ ├── Makefile │ ├── Makefile.am │ ├── Makefile.in │ ├── Makefile.win │ ├── NEWS │ ├── README │ ├── aclocal.m4 │ ├── autogen.sh │ ├── code │ │ ├── base_types.h │ │ ├── crc32c_sse4.cc │ │ ├── crc32c_sse4.h │ │ ├── crc32c_sse4_intrin.h │ │ ├── crc_casts.h │ │ ├── generic_crc.h │ │ ├── gf_util.h │ │ ├── multiword_128_64_gcc_amd64_sse2.cc │ │ ├── multiword_64_64_cl_i386_mmx.cc │ │ ├── multiword_64_64_gcc_amd64_asm.cc │ │ ├── multiword_64_64_gcc_i386_mmx.cc │ │ ├── multiword_64_64_intrinsic_i386_mmx.cc │ │ ├── platform.h │ │ ├── protected_crc.h │ │ ├── rolling_crc.h │ │ ├── std_headers.h │ │ └── uint128_sse2.h │ ├── config.h.in │ ├── configure │ ├── configure.ac │ ├── depcomp │ ├── examples │ │ ├── interface.cc │ │ ├── interface.h │ │ └── usage.cc │ ├── install-sh │ ├── missing │ └── tests │ │ ├── aligned_alloc.h │ │ ├── bob_jenkins_rng.h │ │ ├── rdtsc.h │ │ ├── set_hi_pri.c │ │ ├── unittest.cc │ │ ├── unittest.h │ │ └── unittest_helper.h ├── py.typed ├── sabctools.cc ├── sabctools.h ├── sabctools.pyi ├── sparse.cc ├── sparse.h ├── unlocked_ssl.cc ├── unlocked_ssl.h ├── utils.cc ├── utils.h ├── yenc.cc ├── yenc.h └── yencode │ ├── common.h │ ├── crc.cc │ ├── crc.h │ ├── crc_arm.cc │ ├── crc_arm_pmull.cc │ ├── crc_common.h │ ├── crc_folding.cc │ ├── crc_folding_256.cc │ ├── crc_riscv.cc │ ├── decoder.cc │ ├── decoder.h │ ├── decoder_avx.cc │ ├── decoder_avx2.cc │ ├── decoder_avx2_base.h │ ├── decoder_common.h │ ├── decoder_neon.cc │ ├── decoder_neon64.cc │ ├── decoder_rvv.cc │ ├── decoder_sse2.cc │ ├── decoder_sse_base.h │ ├── decoder_ssse3.cc │ ├── decoder_vbmi2.cc │ ├── encoder.cc │ ├── encoder.h │ ├── encoder_avx.cc │ ├── encoder_avx2.cc │ ├── encoder_avx_base.h │ ├── encoder_common.h │ ├── encoder_neon.cc │ ├── encoder_rvv.cc │ ├── encoder_sse2.cc │ ├── encoder_sse_base.h │ ├── encoder_ssse3.cc │ ├── encoder_vbmi2.cc │ ├── hedley.h │ ├── platform.cc │ └── stdint.h └── tests ├── __init__.py ├── requirements.txt ├── test.py ├── test_crc32.py ├── test_decoder.py ├── test_encoder.py ├── test_sparse.py ├── test_unlocked_ssl.py ├── test_utils.py ├── testsupport.py └── yencfiles ├── crc_1.pickle ├── crc_10.pickle ├── crc_11.pickle ├── crc_2.pickle ├── crc_3.pickle ├── crc_4.pickle ├── crc_5.pickle ├── crc_6.pickle ├── crc_7.pickle ├── crc_8.pickle ├── crc_9.pickle ├── small_file.pickle ├── small_file_2.pickle ├── test_bad_crc.yenc ├── test_bad_crc_end.yenc ├── test_end_after_filename.yenc ├── test_no_name.yenc ├── test_padded_crc.yenc ├── test_partial.yenc ├── test_regular.yenc ├── test_regular_2.yenc ├── test_special_chars.yenc └── test_special_utf8_chars.yenc /.gitattributes: -------------------------------------------------------------------------------- 1 | # These should not be touched 2 | *.yenc binary 3 | *.pickle binary -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | -------------------------------------------------------------------------------- /.github/workflows/ci_and_build_release.yml: -------------------------------------------------------------------------------- 1 | name: Tests and build wheels 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | ci: 7 | name: Tests ${{ matrix.os }} ${{ matrix.python-architecture }} - Python ${{ matrix.python-version }} 8 | runs-on: ${{ matrix.os }} 9 | strategy: 10 | fail-fast: false 11 | matrix: 12 | os: [ ubuntu-latest, windows-latest, macos-latest ] 13 | python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12', '3.13' ] 14 | python-architecture: [ '' ] 15 | include: 16 | - os: windows-latest 17 | python-architecture: 'x86' 18 | python-version: '3.8' 19 | 20 | steps: 21 | - uses: actions/checkout@v4 22 | - uses: actions/setup-python@v5 23 | name: Install Python 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | architecture: ${{ matrix.python-architecture }} 27 | - name: Build module and run pytest 28 | run: | 29 | pip install wheel setuptools 30 | pip install -r tests/requirements.txt 31 | pip install . -v 32 | python -c "import sabctools; print('Version:', sabctools.__version__);" 33 | python -c "import sabctools; print('SIMD:', sabctools.simd);" 34 | python -c "import sabctools; print('OpenSSL linked:', sabctools.openssl_linked);" 35 | pytest 36 | 37 | build_wheels: 38 | name: Build wheels on ${{ matrix.os }} ${{ matrix.linux_arch }} 39 | needs: ci 40 | runs-on: ${{ matrix.os }} 41 | strategy: 42 | fail-fast: false 43 | matrix: 44 | include: 45 | - os: windows-latest 46 | - os: macos-latest 47 | - os: ubuntu-latest 48 | linux_arch: x86_64 49 | - os: ubuntu-latest 50 | linux_arch: aarch64 51 | 52 | steps: 53 | - uses: actions/checkout@v4 54 | - uses: actions/setup-python@v5 55 | name: Install Python 56 | with: 57 | python-version: '3.11' 58 | - name: Set up QEMU 59 | if: runner.os == 'Linux' && matrix.linux_arch == 'aarch64' 60 | uses: docker/setup-qemu-action@v3 61 | with: 62 | platforms: all 63 | 64 | - name: Build wheels 65 | uses: pypa/cibuildwheel@v2.23.3 66 | env: 67 | CIBW_SKIP: pp* *i686* cp36* cp37* 68 | CIBW_ARCHS_LINUX: ${{ matrix.linux_arch }} 69 | CIBW_ARCHS_MACOS: universal2 70 | CIBW_ARCHS_WINDOWS: all 71 | CIBW_BUILD_VERBOSITY: 1 72 | CIBW_TEST_SKIP: '*' 73 | - name: Upload wheel artifacts 74 | uses: actions/upload-artifact@v4 75 | with: 76 | path: ./wheelhouse/*.whl 77 | name: Wheels ${{ matrix.os }} ${{ matrix.linux_arch }} 78 | # The action/upload-artifact only allows unique filenames for whole run 79 | - name: Build source distribution 80 | run: python setup.py sdist 81 | - name: Upload source distribution artifact 82 | uses: actions/upload-artifact@v4 83 | if: runner.os == 'Linux' && matrix.linux_arch == 'aarch64' 84 | with: 85 | path: ./dist/*.tar.gz 86 | name: Source distribution 87 | - name: Publish to PyPI 88 | env: 89 | TWINE_USERNAME: '__token__' 90 | TWINE_PASSWORD: ${{ secrets.PYPI_API_KEY }} 91 | if: env.TWINE_PASSWORD && startsWith(github.ref, 'refs/tags/') 92 | run: | 93 | pip install twine 94 | twine upload --skip-existing ./dist/*.tar.gz ./wheelhouse/*.whl 95 | - name: Publish release to GitHub 96 | uses: softprops/action-gh-release@v2 97 | if: startsWith(github.ref, 'refs/tags/') && runner.os == 'Linux' && matrix.linux_arch == 'aarch64' 98 | with: 99 | token: ${{ secrets.GITHUB_TOKEN }} 100 | prerelease: false 101 | files: ./dist/*.tar.gz 102 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Windows image file caches 2 | Thumbs.db 3 | ehthumbs.db 4 | 5 | # Folder config file 6 | Desktop.ini 7 | 8 | # Recycle Bin used on file shares 9 | $RECYCLE.BIN/ 10 | 11 | # Windows Installer files 12 | *.cab 13 | *.msi 14 | *.msm 15 | *.msp 16 | 17 | # Windows shortcuts 18 | *.lnk 19 | 20 | # ========================= 21 | # Operating System Files 22 | # ========================= 23 | 24 | # OSX 25 | # ========================= 26 | 27 | .DS_Store 28 | .AppleDouble 29 | .LSOverride 30 | 31 | # Thumbnails 32 | ._* 33 | 34 | # Files that might appear in the root of a volume 35 | .DocumentRevisions-V100 36 | .fseventsd 37 | .Spotlight-V100 38 | .TemporaryItems 39 | .Trashes 40 | .VolumeIcon.icns 41 | 42 | # Directories potentially created on remote AFP share 43 | .AppleDB 44 | .AppleDesktop 45 | Network Trash Folder 46 | Temporary Items 47 | .apdisk 48 | 49 | # Building 50 | Build 51 | *.egg-info 52 | build 53 | test 54 | dist 55 | *.whl 56 | 57 | # Visual studio 58 | cmake*/ 59 | *.sln 60 | *.vcxproj* 61 | .vs/ 62 | 63 | # Compiled python 64 | *.py[cod] 65 | __pycache__ 66 | 67 | # PyCharm project files 68 | .idea/ 69 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include src * 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | SABCTools - C implementations of functions for use within SABnzbd 3 | =============================== 4 | 5 | This module implements three main sets of C implementations that are used within SABnzbd: 6 | * yEnc decoding and encoding using SIMD routines 7 | * CRC32 calculations 8 | * Non-blocking SSL-socket reading 9 | * Marking files as sparse 10 | 11 | Of course, they can also be used in any other application. 12 | 13 | ## yEnc decoding and encoding using SIMD routines 14 | yEnc decoding and encoding performed by using [yencode](https://github.com/animetosho/node-yencode) from animetosho, 15 | which utilizes x86/ARM SIMD optimised routines if such CPU features are available. 16 | 17 | ## CRC32 calculations 18 | We used the `crcutil` library for very fast CRC calculations. 19 | 20 | ## Non-blocking SSL-socket reading 21 | When Python reads data from a non-blocking SSL socket, it is limited to receiving 16K data at once. This module implements a patched version that can read as much data is available at once. 22 | For more details, see the [cpython pull request](https://github.com/python/cpython/pull/31492). 23 | 24 | ## Marking files as sparse 25 | Uses Windows specific system calls to mark files as sparse and set the desired size. 26 | On other platforms the same is achieved by calling `truncate`. 27 | 28 | ## Utility functions 29 | Use `sabctools.bytearray_malloc(size)` to get an `bytearray` that is uninitialized (not set to `0`'s). 30 | This is much faster than the built-in `bytearray(size)` because the data inside the new `bytearray` will be whatever is present in the memory block. 31 | 32 | # Installing 33 | 34 | As simple as running: 35 | ``` 36 | pip install sabctools --upgrade 37 | ``` 38 | When you want to compile from sources, you can run in the `sabctools` directory: 39 | ``` 40 | pip install . 41 | ``` 42 | 43 | ## SIMD detection 44 | 45 | To see which SIMD set was detected on your system, run: 46 | ``` 47 | python -c "import sabctools; print(sabctools.simd);" 48 | ``` 49 | 50 | ## OpenSSL detection 51 | 52 | To see if we could link to OpenSSL library on your system, run: 53 | ``` 54 | python -c "import sabctools; print(sabctools.openssl_linked);" 55 | ``` 56 | 57 | # Testing 58 | 59 | For testing we use `pytest` (install via `pip install -r tests/requirements.txt`) and test can simply be executed by browsing to the `sabctools` directory and running: 60 | ``` 61 | pytest 62 | ``` 63 | Note that tests can fail if `git` modified the line endings of data files when checking out the repository! -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | norecursedirs = yencfiles 3 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | # C-extension is placed as submodule to allow typing 2 | from sabctools.sabctools import * 3 | __version__ = version -------------------------------------------------------------------------------- /src/crc32.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2007-2023 The SABnzbd-Team (sabnzbd.org) 3 | * 4 | * This program is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU General Public License 6 | * as published by the Free Software Foundation; either version 2 7 | * of the License, or (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 | */ 18 | 19 | #include "crc32.h" 20 | #include "yencode/crc.h" 21 | 22 | PyObject* crc32_combine(PyObject *self, PyObject *args) { 23 | unsigned long crc1, crc2; 24 | unsigned long long length; 25 | 26 | if(!PyArg_ParseTuple(args, "kkK:crc32_combine", &crc1, &crc2, &length)) { 27 | return NULL; 28 | } 29 | 30 | crc1 = RapidYenc::crc32_combine(crc1, crc2, length); 31 | 32 | return PyLong_FromUnsignedLong(crc1); 33 | } 34 | 35 | PyObject* crc32_multiply(PyObject *self, PyObject *args) { 36 | unsigned long crc1, crc2; 37 | 38 | if(!PyArg_ParseTuple(args, "kk:crc32_multiply", &crc1, &crc2)) { 39 | return NULL; 40 | } 41 | 42 | crc1 = RapidYenc::crc32_multiply(crc1, crc2); 43 | 44 | return PyLong_FromUnsignedLong(crc1); 45 | } 46 | 47 | PyObject* crc32_zero_unpad(PyObject *self, PyObject *args) { 48 | unsigned long crc1; 49 | unsigned long long length; 50 | 51 | if(!PyArg_ParseTuple(args, "kK:crc32_zero_unpad", &crc1, &length)) { 52 | return NULL; 53 | } 54 | 55 | crc1 = RapidYenc::crc32_unzero(crc1, length); 56 | 57 | return PyLong_FromUnsignedLong(crc1); 58 | } 59 | 60 | PyObject* crc32_xpown(PyObject* self, PyObject* arg) { 61 | long long n = PyLong_AsLongLong(arg); 62 | 63 | if (PyErr_Occurred()) { 64 | return NULL; 65 | } 66 | 67 | unsigned long result = RapidYenc::crc32_2pow(n); 68 | 69 | return PyLong_FromUnsignedLong(result); 70 | } 71 | 72 | PyObject* crc32_xpow8n(PyObject* self, PyObject* arg) { 73 | unsigned long long n = PyLong_AsUnsignedLongLong(arg); 74 | 75 | if (PyErr_Occurred()) { 76 | return NULL; 77 | } 78 | 79 | unsigned long result = RapidYenc::crc32_256pow(n); 80 | 81 | return PyLong_FromUnsignedLong(result); 82 | } 83 | -------------------------------------------------------------------------------- /src/crc32.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2007-2023 The SABnzbd-Team (sabnzbd.org) 3 | * 4 | * This program is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU General Public License 6 | * as published by the Free Software Foundation; either version 2 7 | * of the License, or (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 | */ 18 | 19 | #ifndef SABCTOOLS_CRC32_H 20 | #define SABCTOOLS_CRC32_H 21 | 22 | #include 23 | 24 | PyObject* crc32_combine(PyObject *, PyObject*); 25 | PyObject* crc32_multiply(PyObject *, PyObject*); 26 | PyObject* crc32_zero_unpad(PyObject *, PyObject*); 27 | PyObject* crc32_xpown(PyObject *, PyObject*); 28 | PyObject* crc32_xpow8n(PyObject *, PyObject*); 29 | 30 | #endif //SABCTOOLS_CRC32_H 31 | -------------------------------------------------------------------------------- /src/crcutil-1.0/AUTHORS: -------------------------------------------------------------------------------- 1 | Andrew Kadatch 2 | Bob Jenkins 3 | 4 | [The end of the document] 5 | -------------------------------------------------------------------------------- /src/crcutil-1.0/ChangeLog: -------------------------------------------------------------------------------- 1 | 10 Dec 2010: 2 | - Version 1.0.0 3 | 4 | [The end of the document] 5 | -------------------------------------------------------------------------------- /src/crcutil-1.0/INSTALL: -------------------------------------------------------------------------------- 1 | Since crcutil is a library, and rather small, template library, 2 | it is better to compile it directly into your code. Or you could 3 | build a library for interface.cc and use it. 4 | 5 | 6 | Building and running the tests (Linux/GCC, MacOSX/GCC) 7 | ------------------------------------------------------ 8 | 9 | Run 10 | ./autogen.sh 11 | or 12 | ./autogen.sh "-m32 -march=i686 " 13 | to build and run 64-bit and 32-bit with GCC. 14 | 15 | Typically, is not required. 16 | 17 | Useful values for are: 18 | clean 19 | configure 20 | check 21 | 22 | E.g. 23 | ./autogen.sh check 24 | will build and run 64-bit unittest, whilst 25 | ./autogen.sh clean 26 | will clean everything up. 27 | 28 | 29 | Why ./autogen.sh? Two reasons: 30 | 31 | 1. Automake is well beyond my comprehension, and I am unable to create 32 | "Makefile.am" consistently. So autogen.sh has that piece of black magic. 33 | 34 | 2. autogen.sh detects version of GCC and provides different compile options 35 | to work around differences between compiler versions that cannot 36 | be detected at compile time. 37 | 38 | You still can do 39 | ./configure CXXFLAGS="-O3" CFLAGS="-O3" 40 | (if you use GCC before 4.5.0) or 41 | ./configure CXXFLAGS="-O3 -mcrc32" CFLAGS="-O3" 42 | and then run 43 | make check 44 | but the use of "./autogen.sh" is the preferred way to go. 45 | 46 | 47 | Building and running the tests (Windows, CL or ICL compiler) 48 | ------------------------------------------------------------ 49 | 50 | Run 51 | nmake -f Makefile.win cl64 52 | or 53 | nmake -f Makefile.win cl32 54 | or 55 | nmake -f Makefile.win icl64 56 | or 57 | nmake -f Makefile.win icl32 58 | to build and 64-bit and 32-bit unit test using 59 | Microsoft CL and Intel's ICL compilers respectively. 60 | 61 | Run 62 | nmake -f Makefile.win clean 63 | to clean everything up. 64 | 65 | [The end of the document] 66 | -------------------------------------------------------------------------------- /src/crcutil-1.0/Makefile.am: -------------------------------------------------------------------------------- 1 | AM_CXXFLAGS=-DCRCUTIL_USE_MM_CRC32=1 -Wall -msse2 -Icode 2 | AM_CFLAGS=$(AM_CXXFLAGS) 3 | check_PROGRAMS=crcutil_ut 4 | TESTS=crcutil_ut 5 | crcutil_ut_SOURCES=code/base_types.h code/crc32c_sse4.cc code/crc32c_sse4.h code/crc32c_sse4_intrin.h code/crc_casts.h code/generic_crc.h code/gf_util.h code/multiword_128_64_gcc_amd64_sse2.cc code/multiword_64_64_cl_i386_mmx.cc code/multiword_64_64_gcc_amd64_asm.cc code/multiword_64_64_gcc_i386_mmx.cc code/platform.h code/protected_crc.h code/rolling_crc.h code/std_headers.h code/uint128_sse2.h tests/aligned_alloc.h tests/bob_jenkins_rng.h tests/rdtsc.h tests/set_hi_pri.c tests/unittest.cc tests/unittest.h tests/unittest_helper.h 6 | tmpdir=/tmp 7 | tmp_PROGRAMS=usage 8 | usage_CXXFLAGS=$(AM_CXXFLAGS) -Itests 9 | usage_SOURCES=code/base_types.h code/crc32c_sse4.cc code/crc32c_sse4.h code/crc32c_sse4_intrin.h code/crc_casts.h code/generic_crc.h code/gf_util.h code/multiword_128_64_gcc_amd64_sse2.cc code/multiword_64_64_cl_i386_mmx.cc code/multiword_64_64_gcc_amd64_asm.cc code/multiword_64_64_gcc_i386_mmx.cc code/platform.h code/protected_crc.h code/rolling_crc.h code/std_headers.h code/uint128_sse2.h examples/interface.cc examples/interface.h examples/usage.cc tests/aligned_alloc.h 10 | -------------------------------------------------------------------------------- /src/crcutil-1.0/Makefile.win: -------------------------------------------------------------------------------- 1 | Makefile=Makefile.win 2 | 3 | CL_FLAGS=-Wall -O2 -nologo -DCRCUTIL_USE_MM_CRC32=0 4 | ICL_FLAGS=-Wall -O3 -Qdiag-disable:181 -Qdiag-disable:185 -Qdiag-disable:442 -Qdiag-disable:vec -DCRCUTIL_USE_MM_CRC32=0 5 | INCLUDES=-Icode 6 | 7 | all: 8 | @echo Please run "nmake target" where "target" is one of: 9 | @echo cl64 - 64-bit Microsoft compiler 10 | @echo cl32 - 32-bit Microsoft compiler 11 | @echo icl64 - 64-bit Intel compiler 12 | @echo icl32 - 32-bit Intel compiler 13 | 14 | 15 | cl64: 16 | @call "%VCINSTALLDIR%\bin\amd64\vcvarsamd64.bat" && nmake -nologo -f $(Makefile) CC_FLAGS="$(CL_FLAGS) -Icode -Itests" CC=cl.exe configured 17 | 18 | cl32: 19 | @call "%VCINSTALLDIR%\bin\vcvars32.bat" && nmake -nologo -f $(Makefile) CC_FLAGS="$(CL_FLAGS) -Icode -Itests" CC=cl.exe configured 20 | 21 | icl64: 22 | @call "%ICPP_COMPILER11%bin\iclvars.bat" intel64 && nmake -nologo -f $(Makefile) CC_FLAGS="$(ICL_FLAGS) -Icode -Itests" CC=icl.exe configured 23 | 24 | icl32: 25 | @call "%ICPP_COMPILER11%bin\iclvars.bat" ia32 && nmake -nologo -f $(Makefile) CC_FLAGS="$(ICL_FLAGS) -Icode -Itests" CC=icl.exe configured 26 | 27 | 28 | clean: 29 | del /q *.obj *.asm *.exe *.pdb *.suo 30 | 31 | 32 | TARGETS=unittest.exe example.exe 33 | 34 | COMMON_CODE=\ 35 | code/crc32c_sse4.cc \ 36 | code/multiword_64_64_cl_i386_mmx.cc 37 | 38 | COMMON_HEADERS=\ 39 | code/base_types.h \ 40 | code/crc32c_sse4.h \ 41 | code/crc32c_sse4_intrin.h \ 42 | code/crc_casts.h \ 43 | code/generic_crc.h \ 44 | code/gf_util.h \ 45 | code/platform.h \ 46 | code/protected_crc.h \ 47 | code/rolling_crc.h \ 48 | code/std_headers.h \ 49 | code/uint128_sse2.h 50 | 51 | UNITTEST_CODE=\ 52 | tests/unittest.cc \ 53 | tests/set_hi_pri.c \ 54 | $(COMMON_CODE) 55 | 56 | UNITTEST_HEADERS=\ 57 | tests/aligned_alloc.h \ 58 | tests/bob_jenkins_rng.h \ 59 | tests/rdtsc.h \ 60 | tests/unittest.h \ 61 | tests/unittest_helper.h \ 62 | $(COMMON_HEADERS) 63 | 64 | EXAMPLE_CODE=\ 65 | examples/usage.cc \ 66 | examples/interface.cc \ 67 | $(COMMON_CODE) 68 | 69 | EXAMPLE_HEADERS=\ 70 | examples/interface.h \ 71 | $(COMMON_HEADERS) 72 | 73 | configured: $(TARGETS) 74 | 75 | unittest.exe: $(Makefile) $(UNITTEST_CODE) $(UNITTEST_HEADERS) 76 | $(CC) $(CC_FLAGS) $(UNITTEST_CODE) 77 | 78 | example.exe: $(Makefile) $(EXAMPLE_CODE) $(EXAMPLE_HEADERS) 79 | $(CC) $(CC_FLAGS) -Iexamples $(EXAMPLE_CODE) 80 | -------------------------------------------------------------------------------- /src/crcutil-1.0/NEWS: -------------------------------------------------------------------------------- 1 | 10 Jan 2010: automake is working 2 | 3 | [The end of the document] 4 | -------------------------------------------------------------------------------- /src/crcutil-1.0/README: -------------------------------------------------------------------------------- 1 | Goals 2 | ----- 3 | 4 | 1. Performance. In distributed systems the data is CRC'ed 5 | on every breath in and out, and often multiple times. 6 | Having entire cluster spend 10% of all CPU computing 7 | CRCs is not something unheard of. 8 | 9 | 2. Functionality. Computing CRC is not enough. Oftentimes, 10 | distributed systems need to perform various operations 11 | using known CRC values (concatenation, data replacement, 12 | etc.) without touching the actual data. 13 | 14 | 3. Functionality verification: ability to catch even the most 15 | subtle bugs in CRC implementation. 16 | 17 | 4. Performance benchmarking: ability to evaluate performance 18 | of known CRC algorithms and choose the right one for given 19 | architecture and/or compiler. 20 | 21 | 5. Support most popular and most advanced CPUs [typically 22 | used in distributed environments]. That is, AMD64 and X86. 23 | 24 | 6. Support most popular compilers used to compile code running 25 | in distributed environments. That is, Microsoft's CL, GCC, 26 | and Intel's ICL. 27 | 28 | 7. Ability to easily (at run-time) create CRCs for arbitrary 29 | generating polynomials. Many complex projects have to deal 30 | with multiple CRC generating polynomials. Adding support 31 | yet another one should be 1-line change, not 2-week journey. 32 | 33 | 34 | Caveats 35 | ------- 36 | 37 | 1. Only little-endian CPUs are supported. Reason: all the 38 | optimizations makes sense only when CPU has multiple ALUs 39 | and may execute multiple instructions in parallel. I cannot 40 | easily recall big-endian CPUs like that (probably PPC and 41 | IBM's Z-series?) -- and, unless CPU is powerful enough, 42 | trivial byte-by-byte Sarwate algorithm is hard to beat. 43 | 44 | 2. The only CPUs the code was tested are AMD64 and X86 family. 45 | I do not have access to Itanium. I tried to do my best to 46 | allow the code to work on Itanium as is, but I will not 47 | be very surprised if I overlooked something. 48 | 49 | 50 | How it all works 51 | ---------------- 52 | 53 | Please read crc.pdf in "docs" directory -- it explains, slowly, 54 | step-by-step, how it all works, and provides small listings 55 | of respective algorithms that (hopefully, clearly) demonstrate 56 | how specific algorithm is implemented -- actual implementation 57 | is heavily optimized, a lot of loops are unrolled, and comments 58 | explain only the most subtle details of implementation. 59 | 60 | 61 | Usage 62 | ----- 63 | 64 | "unittest.cc" is standalone unit test which perform extensive 65 | functionality validation and also tests performance of key scenarios. 66 | Please keep in mind that it takes almost a minute for GCC to compile 67 | it. Full performance test takes a couple of hours. 68 | 69 | "generic_crc.h" provides a set of implemenations of generic CRCs. 70 | "crc32c*" set of files implements CRC using Intel's CRC32 instruction. 71 | "multiword*" set of files implements specialized -- and heavily 72 | optimized -- versions of multiword CRC. 73 | 74 | However, including these files directly into your project may be 75 | a bad idea -- there is a lot of quite heavy-weight template code 76 | that you probably do not want to see included into every file that 77 | uses CRCs. 78 | 79 | Instead, use "interface.h" which hides all the details of the 80 | implementation. It declares on namespace, two types in that 81 | namespace, and brings in a couple of standard ANSI C headers. 82 | 83 | Another advantage of using "interface.h" is that actual 84 | implementation will pick the most efficient implementation 85 | of CRC for specific platform and compiler (applies to 86 | AMD64 and X86 platform and CL, ICL, and GCC compilers only). 87 | 88 | Please see "usage.cc" which provides an example how to use 89 | crcutil_interface::CRC class. 90 | 91 | 92 | Compiler optimization settings 93 | ------------------------------ 94 | 95 | Recommended compiler flags: 96 | CL: -O2 -Wall 97 | ICL: -O3 -Wall -Qdiag-disable:181 -Qdiag-disable:185 -Qdiag-disable:442 -Qdiag-disable:vec 98 | GCC 4.5+: -O3 -Wall -msse2 -mcrc32 99 | GCC 4.4-, AMD64: -O3 -Wall -msse2 -DCRCUTIL_USE_MM_CRC32=1 100 | GCC 4.4-, I386: -O3 -Wall -msse2 -DCRCUTIL_USE_MM_CRC32=1 -fomit-frame-pointer 101 | 102 | 103 | Compile-time constants 104 | ---------------------- 105 | 106 | CRCUTIL_USE_ASM 107 | Allows the use of inline ASM for GCC on AMD64 and I386 platforms, 108 | 32-bit Intel and Microsoft compilers on Windows. 109 | 110 | See multiword*.cc files. 111 | 112 | By default, turned on. 113 | 114 | 115 | HAVE_MMX 116 | MMX and respective intrinsics are available. When MMX is available, it 117 | will be used on I386 platform to speed up computation of up to 64-bit 118 | CRCs (1.3 CPU cycles/byte, see see *i386_mmx.cc files). 119 | 120 | By default, enabled on AMD64 and I386 platforms, disabled otherwise. 121 | 122 | 123 | HAVE_SSE 124 | By default, enabled on AMD64 and I386 platforms, disabled otherwise. 125 | 126 | 127 | HAVE_SSE2 128 | By default, enabled on AMD64 and I386 platforms, disabled otherwise. 129 | 130 | Allows the use of SSE2 instructions to compute 128-bit CRCs efficiently 131 | (see uint128_sse2.h, multiword_128_64_gcc_amd64_sse2.cc). 132 | 133 | 134 | CRCUTIL_PREFETCH_WIDTH 135 | Prefetch width (default is 0 -- read platform.h to see why). 136 | 137 | When CRCUTIL_PREFETCH_WIDTH > 0 and HAVE_SSE, the code will try to 138 | prefetch CRCUTIL_PREFETCH_WIDTH bytes ahead. 139 | 140 | 141 | CRCUTIL_MIN_ALIGN_SIZE 142 | Align input pointer on word boundary when input length exceeds 143 | CRCUTIL_MIN_ALIGN_SIZE bytes and when CRC implementation will read 144 | input data by words. 145 | 146 | Non-AMD64/I386 do not allow misaligned reads, so default value of 147 | CRCUTIL_MIN_ALIGN_SIZE is 0. 148 | 149 | On AMD64 and I386 platforms, default value is 1KB. Even though AMD64 150 | and I386 allow non-aligned reads, crossing cache line boundary is not 151 | free, and it makes sense to align large inputs first before processing 152 | them (see generic_crc.h for more details). 153 | 154 | 155 | CRCUTIL_USE_MM_CRC32 156 | Allows the use SSE4.2 crc32 instruction when computing CRC32C (0.13 CPU 157 | cycles per byte, see crc32c_sse4* files). 158 | 159 | If set to false (i.e. 0), _mm_crc32_u*() intrinsics will be simulated 160 | (useful for debugging crc32_sse4 code on machines that do not support 161 | SSE 4.2). 162 | 163 | Hardware-assisted CRC32C is supported on AMD64 and I386 platforms only. 164 | 165 | By default, enabled for Windows and for "g++ -msse4". 166 | 167 | With GCC 4.5, it is possible to compile the code using "-msse2 -mcrc32 168 | -DCRCUTIL_USE_MM_CRC32=1" flags. 169 | 170 | GCC 4.4 and earlier do not support "-mcrc32" flag, but it is still 171 | possible to use crc32c_sse4 code by compiling the code using "-msse2 172 | -DCRCUTIL_USE_MM_CRC32=1" flags. In this case, inline asm code will be 173 | used (see crc32c_sse4_intrin.h). 174 | 175 | 176 | CRCUTIL_FORCE_ASM_CRC32C 177 | GCC 4.4 and earlier versions do not have -mcrc32 flag, so 178 | _mm_crc32_u64/32/8 intrinsics there are not available from standard 179 | headers. They are replaced by inline asm code (see 180 | crc32c_sse4_intrin.h). To test backward compatibility using GCC 4.5+, 181 | use "-Wall -O3 -msse2 --DCRCUTIL_USE_MM_CRC32=1 182 | -DCRCUTIL_FORCE_ASM_CRC32C=1". 183 | 184 | 185 | [The end of the document] 186 | -------------------------------------------------------------------------------- /src/crcutil-1.0/autogen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # See http://mij.oltrelinux.com/devel/autoconf-automake/ 4 | 5 | if [ -f "Makefile" ] && [ -f "Makefile.am" ] && [ -f "Makefile.in" ] && [ -d ".deps" ] ; then 6 | make clean 7 | fi 8 | 9 | echo "Removing old garbage" 10 | if [ "${1}" != "clean" ] || [ "${2}" == "clean" ]; then 11 | # "./mk.sh clean" leave all the files needed for "./configure && make". 12 | # "./mk.sh clean clean" deletes them as well. 13 | # Full clean build starts from removing all generated files. 14 | rm -f Makefile 15 | rm -f Makefile.am 16 | rm -f Makefile.in 17 | rm -f aclocal.m4 18 | rm -f config.h.in 19 | rm -f configure 20 | rm -f configure.ac 21 | rm -f depcomp 22 | rm -f install-sh 23 | rm -f missing 24 | fi 25 | 26 | rm -f autoscan.log 27 | rm -f config.h 28 | rm -f config.log 29 | rm -f config.status 30 | rm -f stamp-h1 31 | if [ -d "autom4te.cache" ]; then 32 | rm -r autom4te.cache 33 | fi 34 | if [ -d ".deps" ]; then 35 | rm -r .deps 36 | fi 37 | 38 | if [ "${1}" == "clean" ]; then 39 | exit 40 | fi 41 | 42 | echo "Generating preliminary configure.ac" 43 | autoscan 44 | 45 | sed 's/^AC_INIT(.*$/AC_INIT(crcutil, 1.0, crcutil@googlegroups.com)\ 46 | AM_INIT_AUTOMAKE(crcutil, 1.0)\ 47 | AC_CONFIG_FILES([Makefile]) \ 48 | AC_OUTPUT()/' configure.scan >configure.ac 49 | 50 | # AC_OUTPUT(Makefile)/' configure.scan >configure.ac 51 | rm -f configure.scan 52 | 53 | echo "Generating final configure.ac" 54 | aclocal 55 | autoconf 56 | 57 | echo "Generating config.h.in" 58 | autoheader 59 | 60 | target=./Makefile.am 61 | echo "Generating ${target}" 62 | echo>${target} "AUTOMAKE_OPTIONS=foreign" 63 | 64 | # --pedantic -std=c99? 65 | crcutil_flags="-DCRCUTIL_USE_MM_CRC32=1 -Wall -msse2 -Icode" 66 | echo>${target} "AM_CXXFLAGS=${crcutil_flags}" 67 | if [ "$(uname -a | grep ^Darwin)" == "" ] && [[ "$(c++ -dumpversion)" > "4.4.9" ]]; then 68 | # Static linking is not supported on Mac OS X. 69 | # Use static linking on Linux, otherwise GCC 4.5.0 linker produces 70 | # obscure warning (well, the code works but nevertheless). 71 | echo>>${target} "AM_LDFLAGS=-static" 72 | fi 73 | echo>>${target} 'AM_CFLAGS=$(AM_CXXFLAGS)' 74 | echo>>${target} "check_PROGRAMS=crcutil_ut" 75 | echo>>${target} "TESTS=crcutil_ut" 76 | sources=$(ls tests/*.cc tests/*.c tests/*.h code/*.cc code/*.h | grep -v intrinsic | tr "\n" " ") 77 | echo>>${target} "crcutil_ut_SOURCES=${sources}" 78 | 79 | echo>>${target} "tmpdir=/tmp" 80 | echo>>${target} "tmp_PROGRAMS=usage" 81 | echo>>${target} 'usage_CXXFLAGS=$(AM_CXXFLAGS) -Itests' 82 | sources=$(ls examples/*.cc examples/*.h code/*.cc code/*.h tests/aligned_alloc.h | grep -v intrinsic | tr "\n" " ") 83 | echo>>${target} "usage_SOURCES=${sources}" 84 | 85 | echo "Creating Makefile.in" 86 | aclocal 87 | automake --add-missing 88 | autoconf 89 | 90 | cflags="-O3" 91 | if [[ "$(c++ -dumpversion)" > "4.4.9" ]]; then 92 | cflags="${cflags} -mcrc32" 93 | fi 94 | 95 | cflags="${cflags} $2" 96 | 97 | ./configure CXXFLAGS="${cflags}" CFLAGS="${cflags}" 98 | 99 | echo "" 100 | echo "Configured the library. Compiler flags:" 101 | echo " ${cflags}" 102 | echo "Library configuration flags:" 103 | echo " ${crcutil_flags}" 104 | echo "" 105 | 106 | if [ "${1}" == "configure" ]; then 107 | exit 108 | fi 109 | 110 | make $1 111 | -------------------------------------------------------------------------------- /src/crcutil-1.0/code/base_types.h: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Defines 8/16/32/64-bit integer types. 16 | // 17 | // Either uint64 or uint32 will map to size_t. 18 | // This way, specialized variants of CRC implementation 19 | // parameterized by "size_t" will be reused when 20 | // parameterized by "uint64" or "uint32". 21 | // In their turn, specialized verisons are parameterized 22 | // by "size_t" so that one version of the code is optimal 23 | // both on 32-bit and 64-bit platforms. 24 | 25 | #ifndef CRCUTIL_BASE_TYPES_H_ 26 | #define CRCUTIL_BASE_TYPES_H_ 27 | 28 | #include "std_headers.h" // size_t, ptrdiff_t 29 | 30 | namespace crcutil { 31 | 32 | template class ChooseFirstIfSame { 33 | public: 34 | template class ChooseFirstIfTrue { 35 | public: 36 | typedef AA Type; 37 | }; 38 | template class ChooseFirstIfTrue { 39 | public: 40 | typedef BB Type; 41 | }; 42 | 43 | typedef typename ChooseFirstIfTrue::Type Type; 44 | }; 45 | 46 | typedef unsigned char uint8; 47 | typedef signed char int8; 48 | 49 | typedef unsigned short uint16; 50 | typedef short int16; 51 | 52 | typedef ChooseFirstIfSame::Type uint32; 53 | typedef ChooseFirstIfSame::Type int32; 54 | 55 | #if defined(_MSC_VER) 56 | typedef ChooseFirstIfSame::Type uint64; 57 | typedef ChooseFirstIfSame::Type int64; 58 | #define HAVE_UINT64 1 59 | #elif defined(__GNUC__) 60 | typedef ChooseFirstIfSame::Type uint64; 61 | typedef ChooseFirstIfSame::Type int64; 62 | #define HAVE_UINT64 1 63 | #else 64 | // TODO: ensure that everything compiles and works when HAVE_UINT64 is false. 65 | // TODO: remove HAVE_UINT64 and use sizeof(uint64) instead? 66 | #define HAVE_UINT64 0 67 | typedef uint32 uint64; 68 | typedef int32 int64; 69 | #endif 70 | 71 | } // namespace crcutil 72 | 73 | #endif // CRCUTIL_BASE_TYPES_H_ 74 | -------------------------------------------------------------------------------- /src/crcutil-1.0/code/crc32c_sse4.h: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Implements CRC32C using Intel's SSE4 crc32 instruction. 16 | // Uses _mm_crc32_u64/32/8 intrinsics if CRCUTIL_USE_MM_CRC32 is not zero, 17 | // emilates intrinsics via CRC_WORD/CRC_BYTE otherwise. 18 | 19 | #ifndef CRCUTIL_CRC32C_SSE4_H_ 20 | #define CRCUTIL_CRC32C_SSE4_H_ 21 | 22 | #include "gf_util.h" // base types, gf_util class, etc. 23 | #include "crc32c_sse4_intrin.h" // _mm_crc32_u* intrinsics 24 | 25 | #if HAVE_I386 || HAVE_AMD64 26 | 27 | #if CRCUTIL_USE_MM_CRC32 28 | 29 | #if HAVE_I386 30 | #define CRC_UPDATE_WORD(crc, value) (crc = _mm_crc32_u32(crc, (value))) 31 | #else 32 | #define CRC_UPDATE_WORD(crc, value) (crc = _mm_crc32_u64(crc, (value))) 33 | #endif // HAVE_I386 34 | 35 | #define CRC_UPDATE_BYTE(crc, value) \ 36 | (crc = _mm_crc32_u8(static_cast(crc), static_cast(value))) 37 | 38 | #else 39 | 40 | #include "generic_crc.h" 41 | 42 | #define CRC_UPDATE_WORD(crc, value) do { \ 43 | size_t buf = (value); \ 44 | CRC_WORD(this, crc, buf); \ 45 | } while (0) 46 | #define CRC_UPDATE_BYTE(crc, value) do { \ 47 | CRC_BYTE(this, crc, (value)); \ 48 | } while (0) 49 | 50 | #endif // CRCUTIL_USE_MM_CRC32 51 | 52 | namespace crcutil { 53 | 54 | #pragma pack(push, 16) 55 | 56 | // Since the same pieces should be parameterized in many different places 57 | // and we do not want to introduce a mistake which is rather hard to find, 58 | // use a macro to enumerate all block sizes. 59 | // 60 | // Block sizes and number of stripes were tuned for best performance. 61 | // 62 | // All constants should be literal constants (too lazy to fix the macro). 63 | // 64 | // The use of different "macro_first", "macro", and "macro_last" 65 | // allows generation of different code for smallest, in between, 66 | // and largest block sizes. 67 | // 68 | // This macro shall be kept in sync with 69 | // CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING. 70 | // Failure to do so will cause compile-time error. 71 | #define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING( \ 72 | macro_smallest, macro, macro_largest) \ 73 | macro_smallest(512, 3); \ 74 | macro(1024, 3); \ 75 | macro(4096, 3); \ 76 | macro_largest(32768, 3) 77 | 78 | // This macro shall be kept in sync with 79 | // CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING. 80 | // Failure to do so will cause compile-time error. 81 | #define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING( \ 82 | macro_smallest, macro, macro_largest) \ 83 | macro_largest(32768, 3); \ 84 | macro(4096, 3); \ 85 | macro(1024, 3); \ 86 | macro_smallest(512, 3) 87 | 88 | // Enumerates all block sizes. 89 | #define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(macro) \ 90 | CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING(macro, macro, macro) 91 | 92 | #define CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) \ 93 | (((block_size) / (num_stripes)) & ~(sizeof(size_t) - 1)) 94 | 95 | #define CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes) \ 96 | (CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) * (num_stripes)) 97 | 98 | #define CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \ 99 | mul_table_##block_size##_##num_blocks##_ 100 | 101 | class RollingCrc32cSSE4; 102 | 103 | class Crc32cSSE4 { 104 | public: 105 | // Exports Crc, TableEntry, and Word (needed by RollingCrc). 106 | typedef size_t Crc; 107 | typedef Crc Word; 108 | typedef Crc TableEntry; 109 | 110 | Crc32cSSE4() {} 111 | 112 | // Initializes the tables given generating polynomial of degree (degree). 113 | // If "canonical" is true, crc value will be XOR'ed with (-1) before and 114 | // after actual CRC computation. 115 | explicit Crc32cSSE4(bool canonical) { 116 | Init(canonical); 117 | } 118 | void Init(bool canonical); 119 | 120 | // Initializes the tables given generating polynomial of degree. 121 | // If "canonical" is true, crc value will be XOR'ed with (-1) before and 122 | // after actual CRC computation. 123 | // Provided for compatibility with GenericCrc. 124 | Crc32cSSE4(const Crc &generating_polynomial, 125 | size_t degree, 126 | bool canonical) { 127 | Init(generating_polynomial, degree, canonical); 128 | } 129 | void Init(const Crc &generating_polynomial, 130 | size_t degree, 131 | bool canonical) { 132 | if (generating_polynomial == FixedGeneratingPolynomial() && 133 | degree == FixedDegree()) { 134 | Init(canonical); 135 | } 136 | } 137 | 138 | // Returns fixed generating polymonial the class implements. 139 | static Crc FixedGeneratingPolynomial() { 140 | return 0x82f63b78; 141 | } 142 | 143 | // Returns degree of fixed generating polymonial the class implements. 144 | static Crc FixedDegree() { 145 | return 32; 146 | } 147 | 148 | // Returns base class. 149 | const GfUtil &Base() const { return base_; } 150 | 151 | // Computes CRC32. 152 | size_t CrcDefault(const void *data, size_t bytes, const Crc &crc) const { 153 | return Crc32c(data, bytes, crc); 154 | } 155 | 156 | // Returns true iff crc32 instruction is available. 157 | static bool IsSSE42Available(); 158 | 159 | protected: 160 | // Actual implementation. 161 | size_t Crc32c(const void *data, size_t bytes, Crc crc) const; 162 | 163 | enum { 164 | kTableEntryBits = 8, 165 | kTableEntries = 1 << kTableEntryBits, 166 | kNumTables = (32 + kTableEntryBits - 1) / kTableEntryBits, 167 | kNumTablesHalfLo = kNumTables / 2, 168 | kNumTablesHalfHi = (kNumTables + 1) / 2, 169 | 170 | kUnrolledLoopCount = 8, 171 | kUnrolledLoopBytes = kUnrolledLoopCount * sizeof(size_t), 172 | }; 173 | 174 | // May be set to size_t or uint32, whichever is faster. 175 | typedef uint32 Entry; 176 | 177 | #define DECLARE_MUL_TABLE(block_size, num_stripes) \ 178 | Entry CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \ 179 | [kNumTables][kTableEntries] 180 | 181 | CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(DECLARE_MUL_TABLE); 182 | 183 | #undef DECLARE_MUL_TABLE 184 | 185 | GfUtil base_; 186 | 187 | #if !CRCUTIL_USE_MM_CRC32 188 | TableEntry crc_word_[sizeof(Word)][256]; 189 | friend class RollingCrc32cSSE4; 190 | #endif // !CRCUTIL_USE_MM_CRC32 191 | } GCC_ALIGN_ATTRIBUTE(16); 192 | 193 | class RollingCrc32cSSE4 { 194 | public: 195 | typedef Crc32cSSE4::Crc Crc; 196 | typedef Crc32cSSE4::TableEntry TableEntry; 197 | typedef Crc32cSSE4::Word Word; 198 | 199 | RollingCrc32cSSE4() {} 200 | 201 | // Initializes internal data structures. 202 | // Retains reference to "crc" instance -- it is used by Start(). 203 | RollingCrc32cSSE4(const Crc32cSSE4 &crc, 204 | size_t roll_window_bytes, 205 | const Crc &start_value) { 206 | Init(crc, roll_window_bytes, start_value); 207 | } 208 | void Init(const Crc32cSSE4 &crc, 209 | size_t roll_window_bytes, 210 | const Crc &start_value); 211 | 212 | // Computes crc of "roll_window_bytes" using 213 | // "start_value" of "crc" (see Init()). 214 | Crc Start(const void *data) const { 215 | return crc_->CrcDefault(data, roll_window_bytes_, start_value_); 216 | } 217 | 218 | // Computes CRC of "roll_window_bytes" starting in next position. 219 | Crc Roll(const Crc &old_crc, size_t byte_out, size_t byte_in) const { 220 | Crc crc = old_crc; 221 | CRC_UPDATE_BYTE(crc, byte_in); 222 | crc ^= out_[byte_out]; 223 | return crc; 224 | } 225 | 226 | // Returns start value. 227 | Crc StartValue() const { return start_value_; } 228 | 229 | // Returns length of roll window. 230 | size_t WindowBytes() const { return roll_window_bytes_; } 231 | 232 | protected: 233 | typedef Crc Entry; 234 | Entry out_[256]; 235 | 236 | // Used only by Start(). 237 | Crc start_value_; 238 | const Crc32cSSE4 *crc_; 239 | size_t roll_window_bytes_; 240 | 241 | #if !CRCUTIL_USE_MM_CRC32 242 | TableEntry crc_word_[sizeof(Word)][256]; 243 | #endif // !CRCUTIL_USE_MM_CRC32 244 | } GCC_ALIGN_ATTRIBUTE(16); 245 | 246 | #pragma pack(pop) 247 | 248 | } // namespace crcutil 249 | 250 | #endif // HAVE_I386 || HAVE_AMD64 251 | 252 | #endif // CRCUTIL_CRC32C_SSE4_H_ 253 | -------------------------------------------------------------------------------- /src/crcutil-1.0/code/crc32c_sse4_intrin.h: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Provides _mm_crc32_u64/32/8 intrinsics. 16 | 17 | #ifndef CRCUTIL_CRC32C_SSE4_INTRIN_H_ 18 | #define CRCUTIL_CRC32C_SSE4_INTRIN_H_ 19 | 20 | #include "platform.h" 21 | #include "base_types.h" 22 | 23 | #if CRCUTIL_USE_MM_CRC32 && (HAVE_I386 || HAVE_AMD64) 24 | 25 | #if defined(_MSC_VER) || defined(__SSE4_2__) 26 | 27 | #if defined(_MSC_VER) 28 | #pragma warning(push) 29 | // '_M_IA64' is not defined as a preprocessor macro 30 | #pragma warning(disable: 4668) 31 | #endif // defined(_MSC_VER) 32 | 33 | #include 34 | 35 | #if defined(_MSC_VER) 36 | #pragma warning(pop) 37 | #endif // defined(_MSC_VER) 38 | 39 | #elif GCC_VERSION_AVAILABLE(4, 5) && !defined(CRCUTIL_FORCE_ASM_CRC32C) 40 | // Allow the use of _mm_crc32_u* intrinsic when CRCUTIL_USE_MM_CRC32 41 | // is set irrespective of "-msse*" settings. This way, the sources 42 | // may be compiled with "-msse2 -mcrc32" and work on older CPUs, 43 | // while taking full advantage of "crc32" instruction on newer 44 | // CPUs (requires dynamic CPU detection). See "interface.cc". 45 | // 46 | // If neither -msse4 or -mcrc32 is provided and CRCUTIL_USE_MM_CRC32 is set 47 | // and CRCUTIL_FORCE_ASM_CRC32 is not set, compile-time error will happen. 48 | // Why? Becuase GCC disables __builtin_ia32_crc32* intrinsics when compiled 49 | // without -msse4 or -mcrc32. -msse4 could be detected at run time by checking 50 | // whether __SSE4_2__ is defined, but there is no way to tell whether the 51 | // sources are compiled with -mcrc32. 52 | 53 | extern __inline unsigned int __attribute__(( 54 | __gnu_inline__, __always_inline__, __artificial__)) 55 | _mm_crc32_u8(unsigned int __C, unsigned char __V) { 56 | //return __builtin_ia32_crc32qi(__C, __V); 57 | return 0; 58 | } 59 | #ifdef __x86_64__ 60 | extern __inline unsigned long long __attribute__(( 61 | __gnu_inline__, __always_inline__, __artificial__)) 62 | _mm_crc32_u64(unsigned long long __C, unsigned long long __V) { 63 | //return __builtin_ia32_crc32di(__C, __V); 64 | return 0; 65 | } 66 | #else 67 | extern __inline unsigned int __attribute__(( 68 | __gnu_inline__, __always_inline__, __artificial__)) 69 | _mm_crc32_u32(unsigned int __C, unsigned int __V) { 70 | //return __builtin_ia32_crc32si (__C, __V); 71 | return 0; 72 | } 73 | #endif // __x86_64__ 74 | 75 | #else 76 | 77 | // GCC 4.4.x and earlier: use inline asm. 78 | 79 | namespace crcutil { 80 | 81 | __forceinline uint64 _mm_crc32_u64(uint64 crc, uint64 value) { 82 | asm("crc32q %[value], %[crc]\n" : [crc] "+r" (crc) : [value] "rm" (value)); 83 | return crc; 84 | } 85 | 86 | __forceinline uint32 _mm_crc32_u32(uint32 crc, uint64 value) { 87 | asm("crc32l %[value], %[crc]\n" : [crc] "+r" (crc) : [value] "rm" (value)); 88 | return crc; 89 | } 90 | 91 | __forceinline uint32 _mm_crc32_u8(uint32 crc, uint8 value) { 92 | asm("crc32b %[value], %[crc]\n" : [crc] "+r" (crc) : [value] "rm" (value)); 93 | return crc; 94 | } 95 | 96 | } // namespace crcutil 97 | 98 | #endif 99 | 100 | #endif // CRCUTIL_USE_MM_CRC32 && (HAVE_I386 || HAVE_AMD64) 101 | 102 | #endif // CRCUTIL_CRC32C_SSE4_INTRIN_H_ 103 | -------------------------------------------------------------------------------- /src/crcutil-1.0/code/crc_casts.h: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Casting between integers and compound CRC types. 16 | 17 | #ifndef CRCUTIL_CRC_CASTS_H_ 18 | #define CRCUTIL_CRC_CASTS_H_ 19 | 20 | #include "base_types.h" // uint8, uint64 21 | #include "platform.h" // __forceinline 22 | 23 | namespace crcutil { 24 | 25 | // Downcasts a value of (oftentimes larger) Crc type to (smaller base integer) 26 | // Result type, enabling specialized downcasts implemented by "large integer" 27 | // classes (e.g. uint128_sse2). 28 | template 29 | __forceinline Result Downcast(const Crc &x) { 30 | return static_cast(x); 31 | } 32 | 33 | // Extracts 8 least significant bits from a value of Crc type. 34 | #define TO_BYTE(x) Downcast(x) 35 | 36 | // Converts a pair of uint64 bit values into single value of CRC type. 37 | // It is caller's responsibility to ensure that the input is correct. 38 | template 39 | __forceinline Crc CrcFromUint64(uint64 lo, uint64 hi = 0) { 40 | if (sizeof(Crc) <= sizeof(lo)) { 41 | return static_cast(lo); 42 | } else { 43 | // static_cast to keep compiler happy. 44 | Crc result = static_cast(hi); 45 | result = SHIFT_LEFT_SAFE(result, 8 * sizeof(lo)); 46 | result ^= lo; 47 | return result; 48 | } 49 | } 50 | 51 | // Converts Crc value to a pair of uint64 values. 52 | template 53 | __forceinline void Uint64FromCrc(const Crc &crc, 54 | uint64 *lo, uint64 *hi = NULL) { 55 | if (sizeof(*lo) >= sizeof(crc)) { 56 | *lo = Downcast(crc); 57 | if (hi != NULL) { 58 | *hi = 0; 59 | } 60 | } else { 61 | *lo = Downcast(crc); 62 | *hi = Downcast(SHIFT_RIGHT_SAFE(crc, 8 * sizeof(lo))); 63 | } 64 | } 65 | 66 | } // namespace crcutil 67 | 68 | #endif // CRCUTIL_CRC_CASTS_H_ 69 | -------------------------------------------------------------------------------- /src/crcutil-1.0/code/multiword_64_64_intrinsic_i386_mmx.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Implements 64-bit multiword CRC using MMX built-in functions. 16 | 17 | #include "generic_crc.h" 18 | 19 | #if CRCUTIL_USE_ASM && HAVE_I386 && HAVE_MMX && !defined(_MSC_VER) && !(defined(__GNUC__) && !defined(__clang__)) 20 | 21 | namespace crcutil { 22 | 23 | template<> uint64 GenericCrc::CrcMultiwordI386Mmx( 24 | const void *data, size_t bytes, const uint64 &start) 25 | const GCC_OMIT_FRAME_POINTER; 26 | 27 | #if !defined(_MSC_VER) 28 | template<> uint64 GenericCrc::CrcMultiword( 29 | const void *data, 30 | size_t bytes, 31 | const uint64 &start) const { 32 | if (bytes <= 7) { 33 | const uint8 *src = static_cast(data); 34 | uint64 crc = start ^ Base().Canonize(); 35 | for (const uint8 *end = src + bytes; src < end; ++src) { 36 | CRC_BYTE(this, crc, *src); 37 | } 38 | return (crc ^ Base().Canonize()); 39 | } 40 | return CrcMultiwordI386Mmx(data, bytes, start); 41 | } 42 | #else 43 | #pragma warning(push) 44 | // CL: uninitialized local variable 'crc1' used 45 | // Wrong: crc1 = XOR(crc1, crc1) sets it to 0. 46 | #pragma warning(disable: 4700) 47 | 48 | #pragma warning(disable: 4619) // there is no warning number '592' 49 | 50 | // ICL: variable "crc1" is used before its value is set 51 | // Wrong: crc1 = XOR(crc1, crc1) sets it to 0. 52 | #pragma warning(disable: 592) 53 | #endif // !defined(_MSC_VER) 54 | 55 | #define MM64(adr) reinterpret_cast(adr) 56 | #define MM64_TABLE(byte) MM64(crc_word_interleaved_[byte]) 57 | 58 | #define CRC_WORD_MMX(this, crc, buf) do { \ 59 | buf = _mm_xor_si64(buf, crc); \ 60 | uint32 tmp = static_cast(_mm_cvtsi64_si32(buf)); \ 61 | buf = _mm_srli_si64(buf, 32); \ 62 | crc = MM64(crc_word_[0])[TO_BYTE(tmp)]; \ 63 | tmp >>= 8; \ 64 | crc = _mm_xor_si64(crc, MM64(crc_word_[1])[TO_BYTE(tmp)]); \ 65 | tmp >>= 8; \ 66 | crc = _mm_xor_si64(crc, MM64(crc_word_[2])[TO_BYTE(tmp)]); \ 67 | tmp >>= 8; \ 68 | crc = _mm_xor_si64(crc, MM64(crc_word_[3])[tmp]); \ 69 | tmp = static_cast(_mm_cvtsi64_si32(buf)); \ 70 | crc = _mm_xor_si64(crc, MM64(crc_word_[4])[TO_BYTE(tmp)]); \ 71 | tmp >>= 8; \ 72 | crc = _mm_xor_si64(crc, MM64(crc_word_[5])[TO_BYTE(tmp)]); \ 73 | tmp >>= 8; \ 74 | crc = _mm_xor_si64(crc, MM64(crc_word_[6])[TO_BYTE(tmp)]); \ 75 | tmp >>= 8; \ 76 | crc = _mm_xor_si64(crc, MM64(crc_word_[7])[tmp]); \ 77 | } while (0) 78 | 79 | template<> uint64 GenericCrc::CrcMultiwordI386Mmx( 80 | const void *data, size_t bytes, const uint64 &start) const { 81 | const uint8 *src = static_cast(data); 82 | const uint8 *end = src + bytes; 83 | uint64 crc = start ^ Base().Canonize(); 84 | 85 | ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc, uint64); 86 | if (src >= end) { 87 | return (crc ^ Base().Canonize()); 88 | } 89 | 90 | // Process 4 registers of sizeof(uint64) bytes at once. 91 | bytes = static_cast(end - src) & ~(4*8 - 1); 92 | if (bytes > 4*8) { 93 | const uint8 *stop = src + bytes - 4*8; 94 | union { 95 | __m64 m64; 96 | uint64 u64; 97 | } temp; 98 | __m64 crc0; 99 | __m64 crc1; 100 | __m64 crc2; 101 | __m64 crc3; 102 | __m64 buf0 = MM64(src)[0]; 103 | __m64 buf1 = MM64(src)[1]; 104 | __m64 buf2 = MM64(src)[2]; 105 | __m64 buf3 = MM64(src)[3]; 106 | 107 | temp.u64 = crc; 108 | crc0 = temp.m64; 109 | #if defined(__GNUC__) && !GCC_VERSION_AVAILABLE(4, 4) 110 | // There is no way to suppress a warning in GCC; 111 | // generate extra assignments. 112 | temp.u64 = 0; 113 | crc1 = temp.m64; 114 | crc2 = temp.m64; 115 | crc3 = temp.m64; 116 | #else 117 | crc1 = _mm_xor_si64(crc1, crc1); 118 | crc2 = _mm_xor_si64(crc2, crc2); 119 | crc3 = _mm_xor_si64(crc3, crc3); 120 | #endif // defined(__GNUC__) && !GCC_VERSION_AVAILABLE(4, 4) 121 | 122 | do { 123 | PREFETCH(src); 124 | src += 4*8; 125 | 126 | buf0 = _mm_xor_si64(buf0, crc0); 127 | buf1 = _mm_xor_si64(buf1, crc1); 128 | buf2 = _mm_xor_si64(buf2, crc2); 129 | buf3 = _mm_xor_si64(buf3, crc3); 130 | 131 | uint32 tmp0 = static_cast(_mm_cvtsi64_si32(buf0)); 132 | uint32 tmp1 = static_cast(_mm_cvtsi64_si32(buf1)); 133 | uint32 tmp2 = static_cast(_mm_cvtsi64_si32(buf2)); 134 | uint32 tmp3 = static_cast(_mm_cvtsi64_si32(buf3)); 135 | 136 | buf0 = _mm_srli_si64(buf0, 32); 137 | buf1 = _mm_srli_si64(buf1, 32); 138 | buf2 = _mm_srli_si64(buf2, 32); 139 | buf3 = _mm_srli_si64(buf3, 32); 140 | 141 | crc0 = MM64_TABLE(0)[TO_BYTE(tmp0)]; 142 | tmp0 >>= 8; 143 | crc1 = MM64_TABLE(0)[TO_BYTE(tmp1)]; 144 | tmp1 >>= 8; 145 | crc2 = MM64_TABLE(0)[TO_BYTE(tmp2)]; 146 | tmp2 >>= 8; 147 | crc3 = MM64_TABLE(0)[TO_BYTE(tmp3)]; 148 | tmp3 >>= 8; 149 | 150 | #define XOR(byte) do { \ 151 | crc0 = _mm_xor_si64(crc0, MM64_TABLE(byte)[TO_BYTE(tmp0)]); \ 152 | tmp0 >>= 8; \ 153 | crc1 = _mm_xor_si64(crc1, MM64_TABLE(byte)[TO_BYTE(tmp1)]); \ 154 | tmp1 >>= 8; \ 155 | crc2 = _mm_xor_si64(crc2, MM64_TABLE(byte)[TO_BYTE(tmp2)]); \ 156 | tmp2 >>= 8; \ 157 | crc3 = _mm_xor_si64(crc3, MM64_TABLE(byte)[TO_BYTE(tmp3)]); \ 158 | tmp3 >>= 8; \ 159 | } while (0) 160 | 161 | XOR(1); 162 | XOR(2); 163 | 164 | crc0 = _mm_xor_si64(crc0, MM64_TABLE(3)[tmp0]); 165 | tmp0 = static_cast(_mm_cvtsi64_si32(buf0)); 166 | crc1 = _mm_xor_si64(crc1, MM64_TABLE(3)[tmp1]); 167 | tmp1 = static_cast(_mm_cvtsi64_si32(buf1)); 168 | crc2 = _mm_xor_si64(crc2, MM64_TABLE(3)[tmp2]); 169 | tmp2 = static_cast(_mm_cvtsi64_si32(buf2)); 170 | crc3 = _mm_xor_si64(crc3, MM64_TABLE(3)[tmp3]); 171 | tmp3 = static_cast(_mm_cvtsi64_si32(buf3)); 172 | 173 | XOR(4); 174 | XOR(5); 175 | XOR(6); 176 | 177 | #undef XOR 178 | 179 | crc0 = _mm_xor_si64(crc0, MM64_TABLE(sizeof(uint64) - 1)[tmp0]); 180 | buf0 = MM64(src)[0]; 181 | crc1 = _mm_xor_si64(crc1, MM64_TABLE(sizeof(uint64) - 1)[tmp1]); 182 | buf1 = MM64(src)[1]; 183 | crc2 = _mm_xor_si64(crc2, MM64_TABLE(sizeof(uint64) - 1)[tmp2]); 184 | buf2 = MM64(src)[2]; 185 | crc3 = _mm_xor_si64(crc3, MM64_TABLE(sizeof(uint64) - 1)[tmp3]); 186 | buf3 = MM64(src)[3]; 187 | } 188 | while (src < stop); 189 | 190 | CRC_WORD_MMX(this, crc0, buf0); 191 | buf1 = _mm_xor_si64(buf1, crc1); 192 | CRC_WORD_MMX(this, crc0, buf1); 193 | buf2 = _mm_xor_si64(buf2, crc2); 194 | CRC_WORD_MMX(this, crc0, buf2); 195 | buf3 = _mm_xor_si64(buf3, crc3); 196 | CRC_WORD_MMX(this, crc0, buf3); 197 | 198 | temp.m64 = crc0; 199 | crc = temp.u64; 200 | 201 | _mm_empty(); 202 | 203 | src += 4*8; 204 | } 205 | 206 | // Process sizeof(uint64) bytes at once. 207 | bytes = static_cast(end - src) & ~(sizeof(uint64) - 1); 208 | if (bytes > 0) { 209 | union { 210 | __m64 m64; 211 | uint64 u64; 212 | } temp; 213 | __m64 crc0; 214 | 215 | temp.u64 = crc; 216 | crc0 = temp.m64; 217 | 218 | for (const uint8 *stop = src + bytes; src < stop; src += sizeof(uint64)) { 219 | __m64 buf0 = MM64(src)[0]; 220 | CRC_WORD_MMX(this, crc0, buf0); 221 | } 222 | 223 | temp.m64 = crc0; 224 | crc = temp.u64; 225 | 226 | _mm_empty(); 227 | } 228 | 229 | // Compute CRC of remaining bytes. 230 | for (;src < end; ++src) { 231 | CRC_BYTE(this, crc, *src); 232 | } 233 | 234 | return (crc ^ Base().Canonize()); 235 | } 236 | 237 | #if defined(_MSC_VER) 238 | #pragma warning(pop) 239 | #endif // defined(_MSC_VER) 240 | 241 | } // namespace crcutil 242 | 243 | #endif // CRCUTIL_USE_ASM && HAVE_I386 && HAVE_MMX 244 | -------------------------------------------------------------------------------- /src/crcutil-1.0/code/platform.h: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Detects configuration and defines compiler-specific macros. 16 | // Also, sets user-defined CRUTIL_USE_* macros to default values. 17 | 18 | #ifndef CRCUTIL_PLATFORM_H_ 19 | #define CRCUTIL_PLATFORM_H_ 20 | 21 | // Permanently disable some annoying warnings generated 22 | // by Microsoft CL when compiling Microsoft's headers. 23 | #include "std_headers.h" 24 | 25 | // Use inline asm version of the code? 26 | #if !defined(CRCUTIL_USE_ASM) 27 | #define CRCUTIL_USE_ASM 1 28 | #endif // !defined(CRCUTIL_USE_ASM) 29 | 30 | 31 | #if !defined(HAVE_I386) 32 | #if defined(__i386__) || defined(_M_IX86) 33 | #define HAVE_I386 1 34 | #else 35 | #define HAVE_I386 0 36 | #endif // defined(__i386__) || defined(_M_IX86) 37 | #endif // defined(HAVE_I386) 38 | 39 | 40 | #if !defined(HAVE_AMD64) 41 | #if defined(__amd64__) || defined(_M_AMD64) 42 | #define HAVE_AMD64 1 43 | #else 44 | #define HAVE_AMD64 0 45 | #endif // defined(__amd64__) || defined(_M_AMD64) 46 | #endif // defined(HAVE_AMD64) 47 | 48 | 49 | #if HAVE_AMD64 || HAVE_I386 50 | #if defined(_MSC_VER) 51 | #pragma warning(push) 52 | // '_M_IX86' is not defined as a preprocessor macro 53 | #pragma warning(disable: 4668) 54 | #include 55 | #pragma warning(pop) 56 | #endif // defined(_MSC_VER) 57 | 58 | 59 | #if !defined(HAVE_MMX) 60 | #if defined(_MSC_VER) || (defined(__GNUC__) && defined(__MMX__)) 61 | #define HAVE_MMX 1 62 | #else 63 | #define HAVE_MMX 0 64 | #endif // defined(_MSC_VER) || (defined(__GNUC__) && defined(__MMX__)) 65 | #endif // !defined(HAVE_MMX) 66 | 67 | 68 | #if !defined(HAVE_SSE) 69 | #if defined(_MSC_VER) || (defined(__GNUC__) && defined(__SSE__)) 70 | #include 71 | #define HAVE_SSE 1 72 | #else 73 | #define HAVE_SSE 0 74 | #endif // defined(_MSC_VER) || (defined(__GNUC__) && defined(__SSE__)) 75 | #endif // !defined(HAVE_SSE) 76 | 77 | 78 | #if !defined(HAVE_SSE2) 79 | #if defined(_MSC_VER) || (defined(__GNUC__) && defined(__SSE2__)) 80 | #include 81 | #define HAVE_SSE2 1 82 | #else 83 | #define HAVE_SSE2 0 84 | #endif // defined(_MSC_VER) || (defined(__GNUC__) && defined(__SSE2__)) 85 | #endif // !defined(HAVE_SSE2) 86 | 87 | #else 88 | 89 | #if !defined(HAVE_MMX) 90 | #define HAVE_MMX 0 91 | #endif // !defined(HAVE_MMX) 92 | 93 | #if !defined(HAVE_SSE) 94 | #define HAVE_SSE 0 95 | #endif // !defined(HAVE_SSE) 96 | 97 | #if !defined(HAVE_SSE2) 98 | #define HAVE_SSE2 0 99 | #endif // !defined(HAVE_SSE2) 100 | 101 | #endif // HAVE_AMD64 || HAVE_I386 102 | 103 | // Error checking 104 | #if HAVE_SSE && !HAVE_MMX 105 | #error SSE is available but not MMX? 106 | #endif // HAVE_SSE && !HAVE_MMX 107 | 108 | #if HAVE_SSE2 && (!HAVE_SSE || !HAVE_MMX) 109 | #error SSE2 is available but not SSE or MMX? 110 | #endif // HAVE_SSE2 && (!HAVE_SSE || !HAVE_MMX) 111 | 112 | 113 | #if !defined(CRCUTIL_PREFETCH_WIDTH) 114 | // On newer X5550 CPU, heavily optimized CrcMultiword is 3% faster without 115 | // prefetch for inputs smaller than 8MB and less than 1% slower for 8MB and 116 | // larger blocks. On older Q9650 CPU, the code is 2-3% faster for inputs 117 | // smaller than 8MB, 4-5% slower when length >= 8MB. 118 | // Tested with prefetch length 256, 512, and 4096. 119 | // 120 | // At this moment there is no compelling reason to use prefetching. 121 | // 122 | #define CRCUTIL_PREFETCH_WIDTH 0 123 | #endif // !defined(CRCUTIL_PREFETCH_WIDTH) 124 | 125 | 126 | #if HAVE_SSE && CRCUTIL_PREFETCH_WIDTH > 0 127 | #define PREFETCH(src) \ 128 | _mm_prefetch(reinterpret_cast(src) + CRCUTIL_PREFETCH_WIDTH, \ 129 | _MM_HINT_T0) 130 | #else 131 | #define PREFETCH(src) 132 | #endif // HAVE_SSE && CRCUTIL_PREFETCH_WIDTH > 0 133 | 134 | 135 | // If block size exceeds CRCUTIL_MIN_ALIGN_SIZE, align the data 136 | // before accessing it at word boundary. See generic_crc.cc, 137 | // ALIGN_ON_WORD_BOUNDARY_IF_NEEDED() macro. 138 | #if !defined(CRCUTIL_MIN_ALIGN_SIZE) 139 | #if HAVE_AMD64 || HAVE_I386 140 | #define CRCUTIL_MIN_ALIGN_SIZE (1024) 141 | #else 142 | #define CRCUTIL_MIN_ALIGN_SIZE 0 143 | #endif // HAVE_AMD64 || HAVE_I386 144 | #endif // !defined(CRCUTIL_MIN_ALIGN_SIZE) 145 | 146 | 147 | // Use _mm_crc32_u64/32/8 intrinics? 148 | // If not, they will be implemented in software. 149 | #if !HAVE_I386 && !HAVE_AMD64 150 | 151 | #undef CRCUTIL_USE_MM_CRC32 152 | #define CRCUTIL_USE_MM_CRC32 0 153 | 154 | #else 155 | 156 | #if !defined(CRCUTIL_USE_MM_CRC32) 157 | #if defined(_MSC_VER) || defined(__GNUC__) 158 | #define CRCUTIL_USE_MM_CRC32 1 159 | #else 160 | #define CRCUTIL_USE_MM_CRC32 0 161 | #endif // defined(_MSC_VER) || defined(__GNUC__) 162 | #endif // !defined(CRCUTIL_USE_MM_CRC32) 163 | 164 | #endif // !HAVE_I386 && !HAVE_AMD64 165 | 166 | 167 | // Stringize -- always handy. 168 | #define TO_STRING_VALUE(arg) #arg 169 | #define TO_STRING(arg) TO_STRING_VALUE(arg) 170 | 171 | 172 | // Compilers give "right shift count >= width of type" warning even 173 | // though the shift happens only under appropriate "if". 174 | #define SHIFT_RIGHT_NO_WARNING(value, bits) \ 175 | ((value) >> (((bits) < (8 * sizeof(value))) ? (bits) : 0)) 176 | #define SHIFT_RIGHT_SAFE(value, bits) \ 177 | ((bits) < (8 * sizeof(value)) ? SHIFT_RIGHT_NO_WARNING(value, bits) : 0) 178 | 179 | // The same for left shifts. 180 | #define SHIFT_LEFT_NO_WARNING(value, bits) \ 181 | ((value) << (((bits) < (8 * sizeof(value))) ? (bits) : 0)) 182 | #define SHIFT_LEFT_SAFE(value, bits) \ 183 | ((bits) < (8 * sizeof(value)) ? SHIFT_LEFT_NO_WARNING(value, bits) : 0) 184 | 185 | // GCC-specific macros. 186 | // 187 | #define GCC_VERSION_AVAILABLE(major, minor) \ 188 | (defined(__GNUC__) && \ 189 | (__GNUC__ > (major) || \ 190 | (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))) 191 | 192 | 193 | #if defined(__GNUC__) 194 | 195 | // The GenericCrc tables must be properly aligned. 196 | // Penalty for misalignment? 50% performance degradation. 197 | // For 128-bit SSE2, the penalty is access violation. 198 | #define GCC_ALIGN_ATTRIBUTE(n) __attribute__((aligned(n))) 199 | 200 | #if GCC_VERSION_AVAILABLE(4, 4) 201 | // If not marked as "omit frame pointer", 202 | // GCC won't be able to find enough registers. 203 | #define GCC_OMIT_FRAME_POINTER \ 204 | __attribute__((__optimize__(2, "omit-frame-pointer"))) 205 | #endif // GCC_VERSION_AVAILABLE(4, 4) 206 | 207 | #if !defined(__forceinline) 208 | #define __forceinline __attribute__((__always_inline__)) inline 209 | #endif // !defined(__forceinline) 210 | 211 | #if defined(__APPLE_CC__) 212 | // The version of GCC used by Max OS X xCode v 5664 does not understand 213 | // "movq xmm, r64" instruction and requires the use of "movd" (probably 214 | // because of the bug in GCC which treats "movq/movd xmm,r64 or r64,xmm" 215 | // the same). 216 | // 217 | // Leaving common sense aside, let's peek into Intel's instruction 218 | // reference manual. That's what description of MOVD command says: 219 | // MOVD xmm, r/m32 (opcode 66 0F 6E /r) 220 | // MOVD r/m32, xmm (opcode 66 0F 7E /r) 221 | // MOVQ xmm, r/m64 (opcode 66 REX.W 0F 6E /r) 222 | // MOVQ r/m64, xmm (opcode 66 REX.W 0F 7E /r) 223 | #define SSE2_MOVQ "movd" 224 | #else 225 | #define SSE2_MOVQ "movq" 226 | #endif // defined(__APPLE_CC__) 227 | 228 | #endif // defined(__GNUC__) 229 | 230 | 231 | // Define compiler-specific macros that were not set yet. 232 | #if !defined(_MSC_VER) && !defined(__forceinline) 233 | #define __forceinline inline 234 | #endif // !defined(_MSC_VER) && !defined(__forceinline) 235 | 236 | #if !defined(GCC_OMIT_FRAME_POINTER) 237 | #define GCC_OMIT_FRAME_POINTER 238 | #endif // !defined(GCC_OMIT_FRAME_POINTER) 239 | 240 | #if !defined(GCC_ALIGN_ATTRIBUTE) 241 | #define GCC_ALIGN_ATTRIBUTE(n) 242 | #endif // !defined(GCC_ALIGN_ATTRIBUTE) 243 | 244 | 245 | #endif // CRCUTIL_PLATFORM_H_ 246 | -------------------------------------------------------------------------------- /src/crcutil-1.0/code/protected_crc.h: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Protects CRC tables with its own CRC. 16 | // CRC tables get corrupted too, and if corruption is 17 | // not caught, data poisoning becomes a reality. 18 | 19 | #ifndef CRCUTIL_PROTECTED_CRC_H_ 20 | #define CRCUTIL_PROTECTED_CRC_H_ 21 | 22 | namespace crcutil { 23 | 24 | #pragma pack(push, 16) 25 | 26 | // Class CrcImplementation should not have virtual functions: 27 | // vptr is stored as the very first field, vptr value is defined 28 | // at runtime, so it is impossible to CRC(*this) once and 29 | // guarantee that this value will not change from run to run. 30 | // 31 | template class ProtectedCrc 32 | : public CrcImplementation { 33 | public: 34 | typedef typename CrcImplementation::Crc Crc; 35 | 36 | // Returns check value that the caller should compare 37 | // against pre-computed, trusted constant. 38 | // 39 | // Computing SelfCheckValue() after CRC initialization, 40 | // storing it in memory, and periodically checking against 41 | // stored value may not work: if CRC tables were initialized 42 | // incorrectly and/or had been corrupted during initialization, 43 | // CheckValue() will return garbage. Garbage in, garbage out. 44 | // Consequitive checks will not detect a problem, the application 45 | // will happily produce and save the data with corrupt CRC. 46 | // 47 | // The application should call SelfCheckValue() regularly: 48 | // 1. First and foremost, on every CRC mismatch. 49 | // 2. After CRC'ing the data but before sending it out or writing it. 50 | // 3. Worst case, every Nth CRC'ed byte or every Nth call to CRC. 51 | // 52 | Crc SelfCheckValue() const { 53 | return CrcDefault(this, sizeof(*this), 0); 54 | } 55 | } GCC_ALIGN_ATTRIBUTE(16); 56 | 57 | #pragma pack(pop) 58 | 59 | } // namespace crcutil 60 | 61 | #endif // CRCUTIL_PROTECTED_CRC_H_ 62 | -------------------------------------------------------------------------------- /src/crcutil-1.0/code/rolling_crc.h: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Implements rolling CRC (e.g. for Rabin fingerprinting). 16 | 17 | #ifndef CRCUTIL_ROLLING_CRC_H_ 18 | #define CRCUTIL_ROLLING_CRC_H_ 19 | 20 | #include "base_types.h" // size_t, uint8 21 | #include "crc_casts.h" // TO_BYTE 22 | 23 | namespace crcutil { 24 | 25 | #pragma pack(push, 16) 26 | 27 | // CrcImplementation should provide: 28 | // - typename Crc 29 | // - typename TableEntry 30 | // - typename Word 31 | // - Crc CrcDefault(const void *data, size_t bytes, const Crc &start) 32 | // - const GfUtil &Base() const 33 | template class RollingCrc { 34 | public: 35 | typedef typename CrcImplementation::Crc Crc; 36 | typedef typename CrcImplementation::TableEntry TableEntry; 37 | typedef typename CrcImplementation::Word Word; 38 | 39 | RollingCrc() {} 40 | 41 | // Initializes internal data structures. 42 | // Retains reference to "crc" instance -- it is used by Start(). 43 | RollingCrc(const CrcImplementation &crc, 44 | size_t roll_window_bytes, 45 | const Crc &start_value) { 46 | Init(crc, roll_window_bytes, start_value); 47 | } 48 | 49 | // Computes crc of "roll_window_bytes" using 50 | // "start_value" of "crc" (see Init()). 51 | Crc Start(const void *data) const { 52 | return crc_->CrcDefault(data, roll_window_bytes_, start_value_); 53 | } 54 | 55 | // Computes CRC of "roll_window_bytes" starting in next position. 56 | Crc Roll(const Crc &old_crc, size_t byte_out, size_t byte_in) const { 57 | return (old_crc >> 8) ^ in_[TO_BYTE(old_crc) ^ byte_in] ^ out_[byte_out]; 58 | } 59 | 60 | // Initializes internal data structures. 61 | // Retains reference to "crc" instance -- it is used by Start(). 62 | void Init(const CrcImplementation &crc, 63 | size_t roll_window_bytes, 64 | const Crc &start_value) { 65 | crc_ = &crc; 66 | roll_window_bytes_ = roll_window_bytes; 67 | start_value_ = start_value; 68 | 69 | Crc add = crc.Base().Canonize() ^ start_value; 70 | add = crc.Base().Multiply(add, crc.Base().Xpow8N(roll_window_bytes)); 71 | add ^= crc.Base().Canonize(); 72 | Crc mul = crc.Base().One() ^ crc.Base().Xpow8N(1); 73 | add = crc.Base().Multiply(add, mul); 74 | 75 | mul = crc.Base().XpowN(8 * roll_window_bytes + crc.Base().Degree()); 76 | for (size_t i = 0; i < 256; ++i) { 77 | out_[i] = static_cast( 78 | crc.Base().MultiplyUnnormalized( 79 | static_cast(i), 8, mul) ^ add); 80 | } 81 | for (size_t i = 0; i < 256; ++i) { 82 | in_[i] = crc.crc_word_[sizeof(Word) - 1][i]; 83 | } 84 | } 85 | 86 | // Returns start value. 87 | Crc StartValue() const { return start_value_; } 88 | 89 | // Returns length of roll window. 90 | size_t WindowBytes() const { return roll_window_bytes_; } 91 | 92 | protected: 93 | TableEntry in_[256]; 94 | TableEntry out_[256]; 95 | 96 | // Used only by Start(). 97 | Crc start_value_; 98 | const CrcImplementation *crc_; 99 | size_t roll_window_bytes_; 100 | } GCC_ALIGN_ATTRIBUTE(16); 101 | 102 | #pragma pack(pop) 103 | 104 | } // namespace crcutil 105 | 106 | #endif // CRCUTIL_ROLLING_CRC_H_ 107 | -------------------------------------------------------------------------------- /src/crcutil-1.0/code/std_headers.h: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Includes some standard C headers for size_t, memset, etc. 16 | // 17 | // Also, permanently disables a number of warnings produced 18 | // by Microsoft's compiler when it includes standard headers 19 | // (surprisingly, also by Microsoft). 20 | 21 | #ifndef CRCUTIL_STD_HEADERS_H_ 22 | #define CRCUTIL_STD_HEADERS_H_ 23 | 24 | #if defined(_MSC_VER) 25 | // '4' bytes padding added after data member ... 26 | #pragma warning(disable:4820) 27 | 28 | // unreferenced inline function has been removed ... 29 | #pragma warning(disable:4514) 30 | 31 | // conditional expression is constant 32 | #pragma warning(disable: 4127) 33 | 34 | // function ... not inlined 35 | #pragma warning(disable: 4710) 36 | 37 | // function ... selected for automatic inline expansion 38 | #pragma warning(disable: 4711) 39 | 40 | #define _CRT_SECURE_NO_WARNINGS 41 | 42 | #endif // defined(_MSC_VER) 43 | 44 | // #define _CSTDLIB_ 45 | #include // always handy 46 | #include // memset 47 | #include // size_t, _rotl/_rotl64(MSC) 48 | #include // ptrdiff_t (GNUC) 49 | #include // va_list 50 | 51 | #endif // CRCUTIL_STD_HEADERS_H_ 52 | -------------------------------------------------------------------------------- /src/crcutil-1.0/config.h.in: -------------------------------------------------------------------------------- 1 | /* config.h.in. Generated from configure.ac by autoheader. */ 2 | 3 | /* Define to 1 if you have the header file. */ 4 | #undef HAVE_INTTYPES_H 5 | 6 | /* Define to 1 if you have the header file. */ 7 | #undef HAVE_MEMORY_H 8 | 9 | /* Define to 1 if you have the `memset' function. */ 10 | #undef HAVE_MEMSET 11 | 12 | /* Define to 1 if the system has the type `ptrdiff_t'. */ 13 | #undef HAVE_PTRDIFF_T 14 | 15 | /* Define to 1 if stdbool.h conforms to C99. */ 16 | #undef HAVE_STDBOOL_H 17 | 18 | /* Define to 1 if you have the header file. */ 19 | #undef HAVE_STDDEF_H 20 | 21 | /* Define to 1 if you have the header file. */ 22 | #undef HAVE_STDINT_H 23 | 24 | /* Define to 1 if you have the header file. */ 25 | #undef HAVE_STDLIB_H 26 | 27 | /* Define to 1 if you have the `strchr' function. */ 28 | #undef HAVE_STRCHR 29 | 30 | /* Define to 1 if you have the header file. */ 31 | #undef HAVE_STRINGS_H 32 | 33 | /* Define to 1 if you have the header file. */ 34 | #undef HAVE_STRING_H 35 | 36 | /* Define to 1 if you have the `strrchr' function. */ 37 | #undef HAVE_STRRCHR 38 | 39 | /* Define to 1 if you have the header file. */ 40 | #undef HAVE_SYS_STAT_H 41 | 42 | /* Define to 1 if you have the header file. */ 43 | #undef HAVE_SYS_TYPES_H 44 | 45 | /* Define to 1 if you have the header file. */ 46 | #undef HAVE_UNISTD_H 47 | 48 | /* Define to 1 if the system has the type `_Bool'. */ 49 | #undef HAVE__BOOL 50 | 51 | /* Name of package */ 52 | #undef PACKAGE 53 | 54 | /* Define to the address where bug reports for this package should be sent. */ 55 | #undef PACKAGE_BUGREPORT 56 | 57 | /* Define to the full name of this package. */ 58 | #undef PACKAGE_NAME 59 | 60 | /* Define to the full name and version of this package. */ 61 | #undef PACKAGE_STRING 62 | 63 | /* Define to the one symbol short name of this package. */ 64 | #undef PACKAGE_TARNAME 65 | 66 | /* Define to the home page for this package. */ 67 | #undef PACKAGE_URL 68 | 69 | /* Define to the version of this package. */ 70 | #undef PACKAGE_VERSION 71 | 72 | /* Define to 1 if you have the ANSI C header files. */ 73 | #undef STDC_HEADERS 74 | 75 | /* Version number of package */ 76 | #undef VERSION 77 | 78 | /* Define to `__inline__' or `__inline' if that's what the C compiler 79 | calls it, or to nothing if 'inline' is not supported under any name. */ 80 | #ifndef __cplusplus 81 | #undef inline 82 | #endif 83 | 84 | /* Define to `unsigned int' if does not define. */ 85 | #undef size_t 86 | -------------------------------------------------------------------------------- /src/crcutil-1.0/configure.ac: -------------------------------------------------------------------------------- 1 | # -*- Autoconf -*- 2 | # Process this file with autoconf to produce a configure script. 3 | 4 | AC_PREREQ([2.65]) 5 | AC_INIT(crcutil, 1.0, crcutil@googlegroups.com) 6 | AM_INIT_AUTOMAKE(crcutil, 1.0) 7 | AC_CONFIG_FILES([Makefile]) 8 | AC_OUTPUT() 9 | AC_CONFIG_SRCDIR([tests/aligned_alloc.h]) 10 | AC_CONFIG_HEADERS([config.h]) 11 | 12 | # Checks for programs. 13 | AC_PROG_CXX 14 | AC_PROG_CC 15 | AC_PROG_INSTALL 16 | AC_PROG_MAKE_SET 17 | 18 | # Checks for libraries. 19 | 20 | # Checks for header files. 21 | AC_CHECK_HEADERS([stddef.h stdlib.h string.h]) 22 | 23 | # Checks for typedefs, structures, and compiler characteristics. 24 | AC_HEADER_STDBOOL 25 | AC_C_INLINE 26 | AC_TYPE_SIZE_T 27 | AC_CHECK_TYPES([ptrdiff_t]) 28 | 29 | # Checks for library functions. 30 | AC_CHECK_FUNCS([memset strchr strrchr]) 31 | 32 | AC_OUTPUT 33 | -------------------------------------------------------------------------------- /src/crcutil-1.0/examples/usage.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "std_headers.h" 16 | #include "interface.h" 17 | 18 | static const size_t kRollWindow = 4; 19 | static const unsigned char kTestData[] = "abcdefgh"; 20 | 21 | static const int kTestDataHead = 22 | static_cast((sizeof(kTestData) - 1) / 4); 23 | static const int kTestDataTail = 24 | static_cast(sizeof(kTestData) - 1 - kTestDataHead); 25 | 26 | typedef crcutil_interface::UINT64 uint64; 27 | 28 | // GCC -- up to 4.5.0 inclusively -- is not aware that the right format 29 | // to print "long long" is "%ll[oudx]". Such nonsense does not prevent 30 | // it from complaining about format mismatch, though. Here is the cure. 31 | void xprintf(const char *format, ...) { 32 | va_list va; 33 | va_start(va, format); 34 | vprintf(format, va); 35 | va_end(va); 36 | fflush(stdout); 37 | } 38 | 39 | // 40 | // Please notice that when working with 64-bit and smaller CRCs, 41 | // the use of "hi" part of CRC value is unnecessary. 42 | // 43 | void Show(const crcutil_interface::CRC *crc) { 44 | char buffer[sizeof(kTestData) + 32]; 45 | 46 | // 47 | // Access CRC properties. 48 | // 49 | uint64 lo; 50 | crc->GeneratingPolynomial(&lo); 51 | xprintf("Generating polynomial 0x%llx, degree %llu", 52 | lo, 53 | static_cast(crc->Degree())); 54 | crc->CanonizeValue(&lo); 55 | xprintf(", canonize_value=0x%llx", lo); 56 | 57 | crc->RollStartValue(&lo); 58 | xprintf(", roll start value=0x%llx, roll window=%llu", 59 | lo, 60 | static_cast(crc->RollWindowBytes())); 61 | 62 | // 63 | // Check integrity of CRC tables. 64 | // 65 | crc->SelfCheckValue(&lo); 66 | xprintf(", self check value 0x%llx\n", lo); 67 | 68 | // 69 | // Compute CRC. 70 | // 71 | lo = 0; 72 | crc->Compute(kTestData, sizeof(kTestData) - 1, &lo); 73 | xprintf("CRC32C(\"%s\") = 0x%llx\n", kTestData, lo); 74 | 75 | // 76 | // Compute CRC (incrementally). 77 | // 78 | lo = 0; 79 | crc->Compute(kTestData, kTestDataHead, &lo); 80 | xprintf("CRC32C(\"%.*s\", 0) = 0x%llx, ", kTestDataHead, kTestData, lo); 81 | crc->Compute(kTestData + kTestDataHead, kTestDataTail, &lo); 82 | xprintf("CRC32C(\"%s\", CRC32(\"%.*s\", 0)) = 0x%llx = CRC32(\"%s\")\n", 83 | kTestData + kTestDataHead, kTestDataHead, kTestData, lo, kTestData); 84 | 85 | // 86 | // Compute CRC of a message filled with 0s. 87 | // 88 | lo = 1; 89 | crc->CrcOfZeroes(sizeof(buffer), &lo); 90 | 91 | uint64 lo1 = 1; 92 | memset(buffer, 0, sizeof(buffer)); 93 | crc->Compute(buffer, sizeof(buffer), &lo1); 94 | xprintf("CRC of %d zeroes = %llx, expected %llx\n", 95 | static_cast(sizeof(buffer)), 96 | lo, 97 | lo1); 98 | 99 | 100 | // 101 | // Use rolling CRC. 102 | // 103 | xprintf("RollingCrc expected ="); 104 | for (size_t i = 0; i <= kRollWindow; ++i) { 105 | crc->RollStartValue(&lo); 106 | crc->Compute(kTestData + i, kRollWindow, &lo); 107 | xprintf(" 0x%llx", lo); 108 | } 109 | xprintf("\n"); 110 | 111 | crc->RollStart(kTestData, &lo, NULL); 112 | xprintf("RollingCrc actual = 0x%llx", lo); 113 | for (size_t i = 1; i <= kRollWindow; ++i) { 114 | crc->Roll(kTestData[i - 1], kTestData[i - 1 + kRollWindow], &lo, NULL); 115 | xprintf(" 0x%llx", lo); 116 | } 117 | xprintf("\n"); 118 | 119 | // 120 | // Change initial value. 121 | // 122 | lo = 0; 123 | crc->Compute(kTestData, sizeof(kTestData) - 1, &lo); 124 | uint64 lo1_expected = 1; 125 | crc->Compute(kTestData, sizeof(kTestData) - 1, &lo1_expected); 126 | lo1 = lo; 127 | crc->ChangeStartValue(0, 0, // old start value 128 | 1, 0, // new start value 129 | sizeof(kTestData) - 1, 130 | &lo1); 131 | xprintf("CRC(\"%s\", 0) = 0x%llx, CRC(\"%s\", 1)=0x%llx, expected 0x%llx\n", 132 | kTestData, lo, kTestData, lo1, lo1_expected); 133 | 134 | // 135 | // Concatenate CRCs. 136 | // 137 | uint64 start_value = 1; 138 | lo = start_value; 139 | crc->Compute(kTestData, kTestDataHead, &lo); 140 | lo1 = 0; 141 | crc->Compute(kTestData + kTestDataHead, kTestDataTail, &lo1); 142 | 143 | uint64 lo2 = lo; 144 | crc->Concatenate(lo1, 0, kTestDataTail, &lo2); 145 | 146 | uint64 lo2_expected = start_value; 147 | crc->Compute(kTestData, sizeof(kTestData) - 1, &lo2_expected); 148 | 149 | xprintf("CRC(\"%.*s\", 1) = 0x%llx, CRC(\"%s\", 0)=0x%llx, " 150 | "CRC(\"%s\", 1) = 0x%llx, expected 0x%llx\n", 151 | kTestDataHead, kTestData, lo, 152 | kTestData + kTestDataHead, lo1, 153 | kTestData, lo2, 154 | lo2_expected); 155 | 156 | // 157 | // Store complementary CRC so that CRC of a message followed 158 | // by complementary CRC value produces predefined result (e.g. 0). 159 | // 160 | memcpy(buffer, kTestData, sizeof(kTestData) - 1); 161 | lo = 1; 162 | crc->Compute(buffer, sizeof(kTestData) - 1, &lo); 163 | size_t stored_crc_bytes = crc->StoreComplementaryCrc( 164 | buffer + sizeof(kTestData) - 1, 165 | lo, 0, 166 | 0); 167 | 168 | // Compute CRC of message + complementary CRC using the same start value 169 | // (start value could be changed via ChangeStartValue()). 170 | lo1 = 1; 171 | crc->Compute(buffer, sizeof(kTestData) - 1 + stored_crc_bytes, &lo1); 172 | 173 | xprintf("Crc of message + complementary CRC = %llx, expected 0\n", lo1); 174 | 175 | // 176 | // Store CRC after the message and ensure that CRC of message + its 177 | // CRC produces constant result irrespective of message data. 178 | // 179 | memcpy(buffer, kTestData, sizeof(kTestData) - 1); 180 | lo = 1; 181 | crc->Compute(buffer, sizeof(kTestData) - 1, &lo); 182 | stored_crc_bytes = crc->StoreCrc(buffer + sizeof(kTestData) - 1, lo); 183 | 184 | // Compute CRC of message + its CRC using start value of 0. 185 | lo1 = 1; 186 | crc->Compute(buffer, sizeof(kTestData) - 1 + stored_crc_bytes, &lo1); 187 | 188 | // Ensure that it matches "predicted" constant value, irrespective 189 | // of a message or CRC start value. 190 | crc->CrcOfCrc(&lo2); 191 | xprintf("CrcOfCrc=%llx, expected %llx\n", lo1, lo2); 192 | 193 | xprintf("\n"); 194 | } 195 | 196 | void ShowAndDelete(crcutil_interface::CRC *crc) { 197 | Show(crc); 198 | crc->Delete(); 199 | } 200 | 201 | int main() { 202 | ShowAndDelete(crcutil_interface::CRC::Create( 203 | 0xEB31D82E, 0, 32, true, 0x1111, 0, kRollWindow, 204 | crcutil_interface::CRC::IsSSE42Available(), NULL)); 205 | ShowAndDelete(crcutil_interface::CRC::Create( 206 | 0x82f63b78, 0, 32, true, 0x2222, 0, kRollWindow, 207 | crcutil_interface::CRC::IsSSE42Available(), NULL)); 208 | return 0; 209 | } 210 | -------------------------------------------------------------------------------- /src/crcutil-1.0/tests/aligned_alloc.h: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // 15 | // Poor man's platform-independent implementation of aligned memory allocator. 16 | 17 | #ifndef CRCUTIL_ALIGNED_ALLOC_H_ 18 | #define CRCUTIL_ALIGNED_ALLOC_H_ 19 | 20 | #include "std_headers.h" // size_t, ptrdiff_t 21 | 22 | namespace crcutil { 23 | 24 | // Allocates a block of memory of "size" bytes so that a field 25 | // at "field_offset" is aligned on "align" boundary. 26 | // 27 | // NB #1: "align" shall be exact power of two. 28 | // 29 | // NB #2: memory allocated by AlignedAlloc should be release by AlignedFree(). 30 | // 31 | inline void *AlignedAlloc(size_t size, 32 | size_t field_offset, 33 | size_t align, 34 | const void **allocated_mem) { 35 | if (align == 0 || (align & (align - 1)) != 0 || align < sizeof(char *)) { 36 | align = sizeof(*allocated_mem); 37 | } 38 | size += align - 1 + sizeof(*allocated_mem); 39 | char *allocated_memory = new char[size]; 40 | char *aligned_memory = allocated_memory + sizeof(*allocated_mem); 41 | field_offset &= align - 1; 42 | size_t actual_alignment = 43 | reinterpret_cast(aligned_memory + field_offset) & (align - 1); 44 | if (actual_alignment != 0) { 45 | aligned_memory += align - actual_alignment; 46 | } 47 | reinterpret_cast(aligned_memory)[-1] = allocated_memory; 48 | 49 | if (allocated_mem != NULL) { 50 | *allocated_mem = allocated_memory; 51 | } 52 | 53 | return aligned_memory; 54 | } 55 | 56 | // Frees memory allocated by AlignedAlloc(). 57 | inline void AlignedFree(void *aligned_memory) { 58 | if (aligned_memory != NULL) { 59 | char *allocated_memory = reinterpret_cast(aligned_memory)[-1]; 60 | delete[] allocated_memory; 61 | } 62 | } 63 | 64 | } // namespace crcutil 65 | 66 | #endif // CRCUTIL_ALIGNED_ALLOC_H_ 67 | -------------------------------------------------------------------------------- /src/crcutil-1.0/tests/bob_jenkins_rng.h: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Glorified C++ version of Bob Jenkins' random number generator. 16 | // See http://burtleburtle.net/bob/rand/smallprng.html for more details. 17 | 18 | #ifndef CRCUTIL_BOB_JENKINS_RNG_H_ 19 | #define CRCUTIL_BOB_JENKINS_RNG_H_ 20 | 21 | #include "base_types.h" 22 | 23 | #if !defined(_MSC_VER) 24 | #define _rotl(value, bits) \ 25 | static_cast(((value) << (bits)) + ((value) >> (32 - (bits)))) 26 | #define _rotl64(value, bits) \ 27 | static_cast(((value) << (bits)) + ((value) >> (64 - (bits)))) 28 | #endif // !defined(_MSC_VER) 29 | 30 | namespace crcutil { 31 | 32 | #pragma pack(push, 8) 33 | 34 | template class BobJenkinsRng; 35 | 36 | template<> class BobJenkinsRng { 37 | public: 38 | typedef uint32 value; 39 | 40 | value Get() { 41 | value e = a_ - _rotl(b_, 23); 42 | a_ = b_ ^ _rotl(c_, 16); 43 | b_ = c_ + _rotl(d_, 11); 44 | c_ = d_ + e; 45 | d_ = e + a_; 46 | return (d_); 47 | } 48 | 49 | void Init(value seed) { 50 | a_ = 0xf1ea5eed; 51 | b_ = seed; 52 | c_ = seed; 53 | d_ = seed; 54 | for (size_t i = 0; i < 20; ++i) { 55 | (void) Get(); 56 | } 57 | } 58 | 59 | explicit BobJenkinsRng(value seed) { 60 | Init(seed); 61 | } 62 | 63 | BobJenkinsRng() { 64 | Init(0x1234567); 65 | } 66 | 67 | private: 68 | value a_; 69 | value b_; 70 | value c_; 71 | value d_; 72 | }; 73 | 74 | 75 | #if HAVE_UINT64 76 | 77 | template<> class BobJenkinsRng { 78 | public: 79 | typedef uint64 value; 80 | 81 | value Get() { 82 | value e = a_ - _rotl64(b_, 7); 83 | a_ = b_ ^ _rotl64(c_, 13); 84 | b_ = c_ + _rotl64(d_, 37); 85 | c_ = d_ + e; 86 | d_ = e + a_; 87 | return d_; 88 | } 89 | 90 | void Init(value seed) { 91 | a_ = 0xf1ea5eed; 92 | b_ = seed; 93 | c_ = seed; 94 | d_ = seed; 95 | for (size_t i = 0; i < 20; ++i) { 96 | (void) Get(); 97 | } 98 | } 99 | 100 | explicit BobJenkinsRng(value seed) { 101 | Init(seed); 102 | } 103 | 104 | BobJenkinsRng() { 105 | Init(0x1234567); 106 | } 107 | 108 | private: 109 | value a_; 110 | value b_; 111 | value c_; 112 | value d_; 113 | }; 114 | 115 | #endif // HAVE_UINT64 116 | 117 | #if !defined(_MSC_VER) 118 | #undef _rotl 119 | #undef _rotl64 120 | #endif // !defined(_MSC_VER) 121 | 122 | #pragma pack(pop) 123 | 124 | } // namespace crcutil 125 | 126 | #endif // CRCUTIL_BOB_JENKINS_RNG_H_ 127 | -------------------------------------------------------------------------------- /src/crcutil-1.0/tests/rdtsc.h: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Reads CPU cycle counter on AMD64 and I386 (for performance measurements). 16 | // Thanks to __rdtsc() intrinsic, it's easy with Microsoft and Intel 17 | // compilers, but real pain with GCC. 18 | 19 | #ifndef CRCUTIL_RDTSC_H_ 20 | #define CRCUTIL_RDTSC_H_ 21 | 22 | #include "platform.h" 23 | 24 | namespace crcutil { 25 | 26 | struct Rdtsc { 27 | static inline uint64 Get() { 28 | #if defined(_MSC_VER) && (HAVE_AMD64 || HAVE_I386) 29 | return __rdtsc(); 30 | #elif defined(__GNUC__) && HAVE_AMD64 31 | int64 result; 32 | __asm__ volatile( 33 | "rdtsc\n" 34 | : "=a" (result)); 35 | return result; 36 | #elif defined(__GNUC__) && HAVE_I386 37 | // If "low" and "high" are defined as "uint64" to 38 | // avoid explicit cast to uint64, GCC 4.5.0 in "-m32" mode 39 | // fails with "impossible register constraint" error 40 | // (no, it is not because one cannot use 64-bit value as argument 41 | // for 32-bit register, but because its register allocator 42 | // could not resolve a conflict under high register pressure). 43 | uint32 low; 44 | uint32 high; 45 | __asm__ volatile( 46 | "rdtsc\n" 47 | : "=a" (low), "=d" (high)); 48 | return ((static_cast(high) << 32) | low); 49 | #else 50 | // It is hard to find low overhead timer with 51 | // sub-millisecond resolution and granularity. 52 | return 0; 53 | #endif 54 | } 55 | }; 56 | 57 | } // namespace crcutil 58 | 59 | #endif // CRCUTIL_RDTSC_H_ 60 | -------------------------------------------------------------------------------- /src/crcutil-1.0/tests/set_hi_pri.c: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Raises priority of test process and main thread to reduce 16 | // timing variations caused by context switches. Windows only. 17 | 18 | #if defined(_MSC_VER) 19 | // Disable warnings generated by "windows.h" compiled with -Wall. 20 | 21 | // N bytes padding added after data member X 22 | #pragma warning(disable: 4820) 23 | 24 | // no function prototype given: converting '()' to '(void)' 25 | #pragma warning(disable: 4255) 26 | 27 | // '__midl' is not defined as a preprocessor macro, 28 | // replacing with '0' for '#if/#elif' 29 | #pragma warning(disable: 4668) 30 | 31 | #endif // defined(_MSC_VER) 32 | 33 | 34 | #if defined(_WIN32) 35 | #include 36 | #endif // defined(_WIN32) 37 | 38 | #ifdef __cplusplus 39 | extern "C" 40 | #endif // __cplusplus 41 | void SetHiPri(void) 42 | { 43 | #if defined(_WIN32) 44 | #if 1 45 | // These setting are extremely dangerous. E.g. if app hits infinite loop, 46 | // computer may turn unresponsive and will require a power cycle. 47 | // Use for final testing only. 48 | SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL); 49 | SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS); 50 | #else 51 | SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST); 52 | SetPriorityClass(GetCurrentProcess(), HIGH_PRIORITY_CLASS); 53 | #endif 54 | #endif // defined(_WIN32) 55 | } 56 | -------------------------------------------------------------------------------- /src/crcutil-1.0/tests/unittest.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "unittest.h" 16 | 17 | extern "C" void SetHiPri(); 18 | 19 | using namespace crcutil; 20 | 21 | #if !defined(HAVE_INT128) 22 | #if defined(__GNUC__) && HAVE_AMD64 23 | #define HAVE_INT128 1 24 | #else 25 | #define HAVE_INT128 0 26 | #endif // defined(__GNUC__) && HAVE_AMD64 27 | #endif // defined(HAVE_INT128) 28 | 29 | #if HAVE_INT128 30 | typedef unsigned int uint128_t __attribute__((mode(TI))); 31 | #endif // HAVE_INT128 32 | 33 | int main(int argc, char **argv) { 34 | bool test_perf_main = true; 35 | bool test_perf_all = false; 36 | bool canonical = false; 37 | 38 | for (int i = 1; i < argc; ++i) { 39 | if (strcmp(argv[i], "--noperf") == 0) { 40 | test_perf_main = false; 41 | test_perf_all = false; 42 | } else if (strcmp(argv[i], "--perfall") == 0) { 43 | test_perf_all = true; 44 | } else if (strcmp(argv[i], "--canonical") == 0) { 45 | canonical = true; 46 | } else if (strcmp(argv[i], "help") == 0) { 47 | fprintf(stderr, "Usage: unittest {options}\n"); 48 | fprintf(stderr, "\n"); 49 | fprintf(stderr, "Options:\n"); 50 | fprintf(stderr, " --canonical - test canonical variant of CRC\n"); 51 | fprintf(stderr, " --noperf - do not test performance\n"); 52 | fprintf(stderr, " --perfall - test performance of all CRC width " 53 | "(not just 32, 64, and 128)\n"); 54 | fprintf(stderr, "\n"); 55 | return 1; 56 | } 57 | } 58 | 59 | 60 | SetHiPri(); 61 | 62 | CrcVerifier v; 63 | 64 | CreateTest( 65 | 64, 0, 0x9a6c9329ac4bc9b5ull, "u64/u64/u64", test_perf_main, &v); 66 | CreateTest( 67 | 64, 0, 0x9a6c9329ac4bc9b5ull, "u64/u64/u32", test_perf_all, &v); 68 | 69 | CreateTest( 70 | 32, 0, 0x82f63b78, "u64/u64/u64", test_perf_main, &v); 71 | CreateTest( 72 | 32, 0, 0x82f63b78, "u32/u32/u32", test_perf_main, &v); 73 | 74 | CreateTest( 75 | 32, 0, 0x82f63b78, "u64/u32/u32", test_perf_all, &v); 76 | CreateTest( 77 | 32, 0, 0x82f63b78, "u64/u32/u64", test_perf_all, &v); 78 | 79 | CreateTest( 80 | 15, 0, 0x00004CD1, "u64/u64/u64", test_perf_all, &v); 81 | CreateTest( 82 | 15, 0, 0x00004CD1, "u32/u32/u32", test_perf_all, &v); 83 | 84 | CreateTest( 85 | 07, 0, 0x00000048, "u64/u64/u64", test_perf_all, &v); 86 | CreateTest( 87 | 07, 0, 0x00000048, "u32/u32/u32", test_perf_all, &v); 88 | 89 | #if HAVE_SSE2 90 | CreateTest( 91 | 128, 0xeca61dca77452c88ull, 0x21fe865c87bc0e61ull, 92 | "sse2/sse2/u64", test_perf_main, &v); 93 | CreateTest( 94 | 128, 0xeca61dca77452c88ull, 0x21fe865c87bc0e61ull, 95 | "sse2/sse2/u32", test_perf_main, &v); 96 | CreateTest( 97 | 64, 0, 0x9a6c9329ac4bc9b5ull, 98 | "sse2/sse2/u64", test_perf_main, &v); 99 | CreateTest( 100 | 64, 0, 0x9a6c9329ac4bc9b5ull, 101 | "sse2/sse2/u32", test_perf_main, &v); 102 | CreateTest( 103 | 32, 0, 0x82f63b78, 104 | "sse2/sse2/u64", test_perf_main, &v); 105 | CreateTest( 106 | 32, 0, 0x82f63b78, 107 | "sse2/sse2/u32", test_perf_main, &v); 108 | #endif // HAVE_SSE2 109 | #if HAVE_INT128 110 | CreateTest( 111 | 128, 0xeca61dca77452c88ull, 0x21fe865c87bc0e61ull, 112 | "u128/u128/u64", test_perf_main, &v); 113 | CreateTest( 114 | 128, 0xeca61dca77452c88ull, 0x21fe865c87bc0e61ull, 115 | "u128/u128/u32", test_perf_main, &v); 116 | #endif // HAVE_INT128 117 | 118 | v.add(new CrcVerifierFactory(canonical, 119 | 64, 0, 0x9a6c9329ac4bc9b5ull, "CRC-64-64/64/2", test_perf_main, true)); 120 | v.add(new CrcVerifierFactory(canonical, 121 | 64, 0, 0x9a6c9329ac4bc9b5ull, "CRC-64-64/64/3", test_perf_main, true)); 122 | v.add(new CrcVerifierFactory(canonical, 123 | 64, 0, 0x9a6c9329ac4bc9b5ull, "CRC-64-64/64/4", test_perf_main, true)); 124 | v.add(new CrcVerifierFactory(canonical, 125 | 64, 0, 0x9a6c9329ac4bc9b5ull, "CRC-64-64/64/5", test_perf_main, true)); 126 | v.add(new CrcVerifierFactory(canonical, 127 | 64, 0, 0x9a6c9329ac4bc9b5ull, "CRC-64-64/64/6", test_perf_main, true)); 128 | v.add(new CrcVerifierFactory(canonical, 129 | 64, 0, 0x9a6c9329ac4bc9b5ull, "CRC-64-64/64/7", test_perf_main, true)); 130 | v.add(new CrcVerifierFactory(canonical, 131 | 64, 0, 0x9a6c9329ac4bc9b5ull, "CRC-64-64/64/8", test_perf_main, true)); 132 | 133 | #if HAVE_SSE2 134 | v.add(new CrcVerifierFactory( 135 | canonical, 128, 0xeca61dca77452c88ull, 0x21fe865c87bc0e61ull, 136 | "CRC-128-sse2/size_t/2", test_perf_main, true)); 137 | v.add(new CrcVerifierFactory( 138 | canonical, 128, 0xeca61dca77452c88ull, 0x21fe865c87bc0e61ull, 139 | "CRC-128-sse2/size_t/3", test_perf_main, true)); 140 | v.add(new CrcVerifierFactory( 141 | canonical, 128, 0xeca61dca77452c88ull, 0x21fe865c87bc0e61ull, 142 | "CRC-128-sse2/size_t/4", test_perf_main, true)); 143 | v.add(new CrcVerifierFactory( 144 | canonical, 128, 0xeca61dca77452c88ull, 0x21fe865c87bc0e61ull, 145 | "CRC-128-sse2/size_t/5", test_perf_main, true)); 146 | v.add(new CrcVerifierFactory( 147 | canonical, 128, 0xeca61dca77452c88ull, 0x21fe865c87bc0e61ull, 148 | "CRC-128-sse2/size_t/6", test_perf_main, true)); 149 | v.add(new CrcVerifierFactory( 150 | canonical, 128, 0xeca61dca77452c88ull, 0x21fe865c87bc0e61ull, 151 | "CRC-128-sse2/size_t/7", test_perf_main, true)); 152 | v.add(new CrcVerifierFactory( 153 | canonical, 128, 0xeca61dca77452c88ull, 0x21fe865c87bc0e61ull, 154 | "CRC-128-sse2/size_t/8", test_perf_main, true)); 155 | #endif // HAVE_SSE2 156 | 157 | v.add(new CrcVerifierFactory(canonical, 158 | 32, 0, 0x82f63b78, "CRC-32-size_t/size_t/2", test_perf_main, true)); 159 | v.add(new CrcVerifierFactory(canonical, 160 | 32, 0, 0x82f63b78, "CRC-32-size_t/size_t/3", test_perf_main, true)); 161 | v.add(new CrcVerifierFactory(canonical, 162 | 32, 0, 0x82f63b78, "CRC-32-size_t/size_t/4", test_perf_main, true)); 163 | v.add(new CrcVerifierFactory(canonical, 164 | 32, 0, 0x82f63b78, "CRC-32-size_t/size_t/5", test_perf_main, true)); 165 | v.add(new CrcVerifierFactory(canonical, 166 | 32, 0, 0x82f63b78, "CRC-32-size_t/size_t/6", test_perf_main, true)); 167 | v.add(new CrcVerifierFactory(canonical, 168 | 32, 0, 0x82f63b78, "CRC-32-size_t/size_t/7", test_perf_main, true)); 169 | v.add(new CrcVerifierFactory(canonical, 170 | 32, 0, 0x82f63b78, "CRC-32-size_t/size_t/8", test_perf_main, true)); 171 | 172 | v.TestFunctionality(); 173 | v.TestPerformance(); 174 | 175 | return (0); 176 | } 177 | -------------------------------------------------------------------------------- /src/crcutil-1.0/tests/unittest_helper.h: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // A set of useful macros for crcutil_unittest. 16 | 17 | #ifndef CRCUTIL_UNITTEST_HELPER_H_ 18 | #define CRCUTIL_UNITTEST_HELPER_H_ 19 | 20 | #include "std_headers.h" // printf 21 | 22 | #if !defined(CHECK) 23 | 24 | #if defined(_MSC_VER) 25 | #define DEBUG_BREAK() __debugbreak() 26 | #else 27 | #define DEBUG_BREAK() exit(1) 28 | #endif // defined(_MSC_VER) 29 | 30 | #define CHECK(cond) do { \ 31 | if (!(cond)) { \ 32 | fprintf(stderr, "%s, %d: ASSERT(%s)\n", __FILE__, __LINE__, #cond); \ 33 | fflush(stderr); \ 34 | DEBUG_BREAK(); \ 35 | } \ 36 | } while (0) 37 | 38 | 39 | #define CHECK_GE(a, b) CHECK((a) >= (b)) 40 | #define CHECK_NE(a, b) CHECK((a) != (b)) 41 | #define CHECK_EQ(a, b) CHECK((a) == (b)) 42 | 43 | #endif // !defined(CHECK) 44 | 45 | #endif // CRCUTIL_UNITTEST_HELPER_H_ 46 | -------------------------------------------------------------------------------- /src/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sabnzbd/sabctools/4bf805ad210136fdc70f022141ff363d0ef67e5b/src/py.typed -------------------------------------------------------------------------------- /src/sabctools.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2007-2023 The SABnzbd-Team (sabnzbd.org) 3 | * 4 | * This program is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU General Public License 6 | * as published by the Free Software Foundation; either version 2 7 | * of the License, or (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 | */ 18 | 19 | #include "sabctools.h" 20 | #include "yenc.h" 21 | #include "unlocked_ssl.h" 22 | #include "crc32.h" 23 | #include "sparse.h" 24 | #include "utils.h" 25 | 26 | /* Function and exception declarations */ 27 | PyMODINIT_FUNC PyInit_sabctools(void); 28 | 29 | /* Python API requirements */ 30 | static PyMethodDef sabctools_methods[] = { 31 | { 32 | "yenc_decode", 33 | yenc_decode, 34 | METH_O, 35 | "yenc_decode(raw_data)" 36 | }, 37 | { 38 | "yenc_encode", 39 | yenc_encode, 40 | METH_O, 41 | "yenc_encode(input_string)" 42 | }, 43 | { 44 | "unlocked_ssl_recv_into", 45 | unlocked_ssl_recv_into, 46 | METH_VARARGS, 47 | "unlocked_ssl_recv_into(ssl_socket, buffer)" 48 | }, 49 | { 50 | "crc32_combine", 51 | crc32_combine, 52 | METH_VARARGS, 53 | "crc32_combine(crc1, crc2, length)" 54 | }, 55 | { 56 | "crc32_multiply", 57 | crc32_multiply, 58 | METH_VARARGS, 59 | "crc32_multiply(crc1, crc2)" 60 | }, 61 | { 62 | "crc32_zero_unpad", 63 | crc32_zero_unpad, 64 | METH_VARARGS, 65 | "crc32_zero_unpad(crc1, length)" 66 | }, 67 | { 68 | "crc32_xpown", 69 | crc32_xpown, 70 | METH_O, 71 | "crc32_xpown(n)" 72 | }, 73 | { 74 | "crc32_xpow8n", 75 | crc32_xpow8n, 76 | METH_O, 77 | "crc32_xpow8n(n)" 78 | }, 79 | { 80 | "sparse", 81 | sparse, 82 | METH_VARARGS, 83 | "sparse(handle, length)" 84 | }, 85 | { 86 | "bytearray_malloc", 87 | bytearray_malloc, 88 | METH_O, 89 | "bytearray_malloc(size)" 90 | }, 91 | {NULL, NULL, 0, NULL} 92 | }; 93 | 94 | static struct PyModuleDef sabctools_definition = { 95 | PyModuleDef_HEAD_INIT, 96 | "sabctools", 97 | "Utils written in C for use within SABnzbd.", 98 | -1, 99 | sabctools_methods 100 | }; 101 | 102 | static const char* simd_detected(void) { 103 | int level = RapidYenc::decode_isa_level(); 104 | #ifdef PLATFORM_X86 105 | if(level >= ISA_LEVEL_VBMI2) 106 | return "AVX512VL+VBMI2"; 107 | if(level >= ISA_LEVEL_AVX3) 108 | return "AVX512VL"; 109 | if(level >= ISA_LEVEL_AVX2) 110 | return "AVX2"; 111 | if(level >= ISA_LEVEL_AVX) 112 | return "AVX"; 113 | if(level >= ISA_LEVEL_SSE4_POPCNT) 114 | return "SSE4.1+POPCNT"; 115 | if(level >= ISA_LEVEL_SSE41) 116 | return "SSE4.1"; 117 | if(level >= ISA_LEVEL_SSSE3) 118 | return "SSSE3"; 119 | if(level >= (ISA_LEVEL_SSE2 | ISA_FEATURE_POPCNT | ISA_FEATURE_LZCNT)) 120 | return "SSE2+ABM"; 121 | return "SSE2"; 122 | #endif 123 | #ifdef PLATFORM_ARM 124 | if(level >= ISA_LEVEL_NEON) { 125 | return "NEON"; 126 | } 127 | #endif 128 | #ifdef __riscv 129 | if(level >= ISA_LEVEL_RVV) { 130 | return "RVV"; 131 | } 132 | #endif 133 | return ""; 134 | } 135 | 136 | PyMODINIT_FUNC PyInit_sabctools(void) { 137 | // Initialize and add version / SIMD information 138 | Py_Initialize(); 139 | RapidYenc::encoder_init(); 140 | RapidYenc::decoder_init(); 141 | RapidYenc::crc32_init(); 142 | openssl_init(); 143 | sparse_init(); 144 | 145 | PyObject* m = PyModule_Create(&sabctools_definition); 146 | PyModule_AddStringConstant(m, "version", SABCTOOLS_VERSION); 147 | PyModule_AddStringConstant(m, "simd", simd_detected()); 148 | 149 | // Add status of linking OpenSSL function 150 | PyObject *openssl_linked_object = openssl_linked() ? Py_True : Py_False; 151 | Py_INCREF(openssl_linked_object); 152 | PyModule_AddObject(m, "openssl_linked", openssl_linked_object); 153 | 154 | return m; 155 | } 156 | 157 | 158 | -------------------------------------------------------------------------------- /src/sabctools.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2007-2023 The SABnzbd-Team (sabnzbd.org) 3 | * 4 | * This program is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU General Public License 6 | * as published by the Free Software Foundation; either version 2 7 | * of the License, or (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 | */ 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | /* Version information */ 25 | #define SABCTOOLS_VERSION "8.2.5" 26 | 27 | PyMODINIT_FUNC PyInit_sabctools(void); 28 | -------------------------------------------------------------------------------- /src/sabctools.pyi: -------------------------------------------------------------------------------- 1 | from typing import Tuple, Optional, IO 2 | from ssl import SSLSocket 3 | 4 | __version__: str 5 | openssl_linked: bool 6 | simd: str 7 | 8 | def yenc_decode(raw_data: memoryview) -> Tuple[bytearray, str, int, int, int, Optional[int]]: ... 9 | def yenc_encode(input_string: bytes) -> Tuple[bytes, int]: ... 10 | def unlocked_ssl_recv_into(ssl_socket: SSLSocket, buffer: memoryview) -> int: ... 11 | def crc32_combine(crc1: int, crc2: int, length: int) -> int: ... 12 | def crc32_multiply(crc1: int, crc2: int) -> int: ... 13 | def crc32_xpow8n(n: int) -> int: ... 14 | def crc32_xpown(n: int) -> int: ... 15 | def crc32_zero_unpad(crc1: int, length: int) -> int: ... 16 | def sparse(file: IO, length: int) -> None: ... 17 | def bytearray_malloc(size: int) -> bytearray: ... 18 | -------------------------------------------------------------------------------- /src/sparse.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2007-2023 The SABnzbd-Team (sabnzbd.org) 3 | * 4 | * This program is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU General Public License 6 | * as published by the Free Software Foundation; either version 2 7 | * of the License, or (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 | */ 18 | 19 | #include "sparse.h" 20 | 21 | PyObject *Py_msvcrt_module = NULL; 22 | PyObject *get_osfhandle_string = NULL; 23 | 24 | void sparse_init() 25 | { 26 | #if defined(_WIN32) || defined(__CYGWIN__) 27 | Py_msvcrt_module = PyImport_ImportModule("msvcrt"); 28 | get_osfhandle_string = PyUnicode_FromString("get_osfhandle"); 29 | #endif 30 | } 31 | 32 | PyObject *sparse(PyObject *self, PyObject *args) 33 | { 34 | PyObject *Py_file; 35 | long long length; 36 | 37 | PyObject *Py_file_fileno = NULL; 38 | PyObject *Py_file_handle = NULL; 39 | PyObject *Py_file_truncate = NULL; 40 | 41 | if (!PyArg_ParseTuple(args, "OL:sparse", &Py_file, &length)) 42 | { 43 | return NULL; 44 | } 45 | 46 | #if defined(_WIN32) || defined(__CYGWIN__) 47 | // Get the windows file handle and set file attributes to sparse 48 | 49 | if (Py_msvcrt_module == NULL) 50 | { 51 | PyErr_SetString(PyExc_SystemError, "msvcrt module not loaded."); 52 | goto error; 53 | } 54 | 55 | if (!(Py_file_fileno = PyObject_CallMethod(Py_file, "fileno", NULL))) 56 | { 57 | PyErr_SetString(PyExc_SystemError, "Error calling fileno function."); 58 | goto error; 59 | } 60 | 61 | if (!(Py_file_handle = PyObject_CallMethodObjArgs(Py_msvcrt_module, get_osfhandle_string, Py_file_fileno, NULL))) 62 | { 63 | PyErr_SetString(PyExc_SystemError, "Failed calling get_osfhandle function."); 64 | goto error; 65 | } 66 | 67 | HANDLE handle = reinterpret_cast(PyLong_AsLongLong(Py_file_handle)); 68 | 69 | // Creating a sparse file may fail but that's OK 70 | DWORD bytesReturned; 71 | if (DeviceIoControl(handle, FSCTL_SET_SPARSE, nullptr, 0, nullptr, 0, &bytesReturned, nullptr)) 72 | { 73 | // Increase the file length without writing any data and seek back to the original position 74 | LARGE_INTEGER li_size; 75 | li_size.QuadPart = length; 76 | LARGE_INTEGER li_start = {0}; 77 | if (!SetFilePointerEx(handle, {0}, &li_start, FILE_CURRENT) || !SetFilePointerEx(handle, li_size, nullptr, FILE_END) || !SetEndOfFile(handle) || !SetFilePointerEx(handle, li_start, nullptr, FILE_BEGIN)) 78 | { 79 | PyErr_SetFromWindowsErr(0); 80 | goto error; 81 | } 82 | } 83 | #else 84 | // Call file.truncate(length) 85 | 86 | if (!(Py_file_truncate = PyObject_CallMethod(Py_file, "truncate", "(L)", length))) 87 | { 88 | goto error; 89 | } 90 | #endif 91 | 92 | Py_XDECREF(Py_file_fileno); 93 | Py_XDECREF(Py_file_handle); 94 | Py_XDECREF(Py_file_truncate); 95 | Py_RETURN_NONE; 96 | 97 | error: 98 | Py_XDECREF(Py_file_fileno); 99 | Py_XDECREF(Py_file_handle); 100 | Py_XDECREF(Py_file_truncate); 101 | return NULL; 102 | } 103 | -------------------------------------------------------------------------------- /src/sparse.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2007-2023 The SABnzbd-Team (sabnzbd.org) 3 | * 4 | * This program is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU General Public License 6 | * as published by the Free Software Foundation; either version 2 7 | * of the License, or (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 | */ 18 | 19 | #ifndef SABCTOOLS_SPARSE_H 20 | #define SABCTOOLS_SPARSE_H 21 | 22 | #include 23 | 24 | #if defined(_WIN32) || defined(__CYGWIN__) 25 | #include 26 | #endif 27 | 28 | void sparse_init(); 29 | PyObject *sparse(PyObject *, PyObject *); 30 | 31 | #endif //SABCTOOLS_SPARSE_H 32 | -------------------------------------------------------------------------------- /src/unlocked_ssl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2007-2023 The SABnzbd-Team (sabnzbd.org) 3 | * 4 | * This program is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU General Public License 6 | * as published by the Free Software Foundation; either version 2 7 | * of the License, or (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 | */ 18 | 19 | #ifndef SABCTOOLS_UNLOCKED_SSL_H 20 | #define SABCTOOLS_UNLOCKED_SSL_H 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | /* OpenSSL link */ 28 | #if defined(_WIN32) || defined(__CYGWIN__) 29 | # define WIN32_LEAN_AND_MEAN 30 | # include 31 | # include 32 | #else 33 | # include 34 | #endif 35 | 36 | #ifdef __cplusplus 37 | extern "C" { 38 | #endif 39 | 40 | /* Have to manually define this OpenSSL constant and hope it never changes */ 41 | # define SSL_RECEIVED_SHUTDOWN 2 42 | # define SSL_ERROR_WANT_READ 2 43 | # define SSL_ERROR_WANT_WRITE 3 44 | # define SSL_ERROR_ZERO_RETURN 6 45 | 46 | void openssl_init(); 47 | bool openssl_linked(); 48 | PyObject *unlocked_ssl_recv_into(PyObject *, PyObject*); 49 | 50 | #ifdef __cplusplus 51 | } 52 | #endif 53 | #endif -------------------------------------------------------------------------------- /src/utils.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2007-2023 The SABnzbd-Team (sabnzbd.org) 3 | * 4 | * This program is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU General Public License 6 | * as published by the Free Software Foundation; either version 2 7 | * of the License, or (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 | */ 18 | 19 | #include "utils.h" 20 | 21 | PyObject* bytearray_malloc(PyObject* self, PyObject* Py_input_size) { 22 | if(!PyLong_Check(Py_input_size)) { 23 | PyErr_SetString(PyExc_TypeError, "Expected type 'int'."); 24 | return NULL; 25 | } 26 | return PyByteArray_FromStringAndSize(NULL, PyLong_AsSsize_t(Py_input_size)); 27 | } 28 | -------------------------------------------------------------------------------- /src/utils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2007-2023 The SABnzbd-Team (sabnzbd.org) 3 | * 4 | * This program is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU General Public License 6 | * as published by the Free Software Foundation; either version 2 7 | * of the License, or (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 | */ 18 | 19 | #ifndef SABCTOOLS_UTILS_H 20 | #define SABCTOOLS_UTILS_H 21 | 22 | #include 23 | 24 | PyObject* bytearray_malloc(PyObject *, PyObject*); 25 | 26 | #endif //SABCTOOLS_UTILS_H 27 | -------------------------------------------------------------------------------- /src/yenc.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2007-2023 The SABnzbd-Team (sabnzbd.org) 3 | * 4 | * This program is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU General Public License 6 | * as published by the Free Software Foundation; either version 2 7 | * of the License, or (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 | */ 18 | 19 | #ifndef SABCTOOLS_YENC_H 20 | #define SABCTOOLS_YENC_H 21 | 22 | #include 23 | 24 | #include "yencode/common.h" 25 | #include "yencode/encoder.h" 26 | #include "yencode/decoder.h" 27 | #include "yencode/crc.h" 28 | 29 | /* Constants */ 30 | #define YENC_LINESIZE 128 31 | #define YENC_ZERO 0x00 32 | #define YENC_CR 0x0d 33 | #define YENC_LF 0x0a 34 | 35 | /* The =yend line cannot be crazy long */ 36 | #define YENC_MAX_TAIL_BYTES 256 37 | 38 | /* Prevent strange yEnc sizes */ 39 | #define YENC_MAX_PART_SIZE 10*1024*1024 40 | 41 | /* Functions */ 42 | PyObject* yenc_decode(PyObject *, PyObject*); 43 | PyObject* yenc_encode(PyObject *, PyObject*); 44 | 45 | #endif //SABCTOOLS_YENC_H 46 | -------------------------------------------------------------------------------- /src/yencode/crc.h: -------------------------------------------------------------------------------- 1 | #ifndef __YENC_CRC_H 2 | #define __YENC_CRC_H 3 | #include // for llabs 4 | 5 | #if !defined(__GNUC__) && defined(_MSC_VER) 6 | # include 7 | #endif 8 | 9 | namespace RapidYenc { 10 | 11 | 12 | typedef uint32_t (*crc_func)(const void*, size_t, uint32_t); 13 | extern crc_func _do_crc32_incremental; 14 | 15 | extern int _crc32_isa; 16 | static inline uint32_t crc32(const void* data, size_t length, uint32_t init) { 17 | return (*_do_crc32_incremental)(data, length, init); 18 | } 19 | static inline int crc32_isa_level() { 20 | return _crc32_isa; 21 | } 22 | 23 | 24 | // computes `n % 0xffffffff` (well, almost), using some bit-hacks 25 | static inline uint32_t crc32_powmod(uint64_t n) { 26 | #if defined(__GNUC__) && (__GNUC__ >= 5 || (defined(__clang__) && (__clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ > 3)))) 27 | unsigned res; 28 | unsigned carry = __builtin_uadd_overflow(n >> 32, n, &res); 29 | res += carry; 30 | return res; 31 | #elif defined(_MSC_VER) && defined(PLATFORM_X86) 32 | unsigned res; 33 | unsigned char carry = _addcarry_u32(0, n >> 32, n, &res); 34 | _addcarry_u32(carry, res, 0, &res); 35 | return res; 36 | #else 37 | n = (n >> 32) + (n & 0xffffffff); 38 | n += n >> 32; 39 | return n; 40 | #endif 41 | } 42 | // computes `crc32_powmod(n*8)` avoiding overflow 43 | static inline uint32_t crc32_bytepow(uint64_t n) { 44 | #if defined(__GNUC__) || defined(_MSC_VER) 45 | unsigned res = crc32_powmod(n); 46 | # ifdef _MSC_VER 47 | return _rotl(res, 3); 48 | # else 49 | return (res << 3) | (res >> 29); 50 | # endif 51 | #else 52 | n = (n >> 32) + (n & 0xffffffff); 53 | n <<= 3; 54 | n += n >> 32; 55 | return n; 56 | #endif 57 | } 58 | 59 | typedef uint32_t (*crc_mul_func)(uint32_t, uint32_t); 60 | extern crc_mul_func _crc32_shift; 61 | extern crc_mul_func _crc32_multiply; 62 | static inline uint32_t crc32_shift(uint32_t a, uint32_t b) { 63 | return (*_crc32_shift)(a, b); 64 | } 65 | static inline uint32_t crc32_multiply(uint32_t a, uint32_t b) { 66 | return (*_crc32_multiply)(a, b); 67 | } 68 | 69 | static inline uint32_t crc32_combine(uint32_t crc1, uint32_t crc2, uint64_t len2) { 70 | return crc32_shift(crc1, crc32_bytepow(len2)) ^ crc2; 71 | } 72 | static inline uint32_t crc32_zeros(uint32_t crc1, uint64_t len) { 73 | return ~crc32_shift(~crc1, crc32_bytepow(len)); 74 | } 75 | static inline uint32_t crc32_unzero(uint32_t crc1, uint64_t len) { 76 | return ~crc32_shift(~crc1, ~crc32_bytepow(len)); 77 | } 78 | static inline uint32_t crc32_2pow(int64_t n) { 79 | uint32_t sign = (uint32_t)(n >> 63); 80 | return crc32_shift(0x80000000, crc32_powmod(llabs(n)) ^ sign); 81 | } 82 | static inline uint32_t crc32_256pow(uint64_t n) { 83 | return crc32_shift(0x80000000, crc32_bytepow(n)); 84 | } 85 | 86 | void crc32_init(); 87 | 88 | 89 | 90 | } // namespace 91 | #endif // defined(__YENC_CRC_H) 92 | -------------------------------------------------------------------------------- /src/yencode/crc_arm.cc: -------------------------------------------------------------------------------- 1 | #include "crc_common.h" 2 | 3 | #if defined(PLATFORM_ARM) && defined(_MSC_VER) && defined(__clang__) && !defined(__ARM_FEATURE_CRC32) 4 | // I don't think GYP provides a nice way to detect whether MSVC or clang-cl is being used, but it doesn't use clang-cl by default, so a warning here is probably sufficient 5 | HEDLEY_WARNING("CRC32 acceleration is not been enabled under ARM clang-cl by default; add `-march=armv8-a+crc` to additional compiler arguments to enable"); 6 | #endif 7 | 8 | // disable CRC on GCC versions with broken arm_acle.h 9 | #if defined(__ARM_FEATURE_CRC32) && defined(HEDLEY_GCC_VERSION) 10 | # if !defined(__aarch64__) && HEDLEY_GCC_VERSION_CHECK(7,0,0) && !HEDLEY_GCC_VERSION_CHECK(8,1,1) 11 | # undef __ARM_FEATURE_CRC32 12 | HEDLEY_WARNING("CRC32 acceleration has been disabled due to broken arm_acle.h shipped in GCC 7.0 - 8.1 [https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81497]. If you need this feature, please use a different compiler or version of GCC"); 13 | # endif 14 | # if defined(__aarch64__) && HEDLEY_GCC_VERSION_CHECK(9,4,0) && !HEDLEY_GCC_VERSION_CHECK(9,5,0) 15 | # undef __ARM_FEATURE_CRC32 16 | HEDLEY_WARNING("CRC32 acceleration has been disabled due to broken arm_acle.h shipped in GCC 9.4 [https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100985]. If you need this feature, please use a different compiler or version of GCC"); 17 | # endif 18 | #endif 19 | #if defined(__ARM_FEATURE_CRC32) && defined(__has_include) 20 | # if !__has_include() 21 | # undef __ARM_FEATURE_CRC32 22 | HEDLEY_WARNING("CRC32 acceleration has been disabled due to missing arm_acle.h"); 23 | # endif 24 | #endif 25 | 26 | #if defined(__ARM_FEATURE_CRC32) || (defined(_M_ARM64) && !defined(__clang__)) // MSVC doesn't support CRC for ARM32 27 | 28 | /* ARMv8 accelerated CRC */ 29 | #if defined(_MSC_VER) && !defined(__clang__) 30 | #include 31 | #else 32 | #include 33 | #endif 34 | 35 | 36 | #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 37 | # ifdef __GNUC__ 38 | # define _LE16 __builtin_bswap16 39 | # define _LE32 __builtin_bswap32 40 | # define _LE64 __builtin_bswap64 41 | # else 42 | // currently not supported 43 | # error No endian swap intrinsic defined 44 | # endif 45 | #else 46 | # define _LE16(x) (x) 47 | # define _LE32(x) (x) 48 | # define _LE64(x) (x) 49 | #endif 50 | 51 | #ifdef __aarch64__ 52 | # define WORD_T uint64_t 53 | # define WORDSIZE_LOG 3 // sizeof(WORD_T) == 1<> 63); 71 | b64 += b64; 72 | a64 >>= 1; 73 | } 74 | // reduction via CRC 75 | res = __crc32w(0, res) ^ (res >> 32); 76 | return res; 77 | } 78 | #endif 79 | // regular multiply is probably better for AArch32 80 | 81 | 82 | // exploit CPU pipelining during CRC computation; unfortunately I haven't been able to measure any benefit 83 | // - Neoverse N1: no noticeable difference 84 | // - Cortex A53: actually runs a bit slower 85 | //#define ENABLE_PIPELINE_OPT 1 86 | 87 | #ifdef ENABLE_PIPELINE_OPT 88 | #ifndef __aarch64__ 89 | # define crc32_multiply_arm RapidYenc::crc32_multiply_generic 90 | #endif 91 | #endif 92 | 93 | 94 | 95 | // inspired/stolen off https://github.com/jocover/crc32_armv8/blob/master/crc32_armv8.c 96 | static uint32_t arm_crc_calc(uint32_t crc, const unsigned char *src, long len) { 97 | 98 | // initial alignment 99 | if (len >= 16) { // 16 is an arbitrary number; it just needs to be >=8 100 | if ((uintptr_t)src & sizeof(uint8_t)) { 101 | crc = __crc32b(crc, *src); 102 | src++; 103 | len--; 104 | } 105 | if ((uintptr_t)src & sizeof(uint16_t)) { 106 | crc = __crc32h(crc, _LE16(*((uint16_t *)src))); 107 | src += sizeof(uint16_t); 108 | len -= sizeof(uint16_t); 109 | } 110 | #ifdef __aarch64__ 111 | if ((uintptr_t)src & sizeof(uint32_t)) { 112 | crc = __crc32w(crc, _LE32(*((uint32_t *)src))); 113 | src += sizeof(uint32_t); 114 | len -= sizeof(uint32_t); 115 | } 116 | #endif 117 | } 118 | 119 | const WORD_T* srcW = (const WORD_T*)src; 120 | 121 | #ifdef ENABLE_PIPELINE_OPT 122 | // uses ideas from https://github.com/komrad36/crc#option-13-golden 123 | // (this is a slightly less efficient, but much simpler implementation of the idea) 124 | const unsigned SPLIT_WORDS_LOG = 10; // make sure it's at least 2 125 | const unsigned SPLIT_WORDS = 1<= (long)(sizeof(WORD_T)*SPLIT_WORDS*2)) { 128 | // compute 2x CRCs concurrently to leverage piplining 129 | uint32_t crc2 = 0; 130 | for(unsigned i=0; i= 0) { 152 | crc = CRC_WORD(crc, *(srcW++)); 153 | crc = CRC_WORD(crc, *(srcW++)); 154 | crc = CRC_WORD(crc, *(srcW++)); 155 | crc = CRC_WORD(crc, *(srcW++)); 156 | crc = CRC_WORD(crc, *(srcW++)); 157 | crc = CRC_WORD(crc, *(srcW++)); 158 | crc = CRC_WORD(crc, *(srcW++)); 159 | crc = CRC_WORD(crc, *(srcW++)); 160 | } 161 | if (len & sizeof(WORD_T)*4) { 162 | crc = CRC_WORD(crc, *(srcW++)); 163 | crc = CRC_WORD(crc, *(srcW++)); 164 | crc = CRC_WORD(crc, *(srcW++)); 165 | crc = CRC_WORD(crc, *(srcW++)); 166 | } 167 | if (len & sizeof(WORD_T)*2) { 168 | crc = CRC_WORD(crc, *(srcW++)); 169 | crc = CRC_WORD(crc, *(srcW++)); 170 | } 171 | if (len & sizeof(WORD_T)) { 172 | crc = CRC_WORD(crc, *(srcW++)); 173 | } 174 | src = (const unsigned char*)srcW; 175 | 176 | #ifdef __aarch64__ 177 | if (len & sizeof(uint32_t)) { 178 | crc = __crc32w(crc, _LE32(*((uint32_t *)src))); 179 | src += sizeof(uint32_t); 180 | } 181 | #endif 182 | if (len & sizeof(uint16_t)) { 183 | crc = __crc32h(crc, _LE16(*((uint16_t *)src))); 184 | src += sizeof(uint16_t); 185 | } 186 | if (len & sizeof(uint8_t)) 187 | crc = __crc32b(crc, *src); 188 | 189 | return crc; 190 | } 191 | 192 | static uint32_t do_crc32_incremental_arm(const void* data, size_t length, uint32_t init) { 193 | return ~arm_crc_calc(~init, (const unsigned char*)data, (long)length); 194 | } 195 | 196 | 197 | #if defined(__aarch64__) && (defined(__GNUC__) || defined(_MSC_VER)) 198 | static uint32_t crc32_shift_arm(uint32_t crc1, uint32_t n) { 199 | uint32_t result = crc1; 200 | uint64_t prod = result; 201 | prod <<= 32 - (n&31); 202 | result = __crc32w(0, prod) ^ (prod >> 32); 203 | n &= ~31; 204 | 205 | while(n) { 206 | result = crc32_multiply_arm(result, RapidYenc::crc_power[ctz32(n)]); 207 | n &= n-1; 208 | } 209 | return result; 210 | } 211 | #endif 212 | 213 | 214 | void RapidYenc::crc_arm_set_funcs() { 215 | _do_crc32_incremental = &do_crc32_incremental_arm; 216 | #ifdef __aarch64__ 217 | _crc32_multiply = &crc32_multiply_arm; 218 | # if defined(__GNUC__) || defined(_MSC_VER) 219 | _crc32_shift = &crc32_shift_arm; 220 | # endif 221 | #endif 222 | _crc32_isa = ISA_FEATURE_CRC; 223 | } 224 | #else 225 | void RapidYenc::crc_arm_set_funcs() {} 226 | #endif 227 | -------------------------------------------------------------------------------- /src/yencode/crc_arm_pmull.cc: -------------------------------------------------------------------------------- 1 | #include "crc_common.h" 2 | 3 | // exclude broken/missing arm_acle.h 4 | #if defined(__ARM_FEATURE_CRYPTO) && defined(HEDLEY_GCC_VERSION) 5 | # if !defined(__aarch64__) && HEDLEY_GCC_VERSION_CHECK(7,0,0) && !HEDLEY_GCC_VERSION_CHECK(8,1,1) 6 | # undef __ARM_FEATURE_CRYPTO 7 | # endif 8 | # if defined(__aarch64__) && HEDLEY_GCC_VERSION_CHECK(9,4,0) && !HEDLEY_GCC_VERSION_CHECK(9,5,0) 9 | # undef __ARM_FEATURE_CRYPTO 10 | # endif 11 | #endif 12 | #if defined(__ARM_FEATURE_CRYPTO) && defined(__has_include) 13 | # if !__has_include() 14 | # undef __ARM_FEATURE_CRYPTO 15 | # endif 16 | #endif 17 | 18 | // ARM's intrinsics guide seems to suggest that vmull_p64 is available on A32, but neither Clang/GCC seem to support it on AArch32 19 | #if (defined(__ARM_FEATURE_CRYPTO) && defined(__ARM_FEATURE_CRC32) && defined(__aarch64__)) || (defined(_M_ARM64) && !defined(__clang__)) 20 | 21 | #include 22 | #if defined(_MSC_VER) && !defined(__clang__) 23 | # include 24 | 25 | # ifdef _M_ARM64 26 | // MSVC may detect this pattern: https://devblogs.microsoft.com/cppblog/a-tour-of-4-msvc-backend-improvements/#byteswap-identification 27 | static HEDLEY_ALWAYS_INLINE uint64_t rbit64(uint64_t x) { 28 | x = _byteswap_uint64(x); 29 | x = (x & 0xaaaaaaaaaaaaaaaa) >> 1 | (x & 0x5555555555555555) << 1; 30 | x = (x & 0xcccccccccccccccc) >> 2 | (x & 0x3333333333333333) << 2; 31 | x = (x & 0xf0f0f0f0f0f0f0f0) >> 4 | (x & 0x0f0f0f0f0f0f0f0f) << 4; 32 | return x; 33 | } 34 | // ...whilst this seems to work best for 32-bit RBIT 35 | static HEDLEY_ALWAYS_INLINE uint32_t rbit32(uint32_t x) { 36 | uint64_t r = rbit64(x); 37 | return r >> 32; 38 | } 39 | # else 40 | # define rbit32 _arm_rbit 41 | # endif 42 | #else 43 | # include 44 | // __rbit not present before GCC 11.4.0 or 12.2.0; for ARM32, requires GCC 14 45 | # if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(14,0,0) && (!defined(__aarch64__) || !HEDLEY_GCC_VERSION_CHECK(11,3,0) || (HEDLEY_GCC_VERSION_CHECK(12,0,0) && !HEDLEY_GCC_VERSION_CHECK(12,2,0))) 46 | # ifdef __aarch64__ 47 | static HEDLEY_ALWAYS_INLINE uint64_t rbit64(uint64_t x) { 48 | uint64_t r; 49 | __asm__ ("rbit %0,%1\n" 50 | : "=r"(r) : "r"(x) 51 | : /* No clobbers */); 52 | return r; 53 | } 54 | # endif 55 | static HEDLEY_ALWAYS_INLINE uint32_t rbit32(uint32_t x) { 56 | uint32_t r; 57 | __asm__ ( 58 | # ifdef __aarch64__ 59 | "rbit %w0,%w1\n" 60 | # else 61 | "rbit %0,%1\n" 62 | # endif 63 | : "=r"(r) : "r"(x) 64 | : /* No clobbers */); 65 | return r; 66 | } 67 | # else 68 | # define rbit32 __rbit 69 | # define rbit64 __rbitll 70 | # endif 71 | #endif 72 | 73 | 74 | // MSVC doesn't have poly64/poly128 types, so always use uint64 instead 75 | 76 | #ifdef __aarch64__ 77 | # if defined(__GNUC__) || defined(__clang__) 78 | static HEDLEY_ALWAYS_INLINE uint64x2_t pmull_low(uint64x1_t a, uint64x1_t b) { 79 | uint64x2_t result; 80 | __asm__ ("pmull %0.1q,%1.1d,%2.1d" 81 | : "=w"(result) 82 | : "w"(a), "w"(b) 83 | : /* No clobbers */); 84 | return result; 85 | } 86 | static HEDLEY_ALWAYS_INLINE uint64x2_t pmull_high(uint64x2_t a, uint64x2_t b) { 87 | uint64x2_t result; 88 | __asm__ ("pmull2 %0.1q,%1.2d,%2.2d" 89 | : "=w"(result) 90 | : "w"(a), "w"(b) 91 | : /* No clobbers */); 92 | return result; 93 | } 94 | # elif defined(_MSC_VER) && !defined(__clang__) 95 | # define pmull_low vmull_p64 96 | # define pmull_high vmull_high_p64 97 | # else 98 | # define pmull_low(x, y) vreinterpretq_u64_p128(vmull_p64(vreinterpret_p64_u64(x), vreinterpret_p64_u64(y))) 99 | # define pmull_high(x, y) vreinterpretq_u64_p128(vmull_high_p64(vreinterpretq_p64_u64(x), vreinterpretq_p64_u64(y))) 100 | # endif 101 | #else 102 | # if defined(_MSC_VER) && !defined(__clang__) 103 | # define pmull_low vmull_p64 104 | # define pmull_high(x, y) vmull_p64(vget_high_u64(x), vget_high_u64(y)) 105 | # else 106 | # define pmull_low(x, y) vreinterpretq_u64_p128(vmull_p64(x, y)) 107 | # define pmull_high(x, y) vreinterpretq_u64_p128(vmull_p64(vget_high_p64(vreinterpretq_p64_u64(x)), vget_high_p64(vreinterpretq_p64_u64(y)))) 108 | # endif 109 | #endif 110 | 111 | 112 | static uint32_t crc32_multiply_pmull(uint32_t a, uint32_t b) { 113 | uint64x1_t prod = vget_low_u64(pmull_low( 114 | vreinterpret_u64_u32(vset_lane_u32(a, vdup_n_u32(0), 0)), 115 | vreinterpret_u64_u32(vset_lane_u32(b, vdup_n_u32(0), 0)) 116 | )); 117 | #ifdef __aarch64__ 118 | uint64_t p = vget_lane_u64(prod, 0); 119 | return __crc32w(0, p+p) ^ (p >> 31); 120 | #else 121 | prod = vadd_u64(prod, prod); 122 | uint32x2_t prod32 = vreinterpret_u32_u64(prod); 123 | return __crc32w(0, vget_lane_u32(prod32, 0)) ^ vget_lane_u32(prod32, 1); 124 | #endif 125 | } 126 | 127 | 128 | 129 | static const uint32_t crc_power_rev[32] = { // bit-reversed crc_power 130 | 0x00000002, 0x00000004, 0x00000010, 0x00000100, 0x00010000, 0x04c11db7, 0x490d678d, 0xe8a45605, 131 | 0x75be46b7, 0xe6228b11, 0x567fddeb, 0x88fe2237, 0x0e857e71, 0x7001e426, 0x075de2b2, 0xf12a7f90, 132 | 0xf0b4a1c1, 0x58f46c0c, 0xc3395ade, 0x96837f8c, 0x544037f9, 0x23b7b136, 0xb2e16ba8, 0x725e7bfa, 133 | 0xec709b5d, 0xf77a7274, 0x2845d572, 0x034e2515, 0x79695942, 0x540cb128, 0x0b65d023, 0x3c344723 134 | }; 135 | 136 | 137 | static HEDLEY_ALWAYS_INLINE uint64x1_t crc32_shift_pmull_mulred(uint64x1_t a, uint64x1_t b) { 138 | uint64x2_t r = pmull_low(a, b); 139 | uint64x2_t h = pmull_high(r, vdupq_n_u64(0x490d678d)); 140 | return veor_u64(vget_low_u64(r), vget_low_u64(h)); 141 | } 142 | 143 | 144 | static uint32_t crc32_shift_pmull(uint32_t crc1, uint32_t n) { 145 | crc1 = rbit32(crc1); 146 | 147 | uint64x1_t res; 148 | #ifdef __aarch64__ 149 | uint64_t crc = (uint64_t)crc1 << (n & 31); 150 | res = vset_lane_u64(crc, vdup_n_u64(0), 0); 151 | #else 152 | res = vreinterpret_u64_u32(vset_lane_u32(crc1, vdup_n_u32(0), 0)); 153 | res = vshl_u64(res, vdup_n_u64(n&31)); 154 | #endif 155 | n &= ~31; 156 | 157 | if(n) { 158 | #define LOAD_NEXT_POWER vreinterpret_u64_u32(vset_lane_u32(crc_power_rev[ctz32(n)], vdup_n_u32(0), 0)) 159 | uint64x1_t res2 = LOAD_NEXT_POWER; 160 | n &= n-1; 161 | 162 | if(n) { 163 | // first multiply doesn't need reduction 164 | res2 = vget_low_u64(pmull_low(res2, LOAD_NEXT_POWER)); 165 | n &= n-1; 166 | 167 | while(n) { 168 | res = crc32_shift_pmull_mulred(res, LOAD_NEXT_POWER); 169 | n &= n-1; 170 | 171 | if(n) { 172 | res2 = crc32_shift_pmull_mulred(res2, LOAD_NEXT_POWER); 173 | n &= n-1; 174 | } 175 | } 176 | } 177 | #undef LOAD_NEXT_POWER 178 | 179 | // merge two results 180 | uint64x2_t prod = pmull_low(res, res2); 181 | // weirdly, vrbitq_u8 is missing in ARM32 MSVC 182 | prod = vreinterpretq_u64_u8(vrev64q_u8(vrbitq_u8(vreinterpretq_u8_u64(prod)))); 183 | #ifdef __aarch64__ 184 | crc = __crc32d(0, vgetq_lane_u64(prod, 1)); 185 | uint64_t rem = vgetq_lane_u64(prod, 0); 186 | crc = __crc32w(rem, crc) ^ (rem >> 32); 187 | #else 188 | uint32x4_t prod32 = vreinterpretq_u32_u64(prod); 189 | uint32_t crc = __crc32w(0, vgetq_lane_u32(prod32, 2)); 190 | crc = __crc32w(vgetq_lane_u32(prod32, 3), crc); 191 | crc = __crc32w(vgetq_lane_u32(prod32, 0), crc) ^ vgetq_lane_u32(prod32, 1); 192 | #endif 193 | return crc; 194 | } else { 195 | #ifdef __aarch64__ 196 | crc = rbit64(crc); 197 | crc = __crc32w(0, crc) ^ (crc >> 32); 198 | return crc; 199 | #else 200 | uint32x2_t r = vreinterpret_u32_u64(res); 201 | return __crc32w(0, rbit32(vget_lane_u32(r, 1))) ^ rbit32(vget_lane_u32(r, 0)); 202 | #endif 203 | } 204 | } 205 | 206 | 207 | void RapidYenc::crc_pmull_set_funcs() { 208 | _crc32_multiply = &crc32_multiply_pmull; 209 | _crc32_shift = &crc32_shift_pmull; 210 | _crc32_isa |= ISA_FEATURE_PMULL; 211 | } 212 | 213 | #else 214 | void RapidYenc::crc_pmull_set_funcs() {} 215 | #endif /* defined(__ARM_FEATURE_CRYPTO) && defined(__ARM_FEATURE_CRC32) */ 216 | -------------------------------------------------------------------------------- /src/yencode/crc_common.h: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | #include // for size_t 3 | #include "crc.h" 4 | 5 | #ifdef __GNUC__ 6 | # define ctz32 __builtin_ctz 7 | #elif defined(_MSC_VER) 8 | static HEDLEY_ALWAYS_INLINE unsigned ctz32(uint32_t n) { 9 | unsigned long r; 10 | _BitScanForward(&r, n); 11 | return r; 12 | } 13 | #endif 14 | 15 | namespace RapidYenc { 16 | void crc_clmul_set_funcs(); 17 | void crc_clmul256_set_funcs(); 18 | void crc_arm_set_funcs(); 19 | void crc_pmull_set_funcs(); 20 | void crc_riscv_set_funcs(); 21 | 22 | extern const uint32_t crc_power[32]; 23 | uint32_t crc32_multiply_generic(uint32_t a, uint32_t b); 24 | uint32_t crc32_shift_generic(uint32_t crc1, uint32_t n); 25 | 26 | } -------------------------------------------------------------------------------- /src/yencode/crc_folding_256.cc: -------------------------------------------------------------------------------- 1 | // 256-bit version of crc_folding 2 | 3 | #include "crc_common.h" 4 | 5 | #if !defined(YENC_DISABLE_AVX256) && ((defined(__VPCLMULQDQ__) && defined(__AVX2__) && defined(__PCLMUL__)) || (defined(_MSC_VER) && _MSC_VER >= 1920 && defined(PLATFORM_X86) && !defined(__clang__))) 6 | #include 7 | #include 8 | 9 | 10 | #if defined(__AVX512VL__) && defined(YENC_BUILD_NATIVE) && YENC_BUILD_NATIVE!=0 11 | # define ENABLE_AVX512 1 12 | #endif 13 | 14 | static __m256i do_one_fold(__m256i src, __m256i data) { 15 | const __m256i fold4 = _mm256_set_epi32( 16 | 0x00000001, 0x54442bd4, 17 | 0x00000001, 0xc6e41596, 18 | 0x00000001, 0x54442bd4, 19 | 0x00000001, 0xc6e41596 20 | ); 21 | #ifdef ENABLE_AVX512 22 | return _mm256_ternarylogic_epi32( 23 | _mm256_clmulepi64_epi128(src, fold4, 0x01), 24 | _mm256_clmulepi64_epi128(src, fold4, 0x10), 25 | data, 26 | 0x96 27 | ); 28 | #else 29 | return _mm256_xor_si256(_mm256_xor_si256( 30 | data, _mm256_clmulepi64_epi128(src, fold4, 0x01) 31 | ), _mm256_clmulepi64_epi128(src, fold4, 0x10)); 32 | #endif 33 | } 34 | 35 | ALIGN_TO(32, static const uint8_t pshufb_rot_table[]) = { 36 | 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, 37 | 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 38 | }; 39 | // _mm256_castsi128_si256, but upper is defined to be 0 40 | #if (defined(__clang__) && __clang_major__ >= 5 && (!defined(__APPLE__) || __clang_major__ >= 7)) || (defined(__GNUC__) && __GNUC__ >= 10) || (defined(_MSC_VER) && _MSC_VER >= 1910) 41 | // intrinsic unsupported in GCC 9 and MSVC < 2017 42 | # define zext128_256 _mm256_zextsi128_si256 43 | #else 44 | // technically a cast is incorrect, due to upper 128 bits being undefined, but should usually work fine 45 | // alternative may be `_mm256_set_m128i(_mm_setzero_si128(), v)` but unsupported on GCC < 7, and most compilers generate a VINSERTF128 instruction for it 46 | # ifdef __OPTIMIZE__ 47 | # define zext128_256 _mm256_castsi128_si256 48 | # else 49 | # define zext128_256(x) _mm256_inserti128_si256(_mm256_setzero_si256(), x, 0) 50 | # endif 51 | #endif 52 | 53 | #ifdef ENABLE_AVX512 54 | # define MM256_BLENDV(a, b, m) _mm256_ternarylogic_epi32(a, b, m, 0xd8) 55 | # define MM_2XOR(a, b, c) _mm_ternarylogic_epi32(a, b, c, 0x96) 56 | #else 57 | # define MM256_BLENDV _mm256_blendv_epi8 58 | # define MM_2XOR(a, b, c) _mm_xor_si128(_mm_xor_si128(a, b), c) 59 | #endif 60 | 61 | static void partial_fold(const size_t len, __m256i *crc0, __m256i *crc1, __m256i crc_part) { 62 | __m256i shuf = _mm256_broadcastsi128_si256(_mm_loadu_si128((__m128i*)(pshufb_rot_table + (len&15)))); 63 | __m256i mask = _mm256_cmpgt_epi8(shuf, _mm256_set1_epi8(15)); 64 | 65 | *crc0 = _mm256_shuffle_epi8(*crc0, shuf); 66 | *crc1 = _mm256_shuffle_epi8(*crc1, shuf); 67 | crc_part = _mm256_shuffle_epi8(crc_part, shuf); 68 | 69 | __m256i crc_out = _mm256_permute2x128_si256(*crc0, *crc0, 0x08); // move bottom->top 70 | __m256i crc01, crc1p; 71 | if(len >= 16) { 72 | crc_out = MM256_BLENDV(crc_out, *crc0, mask); 73 | crc01 = *crc1; 74 | crc1p = crc_part; 75 | *crc0 = _mm256_permute2x128_si256(*crc0, *crc1, 0x21); 76 | *crc1 = _mm256_permute2x128_si256(*crc1, crc_part, 0x21); 77 | crc_part = zext128_256(_mm256_extracti128_si256(crc_part, 1)); 78 | } else { 79 | crc_out = _mm256_and_si256(crc_out, mask); 80 | crc01 = _mm256_permute2x128_si256(*crc0, *crc1, 0x21); 81 | crc1p = _mm256_permute2x128_si256(*crc1, crc_part, 0x21); 82 | } 83 | 84 | *crc0 = MM256_BLENDV(*crc0, crc01, mask); 85 | *crc1 = MM256_BLENDV(*crc1, crc1p, mask); 86 | 87 | *crc1 = do_one_fold(crc_out, *crc1); 88 | } 89 | 90 | 91 | ALIGN_TO(16, static const unsigned crc_k[]) = { 92 | 0xccaa009e, 0x00000000, /* rk1 */ 93 | 0x751997d0, 0x00000001, /* rk2 */ 94 | 0xccaa009e, 0x00000000, /* rk5 */ 95 | 0x63cd6124, 0x00000001, /* rk6 */ 96 | 0xf7011641, 0x00000000, /* rk7 */ 97 | 0xdb710640, 0x00000001 /* rk8 */ 98 | }; 99 | 100 | 101 | static uint32_t crc_fold(const unsigned char *src, long len, uint32_t initial) { 102 | __m128i xmm_t0 = _mm_clmulepi64_si128( 103 | _mm_cvtsi32_si128(~initial), 104 | _mm_cvtsi32_si128(0xdfded7ec), 105 | 0 106 | ); 107 | 108 | __m256i crc0 = zext128_256(xmm_t0); 109 | __m256i crc1 = _mm256_setzero_si256(); 110 | 111 | if (len < 32) { 112 | if (len == 0) 113 | return initial; 114 | __m256i crc_part = _mm256_setzero_si256(); 115 | memcpy(&crc_part, src, len); 116 | partial_fold(len, &crc0, &crc1, crc_part); 117 | } else { 118 | uintptr_t algn_diff = (0 - (uintptr_t)src) & 0x1F; 119 | if (algn_diff) { 120 | partial_fold(algn_diff, &crc0, &crc1, _mm256_loadu_si256((__m256i *)src)); 121 | src += algn_diff; 122 | len -= algn_diff; 123 | } 124 | 125 | while (len >= 64) { 126 | crc0 = do_one_fold(crc0, _mm256_load_si256((__m256i*)src)); 127 | crc1 = do_one_fold(crc1, _mm256_load_si256((__m256i*)src + 1)); 128 | src += 64; 129 | len -= 64; 130 | } 131 | 132 | if (len >= 32) { 133 | __m256i old = crc1; 134 | crc1 = do_one_fold(crc0, _mm256_load_si256((__m256i*)src)); 135 | crc0 = old; 136 | 137 | len -= 32; 138 | src += 32; 139 | } 140 | 141 | if(len != 0) { 142 | partial_fold(len, &crc0, &crc1, _mm256_load_si256((__m256i *)src)); 143 | } 144 | } 145 | 146 | const __m128i xmm_mask = _mm_set_epi32(-1,-1,-1,0); 147 | __m128i x_tmp0, x_tmp1, x_tmp2, crc_fold; 148 | 149 | __m128i xmm_crc0 = _mm256_castsi256_si128(crc0); 150 | __m128i xmm_crc1 = _mm256_extracti128_si256(crc0, 1); 151 | __m128i xmm_crc2 = _mm256_castsi256_si128(crc1); 152 | __m128i xmm_crc3 = _mm256_extracti128_si256(crc1, 1); 153 | 154 | /* 155 | * k1 156 | */ 157 | crc_fold = _mm_load_si128((__m128i *)crc_k); 158 | 159 | x_tmp0 = _mm_clmulepi64_si128(xmm_crc0, crc_fold, 0x10); 160 | xmm_crc0 = _mm_clmulepi64_si128(xmm_crc0, crc_fold, 0x01); 161 | xmm_crc1 = MM_2XOR(xmm_crc1, x_tmp0, xmm_crc0); 162 | 163 | x_tmp1 = _mm_clmulepi64_si128(xmm_crc1, crc_fold, 0x10); 164 | xmm_crc1 = _mm_clmulepi64_si128(xmm_crc1, crc_fold, 0x01); 165 | xmm_crc2 = MM_2XOR(xmm_crc2, x_tmp1, xmm_crc1); 166 | 167 | x_tmp2 = _mm_clmulepi64_si128(xmm_crc2, crc_fold, 0x10); 168 | xmm_crc2 = _mm_clmulepi64_si128(xmm_crc2, crc_fold, 0x01); 169 | xmm_crc3 = MM_2XOR(xmm_crc3, x_tmp2, xmm_crc2); 170 | 171 | /* 172 | * k5 173 | */ 174 | crc_fold = _mm_load_si128((__m128i *)crc_k + 1); 175 | 176 | xmm_crc0 = xmm_crc3; 177 | xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0); 178 | xmm_crc0 = _mm_srli_si128(xmm_crc0, 8); 179 | xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc0); 180 | 181 | xmm_crc0 = xmm_crc3; 182 | xmm_crc3 = _mm_slli_si128(xmm_crc3, 4); 183 | xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10); 184 | #ifdef ENABLE_AVX512 185 | //xmm_crc3 = _mm_maskz_xor_epi32(14, xmm_crc3, xmm_crc0); 186 | xmm_crc3 = _mm_ternarylogic_epi32(xmm_crc3, xmm_crc0, xmm_mask, 0x28); 187 | #else 188 | xmm_crc0 = _mm_and_si128(xmm_crc0, xmm_mask); 189 | xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc0); 190 | #endif 191 | 192 | /* 193 | * k7 194 | */ 195 | xmm_crc1 = xmm_crc3; 196 | crc_fold = _mm_load_si128((__m128i *)crc_k + 2); 197 | 198 | xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0); 199 | xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10); 200 | #ifdef ENABLE_AVX512 201 | xmm_crc3 = _mm_ternarylogic_epi32(xmm_crc3, xmm_crc1, xmm_crc1, 0xC3); // NOT(xmm_crc3 ^ xmm_crc1) 202 | #else 203 | xmm_crc1 = _mm_xor_si128(xmm_crc1, xmm_mask); 204 | xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc1); 205 | #endif 206 | return _mm_extract_epi32(xmm_crc3, 2); 207 | } 208 | 209 | static uint32_t do_crc32_incremental_clmul(const void* data, size_t length, uint32_t init) { 210 | return crc_fold((const unsigned char*)data, (long)length, init); 211 | } 212 | 213 | void RapidYenc::crc_clmul256_set_funcs() { 214 | crc_clmul_set_funcs(); // set multiply/shift function 215 | _do_crc32_incremental = &do_crc32_incremental_clmul; 216 | _crc32_isa = ISA_LEVEL_VPCLMUL; 217 | } 218 | #else 219 | void RapidYenc::crc_clmul256_set_funcs() { 220 | crc_clmul_set_funcs(); 221 | } 222 | #endif 223 | 224 | -------------------------------------------------------------------------------- /src/yencode/crc_riscv.cc: -------------------------------------------------------------------------------- 1 | #include "crc_common.h" 2 | 3 | #if defined(__riscv) && defined(__GNUC__) && (defined(__riscv_zbkc) || defined(__riscv_zbc)) 4 | 5 | #if __has_include() 6 | # include 7 | # if __riscv_xlen == 64 8 | # define rv_clmul __riscv_clmul_64 9 | # define rv_clmulh __riscv_clmulh_64 10 | # else 11 | # define rv_clmul __riscv_clmul_32 12 | # define rv_clmulh __riscv_clmulh_32 13 | # endif 14 | #else 15 | static HEDLEY_ALWAYS_INLINE uintptr_t rv_clmul(uintptr_t x, uintptr_t y) { 16 | uintptr_t r; 17 | __asm__("clmul %0, %1, %2\n" 18 | : "=r"(r) 19 | : "r"(x), "r"(y) 20 | :); 21 | return r; 22 | } 23 | static HEDLEY_ALWAYS_INLINE uintptr_t rv_clmulh(uintptr_t x, uintptr_t y) { 24 | uintptr_t r; 25 | __asm__("clmulh %0, %1, %2\n" 26 | : "=r"(r) 27 | : "r"(x), "r"(y) 28 | :); 29 | return r; 30 | } 31 | #endif 32 | 33 | // TODO: test big-endian 34 | #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 35 | # if __riscv_xlen == 64 36 | # define SWAP __builtin_bswap64 37 | # else 38 | # define SWAP __builtin_bswap32 39 | # endif 40 | #else 41 | # define SWAP(d) (d) 42 | #endif 43 | static HEDLEY_ALWAYS_INLINE uintptr_t read_partial(const void* p, unsigned sz) { 44 | uintptr_t data = 0; 45 | memcpy(&data, p, sz); 46 | return SWAP(data); 47 | } 48 | static HEDLEY_ALWAYS_INLINE uintptr_t read_full(const uintptr_t* p) { 49 | return SWAP(*p); 50 | } 51 | #undef SWAP 52 | 53 | static uint32_t rv_crc_calc(uint32_t crc, const unsigned char *src, long len) { 54 | uintptr_t accum[4] = {}; 55 | 56 | // note: constants here are bit-reflected and shifted left by 1 57 | // Zbc does also have clmulr to avoid the shift, but: 58 | // - there's no clmulhr, so for XLEN=64, just shift the constant instead to get the same result 59 | // - it's unavailable in Zbkc 60 | // - for XLEN=32, 2x constants is likely worth it to avoid the additional XORs in the loop 61 | 62 | #if __riscv_xlen == 64 63 | const uint64_t MUL_HI = 0x15a546366 /*2^224*/, MUL_LO = 0xf1da05aa /*2^288*/; 64 | #define CLMULL rv_clmul 65 | #define CLMULH rv_clmulh 66 | 67 | accum[3] = rv_clmul(crc, 0xb66b1fa6); // 2^-32 68 | #elif __riscv_xlen == 32 69 | const uint64_t MUL_HI = 0x140d44a2e /*2^128*/, MUL_LO = 0x1751997d0 /*2^160*/; 70 | #define CLMULL(x, k) rv_clmul(x, k & 0xffffffff) 71 | #define CLMULH(x, k) (rv_clmulh(x, k & 0xffffffff) ^ (k > 0xffffffffULL ? (x) : 0)) 72 | 73 | accum[2] = rv_clmul(crc, 0xb66b1fa6); 74 | accum[3] = rv_clmulh(crc, 0xb66b1fa6); 75 | #else 76 | #error "Unknown __riscv_xlen" 77 | #endif 78 | const size_t WS = sizeof(uintptr_t); 79 | 80 | // if src isn't word-aligned, process until it is so 81 | long initial_alignment = ((uintptr_t)src & (WS-1)); 82 | long initial_process = WS - initial_alignment; 83 | if(initial_alignment && len >= initial_process) { 84 | unsigned shl = initial_alignment * 8, shr = initial_process * 8; 85 | #if __riscv_xlen == 64 86 | accum[2] = accum[3] << shl; 87 | #else 88 | accum[1] = accum[2] << shl; 89 | accum[2] = (accum[3] << shl) | (accum[2] >> shr); 90 | #endif 91 | accum[3] = (read_partial(src, initial_process) << shl) | (accum[3] >> shr); 92 | src += initial_process; 93 | len -= initial_process; 94 | } 95 | 96 | // main processing loop 97 | const uintptr_t* srcW = (const uintptr_t*)src; 98 | while((len -= WS*4) >= 0) { 99 | uintptr_t tmpHi, tmpLo; 100 | tmpLo = CLMULL(accum[0], MUL_LO) ^ CLMULL(accum[1], MUL_HI); 101 | tmpHi = CLMULH(accum[0], MUL_LO) ^ CLMULH(accum[1], MUL_HI); 102 | accum[0] = tmpLo ^ read_full(srcW++); 103 | accum[1] = tmpHi ^ read_full(srcW++); 104 | 105 | tmpLo = CLMULL(accum[2], MUL_LO) ^ CLMULL(accum[3], MUL_HI); 106 | tmpHi = CLMULH(accum[2], MUL_LO) ^ CLMULH(accum[3], MUL_HI); 107 | accum[2] = tmpLo ^ read_full(srcW++); 108 | accum[3] = tmpHi ^ read_full(srcW++); 109 | } 110 | 111 | // process trailing bytes 112 | if(len & (WS*2)) { 113 | uintptr_t tmpLo = CLMULL(accum[0], MUL_LO) ^ CLMULL(accum[1], MUL_HI); 114 | uintptr_t tmpHi = CLMULH(accum[0], MUL_LO) ^ CLMULH(accum[1], MUL_HI); 115 | accum[0] = accum[2]; 116 | accum[1] = accum[3]; 117 | accum[2] = tmpLo ^ read_full(srcW++); 118 | accum[3] = tmpHi ^ read_full(srcW++); 119 | } 120 | if(len & WS) { 121 | uintptr_t tmpLo = CLMULL(accum[0], MUL_HI); 122 | uintptr_t tmpHi = CLMULH(accum[0], MUL_HI); 123 | accum[0] = accum[1]; 124 | accum[1] = accum[2]; 125 | accum[2] = accum[3] ^ tmpLo; 126 | accum[3] = tmpHi ^ read_full(srcW++); 127 | } 128 | 129 | size_t tail = len & (WS-1); 130 | if(tail) { 131 | unsigned shl = ((WS - tail) * 8), shr = tail * 8; 132 | uintptr_t tmp = accum[0] << shl; 133 | uintptr_t tmpLo = CLMULL(tmp, MUL_HI); 134 | uintptr_t tmpHi = CLMULH(tmp, MUL_HI); 135 | accum[0] = (accum[0] >> shr) | (accum[1] << shl); 136 | accum[1] = (accum[1] >> shr) | (accum[2] << shl); 137 | accum[2] = (accum[2] >> shr) | (accum[3] << shl); 138 | accum[3] = (accum[3] >> shr) | (read_partial(srcW, tail) << shl); 139 | accum[2] ^= tmpLo; 140 | accum[3] ^= tmpHi; 141 | } 142 | 143 | 144 | // done processing: fold everything down 145 | #if __riscv_xlen == 64 146 | // fold 0,1 -> 2,3 147 | accum[2] ^= rv_clmul(accum[0], 0x1751997d0) ^ rv_clmul(accum[1], 0xccaa009e); 148 | accum[3] ^= rv_clmulh(accum[0], 0x1751997d0) ^ rv_clmulh(accum[1], 0xccaa009e); 149 | 150 | // fold 2->3 151 | accum[0] = rv_clmulh(accum[2], 0xccaa009e); 152 | accum[3] ^= rv_clmul(accum[2], 0xccaa009e); 153 | 154 | // fold 64b->32b 155 | accum[1] = rv_clmul(accum[3] & 0xffffffff, 0x163cd6124); 156 | accum[0] ^= accum[1] >> 32; 157 | accum[3] = accum[1] ^ (accum[3] >> 32); 158 | accum[3] <<= 32; 159 | #else 160 | // fold 0,1 -> 2,3 161 | accum[2] ^= rv_clmul(accum[0], 0xccaa009e) ^ CLMULL(accum[1], 0x163cd6124); 162 | accum[3] ^= rv_clmulh(accum[0], 0xccaa009e) ^ CLMULH(accum[1], 0x163cd6124); 163 | 164 | // fold 2->3 165 | accum[0] = CLMULH(accum[2], 0x163cd6124); 166 | accum[3] ^= CLMULL(accum[2], 0x163cd6124); 167 | #endif 168 | 169 | // reduction 170 | accum[3] = CLMULL(accum[3], 0xf7011641); 171 | accum[3] = CLMULH(accum[3], 0x1db710640); // maybe consider clmulr for XLEN=32 172 | crc = accum[0] ^ accum[3]; 173 | return crc; 174 | #undef CLMULL 175 | #undef CLMULH 176 | } 177 | 178 | static uint32_t do_crc32_incremental_rv_zbc(const void* data, size_t length, uint32_t init) { 179 | return ~rv_crc_calc(~init, (const unsigned char*)data, (long)length); 180 | } 181 | 182 | 183 | #if __riscv_xlen == 64 184 | // note that prod is shifted by 1 place to the right, due to bit-reflection 185 | static uint32_t crc32_reduce_rv_zbc(uint64_t prod) { 186 | uint64_t t = rv_clmul(prod << 33, 0xf7011641); 187 | t = rv_clmulh(t, 0x1db710640); 188 | t ^= prod >> 31; 189 | return t; 190 | } 191 | #endif 192 | static uint32_t crc32_multiply_rv_zbc(uint32_t a, uint32_t b) { 193 | #if __riscv_xlen == 64 194 | uint64_t t = crc32_reduce_rv_zbc(rv_clmul(a, b)); 195 | #else 196 | uint32_t prodLo = rv_clmul(a, b); 197 | uint32_t prodHi = rv_clmulh(a, b); 198 | 199 | // fix prodHi for bit-reflection (clmulr would be ideal here) 200 | prodHi += prodHi; 201 | prodHi |= prodLo >> 31; 202 | prodLo += prodLo; 203 | 204 | uint32_t t = rv_clmul(prodLo, 0xf7011641); 205 | t ^= rv_clmulh(t, 0xdb710640); 206 | t ^= prodHi; 207 | #endif 208 | return t; 209 | } 210 | 211 | #if defined(__GNUC__) || defined(_MSC_VER) 212 | static uint32_t crc32_shift_rv_zbc(uint32_t crc1, uint32_t n) { 213 | // TODO: require Zbb for ctz 214 | uint32_t result = crc1; 215 | #if __riscv_xlen == 64 216 | // for n<32, can shift directly 217 | uint64_t prod = result; 218 | prod <<= 31 ^ (n&31); 219 | n &= ~31; 220 | result = crc32_reduce_rv_zbc(prod); 221 | #endif 222 | if(!n) return result; 223 | 224 | uint32_t result2 = RapidYenc::crc_power[ctz32(n)]; 225 | n &= n-1; 226 | 227 | while(n) { 228 | result = crc32_multiply_rv_zbc(result, RapidYenc::crc_power[ctz32(n)]); 229 | n &= n-1; 230 | 231 | if(n) { 232 | result2 = crc32_multiply_rv_zbc(result2, RapidYenc::crc_power[ctz32(n)]); 233 | n &= n-1; 234 | } 235 | } 236 | return crc32_multiply_rv_zbc(result, result2); 237 | } 238 | #endif 239 | 240 | 241 | void RapidYenc::crc_riscv_set_funcs() { 242 | _do_crc32_incremental = &do_crc32_incremental_rv_zbc; 243 | _crc32_multiply = &crc32_multiply_rv_zbc; 244 | #if defined(__GNUC__) || defined(_MSC_VER) 245 | _crc32_shift = &crc32_shift_rv_zbc; 246 | #endif 247 | _crc32_isa = ISA_FEATURE_ZBC; 248 | } 249 | #else 250 | void RapidYenc::crc_riscv_set_funcs() {} 251 | #endif 252 | -------------------------------------------------------------------------------- /src/yencode/decoder.h: -------------------------------------------------------------------------------- 1 | #ifndef __YENC_DECODER_H 2 | #define __YENC_DECODER_H 3 | 4 | #include "hedley.h" 5 | 6 | namespace RapidYenc { 7 | 8 | 9 | // the last state that the decoder was in (i.e. last few characters processed) 10 | // the state is needed for incremental decoders as its behavior is affected by what it processed last 11 | // acronyms: CR = carriage return (\r), LF = line feed (\n), EQ = equals char, DT = dot char (.) 12 | typedef enum { 13 | YDEC_STATE_CRLF, // default 14 | YDEC_STATE_EQ, 15 | YDEC_STATE_CR, 16 | YDEC_STATE_NONE, 17 | YDEC_STATE_CRLFDT, 18 | YDEC_STATE_CRLFDTCR, 19 | YDEC_STATE_CRLFEQ // may actually be "\r\n.=" in raw decoder 20 | } YencDecoderState; 21 | 22 | // end result for incremental processing (whether the end of the yEnc data was reached) 23 | typedef enum { 24 | YDEC_END_NONE, // end not reached 25 | YDEC_END_CONTROL, // \r\n=y sequence found, src points to byte after 'y' 26 | YDEC_END_ARTICLE // \r\n.\r\n sequence found, src points to byte after last '\n' 27 | } YencDecoderEnd; 28 | 29 | 30 | extern YencDecoderEnd (*_do_decode)(const unsigned char**, unsigned char**, size_t, YencDecoderState*); 31 | extern YencDecoderEnd (*_do_decode_raw)(const unsigned char**, unsigned char**, size_t, YencDecoderState*); 32 | extern YencDecoderEnd (*_do_decode_end_raw)(const unsigned char**, unsigned char**, size_t, YencDecoderState*); 33 | extern int _decode_isa; 34 | 35 | static inline size_t decode(int isRaw, const void* src, void* dest, size_t len, YencDecoderState* state) { 36 | unsigned char* ds = (unsigned char*)dest; 37 | (*(isRaw ? _do_decode_raw : _do_decode))((const unsigned char**)&src, &ds, len, state); 38 | return ds - (unsigned char*)dest; 39 | } 40 | 41 | static inline YencDecoderEnd decode_end(const void** src, void** dest, size_t len, YencDecoderState* state) { 42 | return _do_decode_end_raw((const unsigned char**)src, (unsigned char**)dest, len, state); 43 | } 44 | 45 | void decoder_init(); 46 | 47 | static inline int decode_isa_level() { 48 | return _decode_isa; 49 | } 50 | 51 | 52 | } // namespace 53 | #endif // defined(__YENC_DECODER_H) 54 | -------------------------------------------------------------------------------- /src/yencode/decoder_avx.cc: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | #include "decoder_common.h" 4 | #if defined(__AVX__) && defined(__POPCNT__) 5 | #include "decoder_sse_base.h" 6 | void RapidYenc::decoder_set_avx_funcs() { 7 | decoder_sse_init(lookups); 8 | decoder_init_lut(lookups->compact); 9 | _do_decode = &do_decode_simd >; 10 | _do_decode_raw = &do_decode_simd >; 11 | _do_decode_end_raw = &do_decode_simd >; 12 | _decode_isa = ISA_LEVEL_AVX; 13 | } 14 | #else 15 | void RapidYenc::decoder_set_avx_funcs() { 16 | decoder_set_ssse3_funcs(); 17 | } 18 | #endif 19 | -------------------------------------------------------------------------------- /src/yencode/decoder_avx2.cc: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | #include "decoder_common.h" 4 | #if defined(__AVX2__) && !defined(YENC_DISABLE_AVX256) 5 | #include "decoder_avx2_base.h" 6 | void RapidYenc::decoder_set_avx2_funcs() { 7 | ALIGN_ALLOC(lookups, sizeof(*lookups), 16); 8 | decoder_init_lut(lookups->compact); 9 | RapidYenc::_do_decode = &do_decode_simd >; 10 | RapidYenc::_do_decode_raw = &do_decode_simd >; 11 | RapidYenc::_do_decode_end_raw = &do_decode_simd >; 12 | RapidYenc::_decode_isa = ISA_LEVEL_AVX2; 13 | } 14 | #else 15 | void RapidYenc::decoder_set_avx2_funcs() { 16 | decoder_set_avx_funcs(); 17 | } 18 | #endif 19 | -------------------------------------------------------------------------------- /src/yencode/decoder_common.h: -------------------------------------------------------------------------------- 1 | #include "decoder.h" 2 | 3 | namespace RapidYenc { 4 | void decoder_set_sse2_funcs(); 5 | void decoder_set_ssse3_funcs(); 6 | void decoder_set_avx_funcs(); 7 | void decoder_set_avx2_funcs(); 8 | void decoder_set_vbmi2_funcs(); 9 | extern const bool decoder_has_avx10; 10 | void decoder_set_neon_funcs(); 11 | void decoder_set_rvv_funcs(); 12 | 13 | template 14 | YencDecoderEnd do_decode_scalar(const unsigned char** src, unsigned char** dest, size_t len, YencDecoderState* state); 15 | } 16 | 17 | 18 | #if defined(PLATFORM_ARM) && !defined(__aarch64__) 19 | #define YENC_DEC_USE_THINTABLE 1 20 | #endif 21 | 22 | // TODO: need to support max output length somehow 23 | 24 | 25 | 26 | template 27 | static inline RapidYenc::YencDecoderEnd _do_decode_simd(size_t width, const unsigned char** src, unsigned char** dest, size_t len, RapidYenc::YencDecoderState* state) { 28 | using namespace RapidYenc; 29 | 30 | if(len <= width*2) return do_decode_scalar(src, dest, len, state); 31 | 32 | YencDecoderState tState = YDEC_STATE_CRLF; 33 | YencDecoderState* pState = state ? state : &tState; 34 | if((uintptr_t)(*src) & ((width-1))) { 35 | // find source memory alignment 36 | unsigned char* aSrc = (unsigned char*)(((uintptr_t)(*src) + (width-1)) & ~(width-1)); 37 | int amount = (int)(aSrc - *src); 38 | len -= amount; 39 | YencDecoderEnd ended = do_decode_scalar(src, dest, amount, pState); 40 | if(ended) return ended; 41 | } 42 | 43 | size_t lenBuffer = width -1; 44 | if(searchEnd) lenBuffer += 3 + (isRaw?1:0); 45 | else if(isRaw) lenBuffer += 2; 46 | 47 | if(len > lenBuffer) { 48 | unsigned char *p = *dest; // destination pointer 49 | unsigned char escFirst = 0; // input character; first char needs escaping 50 | uint16_t nextMask = 0; 51 | // handle finicky case of special sequences straddled across initial boundary 52 | switch(*pState) { 53 | case YDEC_STATE_CRLF: 54 | if(isRaw && **src == '.') { 55 | nextMask = 1; 56 | if(searchEnd && *(uint16_t*)(*src +1) == UINT16_PACK('\r','\n')) { 57 | (*src) += 3; 58 | *pState = YDEC_STATE_CRLF; 59 | return YDEC_END_ARTICLE; 60 | } 61 | if(searchEnd && *(uint16_t*)(*src +1) == UINT16_PACK('=','y')) { 62 | (*src) += 3; 63 | *pState = YDEC_STATE_NONE; 64 | return YDEC_END_CONTROL; 65 | } 66 | } 67 | else if(searchEnd && *(uint16_t*)(*src) == UINT16_PACK('=','y')) { 68 | (*src) += 2; 69 | *pState = YDEC_STATE_NONE; 70 | return YDEC_END_CONTROL; 71 | } 72 | break; 73 | case YDEC_STATE_CR: 74 | if(isRaw && *(uint16_t*)(*src) == UINT16_PACK('\n','.')) { 75 | nextMask = 2; 76 | if(searchEnd && *(uint16_t*)(*src +2) == UINT16_PACK('\r','\n')) { 77 | (*src) += 4; 78 | *pState = YDEC_STATE_CRLF; 79 | return YDEC_END_ARTICLE; 80 | } 81 | if(searchEnd && *(uint16_t*)(*src +2) == UINT16_PACK('=','y')) { 82 | (*src) += 4; 83 | *pState = YDEC_STATE_NONE; 84 | return YDEC_END_CONTROL; 85 | } 86 | } 87 | else if(searchEnd && (*(uint32_t*)(*src) & 0xffffff) == UINT32_PACK('\n','=','y',0)) { 88 | (*src) += 3; 89 | *pState = YDEC_STATE_NONE; 90 | return YDEC_END_CONTROL; 91 | } 92 | break; 93 | case YDEC_STATE_CRLFDT: 94 | if(searchEnd && isRaw && *(uint16_t*)(*src) == UINT16_PACK('\r','\n')) { 95 | (*src) += 2; 96 | *pState = YDEC_STATE_CRLF; 97 | return YDEC_END_ARTICLE; 98 | } 99 | if(searchEnd && isRaw && *(uint16_t*)(*src) == UINT16_PACK('=','y')) { 100 | (*src) += 2; 101 | *pState = YDEC_STATE_NONE; 102 | return YDEC_END_CONTROL; 103 | } 104 | break; 105 | case YDEC_STATE_CRLFDTCR: 106 | if(searchEnd && isRaw && **src == '\n') { 107 | (*src) += 1; 108 | *pState = YDEC_STATE_CRLF; 109 | return YDEC_END_ARTICLE; 110 | } 111 | break; 112 | case YDEC_STATE_CRLFEQ: 113 | if(searchEnd && **src == 'y') { 114 | (*src) += 1; 115 | *pState = YDEC_STATE_NONE; 116 | return YDEC_END_CONTROL; 117 | } 118 | break; 119 | default: break; // silence compiler warning 120 | } 121 | escFirst = (*pState == YDEC_STATE_EQ || *pState == YDEC_STATE_CRLFEQ); 122 | 123 | // our algorithm may perform an aligned load on the next part, of which we consider 2 bytes (for \r\n. sequence checking) 124 | long dLen = (long)(len - lenBuffer); 125 | dLen = (dLen + (width-1)) & ~(width-1); 126 | 127 | kernel((const uint8_t*)(*src) + dLen, dLen, p, escFirst, nextMask); 128 | 129 | if(escFirst) *pState = YDEC_STATE_EQ; // escape next character 130 | else if(nextMask == 1) *pState = YDEC_STATE_CRLF; // next character is '.', where previous two were \r\n 131 | else if(nextMask == 2) *pState = YDEC_STATE_CR; // next characters are '\n.', previous is \r 132 | else *pState = YDEC_STATE_NONE; 133 | 134 | *src += dLen; 135 | len -= dLen; 136 | *dest = p; 137 | } 138 | 139 | // end alignment 140 | if(len) 141 | return do_decode_scalar(src, dest, len, pState); 142 | /** for debugging: ensure that the SIMD routine doesn't exit early 143 | if(len && !searchEnd) { 144 | const uint8_t* s = *src; 145 | unsigned char* p = *dest; 146 | int ended = do_decode_scalar(src, dest, len, pState); 147 | if(*src - s > width*2) { 148 | // this shouldn't happen, corrupt some data to fail the test 149 | while(p < *dest) 150 | *p++ = 0; 151 | } 152 | return ended; 153 | } 154 | */ 155 | return YDEC_END_NONE; 156 | } 157 | 158 | template 159 | static RapidYenc::YencDecoderEnd do_decode_simd(const unsigned char** src, unsigned char** dest, size_t len, RapidYenc::YencDecoderState* state) { 160 | return _do_decode_simd(width, src, dest, len, state); 161 | } 162 | template 163 | static RapidYenc::YencDecoderEnd do_decode_simd(const unsigned char** src, unsigned char** dest, size_t len, RapidYenc::YencDecoderState* state) { 164 | return _do_decode_simd(getWidth(), src, dest, len, state); 165 | } 166 | 167 | 168 | #if defined(PLATFORM_X86) || defined(PLATFORM_ARM) 169 | namespace RapidYenc { 170 | void decoder_init_lut(void* compactLUT); 171 | } 172 | #endif 173 | 174 | template 175 | static inline void decoder_set_nextMask(const uint8_t* src, size_t len, uint16_t& nextMask) { 176 | if(isRaw) { 177 | if(len != 0) { // have to gone through at least one loop cycle 178 | if(src[-2] == '\r' && src[-1] == '\n' && src[0] == '.') 179 | nextMask = 1; 180 | else if(src[-1] == '\r' && src[0] == '\n' && src[1] == '.') 181 | nextMask = 2; 182 | else 183 | nextMask = 0; 184 | } 185 | } else 186 | nextMask = 0; 187 | } 188 | 189 | // without backtracking 190 | template 191 | static inline uint16_t decoder_set_nextMask(const uint8_t* src, unsigned mask) { 192 | if(isRaw) { 193 | if(src[0] == '.') 194 | return mask & 1; 195 | if(src[1] == '.') 196 | return mask & 2; 197 | } 198 | return 0; 199 | } 200 | 201 | // resolve invalid sequences of = to deal with cases like '====' 202 | // bit hack inspired from simdjson: https://youtu.be/wlvKAT7SZIQ?t=33m38s 203 | template 204 | static inline T fix_eqMask(T mask, T maskShift1) { 205 | // isolate the start of each consecutive bit group (e.g. 01011101 -> 01000101) 206 | T start = mask & ~maskShift1; 207 | 208 | // this strategy works by firstly separating groups that start on even/odd bits 209 | // generally, it doesn't matter which one (even/odd) we pick, but clearing even groups specifically allows the escFirst bit in maskShift1 to work 210 | // (this is because the start of the escFirst group is at index -1, an odd bit, but we can't clear it due to being < 0, so we just retain all odd groups instead) 211 | 212 | const T even = (T)0x5555555555555555; // every even bit (01010101...) 213 | 214 | // obtain groups which start on an odd bit (clear groups that start on an even bit, but this leaves an unwanted trailing bit) 215 | T oddGroups = mask + (start & even); 216 | 217 | // clear even bits in odd groups, whilst conversely preserving even bits in even groups 218 | // the `& mask` also conveniently gets rid of unwanted trailing bits 219 | return (oddGroups ^ even) & mask; 220 | } 221 | -------------------------------------------------------------------------------- /src/yencode/decoder_sse2.cc: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | #include "decoder_common.h" 4 | #ifdef __SSE2__ 5 | #include "decoder_sse_base.h" 6 | 7 | void RapidYenc::decoder_sse_init(RapidYenc::SSELookups* HEDLEY_RESTRICT& lookups) { 8 | ALIGN_ALLOC(lookups, sizeof(SSELookups), 16); 9 | for(int i=0; i<256; i++) { 10 | lookups->BitsSetTable256inv[i] = 8 - ( 11 | (i & 1) + ((i>>1) & 1) + ((i>>2) & 1) + ((i>>3) & 1) + ((i>>4) & 1) + ((i>>5) & 1) + ((i>>6) & 1) + ((i>>7) & 1) 12 | ); 13 | 14 | #define _X(n, k) ((((n) & (1<eqAdd[i] = _X(i, 0) | _X(i, 1) | _X(i, 2) | _X(i, 3) | _X(i, 4) | _X(i, 5) | _X(i, 6) | _X(i, 7); 16 | #undef _X 17 | } 18 | for(int i=0; i<32; i++) { 19 | for(int j=0; j<16; j++) { 20 | if(i >= 16) // only used for LZCNT 21 | lookups->unshufMask[i*16 + j] = ((31-i)>j ? -1 : 0); 22 | else // only used for BSR 23 | lookups->unshufMask[i*16 + j] = (i>j ? -1 : 0); 24 | } 25 | } 26 | } 27 | 28 | void RapidYenc::decoder_set_sse2_funcs() { 29 | decoder_sse_init(lookups); 30 | decoder_init_lut(lookups->compact); 31 | _do_decode = &do_decode_simd >; 32 | _do_decode_raw = &do_decode_simd >; 33 | _do_decode_end_raw = &do_decode_simd >; 34 | _decode_isa = ISA_LEVEL_SSE2; 35 | } 36 | #else 37 | void RapidYenc::decoder_set_sse2_funcs() {} 38 | #endif 39 | -------------------------------------------------------------------------------- /src/yencode/decoder_ssse3.cc: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | #include "decoder_common.h" 4 | #ifdef __SSSE3__ 5 | #include "decoder_sse_base.h" 6 | void RapidYenc::decoder_set_ssse3_funcs() { 7 | decoder_sse_init(lookups); 8 | decoder_init_lut(lookups->compact); 9 | _do_decode = &do_decode_simd >; 10 | _do_decode_raw = &do_decode_simd >; 11 | _do_decode_end_raw = &do_decode_simd >; 12 | _decode_isa = ISA_LEVEL_SSSE3; 13 | } 14 | #else 15 | void RapidYenc::decoder_set_ssse3_funcs() { 16 | decoder_set_sse2_funcs(); 17 | } 18 | #endif 19 | -------------------------------------------------------------------------------- /src/yencode/decoder_vbmi2.cc: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | # include "decoder_common.h" 3 | 4 | #if !defined(__EVEX512__) && (defined(__AVX10_1__) || defined(__EVEX256__)) && defined(__AVX512VL__) && defined(__AVX512VBMI2__) && defined(__AVX512BW__) 5 | const bool RapidYenc::decoder_has_avx10 = true; 6 | #else 7 | const bool RapidYenc::decoder_has_avx10 = false; 8 | #endif 9 | 10 | #if defined(__AVX512VL__) && defined(__AVX512VBMI2__) && defined(__AVX512BW__) 11 | # ifndef YENC_DISABLE_AVX256 12 | # include "decoder_avx2_base.h" 13 | void RapidYenc::decoder_set_vbmi2_funcs() { 14 | _do_decode = &do_decode_simd >; 15 | _do_decode_raw = &do_decode_simd >; 16 | _do_decode_end_raw = &do_decode_simd >; 17 | _decode_isa = ISA_LEVEL_VBMI2; 18 | } 19 | # else 20 | # include "decoder_sse_base.h" 21 | void RapidYenc::decoder_set_vbmi2_funcs() { 22 | _do_decode = &do_decode_simd >; 23 | _do_decode_raw = &do_decode_simd >; 24 | _do_decode_end_raw = &do_decode_simd >; 25 | _decode_isa = ISA_LEVEL_VBMI2; 26 | } 27 | # endif 28 | #else 29 | void RapidYenc::decoder_set_vbmi2_funcs() { 30 | decoder_set_avx2_funcs(); 31 | } 32 | #endif 33 | -------------------------------------------------------------------------------- /src/yencode/encoder.cc: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | #include "encoder_common.h" 3 | #include "encoder.h" 4 | 5 | 6 | // lookup tables for scalar processing 7 | #define _B1(n) _B(n), _B(n+1), _B(n+2), _B(n+3) 8 | #define _B2(n) _B1(n), _B1(n+4), _B1(n+8), _B1(n+12) 9 | #define _B3(n) _B2(n), _B2(n+16), _B2(n+32), _B2(n+48) 10 | #define _BX _B3(0), _B3(64), _B3(128), _B3(192) 11 | 12 | const unsigned char RapidYenc::escapeLUT[256] = { // whether or not the character is critical 13 | #define _B(n) ((n == 214 || n == '\r'+214 || n == '\n'+214 || n == '='-42) ? 0 : (n+42) & 0xff) 14 | _BX 15 | #undef _B 16 | }; 17 | const uint16_t RapidYenc::escapedLUT[256] = { // escaped sequences for characters that need escaping 18 | #define _B(n) ((n == 214 || n == 214+'\r' || n == 214+'\n' || n == '='-42 || n == 214+'\t' || n == 214+' ' || n == '.'-42) ? UINT16_PACK('=', ((n+42+64)&0xff)) : 0) 19 | _BX 20 | #undef _B 21 | }; 22 | 23 | #undef _B1 24 | #undef _B2 25 | #undef _B3 26 | #undef _BX 27 | 28 | 29 | 30 | size_t RapidYenc::do_encode_generic(int line_size, int* colOffset, const unsigned char* HEDLEY_RESTRICT src, unsigned char* HEDLEY_RESTRICT dest, size_t len, int doEnd) { 31 | unsigned char* es = (unsigned char*)src + len; 32 | unsigned char *p = dest; // destination pointer 33 | long i = -(long)len; // input position 34 | unsigned char c, escaped; // input character; escaped input character 35 | int col = *colOffset; 36 | 37 | if (col == 0) { 38 | c = es[i++]; 39 | if (RapidYenc::escapedLUT[c]) { 40 | memcpy(p, &RapidYenc::escapedLUT[c], sizeof(uint16_t)); 41 | p += 2; 42 | col = 2; 43 | } else { 44 | *(p++) = c + 42; 45 | col = 1; 46 | } 47 | } 48 | while(i < 0) { 49 | // main line 50 | unsigned char* sp = NULL; 51 | while (i < -1-8 && line_size-col-1 > 8) { 52 | // 8 cycle unrolled version 53 | sp = p; 54 | #define DO_THING(n) \ 55 | c = es[i+n], escaped = RapidYenc::escapeLUT[c]; \ 56 | if (escaped) \ 57 | *(p++) = escaped; \ 58 | else { \ 59 | memcpy(p, &RapidYenc::escapedLUT[c], sizeof(uint16_t)); \ 60 | p += 2; \ 61 | } 62 | DO_THING(0); 63 | DO_THING(1); 64 | DO_THING(2); 65 | DO_THING(3); 66 | DO_THING(4); 67 | DO_THING(5); 68 | DO_THING(6); 69 | DO_THING(7); 70 | 71 | i += 8; 72 | col += (int)(p - sp); 73 | } 74 | if(sp && col >= line_size-1) { 75 | // TODO: consider revert optimisation from SIMD code 76 | // we overflowed - need to revert and use slower method :( 77 | col -= (int)(p - sp); 78 | p = sp; 79 | i -= 8; 80 | } 81 | // handle remaining chars 82 | while(col < line_size-1) { 83 | c = es[i++], escaped = RapidYenc::escapeLUT[c]; 84 | if (escaped) { 85 | *(p++) = escaped; 86 | col++; 87 | } 88 | else { 89 | memcpy(p, &RapidYenc::escapedLUT[c], sizeof(uint16_t)); 90 | p += 2; 91 | col += 2; 92 | } 93 | /* experimental branchless version 94 | *p = '='; 95 | c = (es[i++] + 42) & 0xFF; 96 | int cond = (c=='\0' || c=='=' || c=='\r' || c=='\n'); 97 | *(p+cond) = c + (cond << 6); 98 | p += 1+cond; 99 | col += 1+cond; 100 | */ 101 | if (i >= 0) goto end; 102 | } 103 | 104 | // last line char 105 | if(col < line_size) { // this can only be false if the last character was an escape sequence (or line_size is horribly small), in which case, we don't need to handle space/tab cases 106 | c = es[i++]; 107 | if (RapidYenc::escapedLUT[c] && c != '.'-42) { 108 | memcpy(p, &RapidYenc::escapedLUT[c], sizeof(uint16_t)); 109 | p += 2; 110 | } else { 111 | *(p++) = c + 42; 112 | } 113 | } 114 | 115 | if (i >= 0) break; 116 | 117 | c = es[i++]; 118 | if (RapidYenc::escapedLUT[c]) { 119 | uint32_t w = UINT32_16_PACK(UINT16_PACK('\r', '\n'), (uint32_t)RapidYenc::escapedLUT[c]); 120 | memcpy(p, &w, sizeof(w)); 121 | p += 4; 122 | col = 2; 123 | } else { 124 | // another option may be to just write the EOL and let the first char be handled by the faster methods above, but it appears that writing the extra byte here is generally faster... 125 | uint32_t w = UINT32_PACK('\r', '\n', (uint32_t)(c+42), 0); 126 | memcpy(p, &w, sizeof(w)); 127 | p += 3; 128 | col = 1; 129 | } 130 | } 131 | 132 | end: 133 | if(doEnd) { 134 | // special case: if the last character is a space/tab, it needs to be escaped as it's the final character on the line 135 | unsigned char lc = *(p-1); 136 | if(lc == '\t' || lc == ' ') { 137 | *(p-1) = '='; 138 | *p = lc+64; 139 | p++; 140 | col++; 141 | } 142 | } 143 | *colOffset = col; 144 | return p - dest; 145 | } 146 | 147 | 148 | namespace RapidYenc { 149 | size_t (*_do_encode)(int, int*, const unsigned char* HEDLEY_RESTRICT, unsigned char* HEDLEY_RESTRICT, size_t, int) = &do_encode_generic; 150 | int _encode_isa = ISA_GENERIC; 151 | } 152 | 153 | #if defined(PLATFORM_X86) && defined(YENC_BUILD_NATIVE) && YENC_BUILD_NATIVE!=0 154 | # if defined(__AVX2__) && !defined(YENC_DISABLE_AVX256) 155 | # include "encoder_avx_base.h" 156 | static inline void encoder_native_init() { 157 | RapidYenc::_do_encode = &do_encode_simd< RapidYenc::do_encode_avx2 >; 158 | encoder_avx2_lut(); 159 | RapidYenc::_encode_isa = ISA_NATIVE; 160 | } 161 | # else 162 | # include "encoder_sse_base.h" 163 | static inline void encoder_native_init() { 164 | RapidYenc::_do_encode = &do_encode_simd< RapidYenc::do_encode_sse >; 165 | encoder_sse_lut(); 166 | RapidYenc::_encode_isa = ISA_NATIVE; 167 | } 168 | # endif 169 | #endif 170 | 171 | 172 | void RapidYenc::encoder_init() { 173 | #ifdef PLATFORM_X86 174 | # if defined(YENC_BUILD_NATIVE) && YENC_BUILD_NATIVE!=0 175 | encoder_native_init(); 176 | # else 177 | int use_isa = cpu_supports_isa(); 178 | if(use_isa >= ISA_LEVEL_VBMI2 && (encoder_has_avx10 || (use_isa & ISA_FEATURE_EVEX512))) 179 | encoder_vbmi2_init(); 180 | else if(use_isa >= ISA_LEVEL_AVX2) 181 | encoder_avx2_init(); 182 | else if(use_isa >= ISA_LEVEL_AVX) 183 | encoder_avx_init(); 184 | else if(use_isa >= ISA_LEVEL_SSSE3) 185 | encoder_ssse3_init(); 186 | else 187 | encoder_sse2_init(); 188 | # endif 189 | #endif 190 | #ifdef PLATFORM_ARM 191 | if(cpu_supports_neon()) 192 | encoder_neon_init(); 193 | #endif 194 | #ifdef __riscv 195 | if(cpu_supports_rvv()) 196 | encoder_rvv_init(); 197 | #endif 198 | } 199 | -------------------------------------------------------------------------------- /src/yencode/encoder.h: -------------------------------------------------------------------------------- 1 | #ifndef __YENC_ENCODER_H 2 | #define __YENC_ENCODER_H 3 | 4 | #include "hedley.h" 5 | 6 | namespace RapidYenc { 7 | 8 | 9 | 10 | extern size_t (*_do_encode)(int, int*, const unsigned char* HEDLEY_RESTRICT, unsigned char* HEDLEY_RESTRICT, size_t, int); 11 | extern int _encode_isa; 12 | static inline size_t encode(int line_size, int* colOffset, const void* HEDLEY_RESTRICT src, void* HEDLEY_RESTRICT dest, size_t len, int doEnd) { 13 | return (*_do_encode)(line_size, colOffset, (const unsigned char* HEDLEY_RESTRICT)src, (unsigned char*)dest, len, doEnd); 14 | } 15 | void encoder_init(); 16 | static inline int encode_isa_level() { 17 | return _encode_isa; 18 | } 19 | 20 | 21 | 22 | } 23 | #endif 24 | -------------------------------------------------------------------------------- /src/yencode/encoder_avx.cc: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | #include "encoder_common.h" 3 | 4 | #if defined(__AVX__) && defined(__POPCNT__) 5 | #include "encoder_sse_base.h" 6 | 7 | void RapidYenc::encoder_avx_init() { 8 | _do_encode = &do_encode_simd< do_encode_sse >; 9 | encoder_sse_lut(); 10 | _encode_isa = ISA_LEVEL_AVX; 11 | } 12 | #else 13 | void RapidYenc::encoder_avx_init() { 14 | encoder_ssse3_init(); 15 | } 16 | #endif 17 | 18 | -------------------------------------------------------------------------------- /src/yencode/encoder_avx2.cc: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | #include "encoder_common.h" 3 | 4 | #if defined(__AVX2__) && !defined(YENC_DISABLE_AVX256) 5 | #include "encoder_avx_base.h" 6 | 7 | void RapidYenc::encoder_avx2_init() { 8 | _do_encode = &do_encode_simd< do_encode_avx2 >; 9 | encoder_avx2_lut(); 10 | _encode_isa = ISA_LEVEL_AVX2; 11 | } 12 | #else 13 | void RapidYenc::encoder_avx2_init() { 14 | encoder_avx_init(); 15 | } 16 | #endif 17 | 18 | -------------------------------------------------------------------------------- /src/yencode/encoder_common.h: -------------------------------------------------------------------------------- 1 | #ifndef __YENC_ENCODER_COMMON 2 | #define __YENC_ENCODER_COMMON 3 | 4 | namespace RapidYenc { 5 | void encoder_sse2_init(); 6 | void encoder_ssse3_init(); 7 | void encoder_avx_init(); 8 | void encoder_avx2_init(); 9 | void encoder_vbmi2_init(); 10 | extern const bool encoder_has_avx10; 11 | void encoder_neon_init(); 12 | void encoder_rvv_init(); 13 | 14 | // lookup tables for scalar processing 15 | extern const unsigned char escapeLUT[256]; 16 | extern const uint16_t escapedLUT[256]; 17 | 18 | size_t do_encode_generic(int line_size, int* colOffset, const unsigned char* HEDLEY_RESTRICT src, unsigned char* HEDLEY_RESTRICT dest, size_t len, int doEnd); 19 | } 20 | 21 | 22 | 23 | template 24 | static size_t do_encode_simd(int line_size, int* colOffset, const unsigned char* HEDLEY_RESTRICT src, unsigned char* HEDLEY_RESTRICT dest, size_t len, int doEnd) { 25 | if(len < 1) return 0; 26 | if(line_size < 12) { // short lines probably not worth processing in a SIMD way 27 | // we assume at least the first and last char exist in the line, and since the first char could be escaped, and SIMD encoder assumes at least one non-first/last char, assumption means that line size has to be >= 4 28 | return RapidYenc::do_encode_generic(line_size, colOffset, src, dest, len, doEnd); 29 | } 30 | 31 | const uint8_t* es = src + len; 32 | uint8_t* p = dest; 33 | 34 | if(*colOffset < 0) *colOffset = 0; // sanity check 35 | 36 | kernel(line_size, colOffset, es, p, len); 37 | 38 | // scalar loop to process remaining 39 | long i = -(long)len; 40 | if(*colOffset == 0 && i < 0) { 41 | uint8_t c = es[i++]; 42 | if (LIKELIHOOD(0.0273, RapidYenc::escapedLUT[c] != 0)) { 43 | memcpy(p, RapidYenc::escapedLUT + c, 2); 44 | p += 2; 45 | *colOffset = 2; 46 | } else { 47 | *(p++) = c + 42; 48 | *colOffset = 1; 49 | } 50 | } 51 | while(i < 0) { 52 | uint8_t c = es[i++]; 53 | if(*colOffset < line_size-1) { 54 | if(!RapidYenc::escapeLUT[c]) { 55 | p[0] = '='; 56 | p[1] = c+42+64; 57 | p += 2; 58 | (*colOffset) += 2; 59 | } else { 60 | *(p++) = RapidYenc::escapeLUT[c]; 61 | (*colOffset) += 1; 62 | } 63 | } else { 64 | if(*colOffset < line_size) { 65 | if (RapidYenc::escapedLUT[c] && c != '.'-42) { 66 | memcpy(p, RapidYenc::escapedLUT + c, 2); 67 | p += 2; 68 | } else { 69 | *(p++) = c + 42; 70 | } 71 | if(i == 0) break; 72 | c = es[i++]; 73 | } 74 | 75 | // handle EOL 76 | if (RapidYenc::escapedLUT[c]) { 77 | uint32_t w = UINT32_16_PACK(UINT16_PACK('\r', '\n'), (uint32_t)RapidYenc::escapedLUT[c]); 78 | memcpy(p, &w, sizeof(w)); 79 | p += 4; 80 | *colOffset = 2; 81 | } else { 82 | uint32_t w = UINT32_PACK('\r', '\n', (uint32_t)(c+42), 0); 83 | memcpy(p, &w, sizeof(w)); 84 | p += 3; 85 | *colOffset = 1; 86 | } 87 | } 88 | } 89 | 90 | if(doEnd) { 91 | // special case: if the last character is a space/tab, it needs to be escaped as it's the final character on the line 92 | unsigned char lc = *(p-1); 93 | if(lc == '\t' || lc == ' ') { 94 | p[-1] = '='; 95 | *p = lc+64; 96 | p++; 97 | (*colOffset)++; 98 | } 99 | } 100 | return p - dest; 101 | } 102 | 103 | #endif /* __YENC_ENCODER_COMMON */ 104 | -------------------------------------------------------------------------------- /src/yencode/encoder_rvv.cc: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | #include "encoder_common.h" 3 | 4 | #ifdef __riscv_vector 5 | #include "encoder.h" 6 | 7 | 8 | static HEDLEY_ALWAYS_INLINE void encode_eol_handle_pre(const uint8_t* HEDLEY_RESTRICT _src, long& inpos, uint8_t*& outp, long& col, long lineSizeOffset) { 9 | // TODO: vectorize 10 | uint8_t c = _src[inpos++]; 11 | if(HEDLEY_UNLIKELY(RapidYenc::escapedLUT[c] && c != '.'-42)) { 12 | memcpy(outp, &RapidYenc::escapedLUT[c], sizeof(uint16_t)); 13 | outp += 2; 14 | } else { 15 | *(outp++) = c + 42; 16 | } 17 | 18 | c = _src[inpos++]; 19 | if(LIKELIHOOD(0.0273, RapidYenc::escapedLUT[c]!=0)) { 20 | uint32_t w = UINT32_16_PACK(UINT16_PACK('\r', '\n'), (uint32_t)RapidYenc::escapedLUT[c]); 21 | memcpy(outp, &w, sizeof(w)); 22 | outp += 4; 23 | col = lineSizeOffset + 2; 24 | } else { 25 | uint32_t w = UINT32_PACK('\r', '\n', (uint32_t)(c+42), 0); 26 | memcpy(outp, &w, sizeof(w)); 27 | outp += 3; 28 | col = lineSizeOffset + 1; 29 | } 30 | } 31 | 32 | namespace RapidYenc { 33 | 34 | HEDLEY_ALWAYS_INLINE void do_encode_rvv(int line_size, int* colOffset, const uint8_t* HEDLEY_RESTRICT srcEnd, uint8_t* HEDLEY_RESTRICT& dest, size_t& len) { 35 | size_t vl2 = RV(vsetvlmax_e8m2)(); // TODO: limit to line length 36 | // TODO: have a LMUL=1 variant if line_size < vl 37 | 38 | // offset position to enable simpler loop condition checking 39 | const int INPUT_OFFSET = vl2*2 -1; // extra chars for EOL handling, -1 to change <= to < 40 | if((intptr_t)len <= INPUT_OFFSET || line_size < (int)vl2*2) return; 41 | 42 | uint8_t *outp = dest; 43 | long inpos = -(long)len; 44 | long lineSizeOffset = -line_size +1; 45 | long col = *colOffset - line_size +1; 46 | 47 | inpos += INPUT_OFFSET; 48 | const uint8_t* _src = srcEnd - INPUT_OFFSET; 49 | 50 | if (HEDLEY_LIKELY(col == -line_size+1)) { 51 | uint8_t c = _src[inpos++]; 52 | if (LIKELIHOOD(0.0273, escapedLUT[c] != 0)) { 53 | memcpy(outp, escapedLUT + c, 2); 54 | outp += 2; 55 | col += 2; 56 | } else { 57 | *(outp++) = c + 42; 58 | col += 1; 59 | } 60 | } 61 | if(HEDLEY_UNLIKELY(col >= 0)) { 62 | if(col == 0) 63 | encode_eol_handle_pre(_src, inpos, outp, col, lineSizeOffset); 64 | else { 65 | uint8_t c = _src[inpos++]; 66 | if(LIKELIHOOD(0.0273, escapedLUT[c]!=0)) { 67 | uint32_t v = UINT32_16_PACK(UINT16_PACK('\r', '\n'), (uint32_t)escapedLUT[c]); 68 | memcpy(outp, &v, sizeof(v)); 69 | outp += 4; 70 | col = 2-line_size + 1; 71 | } else { 72 | uint32_t v = UINT32_PACK('\r', '\n', (uint32_t)(c+42), 0); 73 | memcpy(outp, &v, sizeof(v)); 74 | outp += 3; 75 | col = 2-line_size; 76 | } 77 | } 78 | } 79 | 80 | // vector constants 81 | const vuint8mf2_t ALT_SHIFT = RV(vreinterpret_v_u16mf2_u8mf2)(RV(vmv_v_x_u16mf2)(4, vl2)); 82 | const uint8_t _MASK_EXPAND[] = {0xAA, 0xAB, 0xAE, 0xAF, 0xBA, 0xBB, 0xBE, 0xBF, 0xEA, 0xEB, 0xEE, 0xEF, 0xFA, 0xFB, 0xFE, 0xFF}; 83 | const vuint8m1_t MASK_EXPAND = RV(vle8_v_u8m1)(_MASK_EXPAND, 16); 84 | 85 | 86 | // TODO: consider exploiting partial vector capability 87 | while(inpos < 0) { 88 | vuint8m2_t data = RV(vle8_v_u8m2)(_src + inpos, vl2); 89 | inpos += vl2; 90 | 91 | // search for special chars 92 | // TODO: vrgather strat 93 | 94 | vuint8m2_t tmpData = RV(vsub_vx_u8m2)(data, -42, vl2); 95 | vbool4_t cmp = RV(vmor_mm_b4)( 96 | RV(vmor_mm_b4)( 97 | RV(vmseq_vx_u8m2_b4)(data, -42, vl2), 98 | RV(vmseq_vx_u8m2_b4)(tmpData, '=', vl2), 99 | vl2 100 | ), 101 | RV(vmor_mm_b4)( 102 | RV(vmseq_vx_u8m2_b4)(data, '\r'-42, vl2), 103 | RV(vmseq_vx_u8m2_b4)(data, '\n'-42, vl2), 104 | vl2 105 | ), 106 | vl2 107 | ); 108 | 109 | #ifdef __riscv_v_intrinsic 110 | data = RV(vor_vx_u8m2_mu)(cmp, tmpData, tmpData, 64, vl2); 111 | #else 112 | data = RV(vor_vx_u8m2_m)(cmp, tmpData, tmpData, 64, vl2); 113 | #endif 114 | 115 | int idx; 116 | size_t count = RV(vcpop_m_b4)(cmp, vl2); 117 | if(count > 1) { 118 | // widen mask: 4b->8b 119 | vuint8mf4_t vcmp = RV_VEC_U8MF4_CAST(cmp); 120 | // TODO: use vwsll instead if available 121 | // - is clmul useful here? 122 | vuint8mf2_t xcmp = RV(vreinterpret_v_u16mf2_u8mf2)(RV(vwmulu_vx_u16mf2)(vcmp, 16, vl2)); 123 | xcmp = RV(vsrl_vv_u8mf2)(xcmp, ALT_SHIFT, vl2); 124 | 125 | // expand mask by inserting '1' between each bit (0000abcd -> 1a1b1c1d) 126 | vuint8m1_t xcmpTmp = RV(vrgather_vv_u8m1)(MASK_EXPAND, RV(vlmul_ext_v_u8mf2_u8m1)(xcmp), vl2); 127 | vbool2_t cmpmask = RV_MASK_CAST(2, 8, xcmpTmp); 128 | 129 | // expand data and insert = 130 | // TODO: use vwsll instead if available 131 | vuint16m4_t data2 = RV(vzext_vf2_u16m4)(data, vl2); 132 | data2 = RV(vsll_vx_u16m4)(data2, 8, vl2); 133 | data2 = RV(vor_vx_u16m4)(data2, '=', vl2); 134 | 135 | // prune unneeded = 136 | vuint8m4_t dataTmp = RV(vreinterpret_v_u16m4_u8m4)(data2); 137 | vuint8m4_t final_data = RV(vcompress_vm_u8m4)( 138 | #ifdef __riscv_v_intrinsic 139 | dataTmp, cmpmask, vl2*2 140 | #else 141 | cmpmask, dataTmp, dataTmp, vl2*2 142 | #endif 143 | ); 144 | 145 | RV(vse8_v_u8m4)(outp, final_data, vl2*2); 146 | outp += vl2 + count; 147 | col += vl2 + count; 148 | 149 | if(col >= 0) { 150 | // we overflowed - find correct position to revert back to 151 | // TODO: stick with u8 type for vlmax <= 2048 (need to check if ok if vlmax == 2048) 152 | // - considering that it's rare for colWidth > 128, maybe just don't support vectors that long 153 | vuint16m8_t xidx = RV(viota_m_u16m8)(cmpmask, vl2*2); 154 | vbool2_t discardmask = RV(vmsgeu_vx_u16m8_b2)(xidx, vl2 + count - col, vl2*2); 155 | long idx_revert = RV(vcpop_m_b2)(discardmask, vl2*2); 156 | 157 | outp -= col + (idx_revert & 1); 158 | inpos -= ((idx_revert+1) >> 1); 159 | 160 | goto _encode_eol_handle_pre; 161 | } 162 | } else { 163 | // 0 or 1 special characters 164 | { 165 | vbool4_t mask = RV(vmsbf_m_b4)(cmp, vl2); 166 | // TODO: is it better to shuffle this into two stores, instead of three? 167 | RV(vse8_v_u8m2_m)(mask, outp, data, vl2); 168 | idx = RV(vcpop_m_b4)(mask, vl2); 169 | outp[idx] = '='; 170 | RV(vse8_v_u8m2_m)(RV(vmnot_m_b4)(mask, vl2), outp+1, data, vl2); 171 | 172 | outp += vl2 + count; 173 | col += vl2 + count; 174 | } 175 | 176 | if(col >= 0) { 177 | if(count > 0) { 178 | idx = vl2 - idx; 179 | if(HEDLEY_UNLIKELY(col == idx)) { 180 | // this is an escape character, so line will need to overflow 181 | outp--; 182 | } else { 183 | inpos += (col > idx); 184 | } 185 | } 186 | outp -= col; 187 | inpos -= col; 188 | 189 | _encode_eol_handle_pre: 190 | encode_eol_handle_pre(_src, inpos, outp, col, lineSizeOffset); 191 | } 192 | } 193 | } 194 | 195 | *colOffset = col + line_size -1; 196 | dest = outp; 197 | len = -(inpos - INPUT_OFFSET); 198 | } 199 | } // namespace 200 | 201 | void RapidYenc::encoder_rvv_init() { 202 | _do_encode = &do_encode_simd; 203 | _encode_isa = ISA_LEVEL_RVV; 204 | } 205 | #else 206 | void RapidYenc::encoder_rvv_init() {} 207 | #endif /* defined(__riscv_vector) */ 208 | -------------------------------------------------------------------------------- /src/yencode/encoder_sse2.cc: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | #include "encoder_common.h" 3 | 4 | #ifdef __SSE2__ 5 | #include "encoder_sse_base.h" 6 | 7 | void RapidYenc::encoder_sse2_init() { 8 | _do_encode = &do_encode_simd< do_encode_sse >; 9 | encoder_sse_lut(); 10 | _encode_isa = ISA_LEVEL_SSE2; 11 | } 12 | #else 13 | void RapidYenc::encoder_sse2_init() {} 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /src/yencode/encoder_ssse3.cc: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | #include "encoder_common.h" 3 | 4 | // slightly faster version which improves the worst case scenario significantly; since worst case doesn't happen often, overall speedup is relatively minor 5 | // requires PSHUFB (SSSE3) instruction, but will use POPCNT (SSE4.2 (or AMD's ABM, but Phenom doesn't support SSSE3 so doesn't matter)) if available (these only seem to give minor speedups, so considered optional) 6 | #ifdef __SSSE3__ 7 | #include "encoder_sse_base.h" 8 | 9 | void RapidYenc::encoder_ssse3_init() { 10 | _do_encode = &do_encode_simd< do_encode_sse >; 11 | encoder_sse_lut(); 12 | _encode_isa = ISA_LEVEL_SSSE3; 13 | } 14 | #else 15 | void RapidYenc::encoder_ssse3_init() { 16 | encoder_sse2_init(); 17 | } 18 | #endif 19 | 20 | -------------------------------------------------------------------------------- /src/yencode/encoder_vbmi2.cc: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | #include "encoder_common.h" 3 | 4 | #if !defined(__EVEX512__) && (defined(__AVX10_1__) || defined(__EVEX256__)) && defined(__AVX512VL__) && defined(__AVX512VBMI2__) && defined(__AVX512BW__) 5 | const bool RapidYenc::encoder_has_avx10 = true; 6 | #else 7 | const bool RapidYenc::encoder_has_avx10 = false; 8 | #endif 9 | 10 | #if defined(__AVX512VL__) && defined(__AVX512VBMI2__) && defined(__AVX512BW__) 11 | # ifndef YENC_DISABLE_AVX256 12 | # include "encoder_avx_base.h" 13 | 14 | void RapidYenc::encoder_vbmi2_init() { 15 | _do_encode = &do_encode_simd< do_encode_avx2 >; 16 | encoder_avx2_lut(); 17 | _encode_isa = ISA_LEVEL_VBMI2; 18 | } 19 | # else 20 | # include "encoder_sse_base.h" 21 | void RapidYenc::encoder_vbmi2_init() { 22 | _do_encode = &do_encode_simd< do_encode_sse >; 23 | encoder_sse_lut(); 24 | _encode_isa = ISA_LEVEL_VBMI2; 25 | } 26 | # endif 27 | #else 28 | void RapidYenc::encoder_vbmi2_init() { 29 | encoder_avx2_init(); 30 | } 31 | #endif 32 | -------------------------------------------------------------------------------- /src/yencode/platform.cc: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | #ifdef PLATFORM_ARM 3 | # ifdef __ANDROID__ 4 | # include 5 | # elif defined(_WIN32) 6 | # define WIN32_LEAN_AND_MEAN 7 | # define NOMINMAX 8 | # include 9 | # elif defined(__APPLE__) 10 | # include 11 | # include 12 | # elif defined(__has_include) 13 | # if __has_include() 14 | # include 15 | # if __has_include() 16 | # include 17 | # endif 18 | # endif 19 | # endif 20 | bool RapidYenc::cpu_supports_neon() { 21 | # if defined(AT_HWCAP) 22 | # ifdef __FreeBSD__ 23 | unsigned long supported; 24 | elf_aux_info(AT_HWCAP, &supported, sizeof(supported)); 25 | # ifdef __aarch64__ 26 | return supported & HWCAP_ASIMD; 27 | # else 28 | return supported & HWCAP_NEON; 29 | # endif 30 | # else 31 | # ifdef __aarch64__ 32 | return getauxval(AT_HWCAP) & HWCAP_ASIMD; 33 | # else 34 | return getauxval(AT_HWCAP) & HWCAP_NEON; 35 | # endif 36 | # endif 37 | # elif defined(ANDROID_CPU_FAMILY_ARM) 38 | # ifdef __aarch64__ 39 | return android_getCpuFeatures() & ANDROID_CPU_ARM64_FEATURE_ASIMD; 40 | # else 41 | return android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON; 42 | # endif 43 | # elif defined(_WIN32) 44 | return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE); 45 | # elif defined(__APPLE__) 46 | int supported = 0; 47 | size_t len = sizeof(supported); 48 | if(sysctlbyname("hw.optional.neon", &supported, &len, NULL, 0)) 49 | return false; 50 | return (bool)supported; 51 | # endif 52 | # ifdef __aarch64__ 53 | return true; // assume NEON support on AArch64 54 | # else 55 | return false; 56 | # endif 57 | } 58 | #endif 59 | 60 | 61 | #ifdef PLATFORM_X86 62 | #ifdef _MSC_VER 63 | # define _cpuid1(ar) __cpuid(ar, 1) 64 | # define _cpuid1x(ar) __cpuid(ar, 0x80000001) 65 | # if _MSC_VER >= 1600 66 | # define _cpuidX __cpuidex 67 | # include 68 | # define _GET_XCR() _xgetbv(_XCR_XFEATURE_ENABLED_MASK) 69 | # else 70 | // not supported 71 | # define _cpuidX(ar, eax, ecx) ar[0]=0, ar[1]=0, ar[2]=0, ar[3]=0 72 | # define _GET_XCR() 0 73 | # endif 74 | #else 75 | # include 76 | # define _cpuid1(ar) __cpuid(1, ar[0], ar[1], ar[2], ar[3]) 77 | # define _cpuid1x(ar) __cpuid(0x80000001, ar[0], ar[1], ar[2], ar[3]) 78 | # define _cpuidX(ar, eax, ecx) __cpuid_count(eax, ecx, ar[0], ar[1], ar[2], ar[3]) 79 | static inline int _GET_XCR() { 80 | int xcr0; 81 | __asm__ __volatile__("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx"); 82 | return xcr0; 83 | } 84 | #endif 85 | // checks if CPU has 128-bit AVX units; currently not used as AVX2 is beneficial even on Zen1 86 | // static bool cpu_has_slow_avx(cpuid1flag0) { 87 | // int family = ((cpuid1flag0>>8) & 0xf) + ((cpuid1flag0>>16) & 0xff0), 88 | // model = ((cpuid1flag0>>4) & 0xf) + ((cpuid1flag0>>12) & 0xf0); 89 | // return ( 90 | // family == 0x6f // AMD Bulldozer family 91 | // || family == 0x7f // AMD Jaguar/Puma family 92 | // || (family == 0x8f && (model == 0 /*Summit Ridge ES*/ || model == 1 /*Zen*/ || model == 8 /*Zen+*/ || model == 0x11 /*Zen APU*/ || model == 0x18 /*Zen+ APU*/ || model == 0x50 /*Subor Z+*/)) // AMD Zen1 family 93 | // || (family == 6 && model == 0xf) // Centaur/Zhaoxin; overlaps with Intel Core 2, but they don't support AVX 94 | // ); 95 | // } 96 | 97 | 98 | int RapidYenc::cpu_supports_isa() { 99 | int flags[4]; 100 | _cpuid1(flags); 101 | int ret = 0; 102 | 103 | if(flags[2] & 0x800000) 104 | ret |= ISA_FEATURE_POPCNT; 105 | int flags2[4]; 106 | _cpuid1x(flags2); 107 | if(flags2[2] & 0x20) // ABM 108 | ret |= ISA_FEATURE_LZCNT | ISA_FEATURE_POPCNT; 109 | 110 | int family = ((flags[0]>>8) & 0xf) + ((flags[0]>>16) & 0xff0); 111 | int model = ((flags[0]>>4) & 0xf) + ((flags[0]>>12) & 0xf0); 112 | 113 | if(family == 6 && ( 114 | model == 0x1C || model == 0x26 || model == 0x27 || model == 0x35 || model == 0x36 || model == 0x37 || model == 0x4A || model == 0x4C || model == 0x4D || model == 0x5A || model == 0x5D 115 | )) 116 | // Intel Bonnell/Silvermont CPU with very slow PSHUFB and PBLENDVB - pretend SSSE3 doesn't exist 117 | return ret | ISA_LEVEL_SSE2; 118 | 119 | if(family == 0x5f && (model == 0 || model == 1 || model == 2)) 120 | // AMD Bobcat with slow SSSE3 instructions - pretend it doesn't exist 121 | return ret | ISA_LEVEL_SSE2; 122 | 123 | if((flags[2] & 0x200) == 0x200) { // SSSE3 124 | if(family == 6 && (model == 0x5c || model == 0x5f || model == 0x7a || model == 0x9c)) 125 | // Intel Goldmont/plus / Tremont with slow PBLENDVB 126 | return ret | ISA_LEVEL_SSSE3; 127 | 128 | if(flags[2] & 0x80000) { // SSE4.1 129 | if((flags[2] & 0x1C800000) == 0x1C800000) { // POPCNT + OSXSAVE + XSAVE + AVX 130 | int xcr = _GET_XCR() & 0xff; // ignore unused bits 131 | if((xcr & 6) == 6) { // AVX enabled 132 | int cpuInfo[4]; 133 | _cpuidX(cpuInfo, 7, 0); 134 | if((cpuInfo[1] & 0x128) == 0x128 && (ret & ISA_FEATURE_LZCNT)) { // BMI2 + AVX2 + BMI1 135 | if((xcr & 0xE0) == 0xE0) { // AVX512 XSTATE (also applies to AVX10) 136 | // check AVX10 137 | int cpuInfo2[4]; 138 | _cpuidX(cpuInfo2, 7, 1); 139 | if(cpuInfo2[3] & 0x80000) { 140 | _cpuidX(cpuInfo2, 0x24, 0); 141 | if((cpuInfo2[1] & 0xff) >= 1 && ( // minimum AVX10.1 142 | #ifdef YENC_DISABLE_AVX256 143 | cpuInfo2[1] & 0x10000 // AVX10/128 144 | #else 145 | cpuInfo2[1] & 0x20000 // AVX10/256 146 | #endif 147 | )) { 148 | if(cpuInfo2[1] & 0x40000) ret |= ISA_FEATURE_EVEX512; 149 | return ret | ISA_LEVEL_VBMI2; 150 | } 151 | } 152 | 153 | if((cpuInfo[1] & 0xC0010000) == 0xC0010000) { // AVX512BW + AVX512VL + AVX512F 154 | ret |= ISA_FEATURE_EVEX512; 155 | if(cpuInfo[2] & 0x40) 156 | return ret | ISA_LEVEL_VBMI2; 157 | return ret | ISA_LEVEL_AVX3; 158 | } 159 | } 160 | // AVX2 is beneficial even on Zen1 161 | return ret | ISA_LEVEL_AVX2; 162 | } 163 | return ret | ISA_LEVEL_AVX; 164 | } 165 | } 166 | return ret | ISA_LEVEL_SSE41; 167 | } 168 | return ret | ISA_LEVEL_SSSE3; 169 | } 170 | return ret | ISA_LEVEL_SSE2; 171 | } 172 | 173 | int RapidYenc::cpu_supports_crc_isa() { 174 | int flags[4]; 175 | _cpuid1(flags); 176 | 177 | if((flags[2] & 0x80202) == 0x80202) { // SSE4.1 + SSSE3 + CLMUL 178 | if((flags[2] & 0x1C000000) == 0x1C000000) { // AVX + OSXSAVE + XSAVE 179 | int xcr = _GET_XCR() & 0xff; // ignore unused bits 180 | if((xcr & 6) == 6) { // AVX enabled 181 | int cpuInfo[4]; 182 | _cpuidX(cpuInfo, 7, 0); 183 | if((cpuInfo[1] & 0x20) == 0x20 && (cpuInfo[2] & 0x400) == 0x400) { // AVX2 + VPCLMULQDQ 184 | return 2; 185 | } 186 | } 187 | } 188 | return 1; 189 | } 190 | return 0; 191 | } 192 | 193 | #endif // PLATFORM_X86 194 | 195 | #ifdef __riscv 196 | # if defined(__has_include) 197 | # if __has_include() 198 | # include 199 | # if __has_include() 200 | # include 201 | # endif 202 | # endif 203 | # endif 204 | bool RapidYenc::cpu_supports_rvv() { 205 | # if defined(AT_HWCAP) 206 | unsigned long ret; 207 | # ifdef __FreeBSD__ 208 | elf_aux_info(AT_HWCAP, &ret, sizeof(ret)); 209 | # else 210 | ret = getauxval(AT_HWCAP); 211 | # endif 212 | return (ret & 0x20112D) == 0x20112D; // IMAFDCV; TODO: how to detect Z* features of 'G'? 213 | # endif 214 | return false; 215 | } 216 | #endif 217 | 218 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | chardet 3 | jaraco.functools 4 | portend -------------------------------------------------------------------------------- /tests/test.py: -------------------------------------------------------------------------------- 1 | import time 2 | import ssl 3 | import sabctools 4 | import socket 5 | import time 6 | 7 | 8 | hostname = "eunews.frugalusenet.com" 9 | context = ssl.create_default_context() 10 | 11 | print(sabctools.openssl_linked) 12 | 13 | buffer = bytearray(100) 14 | bufferview = memoryview(buffer) 15 | 16 | with socket.create_connection((hostname, 563)) as sock: 17 | with context.wrap_socket(sock, server_hostname=hostname) as ssock: 18 | ssock.setblocking(False) 19 | time.sleep(1) 20 | print(ssock.version()) 21 | print(sabctools.unlocked_ssl_recv_into(ssock._sslobj, bufferview[99:])) 22 | print(buffer) 23 | -------------------------------------------------------------------------------- /tests/test_crc32.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import sabctools 3 | 4 | 5 | @pytest.mark.parametrize( 6 | "crc1,crc2,len2,expected", 7 | [ 8 | (0, 0, 0, 0), 9 | (4294967295, 0, 0, 4294967295), 10 | (0, 4294967295, 0, 4294967295), 11 | (4294967295, 4294967295, 0, 0), 12 | (4, 16, 256, 2385497022), 13 | (100, 200, 300, 1009376567), 14 | (0, 0, 18446744073709551615, 0), 15 | (4294967295, 4294967295, 18446744073709551615, 0), 16 | (0, 100, 1234567890123, 100), 17 | (100, 0, 1234567890123, 1829446317), 18 | ], 19 | ) 20 | def test_crc32_combine_expected(crc1, crc2, len2, expected): 21 | assert sabctools.crc32_combine(crc1, crc2, len2) == expected 22 | 23 | 24 | @pytest.mark.parametrize( 25 | "crc1,crc2,expected", 26 | [ 27 | (0, 0, 0), 28 | (4294967295, 0, 0), 29 | (0, 4294967295, 0), 30 | (4294967295, 4294967295, 1048090088), 31 | (100, 200, 4155012749), 32 | ], 33 | ) 34 | def test_crc32_multiply_expected(crc1, crc2, expected): 35 | assert sabctools.crc32_multiply(crc1, crc2) == expected 36 | 37 | 38 | @pytest.mark.parametrize( 39 | "crc1,zeroes,expected", 40 | [ 41 | (0, 0, 0), 42 | (4294967295, 0, 4294967295), 43 | (4294967295, 4294967295, 4294967295), 44 | (100, 200, 1523530880), 45 | (0, 18446744073709551615, 0), 46 | (4294967295, 18446744073709551615, 4294967295), 47 | (100, 1234567890123, 980217485), 48 | ], 49 | ) 50 | def test_crc32_zero_unpad_expected(crc1, zeroes, expected): 51 | assert sabctools.crc32_zero_unpad(crc1, zeroes) == expected 52 | 53 | 54 | @pytest.mark.parametrize( 55 | "n,expected", 56 | [ 57 | (0, 2147483648), 58 | (1, 1073741824), 59 | (8, 8388608), 60 | (30, 2), 61 | (31, 1), 62 | (4294967295, 2147483648), 63 | (4294967296, 1073741824), # 1 64 | ], 65 | ) 66 | def test_crc32_xpown_expected(n, expected): 67 | assert sabctools.crc32_xpown(n) == expected 68 | 69 | 70 | @pytest.mark.parametrize( 71 | "n,expected", 72 | [ 73 | (0, 2147483648), 74 | (1, 8388608), 75 | (4294967295, 2147483648), 76 | (4294967296, 8388608), # 1 77 | ], 78 | ) 79 | def test_crc32_xpow8n_expected(n, expected): 80 | assert sabctools.crc32_xpow8n(n) == expected 81 | -------------------------------------------------------------------------------- /tests/test_decoder.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pytest 3 | import glob 4 | from tests.testsupport import * 5 | 6 | 7 | def test_regular(): 8 | data_plain = read_plain_yenc_file("test_regular.yenc") 9 | assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain) 10 | data_plain = read_plain_yenc_file("test_regular_2.yenc") 11 | assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain) 12 | 13 | 14 | def test_bytes_compat(): 15 | data_plain = read_plain_yenc_file("test_regular.yenc") 16 | assert python_yenc(data_plain) == sabctools.yenc_decode(memoryview(bytes(data_plain))) 17 | 18 | 19 | def test_partial(): 20 | data_plain = read_plain_yenc_file("test_partial.yenc") 21 | decoded_data, filename, filesize, begin, size, crc_correct = sabctools_yenc_wrapper(data_plain) 22 | assert filename == "90E2Sdvsmds0801dvsmds90E.part06.rar" 23 | assert filesize == 49152000 24 | assert begin == 15360000 25 | assert size == 384000 26 | assert crc_correct is None 27 | assert len(decoded_data) == 549 28 | 29 | 30 | def test_special_chars(): 31 | data_plain = read_plain_yenc_file("test_special_chars.yenc") 32 | # We only compare the data and the filename 33 | assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain) 34 | 35 | data_plain = read_plain_yenc_file("test_special_utf8_chars.yenc") 36 | # We only compare the data and the filename 37 | assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain) 38 | 39 | 40 | def test_bad_crc(): 41 | data_plain = read_plain_yenc_file("test_bad_crc.yenc") 42 | # We only compare the data and the filename 43 | assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain) 44 | 45 | 46 | def test_bad_crc_end(): 47 | data_plain = read_plain_yenc_file("test_bad_crc_end.yenc") 48 | with pytest.raises(ValueError) as excinfo: 49 | sabctools_yenc_wrapper(data_plain) 50 | assert "Invalid CRC in footer" in str(excinfo.value) 51 | 52 | 53 | def test_no_filename(): 54 | data_plain = read_plain_yenc_file("test_no_name.yenc") 55 | with pytest.raises(ValueError) as excinfo: 56 | sabctools_yenc_wrapper(data_plain) 57 | assert "Could not find yEnc filename" in str(excinfo.value) 58 | 59 | 60 | def test_padded_crc(): 61 | data_plain = read_plain_yenc_file("test_padded_crc.yenc") 62 | assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain) 63 | 64 | 65 | def test_end_after_filename(): 66 | data_plain = read_plain_yenc_file("test_end_after_filename.yenc") 67 | with pytest.raises(ValueError): 68 | sabctools_yenc_wrapper(data_plain) 69 | 70 | 71 | def test_empty(): 72 | with pytest.raises(ValueError) as excinfo: 73 | sabctools.yenc_decode(memoryview(bytearray(b""))) 74 | assert "Invalid data length" in str(excinfo.value) 75 | 76 | 77 | def test_ref_counts(): 78 | """Note that sys.getrefcount itself adds another reference!""" 79 | # Test regular case 80 | data_plain = read_plain_yenc_file("test_regular.yenc") 81 | data_out, filename, filesize, begin, end, crc_correct = sabctools_yenc_wrapper(data_plain) 82 | 83 | assert sys.getrefcount(data_plain) == 2 84 | assert sys.getrefcount(data_out) == 2 85 | assert sys.getrefcount(filename) == 2 86 | assert sys.getrefcount(begin) == 2 87 | assert sys.getrefcount(end) == 2 88 | assert sys.getrefcount(crc_correct) == 2 89 | 90 | # Test simple error case 91 | fake_inp = memoryview(bytearray(b"1234")) 92 | assert sys.getrefcount(fake_inp) == 2 93 | with pytest.raises(ValueError): 94 | sabctools.yenc_decode(fake_inp) 95 | assert sys.getrefcount(fake_inp) == 2 96 | 97 | # Test further processing 98 | data_plain = read_plain_yenc_file("test_bad_crc_end.yenc") 99 | with pytest.raises(ValueError): 100 | sabctools_yenc_wrapper(data_plain) 101 | assert sys.getrefcount(data_plain) == 2 102 | 103 | 104 | def test_crc_pickles(): 105 | all_crc_fails = glob.glob("tests/yencfiles/crc_*") 106 | for fname in all_crc_fails: 107 | data_plain = read_pickle(fname) 108 | assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain) 109 | 110 | 111 | def test_small_file_pickles(): 112 | all_pickles = glob.glob("tests/yencfiles/small_file*") 113 | for fname in all_pickles: 114 | data_plain = read_pickle(fname) 115 | assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain) 116 | -------------------------------------------------------------------------------- /tests/test_encoder.py: -------------------------------------------------------------------------------- 1 | from tests.testsupport import * 2 | 3 | 4 | def test_encoder(): 5 | output, crc = sabctools.yenc_encode(b"Hello world!") 6 | assert output == b"r\x8f\x96\x96\x99J\xa1\x99\x9c\x96\x8eK" 7 | assert crc == 0x1B851995 8 | -------------------------------------------------------------------------------- /tests/test_sparse.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import sys 4 | import tempfile 5 | import pytest 6 | from typing import IO 7 | 8 | from tests.testsupport import * 9 | 10 | 11 | def test_sparse(): 12 | file = tempfile.NamedTemporaryFile(delete=False) 13 | try: 14 | sabctools.sparse(file, 100) 15 | assert os.path.getsize(file.name) == 100 16 | assert is_sparse(file) is True 17 | finally: 18 | file.close() 19 | os.unlink(file.name) 20 | 21 | @pytest.mark.parametrize( 22 | "length,position", 23 | [ 24 | (1024, 0), 25 | (1024, 512), 26 | (1024, 4096), 27 | ], 28 | ) 29 | def test_sparse_position_expected(length, position): 30 | with tempfile.TemporaryFile() as file: 31 | file.seek(position) 32 | sabctools.sparse(file, length) 33 | assert file.tell() == position 34 | 35 | def is_sparse(file: IO) -> bool: 36 | """Is the file sparse? 37 | On Windows this closes the file""" 38 | if sys.platform == "win32": 39 | file.close() 40 | return b"This file is set as sparse" in subprocess.run( 41 | ["fsutil", "sparse", "queryflag", file.name], 42 | capture_output=True 43 | ).stdout 44 | 45 | return os.stat(file.name).st_blocks * 512 < os.path.getsize(file.name) 46 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from tests.testsupport import * 4 | 5 | 6 | def test_bytearray_malloc(): 7 | assert len(sabctools.bytearray_malloc(10)) == 10 8 | 9 | 10 | def test_bytearray_malloc_bad_inputs(): 11 | with pytest.raises(TypeError): 12 | sabctools.bytearray_malloc(10.0) 13 | with pytest.raises(SystemError): 14 | sabctools.bytearray_malloc(-1) 15 | with pytest.raises(TypeError): 16 | sabctools.bytearray_malloc("foo") 17 | -------------------------------------------------------------------------------- /tests/testsupport.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 -OO 2 | # Copyright 2007-2019 The SABnzbd-Team (sabnzbd.org) 3 | # 4 | # This program is free software; you can redistribute it and/or 5 | # modify it under the terms of the GNU General Public License 6 | # as published by the Free Software Foundation; either version 2 7 | # of the License, or (at your option) any later version. 8 | # 9 | # This program is distributed in the hope that it will be useful, 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | # GNU General Public License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with this program; if not, write to the Free Software 16 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 | 18 | ################### 19 | # SUPPORT FUNCTIONS 20 | ################### 21 | import binascii 22 | import re 23 | import pickle 24 | from typing import Tuple, Optional 25 | 26 | import chardet 27 | import sabctools 28 | 29 | 30 | def correct_unknown_encoding(str_or_bytes_in): 31 | """Files created on Windows but unpacked/repaired on 32 | linux can result in invalid filenames. Try to fix this 33 | encoding by going to bytes and then back to unicode again. 34 | Last resort we use chardet package 35 | """ 36 | # If already string, back to bytes 37 | if not isinstance(str_or_bytes_in, bytes): 38 | str_or_bytes_in = str_or_bytes_in.encode("utf-8", "surrogateescape") 39 | 40 | # Try simple bytes-to-string 41 | try: 42 | return str_or_bytes_in.decode("utf-8") 43 | except UnicodeDecodeError: 44 | try: 45 | # Try using 8-bit ASCII, if came from Windows 46 | return str_or_bytes_in.decode("ISO-8859-1") 47 | except ValueError: 48 | # Last resort we use the slow chardet package 49 | return str_or_bytes_in.decode(chardet.detect(str_or_bytes_in)["encoding"]) 50 | 51 | 52 | def read_plain_yenc_file(filename: str) -> bytearray: 53 | with open("tests/yencfiles/%s" % filename, "rb") as yencfile: 54 | return bytearray(yencfile.read()) 55 | 56 | 57 | def read_pickle(filename): 58 | with open(filename, "rb") as yencfile: 59 | try: 60 | data_chunks, data_bytes = pickle.load(yencfile, encoding="bytes") 61 | except: 62 | # Reset the pointer and try again 63 | yencfile.seek(0) 64 | data_chunks, data_bytes, lines = pickle.load(yencfile, encoding="bytes") 65 | return bytearray(b"".join(data_chunks)) 66 | 67 | 68 | def sabctools_yenc_wrapper(data: bytearray) -> Tuple[bytearray, str, int, int, int, Optional[int]]: 69 | decoded_data, filename, filesize, begin, size, crc_correct = sabctools.yenc_decode(memoryview(data)) 70 | return decoded_data, correct_unknown_encoding(filename), filesize, begin, size, crc_correct 71 | 72 | 73 | def python_yenc(data_plain): 74 | """Use the older decoder to verify the new one""" 75 | data = [] 76 | 77 | # Remove the NNTP-double-dot style 78 | new_lines = data_plain.split(b"\r\n") 79 | for i in range(len(new_lines)): 80 | if new_lines[i][:2] == b"..": 81 | new_lines[i] = new_lines[i][1:] 82 | if new_lines[-1] == b".": 83 | new_lines = new_lines[1:-1] 84 | data.extend(new_lines) 85 | 86 | # Parse the yEnc headers 87 | yenc, data = parse_yenc_data(data) 88 | ybegin, ypart, yend = yenc 89 | 90 | # Now we get the true flat data 91 | flat_yenc_data = b"".join(data) 92 | 93 | # Remove the escaped-chars 94 | for i in (0, 9, 10, 13, 27, 32, 46, 61): 95 | j = b"=%c" % (i + 64) 96 | flat_yenc_data = flat_yenc_data.replace(j, b"%c" % i) 97 | 98 | # Use the much faster translate function to do fast-subtract of 42 99 | from_bytes = b"".join([b"%c" % i for i in range(256)]) 100 | to_bytes = b"".join([b"%c" % ((i + 256 - 42) % 256) for i in range(256)]) 101 | translate_table = bytes.maketrans(from_bytes, to_bytes) 102 | decoded_data = flat_yenc_data.translate(translate_table) 103 | 104 | # Detect begin and ending 105 | begin = end = size = 0 106 | if ypart: 107 | if begin := ypart.get("begin"): 108 | begin = int(begin) 109 | if end := ypart.get("end"): 110 | end = int(end) 111 | if end and begin: 112 | size = end - begin + 1 113 | begin -= 1 114 | 115 | return decoded_data, ybegin["name"], int(ybegin["size"]), begin, size, binascii.crc32(decoded_data) 116 | 117 | 118 | def parse_yenc_data(data): 119 | ybegin = None 120 | ypart = None 121 | yend = None 122 | 123 | # Check head 124 | for i in range(min(40, len(data))): 125 | try: 126 | if data[i].startswith(b"=ybegin "): 127 | splits = 3 128 | if data[i].find(b" part=") > 0: 129 | splits += 1 130 | if data[i].find(b" total=") > 0: 131 | splits += 1 132 | 133 | ybegin = get_yenc_data(data[i], splits) 134 | 135 | if data[i + 1].startswith(b"=ypart "): 136 | ypart = get_yenc_data(data[i + 1]) 137 | data = data[i + 2 :] 138 | break 139 | else: 140 | data = data[i + 1 :] 141 | break 142 | except IndexError: 143 | break 144 | 145 | # Check tail 146 | for i in range(-1, -11, -1): 147 | try: 148 | if data[i].startswith(b"=yend "): 149 | yend = get_yenc_data(data[i]) 150 | data = data[:i] 151 | break 152 | except IndexError: 153 | break 154 | 155 | return ((ybegin, ypart, yend), data) 156 | 157 | 158 | def get_yenc_data(line, splits=None): 159 | # Example: =ybegin part=1 line=128 size=123 name=-=DUMMY=- abc.par 160 | YSPLIT_RE = re.compile(b"([a-zA-Z0-9]+)=") 161 | 162 | fields = {} 163 | 164 | if splits: 165 | parts = YSPLIT_RE.split(line, splits)[1:] 166 | else: 167 | parts = YSPLIT_RE.split(line)[1:] 168 | 169 | if len(parts) % 2: 170 | return fields 171 | 172 | for i in range(0, len(parts), 2): 173 | key, value = parts[i], parts[i + 1] 174 | fields[correct_unknown_encoding(key)] = correct_unknown_encoding(value.strip()) 175 | 176 | return fields 177 | 178 | 179 | def yenc_subtract(char, subtract): 180 | """Wrap-around for below 0""" 181 | char_diff = char - subtract 182 | if char_diff < 0: 183 | return 256 + char_diff 184 | return char_diff 185 | -------------------------------------------------------------------------------- /tests/yencfiles/small_file.pickle: -------------------------------------------------------------------------------- 1 | ((lp0 2 | S'222 0 \r\n=ybegin part=1 total=1 line=128 size=13 name=newz[NZB].nfo\r\n=ypart begin=1 end=13\r\n\xa8\x88\x98\x8f\xa1\xa4\x85x\x84l\x87\xa84\r\n=yend size=13 part=1 pcrc32=64217fe2 crc32=64217fe2\r\n' 3 | p1 4 | aS'.\r\n' 5 | p2 6 | aI154 7 | (lp3 8 | tp4 9 | . 10 | -------------------------------------------------------------------------------- /tests/yencfiles/small_file_2.pickle: -------------------------------------------------------------------------------- 1 | ((lp0 2 | S'222 0 \r\n=ybegin part=1 total=1 line=128 size=30 name=RARBG.txt\r\n=ypart begin=1 end=30\r\n~\x99\x9c\x9c\x8f\x98\x9eJ\x8e\x99\xa1\x98\x96\x99\x8b\x8e\x8f\x8eJ\x90\x9c\x99\x97J|k|lq4\r\n=yend size=30 part=1 pcrc32=b6327787 crc32=b6327787\r\n' 3 | p1 4 | aS'.\r\n' 5 | p2 6 | aI167 7 | (lp3 8 | tp4 9 | . 10 | -------------------------------------------------------------------------------- /tests/yencfiles/test_bad_crc.yenc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sabnzbd/sabctools/4bf805ad210136fdc70f022141ff363d0ef67e5b/tests/yencfiles/test_bad_crc.yenc -------------------------------------------------------------------------------- /tests/yencfiles/test_bad_crc_end.yenc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sabnzbd/sabctools/4bf805ad210136fdc70f022141ff363d0ef67e5b/tests/yencfiles/test_bad_crc_end.yenc -------------------------------------------------------------------------------- /tests/yencfiles/test_end_after_filename.yenc: -------------------------------------------------------------------------------- 1 | =ybegin part=41 line=128 size=49152000 name=90E2Sdvsmds080 -------------------------------------------------------------------------------- /tests/yencfiles/test_no_name.yenc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sabnzbd/sabctools/4bf805ad210136fdc70f022141ff363d0ef67e5b/tests/yencfiles/test_no_name.yenc -------------------------------------------------------------------------------- /tests/yencfiles/test_padded_crc.yenc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sabnzbd/sabctools/4bf805ad210136fdc70f022141ff363d0ef67e5b/tests/yencfiles/test_padded_crc.yenc -------------------------------------------------------------------------------- /tests/yencfiles/test_partial.yenc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sabnzbd/sabctools/4bf805ad210136fdc70f022141ff363d0ef67e5b/tests/yencfiles/test_partial.yenc -------------------------------------------------------------------------------- /tests/yencfiles/test_regular.yenc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sabnzbd/sabctools/4bf805ad210136fdc70f022141ff363d0ef67e5b/tests/yencfiles/test_regular.yenc -------------------------------------------------------------------------------- /tests/yencfiles/test_regular_2.yenc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sabnzbd/sabctools/4bf805ad210136fdc70f022141ff363d0ef67e5b/tests/yencfiles/test_regular_2.yenc -------------------------------------------------------------------------------- /tests/yencfiles/test_special_chars.yenc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sabnzbd/sabctools/4bf805ad210136fdc70f022141ff363d0ef67e5b/tests/yencfiles/test_special_chars.yenc -------------------------------------------------------------------------------- /tests/yencfiles/test_special_utf8_chars.yenc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sabnzbd/sabctools/4bf805ad210136fdc70f022141ff363d0ef67e5b/tests/yencfiles/test_special_utf8_chars.yenc --------------------------------------------------------------------------------