├── .gitattributes
├── .github
    ├── dependabot.yml
    └── workflows
    │   └── ci_and_build_release.yml
├── .gitignore
├── LICENSE.md
├── MANIFEST.in
├── README.md
├── doc
    └── yenc-draft.1.3.txt
├── pyproject.toml
├── pytest.ini
├── setup.py
├── src
    ├── __init__.py
    ├── crc32.cc
    ├── crc32.h
    ├── crcutil-1.0
    │   ├── AUTHORS
    │   ├── COPYING
    │   ├── ChangeLog
    │   ├── INSTALL
    │   ├── LICENSE
    │   ├── Makefile
    │   ├── Makefile.am
    │   ├── Makefile.in
    │   ├── Makefile.win
    │   ├── NEWS
    │   ├── README
    │   ├── aclocal.m4
    │   ├── autogen.sh
    │   ├── code
    │   │   ├── base_types.h
    │   │   ├── crc32c_sse4.cc
    │   │   ├── crc32c_sse4.h
    │   │   ├── crc32c_sse4_intrin.h
    │   │   ├── crc_casts.h
    │   │   ├── generic_crc.h
    │   │   ├── gf_util.h
    │   │   ├── multiword_128_64_gcc_amd64_sse2.cc
    │   │   ├── multiword_64_64_cl_i386_mmx.cc
    │   │   ├── multiword_64_64_gcc_amd64_asm.cc
    │   │   ├── multiword_64_64_gcc_i386_mmx.cc
    │   │   ├── multiword_64_64_intrinsic_i386_mmx.cc
    │   │   ├── platform.h
    │   │   ├── protected_crc.h
    │   │   ├── rolling_crc.h
    │   │   ├── std_headers.h
    │   │   └── uint128_sse2.h
    │   ├── config.h.in
    │   ├── configure
    │   ├── configure.ac
    │   ├── depcomp
    │   ├── examples
    │   │   ├── interface.cc
    │   │   ├── interface.h
    │   │   └── usage.cc
    │   ├── install-sh
    │   ├── missing
    │   └── tests
    │   │   ├── aligned_alloc.h
    │   │   ├── bob_jenkins_rng.h
    │   │   ├── rdtsc.h
    │   │   ├── set_hi_pri.c
    │   │   ├── unittest.cc
    │   │   ├── unittest.h
    │   │   └── unittest_helper.h
    ├── py.typed
    ├── sabctools.cc
    ├── sabctools.h
    ├── sabctools.pyi
    ├── sparse.cc
    ├── sparse.h
    ├── unlocked_ssl.cc
    ├── unlocked_ssl.h
    ├── utils.cc
    ├── utils.h
    ├── yenc.cc
    ├── yenc.h
    └── yencode
    │   ├── common.h
    │   ├── crc.cc
    │   ├── crc.h
    │   ├── crc_arm.cc
    │   ├── crc_arm_pmull.cc
    │   ├── crc_common.h
    │   ├── crc_folding.cc
    │   ├── crc_folding_256.cc
    │   ├── crc_riscv.cc
    │   ├── decoder.cc
    │   ├── decoder.h
    │   ├── decoder_avx.cc
    │   ├── decoder_avx2.cc
    │   ├── decoder_avx2_base.h
    │   ├── decoder_common.h
    │   ├── decoder_neon.cc
    │   ├── decoder_neon64.cc
    │   ├── decoder_rvv.cc
    │   ├── decoder_sse2.cc
    │   ├── decoder_sse_base.h
    │   ├── decoder_ssse3.cc
    │   ├── decoder_vbmi2.cc
    │   ├── encoder.cc
    │   ├── encoder.h
    │   ├── encoder_avx.cc
    │   ├── encoder_avx2.cc
    │   ├── encoder_avx_base.h
    │   ├── encoder_common.h
    │   ├── encoder_neon.cc
    │   ├── encoder_rvv.cc
    │   ├── encoder_sse2.cc
    │   ├── encoder_sse_base.h
    │   ├── encoder_ssse3.cc
    │   ├── encoder_vbmi2.cc
    │   ├── hedley.h
    │   ├── platform.cc
    │   └── stdint.h
└── tests
    ├── __init__.py
    ├── requirements.txt
    ├── test.py
    ├── test_crc32.py
    ├── test_decoder.py
    ├── test_encoder.py
    ├── test_sparse.py
    ├── test_unlocked_ssl.py
    ├── test_utils.py
    ├── testsupport.py
    └── yencfiles
        ├── crc_1.pickle
        ├── crc_10.pickle
        ├── crc_11.pickle
        ├── crc_2.pickle
        ├── crc_3.pickle
        ├── crc_4.pickle
        ├── crc_5.pickle
        ├── crc_6.pickle
        ├── crc_7.pickle
        ├── crc_8.pickle
        ├── crc_9.pickle
        ├── small_file.pickle
        ├── small_file_2.pickle
        ├── test_bad_crc.yenc
        ├── test_bad_crc_end.yenc
        ├── test_end_after_filename.yenc
        ├── test_no_name.yenc
        ├── test_padded_crc.yenc
        ├── test_partial.yenc
        ├── test_regular.yenc
        ├── test_regular_2.yenc
        ├── test_special_chars.yenc
        └── test_special_utf8_chars.yenc


/.gitattributes:
--------------------------------------------------------------------------------
1 | # These should not be touched
2 | *.yenc binary
3 | *.pickle binary


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: "github-actions"
4 |   directory: "/"
5 |   schedule:
6 |     interval: "weekly"
7 | 


--------------------------------------------------------------------------------
/.github/workflows/ci_and_build_release.yml:
--------------------------------------------------------------------------------
  1 | name: Tests and build wheels
  2 | 
  3 | on: [push, pull_request]
  4 | 
  5 | jobs:
  6 |   ci:
  7 |     name: Tests ${{ matrix.os }} ${{ matrix.python-architecture }} - Python ${{ matrix.python-version }}
  8 |     runs-on: ${{ matrix.os }}
  9 |     strategy:
 10 |       fail-fast: false
 11 |       matrix:
 12 |         os: [ ubuntu-latest, windows-latest, macos-latest ]
 13 |         python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12', '3.13' ]
 14 |         python-architecture: [ '' ]
 15 |         include:
 16 |           - os: windows-latest
 17 |             python-architecture: 'x86'
 18 |             python-version: '3.8'
 19 | 
 20 |     steps:
 21 |       - uses: actions/checkout@v4
 22 |       - uses: actions/setup-python@v5
 23 |         name: Install Python
 24 |         with:
 25 |           python-version: ${{ matrix.python-version }}
 26 |           architecture:  ${{ matrix.python-architecture }}
 27 |       - name: Build module and run pytest
 28 |         run: |
 29 |           pip install wheel setuptools
 30 |           pip install -r tests/requirements.txt
 31 |           pip install . -v
 32 |           python -c "import sabctools; print('Version:', sabctools.__version__);"
 33 |           python -c "import sabctools; print('SIMD:', sabctools.simd);"
 34 |           python -c "import sabctools; print('OpenSSL linked:', sabctools.openssl_linked);"
 35 |           pytest
 36 | 
 37 |   build_wheels:
 38 |     name: Build wheels on ${{ matrix.os }} ${{ matrix.linux_arch }}
 39 |     needs: ci
 40 |     runs-on: ${{ matrix.os }}
 41 |     strategy:
 42 |       fail-fast: false
 43 |       matrix:
 44 |         include:
 45 |           - os: windows-latest
 46 |           - os: macos-latest
 47 |           - os: ubuntu-latest
 48 |             linux_arch: x86_64
 49 |           - os: ubuntu-latest
 50 |             linux_arch: aarch64
 51 | 
 52 |     steps:
 53 |       - uses: actions/checkout@v4
 54 |       - uses: actions/setup-python@v5
 55 |         name: Install Python
 56 |         with:
 57 |           python-version: '3.11'
 58 |       - name: Set up QEMU
 59 |         if: runner.os == 'Linux' && matrix.linux_arch == 'aarch64'
 60 |         uses: docker/setup-qemu-action@v3
 61 |         with:
 62 |           platforms: all
 63 | 
 64 |       - name: Build wheels
 65 |         uses: pypa/cibuildwheel@v2.23.3
 66 |         env:
 67 |           CIBW_SKIP: pp* *i686* cp36* cp37*
 68 |           CIBW_ARCHS_LINUX: ${{ matrix.linux_arch }}
 69 |           CIBW_ARCHS_MACOS: universal2
 70 |           CIBW_ARCHS_WINDOWS: all
 71 |           CIBW_BUILD_VERBOSITY: 1
 72 |           CIBW_TEST_SKIP: '*'
 73 |       - name: Upload wheel artifacts
 74 |         uses: actions/upload-artifact@v4
 75 |         with:
 76 |           path: ./wheelhouse/*.whl
 77 |           name: Wheels ${{ matrix.os }} ${{ matrix.linux_arch }}
 78 |       # The action/upload-artifact only allows unique filenames for whole run
 79 |       - name: Build source distribution
 80 |         run: python setup.py sdist
 81 |       - name: Upload source distribution artifact
 82 |         uses: actions/upload-artifact@v4
 83 |         if: runner.os == 'Linux' && matrix.linux_arch == 'aarch64'
 84 |         with:
 85 |           path: ./dist/*.tar.gz
 86 |           name: Source distribution
 87 |       - name: Publish to PyPI
 88 |         env:
 89 |           TWINE_USERNAME: '__token__'
 90 |           TWINE_PASSWORD: ${{ secrets.PYPI_API_KEY }}
 91 |         if: env.TWINE_PASSWORD && startsWith(github.ref, 'refs/tags/')
 92 |         run: |
 93 |           pip install twine
 94 |           twine upload --skip-existing ./dist/*.tar.gz ./wheelhouse/*.whl
 95 |       - name: Publish release to GitHub
 96 |         uses: softprops/action-gh-release@v2
 97 |         if: startsWith(github.ref, 'refs/tags/') && runner.os == 'Linux' && matrix.linux_arch == 'aarch64'
 98 |         with:
 99 |           token: ${{ secrets.GITHUB_TOKEN }}
100 |           prerelease: false
101 |           files: ./dist/*.tar.gz
102 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Windows image file caches
 2 | Thumbs.db
 3 | ehthumbs.db
 4 | 
 5 | # Folder config file
 6 | Desktop.ini
 7 | 
 8 | # Recycle Bin used on file shares
 9 | $RECYCLE.BIN/
10 | 
11 | # Windows Installer files
12 | *.cab
13 | *.msi
14 | *.msm
15 | *.msp
16 | 
17 | # Windows shortcuts
18 | *.lnk
19 | 
20 | # =========================
21 | # Operating System Files
22 | # =========================
23 | 
24 | # OSX
25 | # =========================
26 | 
27 | .DS_Store
28 | .AppleDouble
29 | .LSOverride
30 | 
31 | # Thumbnails
32 | ._*
33 | 
34 | # Files that might appear in the root of a volume
35 | .DocumentRevisions-V100
36 | .fseventsd
37 | .Spotlight-V100
38 | .TemporaryItems
39 | .Trashes
40 | .VolumeIcon.icns
41 | 
42 | # Directories potentially created on remote AFP share
43 | .AppleDB
44 | .AppleDesktop
45 | Network Trash Folder
46 | Temporary Items
47 | .apdisk
48 | 
49 | # Building
50 | Build
51 | *.egg-info
52 | build
53 | test
54 | dist
55 | *.whl
56 | 
57 | # Visual studio
58 | cmake*/
59 | *.sln
60 | *.vcxproj*
61 | .vs/
62 | 
63 | # Compiled python
64 | *.py[cod]
65 | __pycache__
66 | 
67 | # PyCharm project files
68 | .idea/
69 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include src *
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | SABCTools - C implementations of functions for use within SABnzbd  
 3 | ===============================  
 4 |   
 5 | This module implements three main sets of C implementations that are used within SABnzbd:   
 6 | * yEnc decoding and encoding using SIMD routines  
 7 | * CRC32 calculations  
 8 | * Non-blocking SSL-socket reading  
 9 | * Marking files as sparse
10 | 
11 | Of course, they can also be used in any other application.
12 | 
13 | ## yEnc decoding and encoding using SIMD routines
14 | yEnc decoding and encoding performed by using [yencode](https://github.com/animetosho/node-yencode) from animetosho,   
15 | which utilizes x86/ARM SIMD optimised routines if such CPU features are available.  
16 |   
17 | ## CRC32 calculations
18 | We used the `crcutil` library for very fast CRC calculations.
19 | 
20 | ## Non-blocking SSL-socket reading  
21 | When Python reads data from a non-blocking SSL socket, it is limited to receiving 16K data at once. This module implements a patched version that can read as much data is available at once.
22 | For more details, see the [cpython pull request](https://github.com/python/cpython/pull/31492).
23 | 
24 | ## Marking files as sparse
25 | Uses Windows specific system calls to mark files as sparse and set the desired size.
26 | On other platforms the same is achieved by calling `truncate`.
27 | 
28 | ## Utility functions
29 | Use `sabctools.bytearray_malloc(size)` to get an `bytearray` that is uninitialized (not set to `0`'s). 
30 | This is much faster than the built-in `bytearray(size)` because the data inside the new `bytearray` will be whatever is present in the memory block.
31 |   
32 | # Installing  
33 |   
34 | As simple as running:  
35 | ```  
36 | pip install sabctools --upgrade  
37 | ```  
38 | When you want to compile from sources, you can run in the `sabctools` directory:  
39 | ```  
40 | pip install .  
41 | ```  
42 | 
43 | ## SIMD detection  
44 | 
45 | To see which SIMD set was detected on your system, run:  
46 | ```  
47 | python -c "import sabctools; print(sabctools.simd);"  
48 | ```  
49 |   
50 | ## OpenSSL detection  
51 | 
52 | To see if we could link to OpenSSL library on your system, run:  
53 | ```  
54 | python -c "import sabctools; print(sabctools.openssl_linked);"  
55 | ```  
56 | 
57 | # Testing  
58 |   
59 | For testing we use `pytest` (install via `pip install -r tests/requirements.txt`) and test can simply be executed by browsing to the `sabctools` directory and running:  
60 | ```  
61 | pytest  
62 | ```  
63 | Note that tests can fail if `git` modified the line endings of data files when checking out the repository!  


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | norecursedirs = yencfiles
3 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
1 | # C-extension is placed as submodule to allow typing
2 | from sabctools.sabctools import *
3 | __version__ = version


--------------------------------------------------------------------------------
/src/crc32.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2007-2023 The SABnzbd-Team (sabnzbd.org)
 3 |  *
 4 |  * This program is free software; you can redistribute it and/or
 5 |  * modify it under the terms of the GNU General Public License
 6 |  * as published by the Free Software Foundation; either version 2
 7 |  * of the License, or (at your option) any later version.
 8 |  *
 9 |  * This program is distributed in the hope that it will be useful,
10 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 |  * GNU General Public License for more details.
13 |  *
14 |  * You should have received a copy of the GNU General Public License
15 |  * along with this program; if not, write to the Free Software
16 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 |  */
18 | 
19 | #include "crc32.h"
20 | #include "yencode/crc.h"
21 | 
22 | PyObject* crc32_combine(PyObject *self, PyObject *args) {
23 |     unsigned long crc1, crc2;
24 |     unsigned long long length;
25 | 
26 |     if(!PyArg_ParseTuple(args, "kkK:crc32_combine", &crc1, &crc2, &length)) {
27 |         return NULL;
28 |     }
29 | 
30 |     crc1 = RapidYenc::crc32_combine(crc1, crc2, length);
31 | 
32 |     return PyLong_FromUnsignedLong(crc1);
33 | }
34 | 
35 | PyObject* crc32_multiply(PyObject *self, PyObject *args) {
36 |     unsigned long crc1, crc2;
37 | 
38 |     if(!PyArg_ParseTuple(args, "kk:crc32_multiply", &crc1, &crc2)) {
39 |         return NULL;
40 |     }
41 | 
42 |     crc1 = RapidYenc::crc32_multiply(crc1, crc2);
43 | 
44 |     return PyLong_FromUnsignedLong(crc1);
45 | }
46 | 
47 | PyObject* crc32_zero_unpad(PyObject *self, PyObject *args) {
48 |     unsigned long crc1;
49 |     unsigned long long length;
50 | 
51 |     if(!PyArg_ParseTuple(args, "kK:crc32_zero_unpad", &crc1, &length)) {
52 |         return NULL;
53 |     }
54 | 
55 |     crc1 = RapidYenc::crc32_unzero(crc1, length);
56 | 
57 |     return PyLong_FromUnsignedLong(crc1);
58 | }
59 | 
60 | PyObject* crc32_xpown(PyObject* self, PyObject* arg) {
61 |     long long n = PyLong_AsLongLong(arg);
62 | 
63 |     if (PyErr_Occurred()) {
64 |         return NULL;
65 |     }
66 | 
67 |     unsigned long result = RapidYenc::crc32_2pow(n);
68 | 
69 |     return PyLong_FromUnsignedLong(result);
70 | }
71 | 
72 | PyObject* crc32_xpow8n(PyObject* self, PyObject* arg) {
73 |     unsigned long long n = PyLong_AsUnsignedLongLong(arg);
74 | 
75 |     if (PyErr_Occurred()) {
76 |         return NULL;
77 |     }
78 | 
79 |     unsigned long result = RapidYenc::crc32_256pow(n);
80 | 
81 |     return PyLong_FromUnsignedLong(result);
82 | }
83 | 


--------------------------------------------------------------------------------
/src/crc32.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2007-2023 The SABnzbd-Team (sabnzbd.org)
 3 |  *
 4 |  * This program is free software; you can redistribute it and/or
 5 |  * modify it under the terms of the GNU General Public License
 6 |  * as published by the Free Software Foundation; either version 2
 7 |  * of the License, or (at your option) any later version.
 8 |  *
 9 |  * This program is distributed in the hope that it will be useful,
10 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 |  * GNU General Public License for more details.
13 |  *
14 |  * You should have received a copy of the GNU General Public License
15 |  * along with this program; if not, write to the Free Software
16 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 |  */
18 | 
19 | #ifndef SABCTOOLS_CRC32_H
20 | #define SABCTOOLS_CRC32_H
21 | 
22 | #include <Python.h>
23 | 
24 | PyObject* crc32_combine(PyObject *, PyObject*);
25 | PyObject* crc32_multiply(PyObject *, PyObject*);
26 | PyObject* crc32_zero_unpad(PyObject *, PyObject*);
27 | PyObject* crc32_xpown(PyObject *, PyObject*);
28 | PyObject* crc32_xpow8n(PyObject *, PyObject*);
29 | 
30 | #endif //SABCTOOLS_CRC32_H
31 | 


--------------------------------------------------------------------------------
/src/crcutil-1.0/AUTHORS:
--------------------------------------------------------------------------------
1 | Andrew Kadatch
2 | Bob Jenkins
3 | 
4 | [The end of the document]
5 | 


--------------------------------------------------------------------------------
/src/crcutil-1.0/ChangeLog:
--------------------------------------------------------------------------------
1 | 10 Dec 2010:
2 |   - Version 1.0.0
3 | 
4 | [The end of the document]
5 | 


--------------------------------------------------------------------------------
/src/crcutil-1.0/INSTALL:
--------------------------------------------------------------------------------
 1 | Since crcutil is a library, and rather small, template library,
 2 | it is better to compile it directly into your code. Or you could
 3 | build a library for interface.cc and use it.
 4 | 
 5 | 
 6 | Building and running the tests (Linux/GCC, MacOSX/GCC)
 7 | ------------------------------------------------------
 8 | 
 9 | Run
10 |   ./autogen.sh <target> <compiler_flags>
11 | or
12 |   ./autogen.sh <target> "-m32 -march=i686 <compiler_flags>"
13 | to build and run 64-bit and 32-bit <target> with GCC.
14 | 
15 | Typically, <compiler_flags> is not required.
16 | 
17 | Useful values for <target> are:
18 |   clean
19 |   configure
20 |   check
21 | 
22 | E.g.
23 |   ./autogen.sh check
24 | will build and run 64-bit unittest, whilst
25 |   ./autogen.sh clean
26 | will clean everything up.
27 | 
28 | 
29 | Why ./autogen.sh? Two reasons:
30 | 
31 | 1. Automake is well beyond my comprehension, and I am unable to create
32 |    "Makefile.am" consistently. So autogen.sh has that piece of black magic.
33 | 
34 | 2. autogen.sh detects version of GCC and provides different compile options
35 |    to work around differences between compiler versions that cannot
36 |    be detected at compile time.
37 | 
38 | You still can do
39 |   ./configure CXXFLAGS="-O3" CFLAGS="-O3"
40 | (if you use GCC before 4.5.0) or
41 |   ./configure CXXFLAGS="-O3 -mcrc32" CFLAGS="-O3"
42 | and then run
43 |   make check
44 | but the use of "./autogen.sh" is the preferred way to go.
45 | 
46 | 
47 | Building and running the tests (Windows, CL or ICL compiler)
48 | ------------------------------------------------------------
49 | 
50 | Run
51 |   nmake -f Makefile.win cl64
52 | or
53 |   nmake -f Makefile.win cl32
54 | or
55 |   nmake -f Makefile.win icl64
56 | or
57 |   nmake -f Makefile.win icl32
58 | to build and 64-bit and 32-bit unit test using
59 | Microsoft CL and Intel's ICL compilers respectively.
60 | 
61 | Run
62 |   nmake -f Makefile.win clean
63 | to clean everything up.
64 | 
65 | [The end of the document]
66 | 


--------------------------------------------------------------------------------
/src/crcutil-1.0/Makefile.am:
--------------------------------------------------------------------------------
 1 | AM_CXXFLAGS=-DCRCUTIL_USE_MM_CRC32=1 -Wall -msse2 -Icode
 2 | AM_CFLAGS=$(AM_CXXFLAGS)
 3 | check_PROGRAMS=crcutil_ut
 4 | TESTS=crcutil_ut
 5 | crcutil_ut_SOURCES=code/base_types.h code/crc32c_sse4.cc code/crc32c_sse4.h code/crc32c_sse4_intrin.h code/crc_casts.h code/generic_crc.h code/gf_util.h code/multiword_128_64_gcc_amd64_sse2.cc code/multiword_64_64_cl_i386_mmx.cc code/multiword_64_64_gcc_amd64_asm.cc code/multiword_64_64_gcc_i386_mmx.cc code/platform.h code/protected_crc.h code/rolling_crc.h code/std_headers.h code/uint128_sse2.h tests/aligned_alloc.h tests/bob_jenkins_rng.h tests/rdtsc.h tests/set_hi_pri.c tests/unittest.cc tests/unittest.h tests/unittest_helper.h 
 6 | tmpdir=/tmp
 7 | tmp_PROGRAMS=usage
 8 | usage_CXXFLAGS=$(AM_CXXFLAGS) -Itests
 9 | usage_SOURCES=code/base_types.h code/crc32c_sse4.cc code/crc32c_sse4.h code/crc32c_sse4_intrin.h code/crc_casts.h code/generic_crc.h code/gf_util.h code/multiword_128_64_gcc_amd64_sse2.cc code/multiword_64_64_cl_i386_mmx.cc code/multiword_64_64_gcc_amd64_asm.cc code/multiword_64_64_gcc_i386_mmx.cc code/platform.h code/protected_crc.h code/rolling_crc.h code/std_headers.h code/uint128_sse2.h examples/interface.cc examples/interface.h examples/usage.cc tests/aligned_alloc.h 
10 | 


--------------------------------------------------------------------------------
/src/crcutil-1.0/Makefile.win:
--------------------------------------------------------------------------------
 1 | Makefile=Makefile.win
 2 | 
 3 | CL_FLAGS=-Wall -O2 -nologo -DCRCUTIL_USE_MM_CRC32=0
 4 | ICL_FLAGS=-Wall -O3 -Qdiag-disable:181 -Qdiag-disable:185 -Qdiag-disable:442 -Qdiag-disable:vec -DCRCUTIL_USE_MM_CRC32=0
 5 | INCLUDES=-Icode
 6 | 
 7 | all:
 8 |   @echo Please run "nmake target" where "target" is one of:
 9 |   @echo cl64  - 64-bit Microsoft compiler
10 |   @echo cl32  - 32-bit Microsoft compiler
11 |   @echo icl64 - 64-bit Intel compiler
12 |   @echo icl32 - 32-bit Intel compiler
13 | 
14 | 
15 | cl64:
16 |   @call "%VCINSTALLDIR%\bin\amd64\vcvarsamd64.bat" && nmake -nologo -f $(Makefile) CC_FLAGS="$(CL_FLAGS) -Icode -Itests" CC=cl.exe configured
17 | 
18 | cl32:
19 |   @call "%VCINSTALLDIR%\bin\vcvars32.bat" && nmake -nologo -f $(Makefile) CC_FLAGS="$(CL_FLAGS) -Icode -Itests" CC=cl.exe configured
20 | 
21 | icl64:
22 |   @call "%ICPP_COMPILER11%bin\iclvars.bat" intel64 && nmake -nologo -f $(Makefile) CC_FLAGS="$(ICL_FLAGS) -Icode -Itests" CC=icl.exe configured
23 | 
24 | icl32:
25 |   @call "%ICPP_COMPILER11%bin\iclvars.bat" ia32 && nmake -nologo -f $(Makefile) CC_FLAGS="$(ICL_FLAGS) -Icode -Itests" CC=icl.exe configured
26 | 
27 | 
28 | clean:
29 |   del /q *.obj *.asm *.exe *.pdb *.suo
30 | 
31 | 
32 | TARGETS=unittest.exe example.exe
33 | 
34 | COMMON_CODE=\
35 |   code/crc32c_sse4.cc \
36 |   code/multiword_64_64_cl_i386_mmx.cc
37 | 
38 | COMMON_HEADERS=\
39 |   code/base_types.h \
40 |   code/crc32c_sse4.h \
41 |   code/crc32c_sse4_intrin.h \
42 |   code/crc_casts.h \
43 |   code/generic_crc.h \
44 |   code/gf_util.h \
45 |   code/platform.h \
46 |   code/protected_crc.h \
47 |   code/rolling_crc.h \
48 |   code/std_headers.h \
49 |   code/uint128_sse2.h
50 | 
51 | UNITTEST_CODE=\
52 |   tests/unittest.cc \
53 |   tests/set_hi_pri.c \
54 |   $(COMMON_CODE)
55 | 
56 | UNITTEST_HEADERS=\
57 |   tests/aligned_alloc.h \
58 |   tests/bob_jenkins_rng.h \
59 |   tests/rdtsc.h \
60 |   tests/unittest.h \
61 |   tests/unittest_helper.h \
62 |   $(COMMON_HEADERS)
63 | 
64 | EXAMPLE_CODE=\
65 |   examples/usage.cc \
66 |   examples/interface.cc \
67 |   $(COMMON_CODE)
68 | 
69 | EXAMPLE_HEADERS=\
70 |   examples/interface.h \
71 |   $(COMMON_HEADERS)
72 | 
73 | configured: $(TARGETS)
74 | 
75 | unittest.exe: $(Makefile) $(UNITTEST_CODE) $(UNITTEST_HEADERS)
76 |   $(CC) $(CC_FLAGS) $(UNITTEST_CODE)
77 | 
78 | example.exe: $(Makefile) $(EXAMPLE_CODE) $(EXAMPLE_HEADERS)
79 |   $(CC) $(CC_FLAGS) -Iexamples $(EXAMPLE_CODE)
80 | 


--------------------------------------------------------------------------------
/src/crcutil-1.0/NEWS:
--------------------------------------------------------------------------------
1 | 10 Jan 2010: automake is working
2 | 
3 | [The end of the document]
4 | 


--------------------------------------------------------------------------------
/src/crcutil-1.0/README:
--------------------------------------------------------------------------------
  1 | Goals
  2 | -----
  3 | 
  4 | 1. Performance. In distributed systems the data is CRC'ed
  5 |    on every breath in and out, and often multiple times.
  6 |    Having entire cluster spend 10% of all CPU computing
  7 |    CRCs is not something unheard of.
  8 | 
  9 | 2. Functionality. Computing CRC is not enough. Oftentimes,
 10 |    distributed systems need to perform various operations
 11 |    using known CRC values (concatenation, data replacement,
 12 |    etc.) without touching the actual data.
 13 | 
 14 | 3. Functionality verification: ability to catch even the most
 15 |    subtle bugs in CRC implementation.
 16 | 
 17 | 4. Performance benchmarking: ability to evaluate performance
 18 |    of known CRC algorithms and choose the right one for given
 19 |    architecture and/or compiler.
 20 | 
 21 | 5. Support most popular and most advanced CPUs [typically
 22 |    used in distributed environments]. That is, AMD64 and X86.
 23 | 
 24 | 6. Support most popular compilers used to compile code running
 25 |    in distributed environments. That is, Microsoft's CL, GCC,
 26 |    and Intel's ICL.
 27 | 
 28 | 7. Ability to easily (at run-time) create CRCs for arbitrary
 29 |    generating polynomials. Many complex projects have to deal
 30 |    with multiple CRC generating polynomials. Adding support
 31 |    yet another one should be 1-line change, not 2-week journey.
 32 | 
 33 | 
 34 | Caveats
 35 | -------
 36 | 
 37 | 1. Only little-endian CPUs are supported. Reason: all the
 38 |    optimizations makes sense only when CPU has multiple ALUs
 39 |    and may execute multiple instructions in parallel. I cannot
 40 |    easily recall big-endian CPUs like that (probably PPC and
 41 |    IBM's Z-series?) -- and, unless CPU is powerful enough,
 42 |    trivial byte-by-byte Sarwate algorithm is hard to beat.
 43 | 
 44 | 2. The only CPUs the code was tested are AMD64 and X86 family.
 45 |    I do not have access to Itanium. I tried to do my best to
 46 |    allow the code to work on Itanium as is, but I will not
 47 |    be very surprised if I overlooked something.
 48 | 
 49 | 
 50 | How it all works
 51 | ----------------
 52 | 
 53 | Please read crc.pdf in "docs" directory -- it explains, slowly,
 54 | step-by-step, how it all works, and provides small listings
 55 | of respective algorithms that (hopefully, clearly) demonstrate
 56 | how specific algorithm is implemented -- actual implementation
 57 | is heavily optimized, a lot of loops are unrolled, and comments
 58 | explain only the most subtle details of implementation.
 59 | 
 60 | 
 61 | Usage
 62 | -----
 63 | 
 64 | "unittest.cc" is standalone unit test which perform extensive
 65 | functionality validation and also tests performance of key scenarios.
 66 | Please keep in mind that it takes almost a minute for GCC to compile
 67 | it. Full performance test takes a couple of hours.
 68 | 
 69 | "generic_crc.h" provides a set of implemenations of generic CRCs.
 70 | "crc32c*" set of files implements CRC using Intel's CRC32 instruction.
 71 | "multiword*" set of files implements specialized -- and heavily
 72 | optimized -- versions of multiword CRC.
 73 | 
 74 | However, including these files directly into your project may be
 75 | a bad idea -- there is a lot of quite heavy-weight template code
 76 | that you probably do not want to see included into every file that
 77 | uses CRCs.
 78 | 
 79 | Instead, use "interface.h" which hides all the details of the
 80 | implementation. It declares on namespace, two types in that
 81 | namespace, and brings in a couple of standard ANSI C headers.
 82 | 
 83 | Another advantage of using "interface.h" is that actual
 84 | implementation will pick the most efficient implementation
 85 | of CRC for specific platform and compiler (applies to
 86 | AMD64 and X86 platform and CL, ICL, and GCC compilers only).
 87 | 
 88 | Please see "usage.cc" which provides an example how to use
 89 | crcutil_interface::CRC class.
 90 | 
 91 | 
 92 | Compiler optimization settings
 93 | ------------------------------
 94 | 
 95 | Recommended compiler flags:
 96 | CL: -O2 -Wall
 97 | ICL: -O3 -Wall -Qdiag-disable:181 -Qdiag-disable:185 -Qdiag-disable:442 -Qdiag-disable:vec
 98 | GCC 4.5+: -O3 -Wall -msse2 -mcrc32
 99 | GCC 4.4-, AMD64: -O3 -Wall -msse2 -DCRCUTIL_USE_MM_CRC32=1
100 | GCC 4.4-, I386: -O3 -Wall -msse2 -DCRCUTIL_USE_MM_CRC32=1 -fomit-frame-pointer
101 | 
102 | 
103 | Compile-time constants
104 | ----------------------
105 | 
106 | CRCUTIL_USE_ASM
107 |     Allows the use of inline ASM for GCC on AMD64 and I386 platforms,
108 |     32-bit Intel and Microsoft compilers on Windows.
109 | 
110 |     See multiword*.cc files.
111 | 
112 |     By default, turned on.
113 | 
114 | 
115 | HAVE_MMX
116 |     MMX and respective intrinsics are available. When MMX is available, it
117 |     will be used on I386 platform to speed up computation of up to 64-bit
118 |     CRCs (1.3 CPU cycles/byte, see see *i386_mmx.cc files).
119 | 
120 |     By default, enabled on AMD64 and I386 platforms, disabled otherwise.
121 | 
122 | 
123 | HAVE_SSE
124 |     By default, enabled on AMD64 and I386 platforms, disabled otherwise.
125 | 
126 | 
127 | HAVE_SSE2
128 |     By default, enabled on AMD64 and I386 platforms, disabled otherwise.
129 | 
130 |     Allows the use of SSE2 instructions to compute 128-bit CRCs efficiently
131 |     (see uint128_sse2.h, multiword_128_64_gcc_amd64_sse2.cc).
132 | 
133 | 
134 | CRCUTIL_PREFETCH_WIDTH
135 |     Prefetch width (default is 0 -- read platform.h to see why).
136 | 
137 |     When CRCUTIL_PREFETCH_WIDTH > 0 and HAVE_SSE, the code will try to
138 |     prefetch CRCUTIL_PREFETCH_WIDTH bytes ahead.
139 | 
140 | 
141 | CRCUTIL_MIN_ALIGN_SIZE
142 |     Align input pointer on word boundary when input length exceeds
143 |     CRCUTIL_MIN_ALIGN_SIZE bytes and when CRC implementation will read
144 |     input data by words.
145 | 
146 |     Non-AMD64/I386 do not allow misaligned reads, so default value of
147 |     CRCUTIL_MIN_ALIGN_SIZE is 0.
148 | 
149 |     On AMD64 and I386 platforms, default value is 1KB. Even though AMD64
150 |     and I386 allow non-aligned reads, crossing cache line boundary is not
151 |     free, and it makes sense to align large inputs first before processing
152 |     them (see generic_crc.h for more details).
153 | 
154 | 
155 | CRCUTIL_USE_MM_CRC32
156 |     Allows the use SSE4.2 crc32 instruction when computing CRC32C (0.13 CPU
157 |     cycles per byte, see crc32c_sse4* files).
158 | 
159 |     If set to false (i.e. 0), _mm_crc32_u*() intrinsics will be simulated
160 |     (useful for debugging crc32_sse4 code on machines that do not support
161 |     SSE 4.2).
162 | 
163 |     Hardware-assisted CRC32C is supported on AMD64 and I386 platforms only.
164 | 
165 |     By default, enabled for Windows and for "g++ -msse4".
166 | 
167 |     With GCC 4.5, it is possible to compile the code using "-msse2 -mcrc32
168 |     -DCRCUTIL_USE_MM_CRC32=1" flags.
169 | 
170 |     GCC 4.4 and earlier do not support "-mcrc32" flag, but it is still
171 |     possible to use crc32c_sse4 code by compiling the code using "-msse2
172 |     -DCRCUTIL_USE_MM_CRC32=1" flags. In this case, inline asm code will be
173 |     used (see crc32c_sse4_intrin.h).
174 | 
175 | 
176 | CRCUTIL_FORCE_ASM_CRC32C
177 |     GCC 4.4 and earlier versions do not have -mcrc32 flag, so
178 |     _mm_crc32_u64/32/8 intrinsics there are not available from standard
179 |     headers. They are replaced by inline asm code (see
180 |     crc32c_sse4_intrin.h). To test backward compatibility using GCC 4.5+,
181 |     use "-Wall -O3 -msse2 --DCRCUTIL_USE_MM_CRC32=1
182 |     -DCRCUTIL_FORCE_ASM_CRC32C=1".
183 | 
184 | 
185 | [The end of the document]
186 | 


--------------------------------------------------------------------------------
/src/crcutil-1.0/autogen.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # See http://mij.oltrelinux.com/devel/autoconf-automake/
  4 | 
  5 | if [ -f "Makefile" ] && [ -f "Makefile.am" ] && [ -f "Makefile.in" ] && [ -d ".deps" ] ; then
  6 |   make clean
  7 | fi
  8 | 
  9 | echo "Removing old garbage"
 10 | if [ "${1}" != "clean" ] || [ "${2}" == "clean" ]; then
 11 | # "./mk.sh clean" leave all the files needed for "./configure && make".
 12 | # "./mk.sh clean clean" deletes them as well.
 13 | # Full clean build starts from removing all generated files.
 14 |   rm -f Makefile
 15 |   rm -f Makefile.am
 16 |   rm -f Makefile.in
 17 |   rm -f aclocal.m4
 18 |   rm -f config.h.in
 19 |   rm -f configure
 20 |   rm -f configure.ac
 21 |   rm -f depcomp
 22 |   rm -f install-sh
 23 |   rm -f missing
 24 | fi
 25 | 
 26 | rm -f autoscan.log
 27 | rm -f config.h
 28 | rm -f config.log
 29 | rm -f config.status
 30 | rm -f stamp-h1
 31 | if [ -d "autom4te.cache" ]; then
 32 |   rm -r autom4te.cache
 33 | fi
 34 | if [ -d ".deps" ]; then
 35 |   rm -r .deps
 36 | fi
 37 | 
 38 | if [ "${1}" == "clean" ]; then
 39 |   exit
 40 | fi
 41 | 
 42 | echo "Generating preliminary configure.ac"
 43 | autoscan
 44 | 
 45 | sed 's/^AC_INIT(.*$/AC_INIT(crcutil, 1.0, crcutil@googlegroups.com)\
 46 | AM_INIT_AUTOMAKE(crcutil, 1.0)\
 47 | AC_CONFIG_FILES([Makefile]) \
 48 | AC_OUTPUT()/' configure.scan >configure.ac
 49 | 
 50 | # AC_OUTPUT(Makefile)/' configure.scan >configure.ac
 51 | rm -f configure.scan
 52 | 
 53 | echo "Generating final configure.ac"
 54 | aclocal
 55 | autoconf
 56 | 
 57 | echo "Generating config.h.in"
 58 | autoheader
 59 | 
 60 | target=./Makefile.am
 61 | echo "Generating ${target}"
 62 | echo>${target} "AUTOMAKE_OPTIONS=foreign"
 63 | 
 64 | # --pedantic -std=c99?
 65 | crcutil_flags="-DCRCUTIL_USE_MM_CRC32=1 -Wall -msse2 -Icode"
 66 | echo>${target} "AM_CXXFLAGS=${crcutil_flags}"
 67 | if [ "$(uname -a | grep ^Darwin)" == "" ] && [[ "$(c++ -dumpversion)" > "4.4.9" ]]; then
 68 |   # Static linking is not supported on Mac OS X.
 69 |   # Use static linking on Linux, otherwise GCC 4.5.0 linker produces
 70 |   # obscure warning (well, the code works but nevertheless).
 71 |   echo>>${target} "AM_LDFLAGS=-static"
 72 | fi
 73 | echo>>${target} 'AM_CFLAGS=$(AM_CXXFLAGS)'
 74 | echo>>${target} "check_PROGRAMS=crcutil_ut"
 75 | echo>>${target} "TESTS=crcutil_ut"
 76 | sources=$(ls tests/*.cc tests/*.c tests/*.h code/*.cc code/*.h | grep -v intrinsic | tr "\n" " ")
 77 | echo>>${target} "crcutil_ut_SOURCES=${sources}"
 78 | 
 79 | echo>>${target} "tmpdir=/tmp"
 80 | echo>>${target} "tmp_PROGRAMS=usage"
 81 | echo>>${target} 'usage_CXXFLAGS=$(AM_CXXFLAGS) -Itests'
 82 | sources=$(ls examples/*.cc examples/*.h code/*.cc code/*.h tests/aligned_alloc.h | grep -v intrinsic | tr "\n" " ")
 83 | echo>>${target} "usage_SOURCES=${sources}"
 84 | 
 85 | echo "Creating Makefile.in"
 86 | aclocal
 87 | automake --add-missing
 88 | autoconf
 89 | 
 90 | cflags="-O3"
 91 | if [[ "$(c++ -dumpversion)" > "4.4.9" ]]; then
 92 |   cflags="${cflags} -mcrc32"
 93 | fi
 94 | 
 95 | cflags="${cflags} $2"
 96 | 
 97 | ./configure CXXFLAGS="${cflags}" CFLAGS="${cflags}"
 98 | 
 99 | echo ""
100 | echo "Configured the library. Compiler flags:"
101 | echo "  ${cflags}"
102 | echo "Library configuration flags:"
103 | echo "  ${crcutil_flags}"
104 | echo ""
105 | 
106 | if [ "${1}" == "configure" ]; then
107 |   exit
108 | fi
109 | 
110 | make $1
111 | 


--------------------------------------------------------------------------------
/src/crcutil-1.0/code/base_types.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2010 Google Inc.  All rights reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //      http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | // Defines 8/16/32/64-bit integer types.
16 | //
17 | // Either uint64 or uint32 will map to size_t.
18 | // This way, specialized variants of CRC implementation
19 | // parameterized by "size_t" will be reused when
20 | // parameterized by "uint64" or "uint32".
21 | // In their turn, specialized verisons are parameterized
22 | // by "size_t" so that one version of the code is optimal
23 | // both on 32-bit and 64-bit platforms.
24 | 
25 | #ifndef CRCUTIL_BASE_TYPES_H_
26 | #define CRCUTIL_BASE_TYPES_H_
27 | 
28 | #include "std_headers.h"    // size_t, ptrdiff_t
29 | 
30 | namespace crcutil {
31 | 
32 | template<typename A, typename B> class ChooseFirstIfSame {
33 |  public:
34 |   template<bool same_size, typename AA, typename BB> class ChooseFirstIfTrue {
35 |    public:
36 |     typedef AA Type;
37 |   };
38 |   template<typename AA, typename BB> class ChooseFirstIfTrue<false, AA, BB> {
39 |    public:
40 |     typedef BB Type;
41 |   };
42 | 
43 |   typedef typename ChooseFirstIfTrue<sizeof(A) == sizeof(B), A, B>::Type Type;
44 | };
45 | 
46 | typedef unsigned char uint8;
47 | typedef signed char int8;
48 | 
49 | typedef unsigned short uint16;
50 | typedef short int16;
51 | 
52 | typedef ChooseFirstIfSame<size_t, unsigned int>::Type uint32;
53 | typedef ChooseFirstIfSame<ptrdiff_t, int>::Type int32;
54 | 
55 | #if defined(_MSC_VER)
56 | typedef ChooseFirstIfSame<size_t, unsigned __int64>::Type uint64;
57 | typedef ChooseFirstIfSame<ptrdiff_t, __int64>::Type int64;
58 | #define HAVE_UINT64 1
59 | #elif defined(__GNUC__)
60 | typedef ChooseFirstIfSame<size_t, unsigned long long>::Type uint64;
61 | typedef ChooseFirstIfSame<ptrdiff_t, long long>::Type int64;
62 | #define HAVE_UINT64 1
63 | #else
64 | // TODO: ensure that everything compiles and works when HAVE_UINT64 is false.
65 | // TODO: remove HAVE_UINT64 and use sizeof(uint64) instead?
66 | #define HAVE_UINT64 0
67 | typedef uint32 uint64;
68 | typedef int32 int64;
69 | #endif
70 | 
71 | }  // namespace crcutil
72 | 
73 | #endif  // CRCUTIL_BASE_TYPES_H_
74 | 


--------------------------------------------------------------------------------
/src/crcutil-1.0/code/crc32c_sse4.h:
--------------------------------------------------------------------------------
  1 | // Copyright 2010 Google Inc.  All rights reserved.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //      http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | // Implements CRC32C using Intel's SSE4 crc32 instruction.
 16 | // Uses _mm_crc32_u64/32/8 intrinsics if CRCUTIL_USE_MM_CRC32 is not zero,
 17 | // emilates intrinsics via CRC_WORD/CRC_BYTE otherwise.
 18 | 
 19 | #ifndef CRCUTIL_CRC32C_SSE4_H_
 20 | #define CRCUTIL_CRC32C_SSE4_H_
 21 | 
 22 | #include "gf_util.h"              // base types, gf_util class, etc.
 23 | #include "crc32c_sse4_intrin.h"   // _mm_crc32_u* intrinsics
 24 | 
 25 | #if HAVE_I386 || HAVE_AMD64
 26 | 
 27 | #if CRCUTIL_USE_MM_CRC32
 28 | 
 29 | #if HAVE_I386
 30 | #define CRC_UPDATE_WORD(crc, value) (crc = _mm_crc32_u32(crc, (value)))
 31 | #else
 32 | #define CRC_UPDATE_WORD(crc, value) (crc = _mm_crc32_u64(crc, (value)))
 33 | #endif  // HAVE_I386
 34 | 
 35 | #define CRC_UPDATE_BYTE(crc, value) \
 36 |     (crc = _mm_crc32_u8(static_cast<uint32>(crc), static_cast<uint8>(value)))
 37 | 
 38 | #else
 39 | 
 40 | #include "generic_crc.h"
 41 | 
 42 | #define CRC_UPDATE_WORD(crc, value) do { \
 43 |   size_t buf = (value); \
 44 |   CRC_WORD(this, crc, buf); \
 45 | } while (0)
 46 | #define CRC_UPDATE_BYTE(crc, value) do { \
 47 |   CRC_BYTE(this, crc, (value)); \
 48 | } while (0)
 49 | 
 50 | #endif  // CRCUTIL_USE_MM_CRC32
 51 | 
 52 | namespace crcutil {
 53 | 
 54 | #pragma pack(push, 16)
 55 | 
 56 | // Since the same pieces should be parameterized in many different places
 57 | // and we do not want to introduce a mistake which is rather hard to find,
 58 | // use a macro to enumerate all block sizes.
 59 | //
 60 | // Block sizes and number of stripes were tuned for best performance.
 61 | //
 62 | // All constants should be literal constants (too lazy to fix the macro).
 63 | //
 64 | // The use of different "macro_first", "macro", and "macro_last"
 65 | // allows generation of different code for smallest, in between,
 66 | // and largest block sizes.
 67 | //
 68 | // This macro shall be kept in sync with
 69 | // CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING.
 70 | // Failure to do so will cause compile-time error.
 71 | #define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING( \
 72 |     macro_smallest, macro, macro_largest) \
 73 |   macro_smallest(512, 3); \
 74 |   macro(1024, 3); \
 75 |   macro(4096, 3); \
 76 |   macro_largest(32768, 3)
 77 | 
 78 | // This macro shall be kept in sync with
 79 | // CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING.
 80 | // Failure to do so will cause compile-time error.
 81 | #define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING( \
 82 |     macro_smallest, macro, macro_largest) \
 83 |   macro_largest(32768, 3); \
 84 |   macro(4096, 3); \
 85 |   macro(1024, 3); \
 86 |   macro_smallest(512, 3)
 87 | 
 88 | // Enumerates all block sizes.
 89 | #define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(macro) \
 90 |   CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING(macro, macro, macro)
 91 | 
 92 | #define CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) \
 93 |   (((block_size) / (num_stripes)) & ~(sizeof(size_t) - 1))
 94 | 
 95 | #define CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes) \
 96 |   (CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) * (num_stripes))
 97 | 
 98 | #define CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
 99 |   mul_table_##block_size##_##num_blocks##_
100 | 
101 | class RollingCrc32cSSE4;
102 | 
103 | class Crc32cSSE4 {
104 |  public:
105 |   // Exports Crc, TableEntry, and Word (needed by RollingCrc).
106 |   typedef size_t Crc;
107 |   typedef Crc Word;
108 |   typedef Crc TableEntry;
109 | 
110 |   Crc32cSSE4() {}
111 | 
112 |   // Initializes the tables given generating polynomial of degree (degree).
113 |   // If "canonical" is true, crc value will be XOR'ed with (-1) before and
114 |   // after actual CRC computation.
115 |   explicit Crc32cSSE4(bool canonical) {
116 |     Init(canonical);
117 |   }
118 |   void Init(bool canonical);
119 | 
120 |   // Initializes the tables given generating polynomial of degree.
121 |   // If "canonical" is true, crc value will be XOR'ed with (-1) before and
122 |   // after actual CRC computation.
123 |   // Provided for compatibility with GenericCrc.
124 |   Crc32cSSE4(const Crc &generating_polynomial,
125 |             size_t degree,
126 |             bool canonical) {
127 |     Init(generating_polynomial, degree, canonical);
128 |   }
129 |   void Init(const Crc &generating_polynomial,
130 |             size_t degree,
131 |             bool canonical) {
132 |     if (generating_polynomial == FixedGeneratingPolynomial() &&
133 |         degree == FixedDegree()) {
134 |       Init(canonical);
135 |     }
136 |   }
137 | 
138 |   // Returns fixed generating polymonial the class implements.
139 |   static Crc FixedGeneratingPolynomial() {
140 |     return 0x82f63b78;
141 |   }
142 | 
143 |   // Returns degree of fixed generating polymonial the class implements.
144 |   static Crc FixedDegree() {
145 |     return 32;
146 |   }
147 | 
148 |   // Returns base class.
149 |   const GfUtil<Crc> &Base() const { return base_; }
150 | 
151 |   // Computes CRC32.
152 |   size_t CrcDefault(const void *data, size_t bytes, const Crc &crc) const {
153 |     return Crc32c(data, bytes, crc);
154 |   }
155 | 
156 |   // Returns true iff crc32 instruction is available.
157 |   static bool IsSSE42Available();
158 | 
159 |  protected:
160 |   // Actual implementation.
161 |   size_t Crc32c(const void *data, size_t bytes, Crc crc) const;
162 | 
163 |   enum {
164 |     kTableEntryBits = 8,
165 |     kTableEntries = 1 << kTableEntryBits,
166 |     kNumTables = (32 + kTableEntryBits - 1) / kTableEntryBits,
167 |     kNumTablesHalfLo = kNumTables / 2,
168 |     kNumTablesHalfHi = (kNumTables + 1) / 2,
169 | 
170 |     kUnrolledLoopCount = 8,
171 |     kUnrolledLoopBytes = kUnrolledLoopCount * sizeof(size_t),
172 |   };
173 | 
174 |   // May be set to size_t or uint32, whichever is faster.
175 |   typedef uint32 Entry;
176 | 
177 | #define DECLARE_MUL_TABLE(block_size, num_stripes) \
178 |   Entry CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
179 |       [kNumTables][kTableEntries]
180 | 
181 |   CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(DECLARE_MUL_TABLE);
182 | 
183 | #undef DECLARE_MUL_TABLE
184 | 
185 |   GfUtil<Crc> base_;
186 | 
187 | #if !CRCUTIL_USE_MM_CRC32
188 |   TableEntry crc_word_[sizeof(Word)][256];
189 |   friend class RollingCrc32cSSE4;
190 | #endif  // !CRCUTIL_USE_MM_CRC32
191 | } GCC_ALIGN_ATTRIBUTE(16);
192 | 
193 | class RollingCrc32cSSE4 {
194 |  public:
195 |   typedef Crc32cSSE4::Crc Crc;
196 |   typedef Crc32cSSE4::TableEntry TableEntry;
197 |   typedef Crc32cSSE4::Word Word;
198 | 
199 |   RollingCrc32cSSE4() {}
200 | 
201 |   // Initializes internal data structures.
202 |   // Retains reference to "crc" instance -- it is used by Start().
203 |   RollingCrc32cSSE4(const Crc32cSSE4 &crc,
204 |             size_t roll_window_bytes,
205 |             const Crc &start_value) {
206 |     Init(crc, roll_window_bytes, start_value);
207 |   }
208 |   void Init(const Crc32cSSE4 &crc,
209 |             size_t roll_window_bytes,
210 |             const Crc &start_value);
211 | 
212 |   // Computes crc of "roll_window_bytes" using
213 |   // "start_value" of "crc" (see Init()).
214 |   Crc Start(const void *data) const {
215 |     return crc_->CrcDefault(data, roll_window_bytes_, start_value_);
216 |   }
217 | 
218 |   // Computes CRC of "roll_window_bytes" starting in next position.
219 |   Crc Roll(const Crc &old_crc, size_t byte_out, size_t byte_in) const {
220 |     Crc crc = old_crc;
221 |     CRC_UPDATE_BYTE(crc, byte_in);
222 |     crc ^= out_[byte_out];
223 |     return crc;
224 |   }
225 | 
226 |   // Returns start value.
227 |   Crc StartValue() const { return start_value_; }
228 | 
229 |   // Returns length of roll window.
230 |   size_t WindowBytes() const { return roll_window_bytes_; }
231 | 
232 |  protected:
233 |   typedef Crc Entry;
234 |   Entry out_[256];
235 | 
236 |   // Used only by Start().
237 |   Crc start_value_;
238 |   const Crc32cSSE4 *crc_;
239 |   size_t roll_window_bytes_;
240 | 
241 | #if !CRCUTIL_USE_MM_CRC32
242 |   TableEntry crc_word_[sizeof(Word)][256];
243 | #endif  // !CRCUTIL_USE_MM_CRC32
244 | } GCC_ALIGN_ATTRIBUTE(16);
245 | 
246 | #pragma pack(pop)
247 | 
248 | }  // namespace crcutil
249 | 
250 | #endif  // HAVE_I386 || HAVE_AMD64
251 | 
252 | #endif  // CRCUTIL_CRC32C_SSE4_H_
253 | 


--------------------------------------------------------------------------------
/src/crcutil-1.0/code/crc32c_sse4_intrin.h:
--------------------------------------------------------------------------------
  1 | // Copyright 2010 Google Inc.  All rights reserved.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //      http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | // Provides _mm_crc32_u64/32/8 intrinsics.
 16 | 
 17 | #ifndef CRCUTIL_CRC32C_SSE4_INTRIN_H_
 18 | #define CRCUTIL_CRC32C_SSE4_INTRIN_H_
 19 | 
 20 | #include "platform.h"
 21 | #include "base_types.h"
 22 | 
 23 | #if CRCUTIL_USE_MM_CRC32 && (HAVE_I386 || HAVE_AMD64)
 24 | 
 25 | #if defined(_MSC_VER) || defined(__SSE4_2__)
 26 | 
 27 | #if defined(_MSC_VER)
 28 | #pragma warning(push)
 29 | // '_M_IA64' is not defined as a preprocessor macro
 30 | #pragma warning(disable: 4668)
 31 | #endif  // defined(_MSC_VER)
 32 | 
 33 | #include <nmmintrin.h>
 34 | 
 35 | #if defined(_MSC_VER)
 36 | #pragma warning(pop)
 37 | #endif  // defined(_MSC_VER)
 38 | 
 39 | #elif GCC_VERSION_AVAILABLE(4, 5) && !defined(CRCUTIL_FORCE_ASM_CRC32C)
 40 | // Allow the use of _mm_crc32_u* intrinsic when CRCUTIL_USE_MM_CRC32
 41 | // is set irrespective of "-msse*" settings. This way, the sources
 42 | // may be compiled with "-msse2 -mcrc32" and work on older CPUs,
 43 | // while taking full advantage of "crc32" instruction on newer
 44 | // CPUs (requires dynamic CPU detection). See "interface.cc".
 45 | //
 46 | // If neither -msse4 or -mcrc32 is provided and CRCUTIL_USE_MM_CRC32 is set
 47 | // and CRCUTIL_FORCE_ASM_CRC32 is not set, compile-time error will happen.
 48 | // Why? Becuase GCC disables __builtin_ia32_crc32* intrinsics when compiled
 49 | // without -msse4 or -mcrc32. -msse4 could be detected at run time by checking
 50 | // whether __SSE4_2__ is defined, but there is no way to tell whether the
 51 | // sources are compiled with -mcrc32.
 52 | 
 53 | extern __inline unsigned int __attribute__((
 54 |     __gnu_inline__, __always_inline__, __artificial__))
 55 | _mm_crc32_u8(unsigned int __C, unsigned char __V) {
 56 |   //return __builtin_ia32_crc32qi(__C, __V);
 57 |   return 0;
 58 | }
 59 | #ifdef __x86_64__
 60 | extern __inline unsigned long long __attribute__((
 61 |     __gnu_inline__, __always_inline__, __artificial__))
 62 | _mm_crc32_u64(unsigned long long __C, unsigned long long __V) {
 63 |   //return __builtin_ia32_crc32di(__C, __V);
 64 |   return 0;
 65 | }
 66 | #else
 67 | extern __inline unsigned int __attribute__((
 68 |     __gnu_inline__, __always_inline__, __artificial__))
 69 | _mm_crc32_u32(unsigned int __C, unsigned int __V) {
 70 |   //return __builtin_ia32_crc32si (__C, __V);
 71 |   return 0;
 72 | }
 73 | #endif  // __x86_64__
 74 | 
 75 | #else
 76 | 
 77 | // GCC 4.4.x and earlier: use inline asm.
 78 | 
 79 | namespace crcutil {
 80 | 
 81 | __forceinline uint64 _mm_crc32_u64(uint64 crc, uint64 value) {
 82 |   asm("crc32q %[value], %[crc]\n" : [crc] "+r" (crc) : [value] "rm" (value));
 83 |   return crc;
 84 | }
 85 | 
 86 | __forceinline uint32 _mm_crc32_u32(uint32 crc, uint64 value) {
 87 |   asm("crc32l %[value], %[crc]\n" : [crc] "+r" (crc) : [value] "rm" (value));
 88 |   return crc;
 89 | }
 90 | 
 91 | __forceinline uint32 _mm_crc32_u8(uint32 crc, uint8 value) {
 92 |   asm("crc32b %[value], %[crc]\n" : [crc] "+r" (crc) : [value] "rm" (value));
 93 |   return crc;
 94 | }
 95 | 
 96 | }  // namespace crcutil
 97 | 
 98 | #endif
 99 | 
100 | #endif  // CRCUTIL_USE_MM_CRC32 && (HAVE_I386 || HAVE_AMD64)
101 | 
102 | #endif  // CRCUTIL_CRC32C_SSE4_INTRIN_H_
103 | 


--------------------------------------------------------------------------------
/src/crcutil-1.0/code/crc_casts.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2010 Google Inc.  All rights reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //      http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | // Casting between integers and compound CRC types.
16 | 
17 | #ifndef CRCUTIL_CRC_CASTS_H_
18 | #define CRCUTIL_CRC_CASTS_H_
19 | 
20 | #include "base_types.h"   // uint8, uint64
21 | #include "platform.h"     // __forceinline
22 | 
23 | namespace crcutil {
24 | 
25 | // Downcasts a value of (oftentimes larger) Crc type to (smaller base integer)
26 | // Result type, enabling specialized downcasts implemented by "large integer"
27 | // classes (e.g. uint128_sse2).
28 | template<typename Crc, typename Result>
29 | __forceinline Result Downcast(const Crc &x) {
30 |   return static_cast<Result>(x);
31 | }
32 | 
33 | // Extracts 8 least significant bits from a value of Crc type.
34 | #define TO_BYTE(x) Downcast<Crc, uint8>(x)
35 | 
36 | // Converts a pair of uint64 bit values into single value of CRC type.
37 | // It is caller's responsibility to ensure that the input is correct.
38 | template<typename Crc>
39 | __forceinline Crc CrcFromUint64(uint64 lo, uint64 hi = 0) {
40 |   if (sizeof(Crc) <= sizeof(lo)) {
41 |     return static_cast<Crc>(lo);
42 |   } else {
43 |     // static_cast to keep compiler happy.
44 |     Crc result = static_cast<Crc>(hi);
45 |     result = SHIFT_LEFT_SAFE(result, 8 * sizeof(lo));
46 |     result ^= lo;
47 |     return result;
48 |   }
49 | }
50 | 
51 | // Converts Crc value to a pair of uint64 values.
52 | template<typename Crc>
53 | __forceinline void Uint64FromCrc(const Crc &crc,
54 |                                  uint64 *lo, uint64 *hi = NULL) {
55 |   if (sizeof(*lo) >= sizeof(crc)) {
56 |     *lo = Downcast<Crc, uint64>(crc);
57 |     if (hi != NULL) {
58 |       *hi = 0;
59 |     }
60 |   } else {
61 |     *lo = Downcast<Crc, uint64>(crc);
62 |     *hi = Downcast<Crc, uint64>(SHIFT_RIGHT_SAFE(crc, 8 * sizeof(lo)));
63 |   }
64 | }
65 | 
66 | }  // namespace crcutil
67 | 
68 | #endif  // CRCUTIL_CRC_CASTS_H_
69 | 


--------------------------------------------------------------------------------
/src/crcutil-1.0/code/multiword_64_64_intrinsic_i386_mmx.cc:
--------------------------------------------------------------------------------
  1 | // Copyright 2010 Google Inc.  All rights reserved.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //      http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | // Implements 64-bit multiword CRC using MMX built-in functions.
 16 | 
 17 | #include "generic_crc.h"
 18 | 
 19 | #if CRCUTIL_USE_ASM && HAVE_I386 && HAVE_MMX && !defined(_MSC_VER) && !(defined(__GNUC__) && !defined(__clang__))
 20 | 
 21 | namespace crcutil {
 22 | 
 23 | template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiwordI386Mmx(
 24 |     const void *data, size_t bytes, const uint64 &start)
 25 |         const GCC_OMIT_FRAME_POINTER;
 26 | 
 27 | #if !defined(_MSC_VER)
 28 | template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiword(
 29 |     const void *data,
 30 |     size_t bytes,
 31 |     const uint64 &start) const {
 32 |   if (bytes <= 7) {
 33 |     const uint8 *src = static_cast<const uint8 *>(data);
 34 |     uint64 crc = start ^ Base().Canonize();
 35 |     for (const uint8 *end = src + bytes; src < end; ++src) {
 36 |       CRC_BYTE(this, crc, *src);
 37 |     }
 38 |     return (crc ^ Base().Canonize());
 39 |   }
 40 |   return CrcMultiwordI386Mmx(data, bytes, start);
 41 | }
 42 | #else
 43 | #pragma warning(push)
 44 | // CL: uninitialized local variable 'crc1' used
 45 | // Wrong: crc1 = XOR(crc1, crc1) sets it to 0.
 46 | #pragma warning(disable: 4700)
 47 | 
 48 | #pragma warning(disable: 4619)  // there is no warning number '592'
 49 | 
 50 | // ICL: variable "crc1" is used before its value is set
 51 | // Wrong: crc1 = XOR(crc1, crc1) sets it to 0.
 52 | #pragma warning(disable: 592)
 53 | #endif  // !defined(_MSC_VER)
 54 | 
 55 | #define MM64(adr) reinterpret_cast<const __m64 *>(adr)
 56 | #define MM64_TABLE(byte) MM64(crc_word_interleaved_[byte])
 57 | 
 58 | #define CRC_WORD_MMX(this, crc, buf) do { \
 59 |   buf = _mm_xor_si64(buf, crc); \
 60 |   uint32 tmp = static_cast<uint32>(_mm_cvtsi64_si32(buf)); \
 61 |   buf = _mm_srli_si64(buf, 32); \
 62 |   crc = MM64(crc_word_[0])[TO_BYTE(tmp)]; \
 63 |   tmp >>= 8; \
 64 |   crc = _mm_xor_si64(crc, MM64(crc_word_[1])[TO_BYTE(tmp)]); \
 65 |   tmp >>= 8; \
 66 |   crc = _mm_xor_si64(crc, MM64(crc_word_[2])[TO_BYTE(tmp)]); \
 67 |   tmp >>= 8; \
 68 |   crc = _mm_xor_si64(crc, MM64(crc_word_[3])[tmp]); \
 69 |   tmp = static_cast<uint32>(_mm_cvtsi64_si32(buf)); \
 70 |   crc = _mm_xor_si64(crc, MM64(crc_word_[4])[TO_BYTE(tmp)]); \
 71 |   tmp >>= 8; \
 72 |   crc = _mm_xor_si64(crc, MM64(crc_word_[5])[TO_BYTE(tmp)]); \
 73 |   tmp >>= 8; \
 74 |   crc = _mm_xor_si64(crc, MM64(crc_word_[6])[TO_BYTE(tmp)]); \
 75 |   tmp >>= 8; \
 76 |   crc = _mm_xor_si64(crc, MM64(crc_word_[7])[tmp]); \
 77 | } while (0)
 78 | 
 79 | template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiwordI386Mmx(
 80 |     const void *data, size_t bytes, const uint64 &start) const {
 81 |   const uint8 *src = static_cast<const uint8 *>(data);
 82 |   const uint8 *end = src + bytes;
 83 |   uint64 crc = start ^ Base().Canonize();
 84 | 
 85 |   ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc, uint64);
 86 |   if (src >= end) {
 87 |     return (crc ^ Base().Canonize());
 88 |   }
 89 | 
 90 |   // Process 4 registers of sizeof(uint64) bytes at once.
 91 |   bytes = static_cast<size_t>(end - src) & ~(4*8 - 1);
 92 |   if (bytes > 4*8) {
 93 |     const uint8 *stop = src + bytes - 4*8;
 94 |     union {
 95 |       __m64 m64;
 96 |       uint64 u64;
 97 |     } temp;
 98 |     __m64 crc0;
 99 |     __m64 crc1;
100 |     __m64 crc2;
101 |     __m64 crc3;
102 |     __m64 buf0 = MM64(src)[0];
103 |     __m64 buf1 = MM64(src)[1];
104 |     __m64 buf2 = MM64(src)[2];
105 |     __m64 buf3 = MM64(src)[3];
106 | 
107 |     temp.u64 = crc;
108 |     crc0 = temp.m64;
109 | #if defined(__GNUC__) && !GCC_VERSION_AVAILABLE(4, 4)
110 |     // There is no way to suppress a warning in GCC;
111 |     // generate extra assignments.
112 |     temp.u64 = 0;
113 |     crc1 = temp.m64;
114 |     crc2 = temp.m64;
115 |     crc3 = temp.m64;
116 | #else
117 |     crc1 = _mm_xor_si64(crc1, crc1);
118 |     crc2 = _mm_xor_si64(crc2, crc2);
119 |     crc3 = _mm_xor_si64(crc3, crc3);
120 | #endif  // defined(__GNUC__) && !GCC_VERSION_AVAILABLE(4, 4)
121 | 
122 |     do {
123 |       PREFETCH(src);
124 |       src += 4*8;
125 | 
126 |       buf0 = _mm_xor_si64(buf0, crc0);
127 |       buf1 = _mm_xor_si64(buf1, crc1);
128 |       buf2 = _mm_xor_si64(buf2, crc2);
129 |       buf3 = _mm_xor_si64(buf3, crc3);
130 | 
131 |       uint32 tmp0 = static_cast<uint32>(_mm_cvtsi64_si32(buf0));
132 |       uint32 tmp1 = static_cast<uint32>(_mm_cvtsi64_si32(buf1));
133 |       uint32 tmp2 = static_cast<uint32>(_mm_cvtsi64_si32(buf2));
134 |       uint32 tmp3 = static_cast<uint32>(_mm_cvtsi64_si32(buf3));
135 | 
136 |       buf0 = _mm_srli_si64(buf0, 32);
137 |       buf1 = _mm_srli_si64(buf1, 32);
138 |       buf2 = _mm_srli_si64(buf2, 32);
139 |       buf3 = _mm_srli_si64(buf3, 32);
140 | 
141 |       crc0 = MM64_TABLE(0)[TO_BYTE(tmp0)];
142 |       tmp0 >>= 8;
143 |       crc1 = MM64_TABLE(0)[TO_BYTE(tmp1)];
144 |       tmp1 >>= 8;
145 |       crc2 = MM64_TABLE(0)[TO_BYTE(tmp2)];
146 |       tmp2 >>= 8;
147 |       crc3 = MM64_TABLE(0)[TO_BYTE(tmp3)];
148 |       tmp3 >>= 8;
149 | 
150 | #define XOR(byte) do { \
151 |       crc0 = _mm_xor_si64(crc0, MM64_TABLE(byte)[TO_BYTE(tmp0)]); \
152 |       tmp0 >>= 8; \
153 |       crc1 = _mm_xor_si64(crc1, MM64_TABLE(byte)[TO_BYTE(tmp1)]); \
154 |       tmp1 >>= 8; \
155 |       crc2 = _mm_xor_si64(crc2, MM64_TABLE(byte)[TO_BYTE(tmp2)]); \
156 |       tmp2 >>= 8; \
157 |       crc3 = _mm_xor_si64(crc3, MM64_TABLE(byte)[TO_BYTE(tmp3)]); \
158 |       tmp3 >>= 8; \
159 | } while (0)
160 | 
161 |       XOR(1);
162 |       XOR(2);
163 | 
164 |       crc0 = _mm_xor_si64(crc0, MM64_TABLE(3)[tmp0]);
165 |       tmp0 = static_cast<uint32>(_mm_cvtsi64_si32(buf0));
166 |       crc1 = _mm_xor_si64(crc1, MM64_TABLE(3)[tmp1]);
167 |       tmp1 = static_cast<uint32>(_mm_cvtsi64_si32(buf1));
168 |       crc2 = _mm_xor_si64(crc2, MM64_TABLE(3)[tmp2]);
169 |       tmp2 = static_cast<uint32>(_mm_cvtsi64_si32(buf2));
170 |       crc3 = _mm_xor_si64(crc3, MM64_TABLE(3)[tmp3]);
171 |       tmp3 = static_cast<uint32>(_mm_cvtsi64_si32(buf3));
172 | 
173 |       XOR(4);
174 |       XOR(5);
175 |       XOR(6);
176 | 
177 | #undef XOR
178 | 
179 |       crc0 = _mm_xor_si64(crc0, MM64_TABLE(sizeof(uint64) - 1)[tmp0]);
180 |       buf0 = MM64(src)[0];
181 |       crc1 = _mm_xor_si64(crc1, MM64_TABLE(sizeof(uint64) - 1)[tmp1]);
182 |       buf1 = MM64(src)[1];
183 |       crc2 = _mm_xor_si64(crc2, MM64_TABLE(sizeof(uint64) - 1)[tmp2]);
184 |       buf2 = MM64(src)[2];
185 |       crc3 = _mm_xor_si64(crc3, MM64_TABLE(sizeof(uint64) - 1)[tmp3]);
186 |       buf3 = MM64(src)[3];
187 |     }
188 |     while (src < stop);
189 | 
190 |     CRC_WORD_MMX(this, crc0, buf0);
191 |     buf1 = _mm_xor_si64(buf1, crc1);
192 |     CRC_WORD_MMX(this, crc0, buf1);
193 |     buf2 = _mm_xor_si64(buf2, crc2);
194 |     CRC_WORD_MMX(this, crc0, buf2);
195 |     buf3 = _mm_xor_si64(buf3, crc3);
196 |     CRC_WORD_MMX(this, crc0, buf3);
197 | 
198 |     temp.m64 = crc0;
199 |     crc = temp.u64;
200 | 
201 |     _mm_empty();
202 | 
203 |     src += 4*8;
204 |   }
205 | 
206 |   // Process sizeof(uint64) bytes at once.
207 |   bytes = static_cast<size_t>(end - src) & ~(sizeof(uint64) - 1);
208 |   if (bytes > 0) {
209 |     union {
210 |       __m64 m64;
211 |       uint64 u64;
212 |     } temp;
213 |     __m64 crc0;
214 | 
215 |     temp.u64 = crc;
216 |     crc0 = temp.m64;
217 | 
218 |     for (const uint8 *stop = src + bytes; src < stop; src += sizeof(uint64)) {
219 |       __m64 buf0 = MM64(src)[0];
220 |       CRC_WORD_MMX(this, crc0, buf0);
221 |     }
222 | 
223 |     temp.m64 = crc0;
224 |     crc = temp.u64;
225 | 
226 |     _mm_empty();
227 |   }
228 | 
229 |   // Compute CRC of remaining bytes.
230 |   for (;src < end; ++src) {
231 |     CRC_BYTE(this, crc, *src);
232 |   }
233 | 
234 |   return (crc ^ Base().Canonize());
235 | }
236 | 
237 | #if defined(_MSC_VER)
238 | #pragma warning(pop)
239 | #endif  // defined(_MSC_VER)
240 | 
241 | }  // namespace crcutil
242 | 
243 | #endif  // CRCUTIL_USE_ASM && HAVE_I386 && HAVE_MMX
244 | 


--------------------------------------------------------------------------------
/src/crcutil-1.0/code/platform.h:
--------------------------------------------------------------------------------
  1 | // Copyright 2010 Google Inc.  All rights reserved.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //      http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | // Detects configuration and defines compiler-specific macros.
 16 | // Also, sets user-defined CRUTIL_USE_* macros to default values.
 17 | 
 18 | #ifndef CRCUTIL_PLATFORM_H_
 19 | #define CRCUTIL_PLATFORM_H_
 20 | 
 21 | // Permanently disable some annoying warnings generated
 22 | // by Microsoft CL when compiling Microsoft's headers.
 23 | #include "std_headers.h"
 24 | 
 25 | // Use inline asm version of the code?
 26 | #if !defined(CRCUTIL_USE_ASM)
 27 | #define CRCUTIL_USE_ASM 1
 28 | #endif  // !defined(CRCUTIL_USE_ASM)
 29 | 
 30 | 
 31 | #if !defined(HAVE_I386)
 32 | #if defined(__i386__) || defined(_M_IX86)
 33 | #define HAVE_I386 1
 34 | #else
 35 | #define HAVE_I386 0
 36 | #endif  // defined(__i386__) || defined(_M_IX86)
 37 | #endif  // defined(HAVE_I386)
 38 | 
 39 | 
 40 | #if !defined(HAVE_AMD64)
 41 | #if defined(__amd64__) || defined(_M_AMD64)
 42 | #define HAVE_AMD64 1
 43 | #else
 44 | #define HAVE_AMD64 0
 45 | #endif  // defined(__amd64__) || defined(_M_AMD64)
 46 | #endif  // defined(HAVE_AMD64)
 47 | 
 48 | 
 49 | #if HAVE_AMD64 || HAVE_I386
 50 | #if defined(_MSC_VER)
 51 | #pragma warning(push)
 52 | // '_M_IX86' is not defined as a preprocessor macro
 53 | #pragma warning(disable: 4668)
 54 | #include <intrin.h>
 55 | #pragma warning(pop)
 56 | #endif  // defined(_MSC_VER)
 57 | 
 58 | 
 59 | #if !defined(HAVE_MMX)
 60 | #if defined(_MSC_VER) || (defined(__GNUC__) && defined(__MMX__))
 61 | #define HAVE_MMX 1
 62 | #else
 63 | #define HAVE_MMX 0
 64 | #endif  // defined(_MSC_VER) || (defined(__GNUC__) && defined(__MMX__))
 65 | #endif  // !defined(HAVE_MMX)
 66 | 
 67 | 
 68 | #if !defined(HAVE_SSE)
 69 | #if defined(_MSC_VER) || (defined(__GNUC__) && defined(__SSE__))
 70 | #include <xmmintrin.h>
 71 | #define HAVE_SSE 1
 72 | #else
 73 | #define HAVE_SSE 0
 74 | #endif  // defined(_MSC_VER) || (defined(__GNUC__) && defined(__SSE__))
 75 | #endif  // !defined(HAVE_SSE)
 76 | 
 77 | 
 78 | #if !defined(HAVE_SSE2)
 79 | #if defined(_MSC_VER) || (defined(__GNUC__) && defined(__SSE2__))
 80 | #include <emmintrin.h>
 81 | #define HAVE_SSE2 1
 82 | #else
 83 | #define HAVE_SSE2 0
 84 | #endif  // defined(_MSC_VER) || (defined(__GNUC__) && defined(__SSE2__))
 85 | #endif  // !defined(HAVE_SSE2)
 86 | 
 87 | #else
 88 | 
 89 | #if !defined(HAVE_MMX)
 90 | #define HAVE_MMX 0
 91 | #endif  // !defined(HAVE_MMX)
 92 | 
 93 | #if !defined(HAVE_SSE)
 94 | #define HAVE_SSE 0
 95 | #endif  // !defined(HAVE_SSE)
 96 | 
 97 | #if !defined(HAVE_SSE2)
 98 | #define HAVE_SSE2 0
 99 | #endif  // !defined(HAVE_SSE2)
100 | 
101 | #endif  // HAVE_AMD64 || HAVE_I386
102 | 
103 | // Error checking
104 | #if HAVE_SSE && !HAVE_MMX
105 | #error SSE is available but not MMX?
106 | #endif  // HAVE_SSE && !HAVE_MMX
107 | 
108 | #if HAVE_SSE2 && (!HAVE_SSE || !HAVE_MMX)
109 | #error SSE2 is available but not SSE or MMX?
110 | #endif  // HAVE_SSE2 && (!HAVE_SSE || !HAVE_MMX)
111 | 
112 | 
113 | #if !defined(CRCUTIL_PREFETCH_WIDTH)
114 | // On newer X5550 CPU, heavily optimized CrcMultiword is 3% faster without
115 | // prefetch for inputs smaller than 8MB and less than 1% slower for 8MB and
116 | // larger blocks. On older Q9650 CPU, the code is 2-3% faster for inputs
117 | // smaller than 8MB, 4-5% slower when length >= 8MB.
118 | // Tested with prefetch length 256, 512, and 4096.
119 | //
120 | // At this moment there is no compelling reason to use prefetching.
121 | //
122 | #define CRCUTIL_PREFETCH_WIDTH 0
123 | #endif  // !defined(CRCUTIL_PREFETCH_WIDTH)
124 | 
125 | 
126 | #if HAVE_SSE && CRCUTIL_PREFETCH_WIDTH > 0
127 | #define PREFETCH(src) \
128 |   _mm_prefetch(reinterpret_cast<const char *>(src) + CRCUTIL_PREFETCH_WIDTH, \
129 |                _MM_HINT_T0)
130 | #else
131 | #define PREFETCH(src)
132 | #endif  // HAVE_SSE && CRCUTIL_PREFETCH_WIDTH > 0
133 | 
134 | 
135 | // If block size exceeds CRCUTIL_MIN_ALIGN_SIZE, align the data
136 | // before accessing it at word boundary. See generic_crc.cc,
137 | // ALIGN_ON_WORD_BOUNDARY_IF_NEEDED() macro.
138 | #if !defined(CRCUTIL_MIN_ALIGN_SIZE)
139 | #if HAVE_AMD64 || HAVE_I386
140 | #define CRCUTIL_MIN_ALIGN_SIZE (1024)
141 | #else
142 | #define CRCUTIL_MIN_ALIGN_SIZE 0
143 | #endif  // HAVE_AMD64 || HAVE_I386
144 | #endif  // !defined(CRCUTIL_MIN_ALIGN_SIZE)
145 | 
146 | 
147 | // Use _mm_crc32_u64/32/8 intrinics?
148 | // If not, they will be implemented in software.
149 | #if !HAVE_I386 && !HAVE_AMD64
150 | 
151 | #undef CRCUTIL_USE_MM_CRC32
152 | #define CRCUTIL_USE_MM_CRC32 0
153 | 
154 | #else
155 | 
156 | #if !defined(CRCUTIL_USE_MM_CRC32)
157 | #if defined(_MSC_VER) || defined(__GNUC__)
158 | #define CRCUTIL_USE_MM_CRC32 1
159 | #else
160 | #define CRCUTIL_USE_MM_CRC32 0
161 | #endif  // defined(_MSC_VER) || defined(__GNUC__)
162 | #endif  // !defined(CRCUTIL_USE_MM_CRC32)
163 | 
164 | #endif  // !HAVE_I386 && !HAVE_AMD64
165 | 
166 | 
167 | // Stringize -- always handy.
168 | #define TO_STRING_VALUE(arg) #arg
169 | #define TO_STRING(arg) TO_STRING_VALUE(arg)
170 | 
171 | 
172 | // Compilers give "right shift count >= width of type" warning even
173 | // though the shift happens only under appropriate "if".
174 | #define SHIFT_RIGHT_NO_WARNING(value, bits) \
175 |   ((value) >> (((bits) < (8 * sizeof(value))) ? (bits) : 0))
176 | #define SHIFT_RIGHT_SAFE(value, bits) \
177 |   ((bits) < (8 * sizeof(value)) ? SHIFT_RIGHT_NO_WARNING(value, bits) : 0)
178 | 
179 | // The same for left shifts.
180 | #define SHIFT_LEFT_NO_WARNING(value, bits) \
181 |   ((value) << (((bits) < (8 * sizeof(value))) ? (bits) : 0))
182 | #define SHIFT_LEFT_SAFE(value, bits) \
183 |   ((bits) < (8 * sizeof(value)) ? SHIFT_LEFT_NO_WARNING(value, bits) : 0)
184 | 
185 | // GCC-specific macros.
186 | //
187 | #define GCC_VERSION_AVAILABLE(major, minor) \
188 |     (defined(__GNUC__) && \
189 |         (__GNUC__ > (major) || \
190 |             (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor))))
191 | 
192 | 
193 | #if defined(__GNUC__)
194 | 
195 | // The GenericCrc tables must be properly aligned.
196 | // Penalty for misalignment? 50% performance degradation.
197 | // For 128-bit SSE2, the penalty is access violation.
198 | #define GCC_ALIGN_ATTRIBUTE(n) __attribute__((aligned(n)))
199 | 
200 | #if GCC_VERSION_AVAILABLE(4, 4)
201 | // If not marked as "omit frame pointer",
202 | // GCC won't be able to find enough registers.
203 | #define GCC_OMIT_FRAME_POINTER \
204 |     __attribute__((__optimize__(2, "omit-frame-pointer")))
205 | #endif  // GCC_VERSION_AVAILABLE(4, 4)
206 | 
207 | #if !defined(__forceinline)
208 | #define __forceinline __attribute__((__always_inline__)) inline
209 | #endif  // !defined(__forceinline)
210 | 
211 | #if defined(__APPLE_CC__)
212 | // The version of GCC used by Max OS X xCode v 5664 does not understand
213 | // "movq xmm, r64" instruction and requires the use of "movd" (probably
214 | // because of the bug in GCC which treats "movq/movd xmm,r64 or r64,xmm"
215 | // the same).
216 | //
217 | // Leaving common sense aside, let's peek into Intel's instruction
218 | // reference manual. That's what description of MOVD command says:
219 | // MOVD xmm, r/m32 (opcode 66 0F 6E /r)
220 | // MOVD r/m32, xmm (opcode 66 0F 7E /r)
221 | // MOVQ xmm, r/m64 (opcode 66 REX.W 0F 6E /r)
222 | // MOVQ r/m64, xmm (opcode 66 REX.W 0F 7E /r)
223 | #define SSE2_MOVQ "movd"
224 | #else
225 | #define SSE2_MOVQ "movq"
226 | #endif  // defined(__APPLE_CC__)
227 | 
228 | #endif  // defined(__GNUC__)
229 | 
230 | 
231 | // Define compiler-specific macros that were not set yet.
232 | #if !defined(_MSC_VER) && !defined(__forceinline)
233 | #define __forceinline inline
234 | #endif  // !defined(_MSC_VER) && !defined(__forceinline)
235 | 
236 | #if !defined(GCC_OMIT_FRAME_POINTER)
237 | #define GCC_OMIT_FRAME_POINTER
238 | #endif  // !defined(GCC_OMIT_FRAME_POINTER)
239 | 
240 | #if !defined(GCC_ALIGN_ATTRIBUTE)
241 | #define GCC_ALIGN_ATTRIBUTE(n)
242 | #endif  // !defined(GCC_ALIGN_ATTRIBUTE)
243 | 
244 | 
245 | #endif  // CRCUTIL_PLATFORM_H_
246 | 


--------------------------------------------------------------------------------
/src/crcutil-1.0/code/protected_crc.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2010 Google Inc.  All rights reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //      http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | // Protects CRC tables with its own CRC.
16 | // CRC tables get corrupted too, and if corruption is
17 | // not caught, data poisoning becomes a reality.
18 | 
19 | #ifndef CRCUTIL_PROTECTED_CRC_H_
20 | #define CRCUTIL_PROTECTED_CRC_H_
21 | 
22 | namespace crcutil {
23 | 
24 | #pragma pack(push, 16)
25 | 
26 | // Class CrcImplementation should not have virtual functions:
27 | // vptr is stored as the very first field, vptr value is defined
28 | // at runtime, so it is impossible to CRC(*this) once and
29 | // guarantee that this value will not change from run to run.
30 | //
31 | template<typename CrcImplementation> class ProtectedCrc
32 |     : public CrcImplementation {
33 |  public:
34 |   typedef typename CrcImplementation::Crc Crc;
35 | 
36 |   // Returns check value that the caller should compare
37 |   // against pre-computed, trusted constant.
38 |   //
39 |   // Computing SelfCheckValue() after CRC initialization,
40 |   // storing it in memory, and periodically checking against
41 |   // stored value may not work: if CRC tables were initialized
42 |   // incorrectly and/or had been corrupted during initialization,
43 |   // CheckValue() will return garbage. Garbage in, garbage out.
44 |   // Consequitive checks will not detect a problem, the application
45 |   // will happily produce and save the data with corrupt CRC.
46 |   //
47 |   // The application should call SelfCheckValue() regularly:
48 |   // 1. First and foremost, on every CRC mismatch.
49 |   // 2. After CRC'ing the  data but before sending it out or writing it.
50 |   // 3. Worst case, every Nth CRC'ed byte or every Nth call to CRC.
51 |   //
52 |   Crc SelfCheckValue() const {
53 |     return CrcDefault(this, sizeof(*this), 0);
54 |   }
55 | } GCC_ALIGN_ATTRIBUTE(16);
56 | 
57 | #pragma pack(pop)
58 | 
59 | }  // namespace crcutil
60 | 
61 | #endif  // CRCUTIL_PROTECTED_CRC_H_
62 | 


--------------------------------------------------------------------------------
/src/crcutil-1.0/code/rolling_crc.h:
--------------------------------------------------------------------------------
  1 | // Copyright 2010 Google Inc.  All rights reserved.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //      http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | // Implements rolling CRC (e.g. for Rabin fingerprinting).
 16 | 
 17 | #ifndef CRCUTIL_ROLLING_CRC_H_
 18 | #define CRCUTIL_ROLLING_CRC_H_
 19 | 
 20 | #include "base_types.h"   // size_t, uint8
 21 | #include "crc_casts.h"    // TO_BYTE
 22 | 
 23 | namespace crcutil {
 24 | 
 25 | #pragma pack(push, 16)
 26 | 
 27 | // CrcImplementation should provide:
 28 | // - typename Crc
 29 | // - typename TableEntry
 30 | // - typename Word
 31 | // - Crc CrcDefault(const void *data, size_t bytes, const Crc &start)
 32 | // - const GfUtil<Crc> &Base() const
 33 | template<typename CrcImplementation> class RollingCrc {
 34 |  public:
 35 |   typedef typename CrcImplementation::Crc Crc;
 36 |   typedef typename CrcImplementation::TableEntry TableEntry;
 37 |   typedef typename CrcImplementation::Word Word;
 38 | 
 39 |   RollingCrc() {}
 40 | 
 41 |   // Initializes internal data structures.
 42 |   // Retains reference to "crc" instance -- it is used by Start().
 43 |   RollingCrc(const CrcImplementation &crc,
 44 |              size_t roll_window_bytes,
 45 |              const Crc &start_value) {
 46 |     Init(crc, roll_window_bytes, start_value);
 47 |   }
 48 | 
 49 |   // Computes crc of "roll_window_bytes" using
 50 |   // "start_value" of "crc" (see Init()).
 51 |   Crc Start(const void *data) const {
 52 |     return crc_->CrcDefault(data, roll_window_bytes_, start_value_);
 53 |   }
 54 | 
 55 |   // Computes CRC of "roll_window_bytes" starting in next position.
 56 |   Crc Roll(const Crc &old_crc, size_t byte_out, size_t byte_in) const {
 57 |     return (old_crc >> 8) ^ in_[TO_BYTE(old_crc) ^ byte_in] ^ out_[byte_out];
 58 |   }
 59 | 
 60 |   // Initializes internal data structures.
 61 |   // Retains reference to "crc" instance -- it is used by Start().
 62 |   void Init(const CrcImplementation &crc,
 63 |             size_t roll_window_bytes,
 64 |             const Crc &start_value) {
 65 |     crc_ = &crc;
 66 |     roll_window_bytes_ = roll_window_bytes;
 67 |     start_value_ = start_value;
 68 | 
 69 |     Crc add = crc.Base().Canonize() ^ start_value;
 70 |     add = crc.Base().Multiply(add, crc.Base().Xpow8N(roll_window_bytes));
 71 |     add ^= crc.Base().Canonize();
 72 |     Crc mul = crc.Base().One() ^ crc.Base().Xpow8N(1);
 73 |     add = crc.Base().Multiply(add, mul);
 74 | 
 75 |     mul = crc.Base().XpowN(8 * roll_window_bytes + crc.Base().Degree());
 76 |     for (size_t i = 0; i < 256; ++i) {
 77 |       out_[i] = static_cast<TableEntry>(
 78 |                     crc.Base().MultiplyUnnormalized(
 79 |                         static_cast<Crc>(i), 8, mul) ^ add);
 80 |     }
 81 |     for (size_t i = 0; i < 256; ++i) {
 82 |       in_[i] = crc.crc_word_[sizeof(Word) - 1][i];
 83 |     }
 84 |   }
 85 | 
 86 |   // Returns start value.
 87 |   Crc StartValue() const { return start_value_; }
 88 | 
 89 |   // Returns length of roll window.
 90 |   size_t WindowBytes() const { return roll_window_bytes_; }
 91 | 
 92 |  protected:
 93 |   TableEntry in_[256];
 94 |   TableEntry out_[256];
 95 | 
 96 |   // Used only by Start().
 97 |   Crc start_value_;
 98 |   const CrcImplementation *crc_;
 99 |   size_t roll_window_bytes_;
100 | } GCC_ALIGN_ATTRIBUTE(16);
101 | 
102 | #pragma pack(pop)
103 | 
104 | }  // namespace crcutil
105 | 
106 | #endif  // CRCUTIL_ROLLING_CRC_H_
107 | 


--------------------------------------------------------------------------------
/src/crcutil-1.0/code/std_headers.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2010 Google Inc.  All rights reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //      http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | // Includes some standard C headers for size_t, memset, etc.
16 | //
17 | // Also, permanently disables a number of warnings produced
18 | // by Microsoft's compiler when it includes standard headers
19 | // (surprisingly, also by Microsoft).
20 | 
21 | #ifndef CRCUTIL_STD_HEADERS_H_
22 | #define CRCUTIL_STD_HEADERS_H_
23 | 
24 | #if defined(_MSC_VER)
25 | // '4' bytes padding added after data member ...
26 | #pragma warning(disable:4820)
27 | 
28 | // unreferenced inline function has been removed ...
29 | #pragma warning(disable:4514)
30 | 
31 | // conditional expression is constant
32 | #pragma warning(disable: 4127)
33 | 
34 | // function ... not inlined
35 | #pragma warning(disable: 4710)
36 | 
37 | // function ... selected for automatic inline expansion
38 | #pragma warning(disable: 4711)
39 | 
40 | #define _CRT_SECURE_NO_WARNINGS
41 | 
42 | #endif  // defined(_MSC_VER)
43 | 
44 | // #define _CSTDLIB_
45 | #include <stdio.h>      // always handy
46 | #include <string.h>     // memset
47 | #include <stdlib.h>     // size_t, _rotl/_rotl64(MSC)
48 | #include <stddef.h>     // ptrdiff_t (GNUC)
49 | #include <stdarg.h>     // va_list
50 | 
51 | #endif  // CRCUTIL_STD_HEADERS_H_
52 | 


--------------------------------------------------------------------------------
/src/crcutil-1.0/config.h.in:
--------------------------------------------------------------------------------
 1 | /* config.h.in.  Generated from configure.ac by autoheader.  */
 2 | 
 3 | /* Define to 1 if you have the <inttypes.h> header file. */
 4 | #undef HAVE_INTTYPES_H
 5 | 
 6 | /* Define to 1 if you have the <memory.h> header file. */
 7 | #undef HAVE_MEMORY_H
 8 | 
 9 | /* Define to 1 if you have the `memset' function. */
10 | #undef HAVE_MEMSET
11 | 
12 | /* Define to 1 if the system has the type `ptrdiff_t'. */
13 | #undef HAVE_PTRDIFF_T
14 | 
15 | /* Define to 1 if stdbool.h conforms to C99. */
16 | #undef HAVE_STDBOOL_H
17 | 
18 | /* Define to 1 if you have the <stddef.h> header file. */
19 | #undef HAVE_STDDEF_H
20 | 
21 | /* Define to 1 if you have the <stdint.h> header file. */
22 | #undef HAVE_STDINT_H
23 | 
24 | /* Define to 1 if you have the <stdlib.h> header file. */
25 | #undef HAVE_STDLIB_H
26 | 
27 | /* Define to 1 if you have the `strchr' function. */
28 | #undef HAVE_STRCHR
29 | 
30 | /* Define to 1 if you have the <strings.h> header file. */
31 | #undef HAVE_STRINGS_H
32 | 
33 | /* Define to 1 if you have the <string.h> header file. */
34 | #undef HAVE_STRING_H
35 | 
36 | /* Define to 1 if you have the `strrchr' function. */
37 | #undef HAVE_STRRCHR
38 | 
39 | /* Define to 1 if you have the <sys/stat.h> header file. */
40 | #undef HAVE_SYS_STAT_H
41 | 
42 | /* Define to 1 if you have the <sys/types.h> header file. */
43 | #undef HAVE_SYS_TYPES_H
44 | 
45 | /* Define to 1 if you have the <unistd.h> header file. */
46 | #undef HAVE_UNISTD_H
47 | 
48 | /* Define to 1 if the system has the type `_Bool'. */
49 | #undef HAVE__BOOL
50 | 
51 | /* Name of package */
52 | #undef PACKAGE
53 | 
54 | /* Define to the address where bug reports for this package should be sent. */
55 | #undef PACKAGE_BUGREPORT
56 | 
57 | /* Define to the full name of this package. */
58 | #undef PACKAGE_NAME
59 | 
60 | /* Define to the full name and version of this package. */
61 | #undef PACKAGE_STRING
62 | 
63 | /* Define to the one symbol short name of this package. */
64 | #undef PACKAGE_TARNAME
65 | 
66 | /* Define to the home page for this package. */
67 | #undef PACKAGE_URL
68 | 
69 | /* Define to the version of this package. */
70 | #undef PACKAGE_VERSION
71 | 
72 | /* Define to 1 if you have the ANSI C header files. */
73 | #undef STDC_HEADERS
74 | 
75 | /* Version number of package */
76 | #undef VERSION
77 | 
78 | /* Define to `__inline__' or `__inline' if that's what the C compiler
79 |    calls it, or to nothing if 'inline' is not supported under any name.  */
80 | #ifndef __cplusplus
81 | #undef inline
82 | #endif
83 | 
84 | /* Define to `unsigned int' if <sys/types.h> does not define. */
85 | #undef size_t
86 | 


--------------------------------------------------------------------------------
/src/crcutil-1.0/configure.ac:
--------------------------------------------------------------------------------
 1 | #                                               -*- Autoconf -*-
 2 | # Process this file with autoconf to produce a configure script.
 3 | 
 4 | AC_PREREQ([2.65])
 5 | AC_INIT(crcutil, 1.0, crcutil@googlegroups.com)
 6 | AM_INIT_AUTOMAKE(crcutil, 1.0)
 7 | AC_CONFIG_FILES([Makefile]) 
 8 | AC_OUTPUT()
 9 | AC_CONFIG_SRCDIR([tests/aligned_alloc.h])
10 | AC_CONFIG_HEADERS([config.h])
11 | 
12 | # Checks for programs.
13 | AC_PROG_CXX
14 | AC_PROG_CC
15 | AC_PROG_INSTALL
16 | AC_PROG_MAKE_SET
17 | 
18 | # Checks for libraries.
19 | 
20 | # Checks for header files.
21 | AC_CHECK_HEADERS([stddef.h stdlib.h string.h])
22 | 
23 | # Checks for typedefs, structures, and compiler characteristics.
24 | AC_HEADER_STDBOOL
25 | AC_C_INLINE
26 | AC_TYPE_SIZE_T
27 | AC_CHECK_TYPES([ptrdiff_t])
28 | 
29 | # Checks for library functions.
30 | AC_CHECK_FUNCS([memset strchr strrchr])
31 | 
32 | AC_OUTPUT
33 | 


--------------------------------------------------------------------------------
/src/crcutil-1.0/examples/usage.cc:
--------------------------------------------------------------------------------
  1 | // Copyright 2010 Google Inc.  All rights reserved.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //      http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | #include "std_headers.h"
 16 | #include "interface.h"
 17 | 
 18 | static const size_t kRollWindow = 4;
 19 | static const unsigned char kTestData[] = "abcdefgh";
 20 | 
 21 | static const int kTestDataHead =
 22 |     static_cast<size_t>((sizeof(kTestData) - 1) / 4);
 23 | static const int kTestDataTail =
 24 |     static_cast<size_t>(sizeof(kTestData) - 1 - kTestDataHead);
 25 | 
 26 | typedef crcutil_interface::UINT64 uint64;
 27 | 
 28 | // GCC -- up to 4.5.0 inclusively -- is not aware that the right format
 29 | // to print "long long" is "%ll[oudx]". Such nonsense does not prevent
 30 | // it from complaining about format mismatch, though. Here is the cure.
 31 | void xprintf(const char *format, ...) {
 32 |   va_list va;
 33 |   va_start(va, format);
 34 |   vprintf(format, va);
 35 |   va_end(va);
 36 |   fflush(stdout);
 37 | }
 38 | 
 39 | //
 40 | // Please notice that when working with 64-bit and smaller CRCs,
 41 | // the use of "hi" part of CRC value is unnecessary.
 42 | //
 43 | void Show(const crcutil_interface::CRC *crc) {
 44 |   char buffer[sizeof(kTestData) + 32];
 45 | 
 46 |   //
 47 |   // Access CRC properties.
 48 |   //
 49 |   uint64 lo;
 50 |   crc->GeneratingPolynomial(&lo);
 51 |   xprintf("Generating polynomial 0x%llx, degree %llu",
 52 |           lo,
 53 |           static_cast<uint64>(crc->Degree()));
 54 |   crc->CanonizeValue(&lo);
 55 |   xprintf(", canonize_value=0x%llx", lo);
 56 | 
 57 |   crc->RollStartValue(&lo);
 58 |   xprintf(", roll start value=0x%llx, roll window=%llu",
 59 |           lo,
 60 |           static_cast<uint64>(crc->RollWindowBytes()));
 61 | 
 62 |   //
 63 |   // Check integrity of CRC tables.
 64 |   //
 65 |   crc->SelfCheckValue(&lo);
 66 |   xprintf(", self check value 0x%llx\n", lo);
 67 | 
 68 |   //
 69 |   // Compute CRC.
 70 |   //
 71 |   lo = 0;
 72 |   crc->Compute(kTestData, sizeof(kTestData) - 1, &lo);
 73 |   xprintf("CRC32C(\"%s\") = 0x%llx\n", kTestData, lo);
 74 | 
 75 |   //
 76 |   // Compute CRC (incrementally).
 77 |   //
 78 |   lo = 0;
 79 |   crc->Compute(kTestData, kTestDataHead, &lo);
 80 |   xprintf("CRC32C(\"%.*s\", 0) = 0x%llx, ", kTestDataHead, kTestData, lo);
 81 |   crc->Compute(kTestData + kTestDataHead, kTestDataTail, &lo);
 82 |   xprintf("CRC32C(\"%s\", CRC32(\"%.*s\", 0)) = 0x%llx = CRC32(\"%s\")\n",
 83 |       kTestData + kTestDataHead, kTestDataHead, kTestData, lo, kTestData);
 84 | 
 85 |   //
 86 |   // Compute CRC of a message filled with 0s.
 87 |   //
 88 |   lo = 1;
 89 |   crc->CrcOfZeroes(sizeof(buffer), &lo);
 90 | 
 91 |   uint64 lo1 = 1;
 92 |   memset(buffer, 0, sizeof(buffer));
 93 |   crc->Compute(buffer, sizeof(buffer), &lo1);
 94 |   xprintf("CRC of %d zeroes = %llx, expected %llx\n",
 95 |           static_cast<int>(sizeof(buffer)),
 96 |           lo,
 97 |           lo1);
 98 | 
 99 | 
100 |   //
101 |   // Use rolling CRC.
102 |   //
103 |   xprintf("RollingCrc expected =");
104 |   for (size_t i = 0; i <= kRollWindow; ++i) {
105 |     crc->RollStartValue(&lo);
106 |     crc->Compute(kTestData + i, kRollWindow, &lo);
107 |     xprintf(" 0x%llx", lo);
108 |   }
109 |   xprintf("\n");
110 | 
111 |   crc->RollStart(kTestData, &lo, NULL);
112 |   xprintf("RollingCrc actual   = 0x%llx", lo);
113 |   for (size_t i = 1; i <= kRollWindow; ++i) {
114 |     crc->Roll(kTestData[i - 1], kTestData[i - 1 + kRollWindow], &lo, NULL);
115 |     xprintf(" 0x%llx", lo);
116 |   }
117 |   xprintf("\n");
118 | 
119 |   //
120 |   // Change initial value.
121 |   //
122 |   lo = 0;
123 |   crc->Compute(kTestData, sizeof(kTestData) - 1, &lo);
124 |   uint64 lo1_expected = 1;
125 |   crc->Compute(kTestData, sizeof(kTestData) - 1, &lo1_expected);
126 |   lo1 = lo;
127 |   crc->ChangeStartValue(0, 0,   // old start value
128 |                         1, 0,   // new start value
129 |                         sizeof(kTestData) - 1,
130 |                         &lo1);
131 |   xprintf("CRC(\"%s\", 0) = 0x%llx, CRC(\"%s\", 1)=0x%llx, expected 0x%llx\n",
132 |       kTestData, lo, kTestData, lo1, lo1_expected);
133 | 
134 |   //
135 |   // Concatenate CRCs.
136 |   //
137 |   uint64 start_value = 1;
138 |   lo = start_value;
139 |   crc->Compute(kTestData, kTestDataHead, &lo);
140 |   lo1 = 0;
141 |   crc->Compute(kTestData + kTestDataHead, kTestDataTail, &lo1);
142 | 
143 |   uint64 lo2 = lo;
144 |   crc->Concatenate(lo1, 0, kTestDataTail, &lo2);
145 | 
146 |   uint64 lo2_expected = start_value;
147 |   crc->Compute(kTestData, sizeof(kTestData) - 1, &lo2_expected);
148 | 
149 |   xprintf("CRC(\"%.*s\", 1) = 0x%llx, CRC(\"%s\", 0)=0x%llx, "
150 |          "CRC(\"%s\", 1) = 0x%llx, expected 0x%llx\n",
151 |          kTestDataHead, kTestData, lo,
152 |          kTestData + kTestDataHead, lo1,
153 |          kTestData, lo2,
154 |          lo2_expected);
155 | 
156 |   //
157 |   // Store complementary CRC so that CRC of a message followed
158 |   // by complementary CRC value produces predefined result (e.g. 0).
159 |   //
160 |   memcpy(buffer, kTestData, sizeof(kTestData) - 1);
161 |   lo = 1;
162 |   crc->Compute(buffer, sizeof(kTestData) - 1, &lo);
163 |   size_t stored_crc_bytes = crc->StoreComplementaryCrc(
164 |     buffer + sizeof(kTestData) - 1,
165 |     lo, 0,
166 |     0);
167 | 
168 |   // Compute CRC of message + complementary CRC using the same start value
169 |   // (start value could be changed via ChangeStartValue()).
170 |   lo1 = 1;
171 |   crc->Compute(buffer, sizeof(kTestData) - 1 + stored_crc_bytes, &lo1);
172 | 
173 |   xprintf("Crc of message + complementary CRC = %llx, expected 0\n", lo1);
174 | 
175 |   //
176 |   // Store CRC after the message and ensure that CRC of message + its
177 |   // CRC produces constant result irrespective of message data.
178 |   //
179 |   memcpy(buffer, kTestData, sizeof(kTestData) - 1);
180 |   lo = 1;
181 |   crc->Compute(buffer, sizeof(kTestData) - 1, &lo);
182 |   stored_crc_bytes = crc->StoreCrc(buffer + sizeof(kTestData) - 1, lo);
183 | 
184 |   // Compute CRC of message + its CRC using start value of 0.
185 |   lo1 = 1;
186 |   crc->Compute(buffer, sizeof(kTestData) - 1 + stored_crc_bytes, &lo1);
187 | 
188 |   // Ensure that it matches "predicted" constant value, irrespective
189 |   // of a message or CRC start value.
190 |   crc->CrcOfCrc(&lo2);
191 |   xprintf("CrcOfCrc=%llx, expected %llx\n", lo1, lo2);
192 | 
193 |   xprintf("\n");
194 | }
195 | 
196 | void ShowAndDelete(crcutil_interface::CRC *crc) {
197 |   Show(crc);
198 |   crc->Delete();
199 | }
200 | 
201 | int main() {
202 |   ShowAndDelete(crcutil_interface::CRC::Create(
203 |       0xEB31D82E, 0, 32, true, 0x1111, 0, kRollWindow,
204 |       crcutil_interface::CRC::IsSSE42Available(), NULL));
205 |   ShowAndDelete(crcutil_interface::CRC::Create(
206 |       0x82f63b78, 0, 32, true, 0x2222, 0, kRollWindow,
207 |       crcutil_interface::CRC::IsSSE42Available(), NULL));
208 |   return 0;
209 | }
210 | 


--------------------------------------------------------------------------------
/src/crcutil-1.0/tests/aligned_alloc.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2010 Google Inc.  All rights reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //      http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | //
15 | // Poor man's platform-independent implementation of aligned memory allocator.
16 | 
17 | #ifndef CRCUTIL_ALIGNED_ALLOC_H_
18 | #define CRCUTIL_ALIGNED_ALLOC_H_
19 | 
20 | #include "std_headers.h"    // size_t, ptrdiff_t
21 | 
22 | namespace crcutil {
23 | 
24 | // Allocates a block of memory of "size" bytes so that a field
25 | // at "field_offset" is aligned on "align" boundary.
26 | //
27 | // NB #1: "align" shall be exact power of two.
28 | //
29 | // NB #2: memory allocated by AlignedAlloc should be release by AlignedFree().
30 | //
31 | inline void *AlignedAlloc(size_t size,
32 |                           size_t field_offset,
33 |                           size_t align,
34 |                           const void **allocated_mem) {
35 |   if (align == 0 || (align & (align - 1)) != 0 || align < sizeof(char *)) {
36 |     align = sizeof(*allocated_mem);
37 |   }
38 |   size += align - 1 + sizeof(*allocated_mem);
39 |   char *allocated_memory = new char[size];
40 |   char *aligned_memory = allocated_memory + sizeof(*allocated_mem);
41 |   field_offset &= align - 1;
42 |   size_t actual_alignment =
43 |       reinterpret_cast<size_t>(aligned_memory + field_offset) & (align - 1);
44 |   if (actual_alignment != 0) {
45 |     aligned_memory += align - actual_alignment;
46 |   }
47 |   reinterpret_cast<char **>(aligned_memory)[-1] = allocated_memory;
48 | 
49 |   if (allocated_mem != NULL) {
50 |     *allocated_mem = allocated_memory;
51 |   }
52 | 
53 |   return aligned_memory;
54 | }
55 | 
56 | // Frees memory allocated by AlignedAlloc().
57 | inline void AlignedFree(void *aligned_memory) {
58 |   if (aligned_memory != NULL) {
59 |     char *allocated_memory = reinterpret_cast<char **>(aligned_memory)[-1];
60 |     delete[] allocated_memory;
61 |   }
62 | }
63 | 
64 | }  // namespace crcutil
65 | 
66 | #endif  // CRCUTIL_ALIGNED_ALLOC_H_
67 | 


--------------------------------------------------------------------------------
/src/crcutil-1.0/tests/bob_jenkins_rng.h:
--------------------------------------------------------------------------------
  1 | // Copyright 2010 Google Inc.  All rights reserved.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //      http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | // Glorified C++ version of Bob Jenkins' random number generator.
 16 | // See http://burtleburtle.net/bob/rand/smallprng.html for more details.
 17 | 
 18 | #ifndef CRCUTIL_BOB_JENKINS_RNG_H_
 19 | #define CRCUTIL_BOB_JENKINS_RNG_H_
 20 | 
 21 | #include "base_types.h"
 22 | 
 23 | #if !defined(_MSC_VER)
 24 | #define _rotl(value, bits) \
 25 |   static_cast<uint32>(((value) << (bits)) + ((value) >> (32 - (bits))))
 26 | #define _rotl64(value, bits) \
 27 |   static_cast<uint64>(((value) << (bits)) + ((value) >> (64 - (bits))))
 28 | #endif  // !defined(_MSC_VER)
 29 | 
 30 | namespace crcutil {
 31 | 
 32 | #pragma pack(push, 8)
 33 | 
 34 | template<typename T> class BobJenkinsRng;
 35 | 
 36 | template<> class BobJenkinsRng<uint32> {
 37 |  public:
 38 |   typedef uint32 value;
 39 | 
 40 |   value Get() {
 41 |     value e = a_ - _rotl(b_, 23);
 42 |     a_ = b_ ^ _rotl(c_, 16);
 43 |     b_ = c_ + _rotl(d_, 11);
 44 |     c_ = d_ + e;
 45 |     d_ = e + a_;
 46 |     return (d_);
 47 |   }
 48 | 
 49 |   void Init(value seed) {
 50 |     a_ = 0xf1ea5eed;
 51 |     b_ = seed;
 52 |     c_ = seed;
 53 |     d_ = seed;
 54 |     for (size_t i = 0; i < 20; ++i) {
 55 |       (void) Get();
 56 |     }
 57 |   }
 58 | 
 59 |   explicit BobJenkinsRng(value seed) {
 60 |     Init(seed);
 61 |   }
 62 | 
 63 |   BobJenkinsRng() {
 64 |     Init(0x1234567);
 65 |   }
 66 | 
 67 |  private:
 68 |   value a_;
 69 |   value b_;
 70 |   value c_;
 71 |   value d_;
 72 | };
 73 | 
 74 | 
 75 | #if HAVE_UINT64
 76 | 
 77 | template<> class BobJenkinsRng<uint64> {
 78 |  public:
 79 |   typedef uint64 value;
 80 | 
 81 |   value Get() {
 82 |     value e = a_ - _rotl64(b_, 7);
 83 |     a_ = b_ ^ _rotl64(c_, 13);
 84 |     b_ = c_ + _rotl64(d_, 37);
 85 |     c_ = d_ + e;
 86 |     d_ = e + a_;
 87 |     return d_;
 88 |   }
 89 | 
 90 |   void Init(value seed) {
 91 |     a_ = 0xf1ea5eed;
 92 |     b_ = seed;
 93 |     c_ = seed;
 94 |     d_ = seed;
 95 |     for (size_t i = 0; i < 20; ++i) {
 96 |       (void) Get();
 97 |     }
 98 |   }
 99 | 
100 |   explicit BobJenkinsRng(value seed) {
101 |     Init(seed);
102 |   }
103 | 
104 |   BobJenkinsRng() {
105 |     Init(0x1234567);
106 |   }
107 | 
108 |  private:
109 |   value a_;
110 |   value b_;
111 |   value c_;
112 |   value d_;
113 | };
114 | 
115 | #endif  // HAVE_UINT64
116 | 
117 | #if !defined(_MSC_VER)
118 | #undef _rotl
119 | #undef _rotl64
120 | #endif  // !defined(_MSC_VER)
121 | 
122 | #pragma pack(pop)
123 | 
124 | }  // namespace crcutil
125 | 
126 | #endif  // CRCUTIL_BOB_JENKINS_RNG_H_
127 | 


--------------------------------------------------------------------------------
/src/crcutil-1.0/tests/rdtsc.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2010 Google Inc.  All rights reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //      http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | // Reads CPU cycle counter on AMD64 and I386 (for performance measurements).
16 | // Thanks to __rdtsc() intrinsic, it's easy with Microsoft and Intel
17 | // compilers, but real pain with GCC.
18 | 
19 | #ifndef CRCUTIL_RDTSC_H_
20 | #define CRCUTIL_RDTSC_H_
21 | 
22 | #include "platform.h"
23 | 
24 | namespace crcutil {
25 | 
26 | struct Rdtsc {
27 |   static inline uint64 Get() {
28 | #if defined(_MSC_VER) && (HAVE_AMD64 || HAVE_I386)
29 |     return __rdtsc();
30 | #elif defined(__GNUC__) && HAVE_AMD64
31 |     int64 result;
32 |     __asm__ volatile(
33 |         "rdtsc\n"
34 |         : "=a" (result));
35 |     return result;
36 | #elif defined(__GNUC__) && HAVE_I386
37 |     // If "low" and "high" are defined as "uint64" to
38 |     // avoid explicit cast to uint64, GCC 4.5.0 in "-m32" mode
39 |     // fails with "impossible register constraint" error
40 |     // (no, it is not because one cannot use 64-bit value as argument
41 |     // for 32-bit register, but because its register allocator
42 |     // could not resolve a conflict under high register pressure).
43 |     uint32 low;
44 |     uint32 high;
45 |     __asm__ volatile(
46 |         "rdtsc\n"
47 |         : "=a" (low), "=d" (high));
48 |     return ((static_cast<uint64>(high) << 32) | low);
49 | #else
50 |     // It is hard to find low overhead timer with
51 |     // sub-millisecond resolution and granularity.
52 |     return 0;
53 | #endif
54 |   }
55 | };
56 | 
57 | }  // namespace crcutil
58 | 
59 | #endif  // CRCUTIL_RDTSC_H_
60 | 


--------------------------------------------------------------------------------
/src/crcutil-1.0/tests/set_hi_pri.c:
--------------------------------------------------------------------------------
 1 | // Copyright 2010 Google Inc.  All rights reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //      http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | // Raises priority of test process and main thread to reduce
16 | // timing variations caused by context switches. Windows only.
17 | 
18 | #if defined(_MSC_VER)
19 | // Disable warnings generated by "windows.h" compiled with -Wall.
20 | 
21 | // N bytes padding added after data member X
22 | #pragma warning(disable: 4820)
23 | 
24 | // no function prototype given: converting '()' to '(void)'
25 | #pragma warning(disable: 4255)
26 | 
27 | // '__midl' is not defined as a preprocessor macro,
28 | // replacing with '0' for '#if/#elif'
29 | #pragma warning(disable: 4668)
30 | 
31 | #endif  // defined(_MSC_VER)
32 | 
33 | 
34 | #if defined(_WIN32)
35 | #include <windows.h>
36 | #endif  // defined(_WIN32)
37 | 
38 | #ifdef __cplusplus
39 | extern "C"
40 | #endif  // __cplusplus
41 | void SetHiPri(void)
42 | {
43 | #if defined(_WIN32)
44 | #if 1
45 |   // These setting are extremely dangerous. E.g. if app hits infinite loop,
46 |   // computer may turn unresponsive and will require a power cycle.
47 |   // Use for final testing only.
48 |   SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL);
49 |   SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS);
50 | #else
51 |   SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST);
52 |   SetPriorityClass(GetCurrentProcess(), HIGH_PRIORITY_CLASS);
53 | #endif
54 | #endif  // defined(_WIN32)
55 | }
56 | 


--------------------------------------------------------------------------------
/src/crcutil-1.0/tests/unittest.cc:
--------------------------------------------------------------------------------
  1 | // Copyright 2010 Google Inc.  All rights reserved.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //      http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | #include "unittest.h"
 16 | 
 17 | extern "C" void SetHiPri();
 18 | 
 19 | using namespace crcutil;
 20 | 
 21 | #if !defined(HAVE_INT128)
 22 | #if defined(__GNUC__) && HAVE_AMD64
 23 | #define HAVE_INT128 1
 24 | #else
 25 | #define HAVE_INT128 0
 26 | #endif  // defined(__GNUC__) && HAVE_AMD64
 27 | #endif  // defined(HAVE_INT128)
 28 | 
 29 | #if HAVE_INT128
 30 | typedef unsigned int uint128_t __attribute__((mode(TI)));
 31 | #endif  // HAVE_INT128
 32 | 
 33 | int main(int argc, char **argv) {
 34 |   bool test_perf_main = true;
 35 |   bool test_perf_all = false;
 36 |   bool canonical = false;
 37 | 
 38 |   for (int i = 1; i < argc; ++i) {
 39 |     if (strcmp(argv[i], "--noperf") == 0) {
 40 |       test_perf_main = false;
 41 |       test_perf_all = false;
 42 |     } else if (strcmp(argv[i], "--perfall") == 0) {
 43 |       test_perf_all = true;
 44 |     } else if (strcmp(argv[i], "--canonical") == 0) {
 45 |       canonical = true;
 46 |     } else if (strcmp(argv[i], "help") == 0) {
 47 |       fprintf(stderr, "Usage: unittest {options}\n");
 48 |       fprintf(stderr, "\n");
 49 |       fprintf(stderr, "Options:\n");
 50 |       fprintf(stderr, "    --canonical - test canonical variant of CRC\n");
 51 |       fprintf(stderr, "    --noperf    - do not test performance\n");
 52 |       fprintf(stderr, "    --perfall   - test performance of all CRC width "
 53 |               "(not just 32, 64, and 128)\n");
 54 |       fprintf(stderr, "\n");
 55 |       return 1;
 56 |     }
 57 |   }
 58 | 
 59 | 
 60 |   SetHiPri();
 61 | 
 62 |   CrcVerifier v;
 63 | 
 64 |   CreateTest<uint64, uint64, uint64>(
 65 |       64, 0, 0x9a6c9329ac4bc9b5ull, "u64/u64/u64", test_perf_main, &v);
 66 |   CreateTest<uint64, uint64, uint32>(
 67 |       64, 0, 0x9a6c9329ac4bc9b5ull, "u64/u64/u32", test_perf_all, &v);
 68 | 
 69 |   CreateTest<uint64, uint64, uint64>(
 70 |       32, 0, 0x82f63b78, "u64/u64/u64", test_perf_main, &v);
 71 |   CreateTest<uint32, uint32, uint32>(
 72 |       32, 0, 0x82f63b78, "u32/u32/u32", test_perf_main, &v);
 73 | 
 74 |   CreateTest<uint64, uint32, uint32>(
 75 |       32, 0, 0x82f63b78, "u64/u32/u32", test_perf_all, &v);
 76 |   CreateTest<uint64, uint32, uint64>(
 77 |       32, 0, 0x82f63b78, "u64/u32/u64", test_perf_all, &v);
 78 | 
 79 |   CreateTest<uint64, uint64, uint64>(
 80 |       15, 0, 0x00004CD1, "u64/u64/u64", test_perf_all, &v);
 81 |   CreateTest<uint32, uint32, uint32>(
 82 |       15, 0, 0x00004CD1, "u32/u32/u32", test_perf_all, &v);
 83 | 
 84 |   CreateTest<uint64, uint64, uint64>(
 85 |       07, 0, 0x00000048, "u64/u64/u64", test_perf_all, &v);
 86 |   CreateTest<uint32, uint32, uint32>(
 87 |       07, 0, 0x00000048, "u32/u32/u32", test_perf_all, &v);
 88 | 
 89 | #if HAVE_SSE2
 90 |   CreateTest<uint128_sse2, uint128_sse2, uint64>(
 91 |       128, 0xeca61dca77452c88ull, 0x21fe865c87bc0e61ull,
 92 |       "sse2/sse2/u64", test_perf_main, &v);
 93 |   CreateTest<uint128_sse2, uint128_sse2, uint32>(
 94 |       128, 0xeca61dca77452c88ull, 0x21fe865c87bc0e61ull,
 95 |       "sse2/sse2/u32", test_perf_main, &v);
 96 |   CreateTest<uint128_sse2, uint128_sse2, uint64>(
 97 |       64, 0, 0x9a6c9329ac4bc9b5ull,
 98 |       "sse2/sse2/u64", test_perf_main, &v);
 99 |   CreateTest<uint128_sse2, uint128_sse2, uint32>(
100 |       64, 0, 0x9a6c9329ac4bc9b5ull,
101 |       "sse2/sse2/u32", test_perf_main, &v);
102 |   CreateTest<uint128_sse2, uint128_sse2, uint64>(
103 |       32, 0, 0x82f63b78,
104 |       "sse2/sse2/u64", test_perf_main, &v);
105 |   CreateTest<uint128_sse2, uint128_sse2, uint32>(
106 |       32, 0, 0x82f63b78,
107 |       "sse2/sse2/u32", test_perf_main, &v);
108 | #endif  // HAVE_SSE2
109 | #if HAVE_INT128
110 |   CreateTest<uint128_t, uint128_t, uint64>(
111 |       128, 0xeca61dca77452c88ull, 0x21fe865c87bc0e61ull,
112 |       "u128/u128/u64", test_perf_main, &v);
113 |   CreateTest<uint128_t, uint128_t, uint32>(
114 |       128, 0xeca61dca77452c88ull, 0x21fe865c87bc0e61ull,
115 |       "u128/u128/u32", test_perf_main, &v);
116 | #endif  // HAVE_INT128
117 | 
118 |   v.add(new CrcVerifierFactory<uint64, uint64, uint64, 2>(canonical,
119 |       64, 0, 0x9a6c9329ac4bc9b5ull, "CRC-64-64/64/2", test_perf_main, true));
120 |   v.add(new CrcVerifierFactory<uint64, uint64, uint64, 3>(canonical,
121 |       64, 0, 0x9a6c9329ac4bc9b5ull, "CRC-64-64/64/3", test_perf_main, true));
122 |   v.add(new CrcVerifierFactory<uint64, uint64, uint64, 4>(canonical,
123 |       64, 0, 0x9a6c9329ac4bc9b5ull, "CRC-64-64/64/4", test_perf_main, true));
124 |   v.add(new CrcVerifierFactory<uint64, uint64, uint64, 5>(canonical,
125 |       64, 0, 0x9a6c9329ac4bc9b5ull, "CRC-64-64/64/5", test_perf_main, true));
126 |   v.add(new CrcVerifierFactory<uint64, uint64, uint64, 6>(canonical,
127 |       64, 0, 0x9a6c9329ac4bc9b5ull, "CRC-64-64/64/6", test_perf_main, true));
128 |   v.add(new CrcVerifierFactory<uint64, uint64, uint64, 7>(canonical,
129 |       64, 0, 0x9a6c9329ac4bc9b5ull, "CRC-64-64/64/7", test_perf_main, true));
130 |   v.add(new CrcVerifierFactory<uint64, uint64, uint64, 8>(canonical,
131 |       64, 0, 0x9a6c9329ac4bc9b5ull, "CRC-64-64/64/8", test_perf_main, true));
132 | 
133 | #if HAVE_SSE2
134 |   v.add(new CrcVerifierFactory<uint128_sse2, uint128_sse2, size_t, 2>(
135 |       canonical, 128, 0xeca61dca77452c88ull, 0x21fe865c87bc0e61ull,
136 |       "CRC-128-sse2/size_t/2", test_perf_main, true));
137 |   v.add(new CrcVerifierFactory<uint128_sse2, uint128_sse2, size_t, 3>(
138 |       canonical, 128, 0xeca61dca77452c88ull, 0x21fe865c87bc0e61ull,
139 |       "CRC-128-sse2/size_t/3", test_perf_main, true));
140 |   v.add(new CrcVerifierFactory<uint128_sse2, uint128_sse2, size_t, 4>(
141 |       canonical, 128, 0xeca61dca77452c88ull, 0x21fe865c87bc0e61ull,
142 |       "CRC-128-sse2/size_t/4", test_perf_main, true));
143 |   v.add(new CrcVerifierFactory<uint128_sse2, uint128_sse2, size_t, 5>(
144 |       canonical, 128, 0xeca61dca77452c88ull, 0x21fe865c87bc0e61ull,
145 |       "CRC-128-sse2/size_t/5", test_perf_main, true));
146 |   v.add(new CrcVerifierFactory<uint128_sse2, uint128_sse2, size_t, 6>(
147 |       canonical, 128, 0xeca61dca77452c88ull, 0x21fe865c87bc0e61ull,
148 |       "CRC-128-sse2/size_t/6", test_perf_main, true));
149 |   v.add(new CrcVerifierFactory<uint128_sse2, uint128_sse2, size_t, 7>(
150 |       canonical, 128, 0xeca61dca77452c88ull, 0x21fe865c87bc0e61ull,
151 |       "CRC-128-sse2/size_t/7", test_perf_main, true));
152 |   v.add(new CrcVerifierFactory<uint128_sse2, uint128_sse2, size_t, 8>(
153 |       canonical, 128, 0xeca61dca77452c88ull, 0x21fe865c87bc0e61ull,
154 |       "CRC-128-sse2/size_t/8", test_perf_main, true));
155 | #endif  // HAVE_SSE2
156 | 
157 |   v.add(new CrcVerifierFactory<size_t, size_t, size_t, 2>(canonical,
158 |       32, 0, 0x82f63b78, "CRC-32-size_t/size_t/2", test_perf_main, true));
159 |   v.add(new CrcVerifierFactory<size_t, size_t, size_t, 3>(canonical,
160 |       32, 0, 0x82f63b78, "CRC-32-size_t/size_t/3", test_perf_main, true));
161 |   v.add(new CrcVerifierFactory<size_t, size_t, size_t, 4>(canonical,
162 |       32, 0, 0x82f63b78, "CRC-32-size_t/size_t/4", test_perf_main, true));
163 |   v.add(new CrcVerifierFactory<size_t, size_t, size_t, 5>(canonical,
164 |       32, 0, 0x82f63b78, "CRC-32-size_t/size_t/5", test_perf_main, true));
165 |   v.add(new CrcVerifierFactory<size_t, size_t, size_t, 6>(canonical,
166 |       32, 0, 0x82f63b78, "CRC-32-size_t/size_t/6", test_perf_main, true));
167 |   v.add(new CrcVerifierFactory<size_t, size_t, size_t, 7>(canonical,
168 |       32, 0, 0x82f63b78, "CRC-32-size_t/size_t/7", test_perf_main, true));
169 |   v.add(new CrcVerifierFactory<size_t, size_t, size_t, 8>(canonical,
170 |       32, 0, 0x82f63b78, "CRC-32-size_t/size_t/8", test_perf_main, true));
171 | 
172 |   v.TestFunctionality();
173 |   v.TestPerformance();
174 | 
175 |   return (0);
176 | }
177 | 


--------------------------------------------------------------------------------
/src/crcutil-1.0/tests/unittest_helper.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2010 Google Inc.  All rights reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //      http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | // A set of useful macros for crcutil_unittest.
16 | 
17 | #ifndef CRCUTIL_UNITTEST_HELPER_H_
18 | #define CRCUTIL_UNITTEST_HELPER_H_
19 | 
20 | #include "std_headers.h"    // printf
21 | 
22 | #if !defined(CHECK)
23 | 
24 | #if defined(_MSC_VER)
25 | #define DEBUG_BREAK() __debugbreak()
26 | #else
27 | #define DEBUG_BREAK() exit(1)
28 | #endif  // defined(_MSC_VER)
29 | 
30 | #define CHECK(cond) do { \
31 |   if (!(cond)) { \
32 |     fprintf(stderr, "%s, %d: ASSERT(%s)\n", __FILE__, __LINE__, #cond); \
33 |     fflush(stderr); \
34 |     DEBUG_BREAK(); \
35 |   } \
36 | } while (0)
37 | 
38 | 
39 | #define CHECK_GE(a, b) CHECK((a) >= (b))
40 | #define CHECK_NE(a, b) CHECK((a) != (b))
41 | #define CHECK_EQ(a, b) CHECK((a) == (b))
42 | 
43 | #endif  // !defined(CHECK)
44 | 
45 | #endif  // CRCUTIL_UNITTEST_HELPER_H_
46 | 


--------------------------------------------------------------------------------
/src/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sabnzbd/sabctools/4bf805ad210136fdc70f022141ff363d0ef67e5b/src/py.typed


--------------------------------------------------------------------------------
/src/sabctools.cc:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2007-2023 The SABnzbd-Team (sabnzbd.org)
  3 |  *
  4 |  * This program is free software; you can redistribute it and/or
  5 |  * modify it under the terms of the GNU General Public License
  6 |  * as published by the Free Software Foundation; either version 2
  7 |  * of the License, or (at your option) any later version.
  8 |  *
  9 |  * This program is distributed in the hope that it will be useful,
 10 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 |  * GNU General Public License for more details.
 13 |  *
 14 |  * You should have received a copy of the GNU General Public License
 15 |  * along with this program; if not, write to the Free Software
 16 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 17 |  */
 18 | 
 19 | #include "sabctools.h"
 20 | #include "yenc.h"
 21 | #include "unlocked_ssl.h"
 22 | #include "crc32.h"
 23 | #include "sparse.h"
 24 | #include "utils.h"
 25 | 
 26 | /* Function and exception declarations */
 27 | PyMODINIT_FUNC PyInit_sabctools(void);
 28 | 
 29 | /* Python API requirements */
 30 | static PyMethodDef sabctools_methods[] = {
 31 |     {
 32 |         "yenc_decode",
 33 |         yenc_decode,
 34 |         METH_O,
 35 |         "yenc_decode(raw_data)"
 36 |     },
 37 |     {
 38 |         "yenc_encode",
 39 |         yenc_encode,
 40 |         METH_O,
 41 |         "yenc_encode(input_string)"
 42 |     },
 43 |     {
 44 |         "unlocked_ssl_recv_into",
 45 |         unlocked_ssl_recv_into,
 46 |         METH_VARARGS,
 47 |         "unlocked_ssl_recv_into(ssl_socket, buffer)"
 48 |     },
 49 |     {
 50 |         "crc32_combine",
 51 |         crc32_combine,
 52 |         METH_VARARGS,
 53 |         "crc32_combine(crc1, crc2, length)"
 54 |     },
 55 |     {
 56 |         "crc32_multiply",
 57 |         crc32_multiply,
 58 |         METH_VARARGS,
 59 |         "crc32_multiply(crc1, crc2)"
 60 |     },
 61 |     {
 62 |         "crc32_zero_unpad",
 63 |         crc32_zero_unpad,
 64 |         METH_VARARGS,
 65 |         "crc32_zero_unpad(crc1, length)"
 66 |     },
 67 |     {
 68 |         "crc32_xpown",
 69 |         crc32_xpown,
 70 |         METH_O,
 71 |         "crc32_xpown(n)"
 72 |     },
 73 |     {
 74 |         "crc32_xpow8n",
 75 |         crc32_xpow8n,
 76 |         METH_O,
 77 |         "crc32_xpow8n(n)"
 78 |     },
 79 |     {
 80 |         "sparse",
 81 |         sparse,
 82 |         METH_VARARGS,
 83 |         "sparse(handle, length)"
 84 |     },
 85 |     {
 86 |         "bytearray_malloc",
 87 |         bytearray_malloc,
 88 |         METH_O,
 89 |         "bytearray_malloc(size)"
 90 |     },
 91 |     {NULL, NULL, 0, NULL}
 92 | };
 93 | 
 94 | static struct PyModuleDef sabctools_definition = {
 95 |     PyModuleDef_HEAD_INIT,
 96 |     "sabctools",
 97 |     "Utils written in C for use within SABnzbd.",
 98 |     -1,
 99 |     sabctools_methods
100 | };
101 | 
102 | static const char* simd_detected(void) {
103 |     int level = RapidYenc::decode_isa_level();
104 | #ifdef PLATFORM_X86
105 |     if(level >= ISA_LEVEL_VBMI2)
106 |         return "AVX512VL+VBMI2";
107 |     if(level >= ISA_LEVEL_AVX3)
108 |         return "AVX512VL";
109 |     if(level >= ISA_LEVEL_AVX2)
110 |         return "AVX2";
111 |     if(level >= ISA_LEVEL_AVX)
112 |         return "AVX";
113 |     if(level >= ISA_LEVEL_SSE4_POPCNT)
114 |         return "SSE4.1+POPCNT";
115 |     if(level >= ISA_LEVEL_SSE41)
116 |         return "SSE4.1";
117 |     if(level >= ISA_LEVEL_SSSE3)
118 |         return "SSSE3";
119 |     if(level >= (ISA_LEVEL_SSE2 | ISA_FEATURE_POPCNT | ISA_FEATURE_LZCNT))
120 |         return "SSE2+ABM";
121 |     return "SSE2";
122 | #endif
123 | #ifdef PLATFORM_ARM
124 |     if(level >= ISA_LEVEL_NEON) {
125 |         return "NEON";
126 |     }
127 | #endif
128 | #ifdef __riscv
129 |     if(level >= ISA_LEVEL_RVV) {
130 |         return "RVV";
131 |     }
132 | #endif
133 |     return "";
134 | }
135 | 
136 | PyMODINIT_FUNC PyInit_sabctools(void) {
137 |     // Initialize and add version / SIMD information
138 |     Py_Initialize();
139 |     RapidYenc::encoder_init();
140 |     RapidYenc::decoder_init();
141 |     RapidYenc::crc32_init();
142 |     openssl_init();
143 |     sparse_init();
144 | 
145 |     PyObject* m = PyModule_Create(&sabctools_definition);
146 |     PyModule_AddStringConstant(m, "version", SABCTOOLS_VERSION);
147 |     PyModule_AddStringConstant(m, "simd", simd_detected());
148 | 
149 |     // Add status of linking OpenSSL function
150 |     PyObject *openssl_linked_object = openssl_linked() ? Py_True : Py_False;
151 |     Py_INCREF(openssl_linked_object);
152 |     PyModule_AddObject(m, "openssl_linked", openssl_linked_object);
153 | 
154 |     return m;
155 | }
156 | 
157 | 
158 | 


--------------------------------------------------------------------------------
/src/sabctools.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2007-2023 The SABnzbd-Team (sabnzbd.org)
 3 |  *
 4 |  * This program is free software; you can redistribute it and/or
 5 |  * modify it under the terms of the GNU General Public License
 6 |  * as published by the Free Software Foundation; either version 2
 7 |  * of the License, or (at your option) any later version.
 8 |  *
 9 |  * This program is distributed in the hope that it will be useful,
10 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 |  * GNU General Public License for more details.
13 |  *
14 |  * You should have received a copy of the GNU General Public License
15 |  * along with this program; if not, write to the Free Software
16 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 |  */
18 | 
19 | #include <Python.h>
20 | #include <stdio.h>
21 | #include <fcntl.h>
22 | #include <string.h>
23 | 
24 | /* Version information */
25 | #define SABCTOOLS_VERSION "8.2.5"
26 | 
27 | PyMODINIT_FUNC PyInit_sabctools(void);
28 | 


--------------------------------------------------------------------------------
/src/sabctools.pyi:
--------------------------------------------------------------------------------
 1 | from typing import Tuple, Optional, IO
 2 | from ssl import SSLSocket
 3 | 
 4 | __version__: str
 5 | openssl_linked: bool
 6 | simd: str
 7 | 
 8 | def yenc_decode(raw_data: memoryview) -> Tuple[bytearray, str, int, int, int, Optional[int]]: ...
 9 | def yenc_encode(input_string: bytes) -> Tuple[bytes, int]: ...
10 | def unlocked_ssl_recv_into(ssl_socket: SSLSocket, buffer: memoryview) -> int: ...
11 | def crc32_combine(crc1: int, crc2: int, length: int) -> int: ...
12 | def crc32_multiply(crc1: int, crc2: int) -> int: ...
13 | def crc32_xpow8n(n: int) -> int: ...
14 | def crc32_xpown(n: int) -> int: ...
15 | def crc32_zero_unpad(crc1: int, length: int) -> int: ...
16 | def sparse(file: IO, length: int) -> None: ...
17 | def bytearray_malloc(size: int) -> bytearray: ...
18 | 


--------------------------------------------------------------------------------
/src/sparse.cc:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2007-2023 The SABnzbd-Team (sabnzbd.org)
  3 |  *
  4 |  * This program is free software; you can redistribute it and/or
  5 |  * modify it under the terms of the GNU General Public License
  6 |  * as published by the Free Software Foundation; either version 2
  7 |  * of the License, or (at your option) any later version.
  8 |  *
  9 |  * This program is distributed in the hope that it will be useful,
 10 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 |  * GNU General Public License for more details.
 13 |  *
 14 |  * You should have received a copy of the GNU General Public License
 15 |  * along with this program; if not, write to the Free Software
 16 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 17 |  */
 18 | 
 19 | #include "sparse.h"
 20 | 
 21 | PyObject *Py_msvcrt_module = NULL;
 22 | PyObject *get_osfhandle_string = NULL;
 23 | 
 24 | void sparse_init()
 25 | {
 26 | #if defined(_WIN32) || defined(__CYGWIN__)
 27 |     Py_msvcrt_module = PyImport_ImportModule("msvcrt");
 28 |     get_osfhandle_string = PyUnicode_FromString("get_osfhandle");
 29 | #endif
 30 | }
 31 | 
 32 | PyObject *sparse(PyObject *self, PyObject *args)
 33 | {
 34 |     PyObject *Py_file;
 35 |     long long length;
 36 | 
 37 |     PyObject *Py_file_fileno = NULL;
 38 |     PyObject *Py_file_handle = NULL;
 39 |     PyObject *Py_file_truncate = NULL;
 40 | 
 41 |     if (!PyArg_ParseTuple(args, "OL:sparse", &Py_file, &length))
 42 |     {
 43 |         return NULL;
 44 |     }
 45 | 
 46 | #if defined(_WIN32) || defined(__CYGWIN__)
 47 |     // Get the windows file handle and set file attributes to sparse
 48 | 
 49 |     if (Py_msvcrt_module == NULL)
 50 |     {
 51 |         PyErr_SetString(PyExc_SystemError, "msvcrt module not loaded.");
 52 |         goto error;
 53 |     }
 54 | 
 55 |     if (!(Py_file_fileno = PyObject_CallMethod(Py_file, "fileno", NULL)))
 56 |     {
 57 |         PyErr_SetString(PyExc_SystemError, "Error calling fileno function.");
 58 |         goto error;
 59 |     }
 60 | 
 61 |     if (!(Py_file_handle = PyObject_CallMethodObjArgs(Py_msvcrt_module, get_osfhandle_string, Py_file_fileno, NULL)))
 62 |     {
 63 |         PyErr_SetString(PyExc_SystemError, "Failed calling get_osfhandle function.");
 64 |         goto error;
 65 |     }
 66 | 
 67 |     HANDLE handle = reinterpret_cast<HANDLE>(PyLong_AsLongLong(Py_file_handle));
 68 | 
 69 |     // Creating a sparse file may fail but that's OK
 70 |     DWORD bytesReturned;
 71 |     if (DeviceIoControl(handle, FSCTL_SET_SPARSE, nullptr, 0, nullptr, 0, &bytesReturned, nullptr))
 72 |     {
 73 |         // Increase the file length without writing any data and seek back to the original position
 74 |         LARGE_INTEGER li_size;
 75 |         li_size.QuadPart = length;
 76 |         LARGE_INTEGER li_start = {0};
 77 |         if (!SetFilePointerEx(handle, {0}, &li_start, FILE_CURRENT) || !SetFilePointerEx(handle, li_size, nullptr, FILE_END) || !SetEndOfFile(handle) || !SetFilePointerEx(handle, li_start, nullptr, FILE_BEGIN))
 78 |         {
 79 |             PyErr_SetFromWindowsErr(0);
 80 |             goto error;
 81 |         }
 82 |     }
 83 | #else
 84 |     // Call file.truncate(length)
 85 | 
 86 |     if (!(Py_file_truncate = PyObject_CallMethod(Py_file, "truncate", "(L)", length)))
 87 |     {
 88 |         goto error;
 89 |     }
 90 | #endif
 91 | 
 92 |     Py_XDECREF(Py_file_fileno);
 93 |     Py_XDECREF(Py_file_handle);
 94 |     Py_XDECREF(Py_file_truncate);
 95 |     Py_RETURN_NONE;
 96 | 
 97 | error:
 98 |     Py_XDECREF(Py_file_fileno);
 99 |     Py_XDECREF(Py_file_handle);
100 |     Py_XDECREF(Py_file_truncate);
101 |     return NULL;
102 | }
103 | 


--------------------------------------------------------------------------------
/src/sparse.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2007-2023 The SABnzbd-Team (sabnzbd.org)
 3 |  *
 4 |  * This program is free software; you can redistribute it and/or
 5 |  * modify it under the terms of the GNU General Public License
 6 |  * as published by the Free Software Foundation; either version 2
 7 |  * of the License, or (at your option) any later version.
 8 |  *
 9 |  * This program is distributed in the hope that it will be useful,
10 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 |  * GNU General Public License for more details.
13 |  *
14 |  * You should have received a copy of the GNU General Public License
15 |  * along with this program; if not, write to the Free Software
16 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 |  */
18 | 
19 | #ifndef SABCTOOLS_SPARSE_H
20 | #define SABCTOOLS_SPARSE_H
21 | 
22 | #include <Python.h>
23 | 
24 | #if defined(_WIN32) || defined(__CYGWIN__)
25 | #include <Windows.h>
26 | #endif
27 | 
28 | void sparse_init();
29 | PyObject *sparse(PyObject *, PyObject *);
30 | 
31 | #endif //SABCTOOLS_SPARSE_H
32 | 


--------------------------------------------------------------------------------
/src/unlocked_ssl.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2007-2023 The SABnzbd-Team (sabnzbd.org)
 3 |  *
 4 |  * This program is free software; you can redistribute it and/or
 5 |  * modify it under the terms of the GNU General Public License
 6 |  * as published by the Free Software Foundation; either version 2
 7 |  * of the License, or (at your option) any later version.
 8 |  *
 9 |  * This program is distributed in the hope that it will be useful,
10 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 |  * GNU General Public License for more details.
13 |  *
14 |  * You should have received a copy of the GNU General Public License
15 |  * along with this program; if not, write to the Free Software
16 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 |  */
18 | 
19 | #ifndef SABCTOOLS_UNLOCKED_SSL_H
20 | #define SABCTOOLS_UNLOCKED_SSL_H
21 | 
22 | #include <Python.h>
23 | #include <stdio.h>
24 | #include <fcntl.h>
25 | #include <string.h>
26 | 
27 | /* OpenSSL link */
28 | #if defined(_WIN32) || defined(__CYGWIN__)
29 | # define WIN32_LEAN_AND_MEAN
30 | # include <Windows.h>
31 | # include <winsock2.h>
32 | #else
33 | # include <dlfcn.h>
34 | #endif
35 | 
36 | #ifdef __cplusplus
37 | extern "C" {
38 | #endif
39 | 
40 | /* Have to manually define this OpenSSL constant and hope it never changes */
41 | # define SSL_RECEIVED_SHUTDOWN 2
42 | # define SSL_ERROR_WANT_READ 2
43 | # define SSL_ERROR_WANT_WRITE 3
44 | # define SSL_ERROR_ZERO_RETURN 6
45 | 
46 | void openssl_init();
47 | bool openssl_linked();
48 | PyObject *unlocked_ssl_recv_into(PyObject *, PyObject*);
49 | 
50 | #ifdef __cplusplus
51 | }
52 | #endif
53 | #endif


--------------------------------------------------------------------------------
/src/utils.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2007-2023 The SABnzbd-Team (sabnzbd.org)
 3 |  *
 4 |  * This program is free software; you can redistribute it and/or
 5 |  * modify it under the terms of the GNU General Public License
 6 |  * as published by the Free Software Foundation; either version 2
 7 |  * of the License, or (at your option) any later version.
 8 |  *
 9 |  * This program is distributed in the hope that it will be useful,
10 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 |  * GNU General Public License for more details.
13 |  *
14 |  * You should have received a copy of the GNU General Public License
15 |  * along with this program; if not, write to the Free Software
16 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 |  */
18 | 
19 | #include "utils.h"
20 | 
21 | PyObject* bytearray_malloc(PyObject* self, PyObject* Py_input_size) {
22 |     if(!PyLong_Check(Py_input_size)) {
23 |         PyErr_SetString(PyExc_TypeError, "Expected type 'int'.");
24 |         return NULL;
25 |     }
26 |     return PyByteArray_FromStringAndSize(NULL, PyLong_AsSsize_t(Py_input_size));
27 | }
28 | 


--------------------------------------------------------------------------------
/src/utils.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2007-2023 The SABnzbd-Team (sabnzbd.org)
 3 |  *
 4 |  * This program is free software; you can redistribute it and/or
 5 |  * modify it under the terms of the GNU General Public License
 6 |  * as published by the Free Software Foundation; either version 2
 7 |  * of the License, or (at your option) any later version.
 8 |  *
 9 |  * This program is distributed in the hope that it will be useful,
10 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 |  * GNU General Public License for more details.
13 |  *
14 |  * You should have received a copy of the GNU General Public License
15 |  * along with this program; if not, write to the Free Software
16 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 |  */
18 | 
19 | #ifndef SABCTOOLS_UTILS_H
20 | #define SABCTOOLS_UTILS_H
21 | 
22 | #include <Python.h>
23 | 
24 | PyObject* bytearray_malloc(PyObject *, PyObject*);
25 | 
26 | #endif //SABCTOOLS_UTILS_H
27 | 


--------------------------------------------------------------------------------
/src/yenc.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2007-2023 The SABnzbd-Team (sabnzbd.org)
 3 |  *
 4 |  * This program is free software; you can redistribute it and/or
 5 |  * modify it under the terms of the GNU General Public License
 6 |  * as published by the Free Software Foundation; either version 2
 7 |  * of the License, or (at your option) any later version.
 8 |  *
 9 |  * This program is distributed in the hope that it will be useful,
10 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 |  * GNU General Public License for more details.
13 |  *
14 |  * You should have received a copy of the GNU General Public License
15 |  * along with this program; if not, write to the Free Software
16 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 |  */
18 | 
19 | #ifndef SABCTOOLS_YENC_H
20 | #define SABCTOOLS_YENC_H
21 | 
22 | #include <Python.h>
23 | 
24 | #include "yencode/common.h"
25 | #include "yencode/encoder.h"
26 | #include "yencode/decoder.h"
27 | #include "yencode/crc.h"
28 | 
29 | /* Constants */
30 | #define YENC_LINESIZE    128
31 | #define YENC_ZERO        0x00
32 | #define YENC_CR          0x0d
33 | #define YENC_LF          0x0a
34 | 
35 | /* The =yend line cannot be crazy long */
36 | #define YENC_MAX_TAIL_BYTES 256
37 | 
38 | /* Prevent strange yEnc sizes */
39 | #define YENC_MAX_PART_SIZE 10*1024*1024
40 | 
41 | /* Functions */
42 | PyObject* yenc_decode(PyObject *, PyObject*);
43 | PyObject* yenc_encode(PyObject *, PyObject*);
44 | 
45 | #endif //SABCTOOLS_YENC_H
46 | 


--------------------------------------------------------------------------------
/src/yencode/crc.h:
--------------------------------------------------------------------------------
 1 | #ifndef __YENC_CRC_H
 2 | #define __YENC_CRC_H
 3 | #include <stdlib.h> // for llabs
 4 | 
 5 | #if !defined(__GNUC__) && defined(_MSC_VER)
 6 | # include <intrin.h>
 7 | #endif
 8 | 
 9 | namespace RapidYenc {
10 | 
11 | 
12 | typedef uint32_t (*crc_func)(const void*, size_t, uint32_t);
13 | extern crc_func _do_crc32_incremental;
14 | 
15 | extern int _crc32_isa;
16 | static inline uint32_t crc32(const void* data, size_t length, uint32_t init) {
17 | 	return (*_do_crc32_incremental)(data, length, init);
18 | }
19 | static inline int crc32_isa_level() {
20 | 	return _crc32_isa;
21 | }
22 | 
23 | 
24 | // computes `n % 0xffffffff` (well, almost), using some bit-hacks
25 | static inline uint32_t crc32_powmod(uint64_t n) {
26 | #if defined(__GNUC__) && (__GNUC__ >= 5 || (defined(__clang__) && (__clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ > 3))))
27 | 	unsigned res;
28 | 	unsigned carry = __builtin_uadd_overflow(n >> 32, n, &res);
29 | 	res += carry;
30 | 	return res;
31 | #elif defined(_MSC_VER) && defined(PLATFORM_X86)
32 | 	unsigned res;
33 | 	unsigned char carry = _addcarry_u32(0, n >> 32, n, &res);
34 | 	_addcarry_u32(carry, res, 0, &res);
35 | 	return res;
36 | #else
37 | 	n = (n >> 32) + (n & 0xffffffff);
38 | 	n += n >> 32;
39 | 	return n;
40 | #endif
41 | }
42 | // computes `crc32_powmod(n*8)` avoiding overflow
43 | static inline uint32_t crc32_bytepow(uint64_t n) {
44 | #if defined(__GNUC__) || defined(_MSC_VER)
45 | 	unsigned res = crc32_powmod(n);
46 | # ifdef _MSC_VER
47 | 	return _rotl(res, 3);
48 | # else
49 | 	return (res << 3) | (res >> 29);
50 | # endif
51 | #else
52 | 	n = (n >> 32) + (n & 0xffffffff);
53 | 	n <<= 3;
54 | 	n += n >> 32;
55 | 	return n;
56 | #endif
57 | }
58 | 
59 | typedef uint32_t (*crc_mul_func)(uint32_t, uint32_t);
60 | extern crc_mul_func _crc32_shift;
61 | extern crc_mul_func _crc32_multiply;
62 | static inline uint32_t crc32_shift(uint32_t a, uint32_t b) {
63 | 	return (*_crc32_shift)(a, b);
64 | }
65 | static inline uint32_t crc32_multiply(uint32_t a, uint32_t b) {
66 | 	return (*_crc32_multiply)(a, b);
67 | }
68 | 
69 | static inline uint32_t crc32_combine(uint32_t crc1, uint32_t crc2, uint64_t len2) {
70 | 	return crc32_shift(crc1, crc32_bytepow(len2)) ^ crc2;
71 | }
72 | static inline uint32_t crc32_zeros(uint32_t crc1, uint64_t len) {
73 | 	return ~crc32_shift(~crc1, crc32_bytepow(len));
74 | }
75 | static inline uint32_t crc32_unzero(uint32_t crc1, uint64_t len) {
76 | 	return ~crc32_shift(~crc1, ~crc32_bytepow(len));
77 | }
78 | static inline uint32_t crc32_2pow(int64_t n) {
79 | 	uint32_t sign = (uint32_t)(n >> 63);
80 | 	return crc32_shift(0x80000000, crc32_powmod(llabs(n)) ^ sign);
81 | }
82 | static inline uint32_t crc32_256pow(uint64_t n) {
83 | 	return crc32_shift(0x80000000, crc32_bytepow(n));
84 | }
85 | 
86 | void crc32_init();
87 | 
88 | 
89 | 
90 | } // namespace
91 | #endif // defined(__YENC_CRC_H)
92 | 


--------------------------------------------------------------------------------
/src/yencode/crc_arm.cc:
--------------------------------------------------------------------------------
  1 | #include "crc_common.h"
  2 | 
  3 | #if defined(PLATFORM_ARM) && defined(_MSC_VER) && defined(__clang__) && !defined(__ARM_FEATURE_CRC32)
  4 | // I don't think GYP provides a nice way to detect whether MSVC or clang-cl is being used, but it doesn't use clang-cl by default, so a warning here is probably sufficient
  5 | HEDLEY_WARNING("CRC32 acceleration is not been enabled under ARM clang-cl by default; add `-march=armv8-a+crc` to additional compiler arguments to enable");
  6 | #endif
  7 | 
  8 | // disable CRC on GCC versions with broken arm_acle.h
  9 | #if defined(__ARM_FEATURE_CRC32) && defined(HEDLEY_GCC_VERSION)
 10 | # if !defined(__aarch64__) && HEDLEY_GCC_VERSION_CHECK(7,0,0) && !HEDLEY_GCC_VERSION_CHECK(8,1,1)
 11 | #  undef __ARM_FEATURE_CRC32
 12 | HEDLEY_WARNING("CRC32 acceleration has been disabled due to broken arm_acle.h shipped in GCC 7.0 - 8.1 [https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81497]. If you need this feature, please use a different compiler or version of GCC");
 13 | # endif
 14 | # if defined(__aarch64__) && HEDLEY_GCC_VERSION_CHECK(9,4,0) && !HEDLEY_GCC_VERSION_CHECK(9,5,0)
 15 | #  undef __ARM_FEATURE_CRC32
 16 | HEDLEY_WARNING("CRC32 acceleration has been disabled due to broken arm_acle.h shipped in GCC 9.4 [https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100985]. If you need this feature, please use a different compiler or version of GCC");
 17 | # endif
 18 | #endif
 19 | #if defined(__ARM_FEATURE_CRC32) && defined(__has_include)
 20 | # if !__has_include(<arm_acle.h>)
 21 | #  undef __ARM_FEATURE_CRC32
 22 | HEDLEY_WARNING("CRC32 acceleration has been disabled due to missing arm_acle.h");
 23 | # endif
 24 | #endif
 25 | 
 26 | #if defined(__ARM_FEATURE_CRC32) || (defined(_M_ARM64) && !defined(__clang__)) // MSVC doesn't support CRC for ARM32
 27 | 
 28 | /* ARMv8 accelerated CRC */
 29 | #if defined(_MSC_VER) && !defined(__clang__)
 30 | #include <intrin.h>
 31 | #else
 32 | #include <arm_acle.h>
 33 | #endif
 34 | 
 35 | 
 36 | #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
 37 | # ifdef __GNUC__
 38 | #  define _LE16 __builtin_bswap16
 39 | #  define _LE32 __builtin_bswap32
 40 | #  define _LE64 __builtin_bswap64
 41 | # else
 42 | // currently not supported
 43 | #  error No endian swap intrinsic defined
 44 | # endif
 45 | #else
 46 | # define _LE16(x) (x)
 47 | # define _LE32(x) (x)
 48 | # define _LE64(x) (x)
 49 | #endif
 50 | 
 51 | #ifdef __aarch64__
 52 | # define WORD_T uint64_t
 53 | # define WORDSIZE_LOG 3  // sizeof(WORD_T) == 1<<WORDSIZE_LOG
 54 | # define CRC_WORD(crc, data) __crc32d(crc, _LE64(data))
 55 | #else
 56 | # define WORD_T uint32_t
 57 | # define WORDSIZE_LOG 2  // sizeof(WORD_T) == 1<<WORDSIZE_LOG
 58 | # define CRC_WORD(crc, data) __crc32w(crc, _LE32(data))
 59 | #endif
 60 | 
 61 | 
 62 | 
 63 | #ifdef __aarch64__
 64 | static uint32_t crc32_multiply_arm(uint32_t a, uint32_t b) {
 65 | 	// perform PMULL
 66 | 	uint64_t res = 0;
 67 | 	uint64_t a64 = (uint64_t)a << 32;
 68 | 	int64_t b64 = (int64_t)b << 32;
 69 | 	for(int i=0; i<32; i++) {
 70 | 		res ^= a64 & (b64 >> 63);
 71 | 		b64 += b64;
 72 | 		a64 >>= 1;
 73 | 	}
 74 | 	// reduction via CRC
 75 | 	res = __crc32w(0, res) ^ (res >> 32);
 76 | 	return res;
 77 | }
 78 | #endif
 79 | // regular multiply is probably better for AArch32
 80 | 
 81 | 
 82 | // exploit CPU pipelining during CRC computation; unfortunately I haven't been able to measure any benefit
 83 | // - Neoverse N1: no noticeable difference
 84 | // - Cortex A53: actually runs a bit slower
 85 | //#define ENABLE_PIPELINE_OPT 1
 86 | 
 87 | #ifdef ENABLE_PIPELINE_OPT
 88 | #ifndef __aarch64__
 89 | # define crc32_multiply_arm RapidYenc::crc32_multiply_generic
 90 | #endif
 91 | #endif
 92 | 
 93 | 
 94 | 
 95 | // inspired/stolen off https://github.com/jocover/crc32_armv8/blob/master/crc32_armv8.c
 96 | static uint32_t arm_crc_calc(uint32_t crc, const unsigned char *src, long len) {
 97 | 	
 98 | 	// initial alignment
 99 | 	if (len >= 16) { // 16 is an arbitrary number; it just needs to be >=8
100 | 		if ((uintptr_t)src & sizeof(uint8_t)) {
101 | 			crc = __crc32b(crc, *src);
102 | 			src++;
103 | 			len--;
104 | 		}
105 | 		if ((uintptr_t)src & sizeof(uint16_t)) {
106 | 			crc = __crc32h(crc, _LE16(*((uint16_t *)src)));
107 | 			src += sizeof(uint16_t);
108 | 			len -= sizeof(uint16_t);
109 | 		}
110 | #ifdef __aarch64__
111 | 		if ((uintptr_t)src & sizeof(uint32_t)) {
112 | 			crc = __crc32w(crc, _LE32(*((uint32_t *)src)));
113 | 			src += sizeof(uint32_t);
114 | 			len -= sizeof(uint32_t);
115 | 		}
116 | #endif
117 | 	}
118 | 	
119 | 	const WORD_T* srcW = (const WORD_T*)src;
120 | 	
121 | #ifdef ENABLE_PIPELINE_OPT
122 | 	// uses ideas from https://github.com/komrad36/crc#option-13-golden
123 | 	// (this is a slightly less efficient, but much simpler implementation of the idea)
124 | 	const unsigned SPLIT_WORDS_LOG = 10;  // make sure it's at least 2
125 | 	const unsigned SPLIT_WORDS = 1<<SPLIT_WORDS_LOG;
126 | 	const unsigned blockCoeff = RapidYenc::crc_power[SPLIT_WORDS_LOG + WORDSIZE_LOG + 3];
127 | 	while(len >= (long)(sizeof(WORD_T)*SPLIT_WORDS*2)) {
128 | 		// compute 2x CRCs concurrently to leverage piplining
129 | 		uint32_t crc2 = 0;
130 | 		for(unsigned i=0; i<SPLIT_WORDS; i+=4) {
131 | 			crc = CRC_WORD(crc, *srcW);
132 | 			crc2 = CRC_WORD(crc2, *(srcW + SPLIT_WORDS));
133 | 			srcW++;
134 | 			crc = CRC_WORD(crc, *srcW);
135 | 			crc2 = CRC_WORD(crc2, *(srcW + SPLIT_WORDS));
136 | 			srcW++;
137 | 			crc = CRC_WORD(crc, *srcW);
138 | 			crc2 = CRC_WORD(crc2, *(srcW + SPLIT_WORDS));
139 | 			srcW++;
140 | 			crc = CRC_WORD(crc, *srcW);
141 | 			crc2 = CRC_WORD(crc2, *(srcW + SPLIT_WORDS));
142 | 			srcW++;
143 | 		}
144 | 		// merge the CRCs
145 | 		crc = crc32_multiply_arm(crc, blockCoeff) ^ crc2;
146 | 		srcW += SPLIT_WORDS;
147 | 		len -= sizeof(WORD_T)*SPLIT_WORDS*2;
148 | 	}
149 | #endif
150 | 	
151 | 	while ((len -= sizeof(WORD_T)*8) >= 0) {
152 | 		crc = CRC_WORD(crc, *(srcW++));
153 | 		crc = CRC_WORD(crc, *(srcW++));
154 | 		crc = CRC_WORD(crc, *(srcW++));
155 | 		crc = CRC_WORD(crc, *(srcW++));
156 | 		crc = CRC_WORD(crc, *(srcW++));
157 | 		crc = CRC_WORD(crc, *(srcW++));
158 | 		crc = CRC_WORD(crc, *(srcW++));
159 | 		crc = CRC_WORD(crc, *(srcW++));
160 | 	}
161 | 	if (len & sizeof(WORD_T)*4) {
162 | 		crc = CRC_WORD(crc, *(srcW++));
163 | 		crc = CRC_WORD(crc, *(srcW++));
164 | 		crc = CRC_WORD(crc, *(srcW++));
165 | 		crc = CRC_WORD(crc, *(srcW++));
166 | 	}
167 | 	if (len & sizeof(WORD_T)*2) {
168 | 		crc = CRC_WORD(crc, *(srcW++));
169 | 		crc = CRC_WORD(crc, *(srcW++));
170 | 	}
171 | 	if (len & sizeof(WORD_T)) {
172 | 		crc = CRC_WORD(crc, *(srcW++));
173 | 	}
174 | 	src = (const unsigned char*)srcW;
175 | 	
176 | #ifdef __aarch64__
177 | 	if (len & sizeof(uint32_t)) {
178 | 		crc = __crc32w(crc, _LE32(*((uint32_t *)src)));
179 | 		src += sizeof(uint32_t);
180 | 	}
181 | #endif
182 | 	if (len & sizeof(uint16_t)) {
183 | 		crc = __crc32h(crc, _LE16(*((uint16_t *)src)));
184 | 		src += sizeof(uint16_t);
185 | 	}
186 | 	if (len & sizeof(uint8_t))
187 | 		crc = __crc32b(crc, *src);
188 | 	
189 | 	return crc;
190 | }
191 | 
192 | static uint32_t do_crc32_incremental_arm(const void* data, size_t length, uint32_t init) {
193 | 	return ~arm_crc_calc(~init, (const unsigned char*)data, (long)length);
194 | }
195 | 
196 | 
197 | #if defined(__aarch64__) && (defined(__GNUC__) || defined(_MSC_VER))
198 | static uint32_t crc32_shift_arm(uint32_t crc1, uint32_t n) {
199 | 	uint32_t result = crc1;
200 | 	uint64_t prod = result;
201 | 	prod <<= 32 - (n&31);
202 | 	result = __crc32w(0, prod) ^ (prod >> 32);
203 | 	n &= ~31;
204 | 	
205 | 	while(n) {
206 | 		result = crc32_multiply_arm(result, RapidYenc::crc_power[ctz32(n)]);
207 | 		n &= n-1;
208 | 	}
209 | 	return result;
210 | }
211 | #endif
212 | 
213 | 
214 | void RapidYenc::crc_arm_set_funcs() {
215 | 	_do_crc32_incremental = &do_crc32_incremental_arm;
216 | #ifdef __aarch64__
217 | 	_crc32_multiply = &crc32_multiply_arm;
218 | # if defined(__GNUC__) || defined(_MSC_VER)
219 | 	_crc32_shift = &crc32_shift_arm;
220 | # endif
221 | #endif
222 | 	_crc32_isa = ISA_FEATURE_CRC;
223 | }
224 | #else
225 | void RapidYenc::crc_arm_set_funcs() {}
226 | #endif
227 | 


--------------------------------------------------------------------------------
/src/yencode/crc_arm_pmull.cc:
--------------------------------------------------------------------------------
  1 | #include "crc_common.h"
  2 | 
  3 | // exclude broken/missing arm_acle.h
  4 | #if defined(__ARM_FEATURE_CRYPTO) && defined(HEDLEY_GCC_VERSION)
  5 | # if !defined(__aarch64__) && HEDLEY_GCC_VERSION_CHECK(7,0,0) && !HEDLEY_GCC_VERSION_CHECK(8,1,1)
  6 | #  undef __ARM_FEATURE_CRYPTO
  7 | # endif
  8 | # if defined(__aarch64__) && HEDLEY_GCC_VERSION_CHECK(9,4,0) && !HEDLEY_GCC_VERSION_CHECK(9,5,0)
  9 | #  undef __ARM_FEATURE_CRYPTO
 10 | # endif
 11 | #endif
 12 | #if defined(__ARM_FEATURE_CRYPTO) && defined(__has_include)
 13 | # if !__has_include(<arm_acle.h>)
 14 | #  undef __ARM_FEATURE_CRYPTO
 15 | # endif
 16 | #endif
 17 | 
 18 | // ARM's intrinsics guide seems to suggest that vmull_p64 is available on A32, but neither Clang/GCC seem to support it on AArch32
 19 | #if (defined(__ARM_FEATURE_CRYPTO) && defined(__ARM_FEATURE_CRC32) && defined(__aarch64__)) || (defined(_M_ARM64) && !defined(__clang__))
 20 | 
 21 | #include <arm_neon.h>
 22 | #if defined(_MSC_VER) && !defined(__clang__)
 23 | # include <intrin.h>
 24 | 
 25 | # ifdef _M_ARM64
 26 | // MSVC may detect this pattern: https://devblogs.microsoft.com/cppblog/a-tour-of-4-msvc-backend-improvements/#byteswap-identification
 27 | static HEDLEY_ALWAYS_INLINE uint64_t rbit64(uint64_t x) {
 28 | 	x = _byteswap_uint64(x);
 29 | 	x = (x & 0xaaaaaaaaaaaaaaaa) >> 1 | (x & 0x5555555555555555) << 1;
 30 | 	x = (x & 0xcccccccccccccccc) >> 2 | (x & 0x3333333333333333) << 2;
 31 | 	x = (x & 0xf0f0f0f0f0f0f0f0) >> 4 | (x & 0x0f0f0f0f0f0f0f0f) << 4;
 32 | 	return x;
 33 | }
 34 | // ...whilst this seems to work best for 32-bit RBIT
 35 | static HEDLEY_ALWAYS_INLINE uint32_t rbit32(uint32_t x) {
 36 | 	uint64_t r = rbit64(x);
 37 | 	return r >> 32;
 38 | }
 39 | # else
 40 | #  define rbit32 _arm_rbit
 41 | # endif
 42 | #else
 43 | # include <arm_acle.h>
 44 | // __rbit not present before GCC 11.4.0 or 12.2.0; for ARM32, requires GCC 14
 45 | # if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(14,0,0) && (!defined(__aarch64__) || !HEDLEY_GCC_VERSION_CHECK(11,3,0) || (HEDLEY_GCC_VERSION_CHECK(12,0,0) && !HEDLEY_GCC_VERSION_CHECK(12,2,0)))
 46 | #  ifdef __aarch64__
 47 | static HEDLEY_ALWAYS_INLINE uint64_t rbit64(uint64_t x) {
 48 | 	uint64_t r;
 49 | 	__asm__ ("rbit %0,%1\n"
 50 | 		: "=r"(r) : "r"(x)
 51 | 		: /* No clobbers */);
 52 | 	return r;
 53 | }
 54 | #  endif
 55 | static HEDLEY_ALWAYS_INLINE uint32_t rbit32(uint32_t x) {
 56 | 	uint32_t r;
 57 | 	__asm__ (
 58 | #  ifdef __aarch64__
 59 | 		"rbit %w0,%w1\n"
 60 | #  else
 61 | 		"rbit %0,%1\n"
 62 | #  endif
 63 | 		: "=r"(r) : "r"(x)
 64 | 		: /* No clobbers */);
 65 | 	return r;
 66 | }
 67 | # else
 68 | #  define rbit32 __rbit
 69 | #  define rbit64 __rbitll
 70 | # endif
 71 | #endif
 72 | 
 73 | 
 74 | // MSVC doesn't have poly64/poly128 types, so always use uint64 instead
 75 | 
 76 | #ifdef __aarch64__
 77 | # if defined(__GNUC__) || defined(__clang__)
 78 | static HEDLEY_ALWAYS_INLINE uint64x2_t pmull_low(uint64x1_t a, uint64x1_t b) {
 79 | 	uint64x2_t result;
 80 | 	__asm__ ("pmull %0.1q,%1.1d,%2.1d"
 81 | 		: "=w"(result)
 82 | 		: "w"(a), "w"(b)
 83 | 		: /* No clobbers */);
 84 | 	return result;
 85 | }
 86 | static HEDLEY_ALWAYS_INLINE uint64x2_t pmull_high(uint64x2_t a, uint64x2_t b) {
 87 | 	uint64x2_t result;
 88 | 	__asm__ ("pmull2 %0.1q,%1.2d,%2.2d"
 89 | 		: "=w"(result)
 90 | 		: "w"(a), "w"(b)
 91 | 		: /* No clobbers */);
 92 | 	return result;
 93 | }
 94 | # elif defined(_MSC_VER) && !defined(__clang__)
 95 | #  define pmull_low vmull_p64
 96 | #  define pmull_high vmull_high_p64
 97 | # else
 98 | #  define pmull_low(x, y) vreinterpretq_u64_p128(vmull_p64(vreinterpret_p64_u64(x), vreinterpret_p64_u64(y)))
 99 | #  define pmull_high(x, y) vreinterpretq_u64_p128(vmull_high_p64(vreinterpretq_p64_u64(x), vreinterpretq_p64_u64(y)))
100 | # endif
101 | #else
102 | # if defined(_MSC_VER) && !defined(__clang__)
103 | #  define pmull_low vmull_p64
104 | #  define pmull_high(x, y) vmull_p64(vget_high_u64(x), vget_high_u64(y))
105 | # else
106 | #  define pmull_low(x, y) vreinterpretq_u64_p128(vmull_p64(x, y))
107 | #  define pmull_high(x, y) vreinterpretq_u64_p128(vmull_p64(vget_high_p64(vreinterpretq_p64_u64(x)), vget_high_p64(vreinterpretq_p64_u64(y))))
108 | # endif
109 | #endif
110 | 
111 | 
112 | static uint32_t crc32_multiply_pmull(uint32_t a, uint32_t b) {
113 | 	uint64x1_t prod = vget_low_u64(pmull_low(
114 | 		vreinterpret_u64_u32(vset_lane_u32(a, vdup_n_u32(0), 0)),
115 | 		vreinterpret_u64_u32(vset_lane_u32(b, vdup_n_u32(0), 0))
116 | 	));
117 | 	#ifdef __aarch64__
118 | 	uint64_t p = vget_lane_u64(prod, 0);
119 | 	return __crc32w(0, p+p) ^ (p >> 31);
120 | 	#else
121 | 	prod = vadd_u64(prod, prod);
122 | 	uint32x2_t prod32 = vreinterpret_u32_u64(prod);
123 | 	return __crc32w(0, vget_lane_u32(prod32, 0)) ^ vget_lane_u32(prod32, 1);
124 | 	#endif
125 | }
126 | 
127 | 
128 | 
129 | static const uint32_t crc_power_rev[32] = { // bit-reversed crc_power
130 | 	0x00000002, 0x00000004, 0x00000010, 0x00000100, 0x00010000, 0x04c11db7, 0x490d678d, 0xe8a45605,
131 | 	0x75be46b7, 0xe6228b11, 0x567fddeb, 0x88fe2237, 0x0e857e71, 0x7001e426, 0x075de2b2, 0xf12a7f90,
132 | 	0xf0b4a1c1, 0x58f46c0c, 0xc3395ade, 0x96837f8c, 0x544037f9, 0x23b7b136, 0xb2e16ba8, 0x725e7bfa,
133 | 	0xec709b5d, 0xf77a7274, 0x2845d572, 0x034e2515, 0x79695942, 0x540cb128, 0x0b65d023, 0x3c344723
134 | };
135 | 
136 | 
137 | static HEDLEY_ALWAYS_INLINE uint64x1_t crc32_shift_pmull_mulred(uint64x1_t a, uint64x1_t b) {
138 | 	uint64x2_t r = pmull_low(a, b);
139 | 	uint64x2_t h = pmull_high(r, vdupq_n_u64(0x490d678d));
140 | 	return veor_u64(vget_low_u64(r), vget_low_u64(h));
141 | }
142 | 
143 | 
144 | static uint32_t crc32_shift_pmull(uint32_t crc1, uint32_t n) {
145 | 	crc1 = rbit32(crc1);
146 | 	
147 | 	uint64x1_t res;
148 | 	#ifdef __aarch64__
149 | 	uint64_t crc = (uint64_t)crc1 << (n & 31);
150 | 	res = vset_lane_u64(crc, vdup_n_u64(0), 0);
151 | 	#else
152 | 	res = vreinterpret_u64_u32(vset_lane_u32(crc1, vdup_n_u32(0), 0));
153 | 	res = vshl_u64(res, vdup_n_u64(n&31));
154 | 	#endif
155 | 	n &= ~31;
156 | 	
157 | 	if(n) {
158 | 		#define LOAD_NEXT_POWER vreinterpret_u64_u32(vset_lane_u32(crc_power_rev[ctz32(n)], vdup_n_u32(0), 0))
159 | 		uint64x1_t res2 = LOAD_NEXT_POWER;
160 | 		n &= n-1;
161 | 		
162 | 		if(n) {
163 | 			// first multiply doesn't need reduction
164 | 			res2 = vget_low_u64(pmull_low(res2, LOAD_NEXT_POWER));
165 | 			n &= n-1;
166 | 			
167 | 			while(n) {
168 | 				res = crc32_shift_pmull_mulred(res, LOAD_NEXT_POWER);
169 | 				n &= n-1;
170 | 				
171 | 				if(n) {
172 | 					res2 = crc32_shift_pmull_mulred(res2, LOAD_NEXT_POWER);
173 | 					n &= n-1;
174 | 				}
175 | 			}
176 | 		}
177 | 		#undef LOAD_NEXT_POWER
178 | 		
179 | 		// merge two results
180 | 		uint64x2_t prod = pmull_low(res, res2);
181 | 		// weirdly, vrbitq_u8 is missing in ARM32 MSVC
182 | 		prod = vreinterpretq_u64_u8(vrev64q_u8(vrbitq_u8(vreinterpretq_u8_u64(prod))));
183 | 		#ifdef __aarch64__
184 | 		crc = __crc32d(0, vgetq_lane_u64(prod, 1));
185 | 		uint64_t rem = vgetq_lane_u64(prod, 0);
186 | 		crc = __crc32w(rem, crc) ^ (rem >> 32);
187 | 		#else
188 | 		uint32x4_t prod32 = vreinterpretq_u32_u64(prod);
189 | 		uint32_t crc = __crc32w(0, vgetq_lane_u32(prod32, 2));
190 | 		crc = __crc32w(vgetq_lane_u32(prod32, 3), crc);
191 | 		crc = __crc32w(vgetq_lane_u32(prod32, 0), crc) ^ vgetq_lane_u32(prod32, 1);
192 | 		#endif
193 | 		return crc;
194 | 	} else {
195 | 		#ifdef __aarch64__
196 | 		crc = rbit64(crc);
197 | 		crc = __crc32w(0, crc) ^ (crc >> 32);
198 | 		return crc;
199 | 		#else
200 | 		uint32x2_t r = vreinterpret_u32_u64(res);
201 | 		return __crc32w(0, rbit32(vget_lane_u32(r, 1))) ^ rbit32(vget_lane_u32(r, 0));
202 | 		#endif
203 | 	}
204 | }
205 | 
206 | 
207 | void RapidYenc::crc_pmull_set_funcs() {
208 | 	_crc32_multiply = &crc32_multiply_pmull;
209 | 	_crc32_shift = &crc32_shift_pmull;
210 | 	_crc32_isa |= ISA_FEATURE_PMULL;
211 | }
212 | 
213 | #else
214 | void RapidYenc::crc_pmull_set_funcs() {}
215 | #endif /* defined(__ARM_FEATURE_CRYPTO) && defined(__ARM_FEATURE_CRC32) */
216 | 


--------------------------------------------------------------------------------
/src/yencode/crc_common.h:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | #include <stddef.h> // for size_t
 3 | #include "crc.h"
 4 | 
 5 | #ifdef __GNUC__
 6 | # define ctz32 __builtin_ctz
 7 | #elif defined(_MSC_VER)
 8 | static HEDLEY_ALWAYS_INLINE unsigned ctz32(uint32_t n) {
 9 | 	unsigned long r;
10 | 	_BitScanForward(&r, n);
11 | 	return r;
12 | }
13 | #endif
14 | 
15 | namespace RapidYenc {
16 | 	void crc_clmul_set_funcs();
17 | 	void crc_clmul256_set_funcs();
18 | 	void crc_arm_set_funcs();
19 | 	void crc_pmull_set_funcs();
20 | 	void crc_riscv_set_funcs();
21 | 	
22 | 	extern const uint32_t crc_power[32];
23 | 	uint32_t crc32_multiply_generic(uint32_t a, uint32_t b);
24 | 	uint32_t crc32_shift_generic(uint32_t crc1, uint32_t n);
25 | 	
26 | }


--------------------------------------------------------------------------------
/src/yencode/crc_folding_256.cc:
--------------------------------------------------------------------------------
  1 | // 256-bit version of crc_folding
  2 | 
  3 | #include "crc_common.h"
  4 |  
  5 | #if !defined(YENC_DISABLE_AVX256) && ((defined(__VPCLMULQDQ__) && defined(__AVX2__) && defined(__PCLMUL__)) || (defined(_MSC_VER) && _MSC_VER >= 1920 && defined(PLATFORM_X86) && !defined(__clang__)))
  6 | #include <inttypes.h>
  7 | #include <immintrin.h>
  8 | 
  9 | 
 10 | #if defined(__AVX512VL__) && defined(YENC_BUILD_NATIVE) && YENC_BUILD_NATIVE!=0
 11 | # define ENABLE_AVX512 1
 12 | #endif
 13 | 
 14 | static __m256i do_one_fold(__m256i src, __m256i data) {
 15 | 	const __m256i fold4 = _mm256_set_epi32(
 16 | 		0x00000001, 0x54442bd4,
 17 | 		0x00000001, 0xc6e41596,
 18 | 		0x00000001, 0x54442bd4,
 19 | 		0x00000001, 0xc6e41596
 20 | 	);
 21 | #ifdef ENABLE_AVX512
 22 | 	return _mm256_ternarylogic_epi32(
 23 | 	  _mm256_clmulepi64_epi128(src, fold4, 0x01),
 24 | 	  _mm256_clmulepi64_epi128(src, fold4, 0x10),
 25 | 	  data,
 26 | 	  0x96
 27 | 	);
 28 | #else
 29 | 	return _mm256_xor_si256(_mm256_xor_si256(
 30 | 	  data, _mm256_clmulepi64_epi128(src, fold4, 0x01)
 31 | 	), _mm256_clmulepi64_epi128(src, fold4, 0x10));
 32 | #endif
 33 | }
 34 | 
 35 | ALIGN_TO(32, static const uint8_t  pshufb_rot_table[]) = {
 36 | 	0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
 37 | 	16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
 38 | };
 39 | // _mm256_castsi128_si256, but upper is defined to be 0
 40 | #if (defined(__clang__) && __clang_major__ >= 5 && (!defined(__APPLE__) || __clang_major__ >= 7)) || (defined(__GNUC__) && __GNUC__ >= 10) || (defined(_MSC_VER) && _MSC_VER >= 1910)
 41 | // intrinsic unsupported in GCC 9 and MSVC < 2017
 42 | # define zext128_256 _mm256_zextsi128_si256
 43 | #else
 44 | // technically a cast is incorrect, due to upper 128 bits being undefined, but should usually work fine
 45 | // alternative may be `_mm256_set_m128i(_mm_setzero_si128(), v)` but unsupported on GCC < 7, and most compilers generate a VINSERTF128 instruction for it
 46 | # ifdef __OPTIMIZE__
 47 | #  define zext128_256 _mm256_castsi128_si256
 48 | # else
 49 | #  define zext128_256(x) _mm256_inserti128_si256(_mm256_setzero_si256(), x, 0)
 50 | # endif
 51 | #endif
 52 | 
 53 | #ifdef ENABLE_AVX512
 54 | # define MM256_BLENDV(a, b, m) _mm256_ternarylogic_epi32(a, b, m, 0xd8)
 55 | # define MM_2XOR(a, b, c) _mm_ternarylogic_epi32(a, b, c, 0x96)
 56 | #else
 57 | # define MM256_BLENDV _mm256_blendv_epi8
 58 | # define MM_2XOR(a, b, c) _mm_xor_si128(_mm_xor_si128(a, b), c)
 59 | #endif
 60 | 
 61 | static void partial_fold(const size_t len, __m256i *crc0, __m256i *crc1, __m256i crc_part) {
 62 | 	__m256i shuf = _mm256_broadcastsi128_si256(_mm_loadu_si128((__m128i*)(pshufb_rot_table + (len&15))));
 63 | 	__m256i mask = _mm256_cmpgt_epi8(shuf, _mm256_set1_epi8(15));
 64 | 	
 65 | 	*crc0 = _mm256_shuffle_epi8(*crc0, shuf);
 66 | 	*crc1 = _mm256_shuffle_epi8(*crc1, shuf);
 67 | 	crc_part = _mm256_shuffle_epi8(crc_part, shuf);
 68 | 	
 69 | 	__m256i crc_out = _mm256_permute2x128_si256(*crc0, *crc0, 0x08);  // move bottom->top
 70 | 	__m256i crc01, crc1p;
 71 | 	if(len >= 16) {
 72 | 		crc_out = MM256_BLENDV(crc_out, *crc0, mask);
 73 | 		crc01 = *crc1;
 74 | 		crc1p = crc_part;
 75 | 		*crc0 = _mm256_permute2x128_si256(*crc0, *crc1, 0x21);
 76 | 		*crc1 = _mm256_permute2x128_si256(*crc1, crc_part, 0x21);
 77 | 		crc_part = zext128_256(_mm256_extracti128_si256(crc_part, 1));
 78 | 	} else {
 79 | 		crc_out = _mm256_and_si256(crc_out, mask);
 80 | 		crc01 = _mm256_permute2x128_si256(*crc0, *crc1, 0x21);
 81 | 		crc1p = _mm256_permute2x128_si256(*crc1, crc_part, 0x21);
 82 | 	}
 83 | 	
 84 | 	*crc0 = MM256_BLENDV(*crc0, crc01, mask);
 85 | 	*crc1 = MM256_BLENDV(*crc1, crc1p, mask);
 86 | 	
 87 | 	*crc1 = do_one_fold(crc_out, *crc1);
 88 | }
 89 | 
 90 | 
 91 | ALIGN_TO(16, static const unsigned crc_k[]) = {
 92 | 	0xccaa009e, 0x00000000, /* rk1 */
 93 | 	0x751997d0, 0x00000001, /* rk2 */
 94 | 	0xccaa009e, 0x00000000, /* rk5 */
 95 | 	0x63cd6124, 0x00000001, /* rk6 */
 96 | 	0xf7011641, 0x00000000, /* rk7 */
 97 | 	0xdb710640, 0x00000001  /* rk8 */
 98 | };
 99 | 
100 | 
101 | static uint32_t crc_fold(const unsigned char *src, long len, uint32_t initial) {
102 | 	__m128i xmm_t0 = _mm_clmulepi64_si128(
103 | 		_mm_cvtsi32_si128(~initial),
104 | 		_mm_cvtsi32_si128(0xdfded7ec),
105 | 		0
106 | 	);
107 | 	
108 | 	__m256i crc0 = zext128_256(xmm_t0);
109 | 	__m256i crc1 = _mm256_setzero_si256();
110 | 	
111 | 	if (len < 32) {
112 | 		if (len == 0)
113 | 			return initial;
114 | 		__m256i crc_part = _mm256_setzero_si256();
115 | 		memcpy(&crc_part, src, len);
116 | 		partial_fold(len, &crc0, &crc1, crc_part);
117 | 	} else {
118 | 		uintptr_t algn_diff = (0 - (uintptr_t)src) & 0x1F;
119 | 		if (algn_diff) {
120 | 			partial_fold(algn_diff, &crc0, &crc1, _mm256_loadu_si256((__m256i *)src));
121 | 			src += algn_diff;
122 | 			len -= algn_diff;
123 | 		}
124 | 		
125 | 		while (len >= 64) {
126 | 			crc0 = do_one_fold(crc0, _mm256_load_si256((__m256i*)src));
127 | 			crc1 = do_one_fold(crc1, _mm256_load_si256((__m256i*)src + 1));
128 | 			src += 64;
129 | 			len -= 64;
130 | 		}
131 | 		
132 | 		if (len >= 32) {
133 | 			__m256i old = crc1;
134 | 			crc1 = do_one_fold(crc0, _mm256_load_si256((__m256i*)src));
135 | 			crc0 = old;
136 | 			
137 | 			len -= 32;
138 | 			src += 32;
139 | 		}
140 | 		
141 | 		if(len != 0) {
142 | 			partial_fold(len, &crc0, &crc1, _mm256_load_si256((__m256i *)src));
143 | 		}
144 | 	}
145 | 	
146 | 	const __m128i xmm_mask = _mm_set_epi32(-1,-1,-1,0);
147 | 	__m128i x_tmp0, x_tmp1, x_tmp2, crc_fold;
148 | 	
149 | 	__m128i xmm_crc0 = _mm256_castsi256_si128(crc0);
150 | 	__m128i xmm_crc1 = _mm256_extracti128_si256(crc0, 1);
151 | 	__m128i xmm_crc2 = _mm256_castsi256_si128(crc1);
152 | 	__m128i xmm_crc3 = _mm256_extracti128_si256(crc1, 1);
153 | 
154 | 	/*
155 | 	 * k1
156 | 	 */
157 | 	crc_fold = _mm_load_si128((__m128i *)crc_k);
158 | 
159 | 	x_tmp0 = _mm_clmulepi64_si128(xmm_crc0, crc_fold, 0x10);
160 | 	xmm_crc0 = _mm_clmulepi64_si128(xmm_crc0, crc_fold, 0x01);
161 | 	xmm_crc1 = MM_2XOR(xmm_crc1, x_tmp0, xmm_crc0);
162 | 
163 | 	x_tmp1 = _mm_clmulepi64_si128(xmm_crc1, crc_fold, 0x10);
164 | 	xmm_crc1 = _mm_clmulepi64_si128(xmm_crc1, crc_fold, 0x01);
165 | 	xmm_crc2 = MM_2XOR(xmm_crc2, x_tmp1, xmm_crc1);
166 | 
167 | 	x_tmp2 = _mm_clmulepi64_si128(xmm_crc2, crc_fold, 0x10);
168 | 	xmm_crc2 = _mm_clmulepi64_si128(xmm_crc2, crc_fold, 0x01);
169 | 	xmm_crc3 = MM_2XOR(xmm_crc3, x_tmp2, xmm_crc2);
170 | 
171 | 	/*
172 | 	 * k5
173 | 	 */
174 | 	crc_fold = _mm_load_si128((__m128i *)crc_k + 1);
175 | 
176 | 	xmm_crc0 = xmm_crc3;
177 | 	xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0);
178 | 	xmm_crc0 = _mm_srli_si128(xmm_crc0, 8);
179 | 	xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc0);
180 | 
181 | 	xmm_crc0 = xmm_crc3;
182 | 	xmm_crc3 = _mm_slli_si128(xmm_crc3, 4);
183 | 	xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10);
184 | #ifdef ENABLE_AVX512
185 | 	//xmm_crc3 = _mm_maskz_xor_epi32(14, xmm_crc3, xmm_crc0);
186 | 	xmm_crc3 = _mm_ternarylogic_epi32(xmm_crc3, xmm_crc0, xmm_mask, 0x28);
187 | #else
188 | 	xmm_crc0 = _mm_and_si128(xmm_crc0, xmm_mask);
189 | 	xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc0);
190 | #endif
191 | 
192 | 	/*
193 | 	 * k7
194 | 	 */
195 | 	xmm_crc1 = xmm_crc3;
196 | 	crc_fold = _mm_load_si128((__m128i *)crc_k + 2);
197 | 
198 | 	xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0);
199 | 	xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10);
200 | #ifdef ENABLE_AVX512
201 | 	xmm_crc3 = _mm_ternarylogic_epi32(xmm_crc3, xmm_crc1, xmm_crc1, 0xC3); // NOT(xmm_crc3 ^ xmm_crc1)
202 | #else
203 | 	xmm_crc1 = _mm_xor_si128(xmm_crc1, xmm_mask);
204 | 	xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc1);
205 | #endif
206 | 	return _mm_extract_epi32(xmm_crc3, 2);
207 | }
208 | 
209 | static uint32_t do_crc32_incremental_clmul(const void* data, size_t length, uint32_t init) {
210 | 	return crc_fold((const unsigned char*)data, (long)length, init);
211 | }
212 | 
213 | void RapidYenc::crc_clmul256_set_funcs() {
214 | 	crc_clmul_set_funcs(); // set multiply/shift function
215 | 	_do_crc32_incremental = &do_crc32_incremental_clmul;
216 | 	_crc32_isa = ISA_LEVEL_VPCLMUL;
217 | }
218 | #else
219 | void RapidYenc::crc_clmul256_set_funcs() {
220 | 	crc_clmul_set_funcs();
221 | }
222 | #endif
223 | 
224 | 


--------------------------------------------------------------------------------
/src/yencode/crc_riscv.cc:
--------------------------------------------------------------------------------
  1 | #include "crc_common.h"
  2 | 
  3 | #if defined(__riscv) && defined(__GNUC__) && (defined(__riscv_zbkc) || defined(__riscv_zbc))
  4 | 
  5 | #if __has_include(<riscv_bitmanip.h>)
  6 | # include <riscv_bitmanip.h>
  7 | # if __riscv_xlen == 64
  8 | #  define rv_clmul __riscv_clmul_64
  9 | #  define rv_clmulh __riscv_clmulh_64
 10 | # else
 11 | #  define rv_clmul __riscv_clmul_32
 12 | #  define rv_clmulh __riscv_clmulh_32
 13 | # endif
 14 | #else
 15 | static HEDLEY_ALWAYS_INLINE uintptr_t rv_clmul(uintptr_t x, uintptr_t y) {
 16 | 	uintptr_t r;
 17 | 	__asm__("clmul %0, %1, %2\n"
 18 | 		: "=r"(r)
 19 | 		: "r"(x), "r"(y)
 20 | 		:);
 21 | 	return r;
 22 | }
 23 | static HEDLEY_ALWAYS_INLINE uintptr_t rv_clmulh(uintptr_t x, uintptr_t y) {
 24 | 	uintptr_t r;
 25 | 	__asm__("clmulh %0, %1, %2\n"
 26 | 		: "=r"(r)
 27 | 		: "r"(x), "r"(y)
 28 | 		:);
 29 | 	return r;
 30 | }
 31 | #endif
 32 | 
 33 | // TODO: test big-endian
 34 | #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
 35 | # if __riscv_xlen == 64
 36 | #  define SWAP __builtin_bswap64
 37 | # else
 38 | #  define SWAP __builtin_bswap32
 39 | # endif
 40 | #else
 41 | # define SWAP(d) (d)
 42 | #endif
 43 | static HEDLEY_ALWAYS_INLINE uintptr_t read_partial(const void* p, unsigned sz) {
 44 | 	uintptr_t data = 0;
 45 | 	memcpy(&data, p, sz);
 46 | 	return SWAP(data);
 47 | }
 48 | static HEDLEY_ALWAYS_INLINE uintptr_t read_full(const uintptr_t* p) {
 49 | 	return SWAP(*p);
 50 | }
 51 | #undef SWAP
 52 | 
 53 | static uint32_t rv_crc_calc(uint32_t crc, const unsigned char *src, long len) {
 54 | 	uintptr_t accum[4] = {};
 55 | 	
 56 | 	// note: constants here are bit-reflected and shifted left by 1
 57 | 	// Zbc does also have clmulr to avoid the shift, but:
 58 | 	// - there's no clmulhr, so for XLEN=64, just shift the constant instead to get the same result
 59 | 	// - it's unavailable in Zbkc
 60 | 	// - for XLEN=32, 2x constants is likely worth it to avoid the additional XORs in the loop
 61 | 	
 62 | #if __riscv_xlen == 64
 63 | 	const uint64_t MUL_HI = 0x15a546366 /*2^224*/, MUL_LO = 0xf1da05aa /*2^288*/;
 64 | 	#define CLMULL rv_clmul
 65 | 	#define CLMULH rv_clmulh
 66 | 	
 67 | 	accum[3] = rv_clmul(crc, 0xb66b1fa6); // 2^-32
 68 | #elif __riscv_xlen == 32
 69 | 	const uint64_t MUL_HI = 0x140d44a2e /*2^128*/,  MUL_LO = 0x1751997d0 /*2^160*/;
 70 | 	#define CLMULL(x, k) rv_clmul(x, k & 0xffffffff)
 71 | 	#define CLMULH(x, k) (rv_clmulh(x, k & 0xffffffff) ^ (k > 0xffffffffULL ? (x) : 0))
 72 | 	
 73 | 	accum[2] = rv_clmul(crc, 0xb66b1fa6);
 74 | 	accum[3] = rv_clmulh(crc, 0xb66b1fa6);
 75 | #else
 76 | 	#error "Unknown __riscv_xlen"
 77 | #endif
 78 | 	const size_t WS = sizeof(uintptr_t);
 79 | 	
 80 | 	// if src isn't word-aligned, process until it is so
 81 | 	long initial_alignment = ((uintptr_t)src & (WS-1));
 82 | 	long initial_process = WS - initial_alignment;
 83 | 	if(initial_alignment && len >= initial_process) {
 84 | 		unsigned shl = initial_alignment * 8, shr = initial_process * 8;
 85 | #if __riscv_xlen == 64
 86 | 		accum[2] = accum[3] << shl;
 87 | #else
 88 | 		accum[1] = accum[2] << shl;
 89 | 		accum[2] = (accum[3] << shl) | (accum[2] >> shr);
 90 | #endif
 91 | 		accum[3] = (read_partial(src, initial_process) << shl) | (accum[3] >> shr);
 92 | 		src += initial_process;
 93 | 		len -= initial_process;
 94 | 	}
 95 | 	
 96 | 	// main processing loop
 97 | 	const uintptr_t* srcW = (const uintptr_t*)src;
 98 | 	while((len -= WS*4) >= 0) {
 99 | 		uintptr_t tmpHi, tmpLo;
100 | 		tmpLo = CLMULL(accum[0], MUL_LO) ^ CLMULL(accum[1], MUL_HI);
101 | 		tmpHi = CLMULH(accum[0], MUL_LO) ^ CLMULH(accum[1], MUL_HI);
102 | 		accum[0] = tmpLo ^ read_full(srcW++);
103 | 		accum[1] = tmpHi ^ read_full(srcW++);
104 | 		
105 | 		tmpLo = CLMULL(accum[2], MUL_LO) ^ CLMULL(accum[3], MUL_HI);
106 | 		tmpHi = CLMULH(accum[2], MUL_LO) ^ CLMULH(accum[3], MUL_HI);
107 | 		accum[2] = tmpLo ^ read_full(srcW++);
108 | 		accum[3] = tmpHi ^ read_full(srcW++);
109 | 	}
110 | 	
111 | 	// process trailing bytes
112 | 	if(len & (WS*2)) {
113 | 		uintptr_t tmpLo = CLMULL(accum[0], MUL_LO) ^ CLMULL(accum[1], MUL_HI);
114 | 		uintptr_t tmpHi = CLMULH(accum[0], MUL_LO) ^ CLMULH(accum[1], MUL_HI);
115 | 		accum[0] = accum[2];
116 | 		accum[1] = accum[3];
117 | 		accum[2] = tmpLo ^ read_full(srcW++);
118 | 		accum[3] = tmpHi ^ read_full(srcW++);
119 | 	}
120 | 	if(len & WS) {
121 | 		uintptr_t tmpLo = CLMULL(accum[0], MUL_HI);
122 | 		uintptr_t tmpHi = CLMULH(accum[0], MUL_HI);
123 | 		accum[0] = accum[1];
124 | 		accum[1] = accum[2];
125 | 		accum[2] = accum[3] ^ tmpLo;
126 | 		accum[3] = tmpHi ^ read_full(srcW++);
127 | 	}
128 | 	
129 | 	size_t tail = len & (WS-1);
130 | 	if(tail) {
131 | 		unsigned shl = ((WS - tail) * 8), shr = tail * 8;
132 | 		uintptr_t tmp = accum[0] << shl;
133 | 		uintptr_t tmpLo = CLMULL(tmp, MUL_HI);
134 | 		uintptr_t tmpHi = CLMULH(tmp, MUL_HI);
135 | 		accum[0] = (accum[0] >> shr) | (accum[1] << shl);
136 | 		accum[1] = (accum[1] >> shr) | (accum[2] << shl);
137 | 		accum[2] = (accum[2] >> shr) | (accum[3] << shl);
138 | 		accum[3] = (accum[3] >> shr) | (read_partial(srcW, tail) << shl);
139 | 		accum[2] ^= tmpLo;
140 | 		accum[3] ^= tmpHi;
141 | 	}
142 | 	
143 | 	
144 | 	// done processing: fold everything down
145 | #if __riscv_xlen == 64
146 | 	// fold 0,1 -> 2,3
147 | 	accum[2] ^= rv_clmul(accum[0], 0x1751997d0) ^ rv_clmul(accum[1], 0xccaa009e);
148 | 	accum[3] ^= rv_clmulh(accum[0], 0x1751997d0) ^ rv_clmulh(accum[1], 0xccaa009e);
149 | 	
150 | 	// fold 2->3
151 | 	accum[0] = rv_clmulh(accum[2], 0xccaa009e);
152 | 	accum[3] ^= rv_clmul(accum[2], 0xccaa009e);
153 | 	
154 | 	// fold 64b->32b
155 | 	accum[1] = rv_clmul(accum[3] & 0xffffffff, 0x163cd6124);
156 | 	accum[0] ^= accum[1] >> 32;
157 | 	accum[3] = accum[1] ^ (accum[3] >> 32);
158 | 	accum[3] <<= 32;
159 | #else
160 | 	// fold 0,1 -> 2,3
161 | 	accum[2] ^= rv_clmul(accum[0], 0xccaa009e) ^ CLMULL(accum[1], 0x163cd6124);
162 | 	accum[3] ^= rv_clmulh(accum[0], 0xccaa009e) ^ CLMULH(accum[1], 0x163cd6124);
163 | 	
164 | 	// fold 2->3
165 | 	accum[0] = CLMULH(accum[2], 0x163cd6124);
166 | 	accum[3] ^= CLMULL(accum[2], 0x163cd6124);
167 | #endif
168 | 	
169 | 	// reduction
170 | 	accum[3] = CLMULL(accum[3], 0xf7011641);
171 | 	accum[3] = CLMULH(accum[3], 0x1db710640);  // maybe consider clmulr for XLEN=32
172 | 	crc = accum[0] ^ accum[3];
173 | 	return crc;
174 | 	#undef CLMULL
175 | 	#undef CLMULH
176 | }
177 | 
178 | static uint32_t do_crc32_incremental_rv_zbc(const void* data, size_t length, uint32_t init) {
179 | 	return ~rv_crc_calc(~init, (const unsigned char*)data, (long)length);
180 | }
181 | 
182 | 
183 | #if __riscv_xlen == 64
184 | 	// note that prod is shifted by 1 place to the right, due to bit-reflection
185 | static uint32_t crc32_reduce_rv_zbc(uint64_t prod) {
186 | 	uint64_t t = rv_clmul(prod << 33, 0xf7011641);
187 | 	t = rv_clmulh(t, 0x1db710640);
188 | 	t ^= prod >> 31;
189 | 	return t;
190 | }
191 | #endif
192 | static uint32_t crc32_multiply_rv_zbc(uint32_t a, uint32_t b) {
193 | #if __riscv_xlen == 64
194 | 	uint64_t t = crc32_reduce_rv_zbc(rv_clmul(a, b));
195 | #else
196 | 	uint32_t prodLo = rv_clmul(a, b);
197 | 	uint32_t prodHi = rv_clmulh(a, b);
198 | 	
199 | 	// fix prodHi for bit-reflection (clmulr would be ideal here)
200 | 	prodHi += prodHi;
201 | 	prodHi |= prodLo >> 31;
202 | 	prodLo += prodLo;
203 | 	
204 | 	uint32_t t = rv_clmul(prodLo, 0xf7011641);
205 | 	t ^= rv_clmulh(t, 0xdb710640);
206 | 	t ^= prodHi;
207 | #endif
208 | 	return t;
209 | }
210 | 
211 | #if defined(__GNUC__) || defined(_MSC_VER)
212 | static uint32_t crc32_shift_rv_zbc(uint32_t crc1, uint32_t n) {
213 | 	// TODO: require Zbb for ctz
214 | 	uint32_t result = crc1;
215 | #if __riscv_xlen == 64
216 | 	// for n<32, can shift directly
217 | 	uint64_t prod = result;
218 | 	prod <<= 31 ^ (n&31);
219 | 	n &= ~31;
220 | 	result = crc32_reduce_rv_zbc(prod);
221 | #endif
222 | 	if(!n) return result;
223 | 	
224 | 	uint32_t result2 = RapidYenc::crc_power[ctz32(n)];
225 | 	n &= n-1;
226 | 	
227 | 	while(n) {
228 | 		result = crc32_multiply_rv_zbc(result, RapidYenc::crc_power[ctz32(n)]);
229 | 		n &= n-1;
230 | 		
231 | 		if(n) {
232 | 			result2 = crc32_multiply_rv_zbc(result2, RapidYenc::crc_power[ctz32(n)]);
233 | 			n &= n-1;
234 | 		}
235 | 	}
236 | 	return crc32_multiply_rv_zbc(result, result2);
237 | }
238 | #endif
239 | 
240 | 
241 | void RapidYenc::crc_riscv_set_funcs() {
242 | 	_do_crc32_incremental = &do_crc32_incremental_rv_zbc;
243 | 	_crc32_multiply = &crc32_multiply_rv_zbc;
244 | #if defined(__GNUC__) || defined(_MSC_VER)
245 | 	_crc32_shift = &crc32_shift_rv_zbc;
246 | #endif
247 | 	_crc32_isa = ISA_FEATURE_ZBC;
248 | }
249 | #else
250 | void RapidYenc::crc_riscv_set_funcs() {}
251 | #endif
252 | 


--------------------------------------------------------------------------------
/src/yencode/decoder.h:
--------------------------------------------------------------------------------
 1 | #ifndef __YENC_DECODER_H
 2 | #define __YENC_DECODER_H
 3 | 
 4 | #include "hedley.h"
 5 | 
 6 | namespace RapidYenc {
 7 | 
 8 | 
 9 | // the last state that the decoder was in (i.e. last few characters processed)
10 | // the state is needed for incremental decoders as its behavior is affected by what it processed last
11 | // acronyms: CR = carriage return (\r), LF = line feed (\n), EQ = equals char, DT = dot char (.)
12 | typedef enum {
13 | 	YDEC_STATE_CRLF, // default
14 | 	YDEC_STATE_EQ,
15 | 	YDEC_STATE_CR,
16 | 	YDEC_STATE_NONE,
17 | 	YDEC_STATE_CRLFDT,
18 | 	YDEC_STATE_CRLFDTCR,
19 | 	YDEC_STATE_CRLFEQ // may actually be "\r\n.=" in raw decoder
20 | } YencDecoderState;
21 | 
22 | // end result for incremental processing (whether the end of the yEnc data was reached)
23 | typedef enum {
24 | 	YDEC_END_NONE,    // end not reached
25 | 	YDEC_END_CONTROL, // \r\n=y sequence found, src points to byte after 'y'
26 | 	YDEC_END_ARTICLE  // \r\n.\r\n sequence found, src points to byte after last '\n'
27 | } YencDecoderEnd;
28 | 
29 | 
30 | extern YencDecoderEnd (*_do_decode)(const unsigned char**, unsigned char**, size_t, YencDecoderState*);
31 | extern YencDecoderEnd (*_do_decode_raw)(const unsigned char**, unsigned char**, size_t, YencDecoderState*);
32 | extern YencDecoderEnd (*_do_decode_end_raw)(const unsigned char**, unsigned char**, size_t, YencDecoderState*);
33 | extern int _decode_isa;
34 | 
35 | static inline size_t decode(int isRaw, const void* src, void* dest, size_t len, YencDecoderState* state) {
36 | 	unsigned char* ds = (unsigned char*)dest;
37 | 	(*(isRaw ? _do_decode_raw : _do_decode))((const unsigned char**)&src, &ds, len, state);
38 | 	return ds - (unsigned char*)dest;
39 | }
40 | 
41 | static inline YencDecoderEnd decode_end(const void** src, void** dest, size_t len, YencDecoderState* state) {
42 | 	return _do_decode_end_raw((const unsigned char**)src, (unsigned char**)dest, len, state);
43 | }
44 | 
45 | void decoder_init();
46 | 
47 | static inline int decode_isa_level() {
48 | 	return _decode_isa;
49 | }
50 | 
51 | 
52 | } // namespace
53 | #endif // defined(__YENC_DECODER_H)
54 | 


--------------------------------------------------------------------------------
/src/yencode/decoder_avx.cc:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | 
 3 | #include "decoder_common.h"
 4 | #if defined(__AVX__) && defined(__POPCNT__)
 5 | #include "decoder_sse_base.h"
 6 | void RapidYenc::decoder_set_avx_funcs() {
 7 | 	decoder_sse_init(lookups);
 8 | 	decoder_init_lut(lookups->compact);
 9 | 	_do_decode = &do_decode_simd<false, false, sizeof(__m128i)*2, do_decode_sse<false, false, ISA_LEVEL_SSE4_POPCNT> >;
10 | 	_do_decode_raw = &do_decode_simd<true, false, sizeof(__m128i)*2, do_decode_sse<true, false, ISA_LEVEL_SSE4_POPCNT> >;
11 | 	_do_decode_end_raw = &do_decode_simd<true, true, sizeof(__m128i)*2, do_decode_sse<true, true, ISA_LEVEL_SSE4_POPCNT> >;
12 | 	_decode_isa = ISA_LEVEL_AVX;
13 | }
14 | #else
15 | void RapidYenc::decoder_set_avx_funcs() {
16 | 	decoder_set_ssse3_funcs();
17 | }
18 | #endif
19 | 


--------------------------------------------------------------------------------
/src/yencode/decoder_avx2.cc:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | 
 3 | #include "decoder_common.h"
 4 | #if defined(__AVX2__) && !defined(YENC_DISABLE_AVX256)
 5 | #include "decoder_avx2_base.h"
 6 | void RapidYenc::decoder_set_avx2_funcs() {
 7 | 	ALIGN_ALLOC(lookups, sizeof(*lookups), 16);
 8 | 	decoder_init_lut(lookups->compact);
 9 | 	RapidYenc::_do_decode = &do_decode_simd<false, false, sizeof(__m256i)*2, do_decode_avx2<false, false, ISA_LEVEL_AVX2> >;
10 | 	RapidYenc::_do_decode_raw = &do_decode_simd<true, false, sizeof(__m256i)*2, do_decode_avx2<true, false, ISA_LEVEL_AVX2> >;
11 | 	RapidYenc::_do_decode_end_raw = &do_decode_simd<true, true, sizeof(__m256i)*2, do_decode_avx2<true, true, ISA_LEVEL_AVX2> >;
12 | 	RapidYenc::_decode_isa = ISA_LEVEL_AVX2;
13 | }
14 | #else
15 | void RapidYenc::decoder_set_avx2_funcs() {
16 | 	decoder_set_avx_funcs();
17 | }
18 | #endif
19 | 


--------------------------------------------------------------------------------
/src/yencode/decoder_common.h:
--------------------------------------------------------------------------------
  1 | #include "decoder.h"
  2 | 
  3 | namespace RapidYenc {
  4 | 	void decoder_set_sse2_funcs();
  5 | 	void decoder_set_ssse3_funcs();
  6 | 	void decoder_set_avx_funcs();
  7 | 	void decoder_set_avx2_funcs();
  8 | 	void decoder_set_vbmi2_funcs();
  9 | 	extern const bool decoder_has_avx10;
 10 | 	void decoder_set_neon_funcs();
 11 | 	void decoder_set_rvv_funcs();
 12 | 	
 13 | 	template<bool isRaw, bool searchEnd>
 14 | 	YencDecoderEnd do_decode_scalar(const unsigned char** src, unsigned char** dest, size_t len, YencDecoderState* state);
 15 | }
 16 | 
 17 | 
 18 | #if defined(PLATFORM_ARM) && !defined(__aarch64__)
 19 | #define YENC_DEC_USE_THINTABLE 1
 20 | #endif
 21 | 
 22 | // TODO: need to support max output length somehow
 23 | 
 24 | 
 25 | 
 26 | template<bool isRaw, bool searchEnd, void(&kernel)(const uint8_t*, long&, unsigned char*&, unsigned char&, uint16_t&)>
 27 | static inline RapidYenc::YencDecoderEnd _do_decode_simd(size_t width, const unsigned char** src, unsigned char** dest, size_t len, RapidYenc::YencDecoderState* state) {
 28 | 	using namespace RapidYenc;
 29 | 	
 30 | 	if(len <= width*2) return do_decode_scalar<isRaw, searchEnd>(src, dest, len, state);
 31 | 	
 32 | 	YencDecoderState tState = YDEC_STATE_CRLF;
 33 | 	YencDecoderState* pState = state ? state : &tState;
 34 | 	if((uintptr_t)(*src) & ((width-1))) {
 35 | 		// find source memory alignment
 36 | 		unsigned char* aSrc = (unsigned char*)(((uintptr_t)(*src) + (width-1)) & ~(width-1));
 37 | 		int amount = (int)(aSrc - *src);
 38 | 		len -= amount;
 39 | 		YencDecoderEnd ended = do_decode_scalar<isRaw, searchEnd>(src, dest, amount, pState);
 40 | 		if(ended) return ended;
 41 | 	}
 42 | 	
 43 | 	size_t lenBuffer = width -1;
 44 | 	if(searchEnd) lenBuffer += 3 + (isRaw?1:0);
 45 | 	else if(isRaw) lenBuffer += 2;
 46 | 	
 47 | 	if(len > lenBuffer) {
 48 | 		unsigned char *p = *dest; // destination pointer
 49 | 		unsigned char escFirst = 0; // input character; first char needs escaping
 50 | 		uint16_t nextMask = 0;
 51 | 		// handle finicky case of special sequences straddled across initial boundary
 52 | 		switch(*pState) {
 53 | 			case YDEC_STATE_CRLF:
 54 | 				if(isRaw && **src == '.') {
 55 | 					nextMask = 1;
 56 | 					if(searchEnd && *(uint16_t*)(*src +1) == UINT16_PACK('\r','\n')) {
 57 | 						(*src) += 3;
 58 | 						*pState = YDEC_STATE_CRLF;
 59 | 						return YDEC_END_ARTICLE;
 60 | 					}
 61 | 					if(searchEnd && *(uint16_t*)(*src +1) == UINT16_PACK('=','y')) {
 62 | 						(*src) += 3;
 63 | 						*pState = YDEC_STATE_NONE;
 64 | 						return YDEC_END_CONTROL;
 65 | 					}
 66 | 				}
 67 | 				else if(searchEnd && *(uint16_t*)(*src) == UINT16_PACK('=','y')) {
 68 | 					(*src) += 2;
 69 | 					*pState = YDEC_STATE_NONE;
 70 | 					return YDEC_END_CONTROL;
 71 | 				}
 72 | 				break;
 73 | 			case YDEC_STATE_CR:
 74 | 				if(isRaw && *(uint16_t*)(*src) == UINT16_PACK('\n','.')) {
 75 | 					nextMask = 2;
 76 | 					if(searchEnd && *(uint16_t*)(*src +2) == UINT16_PACK('\r','\n')) {
 77 | 						(*src) += 4;
 78 | 						*pState = YDEC_STATE_CRLF;
 79 | 						return YDEC_END_ARTICLE;
 80 | 					}
 81 | 					if(searchEnd && *(uint16_t*)(*src +2) == UINT16_PACK('=','y')) {
 82 | 						(*src) += 4;
 83 | 						*pState = YDEC_STATE_NONE;
 84 | 						return YDEC_END_CONTROL;
 85 | 					}
 86 | 				}
 87 | 				else if(searchEnd && (*(uint32_t*)(*src) & 0xffffff) == UINT32_PACK('\n','=','y',0)) {
 88 | 					(*src) += 3;
 89 | 					*pState = YDEC_STATE_NONE;
 90 | 					return YDEC_END_CONTROL;
 91 | 				}
 92 | 				break;
 93 | 			case YDEC_STATE_CRLFDT:
 94 | 				if(searchEnd && isRaw && *(uint16_t*)(*src) == UINT16_PACK('\r','\n')) {
 95 | 					(*src) += 2;
 96 | 					*pState = YDEC_STATE_CRLF;
 97 | 					return YDEC_END_ARTICLE;
 98 | 				}
 99 | 				if(searchEnd && isRaw && *(uint16_t*)(*src) == UINT16_PACK('=','y')) {
100 | 					(*src) += 2;
101 | 					*pState = YDEC_STATE_NONE;
102 | 					return YDEC_END_CONTROL;
103 | 				}
104 | 				break;
105 | 			case YDEC_STATE_CRLFDTCR:
106 | 				if(searchEnd && isRaw && **src == '\n') {
107 | 					(*src) += 1;
108 | 					*pState = YDEC_STATE_CRLF;
109 | 					return YDEC_END_ARTICLE;
110 | 				}
111 | 				break;
112 | 			case YDEC_STATE_CRLFEQ:
113 | 				if(searchEnd && **src == 'y') {
114 | 					(*src) += 1;
115 | 					*pState = YDEC_STATE_NONE;
116 | 					return YDEC_END_CONTROL;
117 | 				}
118 | 				break;
119 | 			default: break; // silence compiler warning
120 | 		}
121 | 		escFirst = (*pState == YDEC_STATE_EQ || *pState == YDEC_STATE_CRLFEQ);
122 | 		
123 | 		// our algorithm may perform an aligned load on the next part, of which we consider 2 bytes (for \r\n. sequence checking)
124 | 		long dLen = (long)(len - lenBuffer);
125 | 		dLen = (dLen + (width-1)) & ~(width-1);
126 | 		
127 | 		kernel((const uint8_t*)(*src) + dLen, dLen, p, escFirst, nextMask);
128 | 		
129 | 		if(escFirst) *pState = YDEC_STATE_EQ; // escape next character
130 | 		else if(nextMask == 1) *pState = YDEC_STATE_CRLF; // next character is '.', where previous two were \r\n
131 | 		else if(nextMask == 2) *pState = YDEC_STATE_CR; // next characters are '\n.', previous is \r
132 | 		else *pState = YDEC_STATE_NONE;
133 | 		
134 | 		*src += dLen;
135 | 		len -= dLen;
136 | 		*dest = p;
137 | 	}
138 | 	
139 | 	// end alignment
140 | 	if(len)
141 | 		return do_decode_scalar<isRaw, searchEnd>(src, dest, len, pState);
142 | 	/** for debugging: ensure that the SIMD routine doesn't exit early
143 | 	if(len && !searchEnd) {
144 | 		const uint8_t* s = *src;
145 | 		unsigned char* p = *dest;
146 | 		int ended = do_decode_scalar<isRaw, searchEnd>(src, dest, len, pState);
147 | 		if(*src - s > width*2) {
148 | 			// this shouldn't happen, corrupt some data to fail the test
149 | 			while(p < *dest)
150 | 				*p++ = 0;
151 | 		}
152 | 		return ended;
153 | 	}
154 | 	*/
155 | 	return YDEC_END_NONE;
156 | }
157 | 
158 | template<bool isRaw, bool searchEnd, size_t width, void(&kernel)(const uint8_t*, long&, unsigned char*&, unsigned char&, uint16_t&)>
159 | static RapidYenc::YencDecoderEnd do_decode_simd(const unsigned char** src, unsigned char** dest, size_t len, RapidYenc::YencDecoderState* state) {
160 | 	return _do_decode_simd<isRaw, searchEnd, kernel>(width, src, dest, len, state);
161 | }
162 | template<bool isRaw, bool searchEnd, size_t(&getWidth)(), void(&kernel)(const uint8_t*, long&, unsigned char*&, unsigned char&, uint16_t&)>
163 | static RapidYenc::YencDecoderEnd do_decode_simd(const unsigned char** src, unsigned char** dest, size_t len, RapidYenc::YencDecoderState* state) {
164 | 	return _do_decode_simd<isRaw, searchEnd, kernel>(getWidth(), src, dest, len, state);
165 | }
166 | 
167 | 
168 | #if defined(PLATFORM_X86) || defined(PLATFORM_ARM)
169 | namespace RapidYenc {
170 | 	void decoder_init_lut(void* compactLUT);
171 | }
172 | #endif
173 | 
174 | template<bool isRaw>
175 | static inline void decoder_set_nextMask(const uint8_t* src, size_t len, uint16_t& nextMask) {
176 | 	if(isRaw) {
177 | 		if(len != 0) { // have to gone through at least one loop cycle
178 | 			if(src[-2] == '\r' && src[-1] == '\n' && src[0] == '.')
179 | 				nextMask = 1;
180 | 			else if(src[-1] == '\r' && src[0] == '\n' && src[1] == '.')
181 | 				nextMask = 2;
182 | 			else
183 | 				nextMask = 0;
184 | 		}
185 | 	} else
186 | 		nextMask = 0;
187 | }
188 | 
189 | // without backtracking
190 | template<bool isRaw>
191 | static inline uint16_t decoder_set_nextMask(const uint8_t* src, unsigned mask) {
192 | 	if(isRaw) {
193 | 		if(src[0] == '.')
194 | 			return mask & 1;
195 | 		if(src[1] == '.')
196 | 			return mask & 2;
197 | 	}
198 | 	return 0;
199 | }
200 | 
201 | // resolve invalid sequences of = to deal with cases like '===='
202 | // bit hack inspired from simdjson: https://youtu.be/wlvKAT7SZIQ?t=33m38s
203 | template<typename T>
204 | static inline T fix_eqMask(T mask, T maskShift1) {
205 | 	// isolate the start of each consecutive bit group (e.g. 01011101 -> 01000101)
206 | 	T start = mask & ~maskShift1;
207 | 	
208 | 	// this strategy works by firstly separating groups that start on even/odd bits
209 | 	// generally, it doesn't matter which one (even/odd) we pick, but clearing even groups specifically allows the escFirst bit in maskShift1 to work
210 | 	// (this is because the start of the escFirst group is at index -1, an odd bit, but we can't clear it due to being < 0, so we just retain all odd groups instead)
211 | 	
212 | 	const T even = (T)0x5555555555555555; // every even bit (01010101...)
213 | 	
214 | 	// obtain groups which start on an odd bit (clear groups that start on an even bit, but this leaves an unwanted trailing bit)
215 | 	T oddGroups = mask + (start & even);
216 | 	
217 | 	// clear even bits in odd groups, whilst conversely preserving even bits in even groups
218 | 	// the `& mask` also conveniently gets rid of unwanted trailing bits
219 | 	return (oddGroups ^ even) & mask;
220 | }
221 | 


--------------------------------------------------------------------------------
/src/yencode/decoder_sse2.cc:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | 
 3 | #include "decoder_common.h"
 4 | #ifdef __SSE2__
 5 | #include "decoder_sse_base.h"
 6 | 
 7 | void RapidYenc::decoder_sse_init(RapidYenc::SSELookups* HEDLEY_RESTRICT& lookups) {
 8 | 	ALIGN_ALLOC(lookups, sizeof(SSELookups), 16);
 9 | 	for(int i=0; i<256; i++) {
10 | 		lookups->BitsSetTable256inv[i] = 8 - (
11 | 			(i & 1) + ((i>>1) & 1) + ((i>>2) & 1) + ((i>>3) & 1) + ((i>>4) & 1) + ((i>>5) & 1) + ((i>>6) & 1) + ((i>>7) & 1)
12 | 		);
13 | 		
14 | 		#define _X(n, k) ((((n) & (1<<k)) ? 192ULL : 0ULL) << (k*8))
15 | 		lookups->eqAdd[i] = _X(i, 0) | _X(i, 1) | _X(i, 2) | _X(i, 3) | _X(i, 4) | _X(i, 5) | _X(i, 6) | _X(i, 7);
16 | 		#undef _X
17 | 	}
18 | 	for(int i=0; i<32; i++) {
19 | 		for(int j=0; j<16; j++) {
20 | 			if(i >= 16) // only used for LZCNT
21 | 				lookups->unshufMask[i*16 + j] = ((31-i)>j ? -1 : 0);
22 | 			else // only used for BSR
23 | 				lookups->unshufMask[i*16 + j] = (i>j ? -1 : 0);
24 | 		}
25 | 	}
26 | }
27 | 
28 | void RapidYenc::decoder_set_sse2_funcs() {
29 | 	decoder_sse_init(lookups);
30 | 	decoder_init_lut(lookups->compact);
31 | 	_do_decode = &do_decode_simd<false, false, sizeof(__m128i)*2, do_decode_sse<false, false, ISA_LEVEL_SSE2> >;
32 | 	_do_decode_raw = &do_decode_simd<true, false, sizeof(__m128i)*2, do_decode_sse<true, false, ISA_LEVEL_SSE2> >;
33 | 	_do_decode_end_raw = &do_decode_simd<true, true, sizeof(__m128i)*2, do_decode_sse<true, true, ISA_LEVEL_SSE2> >;
34 | 	_decode_isa = ISA_LEVEL_SSE2;
35 | }
36 | #else
37 | void RapidYenc::decoder_set_sse2_funcs() {}
38 | #endif
39 | 


--------------------------------------------------------------------------------
/src/yencode/decoder_ssse3.cc:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | 
 3 | #include "decoder_common.h"
 4 | #ifdef __SSSE3__
 5 | #include "decoder_sse_base.h"
 6 | void RapidYenc::decoder_set_ssse3_funcs() {
 7 | 	decoder_sse_init(lookups);
 8 | 	decoder_init_lut(lookups->compact);
 9 | 	_do_decode = &do_decode_simd<false, false, sizeof(__m128i)*2, do_decode_sse<false, false, ISA_LEVEL_SSSE3> >;
10 | 	_do_decode_raw = &do_decode_simd<true, false, sizeof(__m128i)*2, do_decode_sse<true, false, ISA_LEVEL_SSSE3> >;
11 | 	_do_decode_end_raw = &do_decode_simd<true, true, sizeof(__m128i)*2, do_decode_sse<true, true, ISA_LEVEL_SSSE3> >;
12 | 	_decode_isa = ISA_LEVEL_SSSE3;
13 | }
14 | #else
15 | void RapidYenc::decoder_set_ssse3_funcs() {
16 | 	decoder_set_sse2_funcs();
17 | }
18 | #endif
19 | 


--------------------------------------------------------------------------------
/src/yencode/decoder_vbmi2.cc:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | # include "decoder_common.h"
 3 | 
 4 | #if !defined(__EVEX512__) && (defined(__AVX10_1__) || defined(__EVEX256__)) && defined(__AVX512VL__) && defined(__AVX512VBMI2__) && defined(__AVX512BW__)
 5 | const bool RapidYenc::decoder_has_avx10 = true;
 6 | #else
 7 | const bool RapidYenc::decoder_has_avx10 = false;
 8 | #endif
 9 | 
10 | #if defined(__AVX512VL__) && defined(__AVX512VBMI2__) && defined(__AVX512BW__)
11 | # ifndef YENC_DISABLE_AVX256
12 | #  include "decoder_avx2_base.h"
13 | void RapidYenc::decoder_set_vbmi2_funcs() {
14 | 	_do_decode = &do_decode_simd<false, false, sizeof(__m256i)*2, do_decode_avx2<false, false, ISA_LEVEL_VBMI2> >;
15 | 	_do_decode_raw = &do_decode_simd<true, false, sizeof(__m256i)*2, do_decode_avx2<true, false, ISA_LEVEL_VBMI2> >;
16 | 	_do_decode_end_raw = &do_decode_simd<true, true, sizeof(__m256i)*2, do_decode_avx2<true, true, ISA_LEVEL_VBMI2> >;
17 | 	_decode_isa = ISA_LEVEL_VBMI2;
18 | }
19 | # else
20 | #  include "decoder_sse_base.h"
21 | void RapidYenc::decoder_set_vbmi2_funcs() {
22 | 	_do_decode = &do_decode_simd<false, false, sizeof(__m128i)*2, do_decode_sse<false, false, ISA_LEVEL_VBMI2> >;
23 | 	_do_decode_raw = &do_decode_simd<true, false, sizeof(__m128i)*2, do_decode_sse<true, false, ISA_LEVEL_VBMI2> >;
24 | 	_do_decode_end_raw = &do_decode_simd<true, true, sizeof(__m128i)*2, do_decode_sse<true, true, ISA_LEVEL_VBMI2> >;
25 | 	_decode_isa = ISA_LEVEL_VBMI2;
26 | }
27 | # endif
28 | #else
29 | void RapidYenc::decoder_set_vbmi2_funcs() {
30 | 	decoder_set_avx2_funcs();
31 | }
32 | #endif
33 | 


--------------------------------------------------------------------------------
/src/yencode/encoder.cc:
--------------------------------------------------------------------------------
  1 | #include "common.h"
  2 | #include "encoder_common.h"
  3 | #include "encoder.h"
  4 | 
  5 | 
  6 | // lookup tables for scalar processing
  7 | #define _B1(n) _B(n), _B(n+1), _B(n+2), _B(n+3)
  8 | #define _B2(n) _B1(n), _B1(n+4), _B1(n+8), _B1(n+12)
  9 | #define _B3(n) _B2(n), _B2(n+16), _B2(n+32), _B2(n+48)
 10 | #define _BX _B3(0), _B3(64), _B3(128), _B3(192)
 11 | 
 12 | const unsigned char RapidYenc::escapeLUT[256] = { // whether or not the character is critical
 13 | #define _B(n) ((n == 214 || n == '\r'+214 || n == '\n'+214 || n == '='-42) ? 0 : (n+42) & 0xff)
 14 | 	_BX
 15 | #undef _B
 16 | };
 17 | const uint16_t RapidYenc::escapedLUT[256] = { // escaped sequences for characters that need escaping
 18 | #define _B(n) ((n == 214 || n == 214+'\r' || n == 214+'\n' || n == '='-42 || n == 214+'\t' || n == 214+' ' || n == '.'-42) ? UINT16_PACK('=', ((n+42+64)&0xff)) : 0)
 19 | 	_BX
 20 | #undef _B
 21 | };
 22 | 
 23 | #undef _B1
 24 | #undef _B2
 25 | #undef _B3
 26 | #undef _BX
 27 | 
 28 | 
 29 | 
 30 | size_t RapidYenc::do_encode_generic(int line_size, int* colOffset, const unsigned char* HEDLEY_RESTRICT src, unsigned char* HEDLEY_RESTRICT dest, size_t len, int doEnd) {
 31 | 	unsigned char* es = (unsigned char*)src + len;
 32 | 	unsigned char *p = dest; // destination pointer
 33 | 	long i = -(long)len; // input position
 34 | 	unsigned char c, escaped; // input character; escaped input character
 35 | 	int col = *colOffset;
 36 | 	
 37 | 	if (col == 0) {
 38 | 		c = es[i++];
 39 | 		if (RapidYenc::escapedLUT[c]) {
 40 | 			memcpy(p, &RapidYenc::escapedLUT[c], sizeof(uint16_t));
 41 | 			p += 2;
 42 | 			col = 2;
 43 | 		} else {
 44 | 			*(p++) = c + 42;
 45 | 			col = 1;
 46 | 		}
 47 | 	}
 48 | 	while(i < 0) {
 49 | 		// main line
 50 | 		unsigned char* sp = NULL;
 51 | 		while (i < -1-8 && line_size-col-1 > 8) {
 52 | 			// 8 cycle unrolled version
 53 | 			sp = p;
 54 | 			#define DO_THING(n) \
 55 | 				c = es[i+n], escaped = RapidYenc::escapeLUT[c]; \
 56 | 				if (escaped) \
 57 | 					*(p++) = escaped; \
 58 | 				else { \
 59 | 					memcpy(p, &RapidYenc::escapedLUT[c], sizeof(uint16_t)); \
 60 | 					p += 2; \
 61 | 				}
 62 | 			DO_THING(0);
 63 | 			DO_THING(1);
 64 | 			DO_THING(2);
 65 | 			DO_THING(3);
 66 | 			DO_THING(4);
 67 | 			DO_THING(5);
 68 | 			DO_THING(6);
 69 | 			DO_THING(7);
 70 | 			
 71 | 			i += 8;
 72 | 			col += (int)(p - sp);
 73 | 		}
 74 | 		if(sp && col >= line_size-1) {
 75 | 			// TODO: consider revert optimisation from SIMD code
 76 | 			// we overflowed - need to revert and use slower method :(
 77 | 			col -= (int)(p - sp);
 78 | 			p = sp;
 79 | 			i -= 8;
 80 | 		}
 81 | 		// handle remaining chars
 82 | 		while(col < line_size-1) {
 83 | 			c = es[i++], escaped = RapidYenc::escapeLUT[c];
 84 | 			if (escaped) {
 85 | 				*(p++) = escaped;
 86 | 				col++;
 87 | 			}
 88 | 			else {
 89 | 				memcpy(p, &RapidYenc::escapedLUT[c], sizeof(uint16_t));
 90 | 				p += 2;
 91 | 				col += 2;
 92 | 			}
 93 | 			/* experimental branchless version 
 94 | 			*p = '=';
 95 | 			c = (es[i++] + 42) & 0xFF;
 96 | 			int cond = (c=='\0' || c=='=' || c=='\r' || c=='\n');
 97 | 			*(p+cond) = c + (cond << 6);
 98 | 			p += 1+cond;
 99 | 			col += 1+cond;
100 | 			*/
101 | 			if (i >= 0) goto end;
102 | 		}
103 | 		
104 | 		// last line char
105 | 		if(col < line_size) { // this can only be false if the last character was an escape sequence (or line_size is horribly small), in which case, we don't need to handle space/tab cases
106 | 			c = es[i++];
107 | 			if (RapidYenc::escapedLUT[c] && c != '.'-42) {
108 | 				memcpy(p, &RapidYenc::escapedLUT[c], sizeof(uint16_t));
109 | 				p += 2;
110 | 			} else {
111 | 				*(p++) = c + 42;
112 | 			}
113 | 		}
114 | 		
115 | 		if (i >= 0) break;
116 | 		
117 | 		c = es[i++];
118 | 		if (RapidYenc::escapedLUT[c]) {
119 | 			uint32_t w = UINT32_16_PACK(UINT16_PACK('\r', '\n'), (uint32_t)RapidYenc::escapedLUT[c]);
120 | 			memcpy(p, &w, sizeof(w));
121 | 			p += 4;
122 | 			col = 2;
123 | 		} else {
124 | 			// another option may be to just write the EOL and let the first char be handled by the faster methods above, but it appears that writing the extra byte here is generally faster...
125 | 			uint32_t w = UINT32_PACK('\r', '\n', (uint32_t)(c+42), 0);
126 | 			memcpy(p, &w, sizeof(w));
127 | 			p += 3;
128 | 			col = 1;
129 | 		}
130 | 	}
131 | 	
132 | 	end:
133 | 	if(doEnd) {
134 | 		// special case: if the last character is a space/tab, it needs to be escaped as it's the final character on the line
135 | 		unsigned char lc = *(p-1);
136 | 		if(lc == '\t' || lc == ' ') {
137 | 			*(p-1) = '=';
138 | 			*p = lc+64;
139 | 			p++;
140 | 			col++;
141 | 		}
142 | 	}
143 | 	*colOffset = col;
144 | 	return p - dest;
145 | }
146 | 
147 | 
148 | namespace RapidYenc {
149 | 	size_t (*_do_encode)(int, int*, const unsigned char* HEDLEY_RESTRICT, unsigned char* HEDLEY_RESTRICT, size_t, int) = &do_encode_generic;
150 | 	int _encode_isa = ISA_GENERIC;
151 | }
152 | 
153 | #if defined(PLATFORM_X86) && defined(YENC_BUILD_NATIVE) && YENC_BUILD_NATIVE!=0
154 | # if defined(__AVX2__) && !defined(YENC_DISABLE_AVX256)
155 | #  include "encoder_avx_base.h"
156 | static inline void encoder_native_init() {
157 | 	RapidYenc::_do_encode = &do_encode_simd< RapidYenc::do_encode_avx2<ISA_NATIVE> >;
158 | 	encoder_avx2_lut<ISA_NATIVE>();
159 | 	RapidYenc::_encode_isa = ISA_NATIVE;
160 | }
161 | # else
162 | #  include "encoder_sse_base.h"
163 | static inline void encoder_native_init() {
164 | 	RapidYenc::_do_encode = &do_encode_simd< RapidYenc::do_encode_sse<ISA_NATIVE> >;
165 | 	encoder_sse_lut<ISA_NATIVE>();
166 | 	RapidYenc::_encode_isa = ISA_NATIVE;
167 | }
168 | # endif
169 | #endif
170 | 
171 | 
172 | void RapidYenc::encoder_init() {
173 | #ifdef PLATFORM_X86
174 | # if defined(YENC_BUILD_NATIVE) && YENC_BUILD_NATIVE!=0
175 | 	encoder_native_init();
176 | # else
177 | 	int use_isa = cpu_supports_isa();
178 | 	if(use_isa >= ISA_LEVEL_VBMI2 && (encoder_has_avx10 || (use_isa & ISA_FEATURE_EVEX512)))
179 | 		encoder_vbmi2_init();
180 | 	else if(use_isa >= ISA_LEVEL_AVX2)
181 | 		encoder_avx2_init();
182 | 	else if(use_isa >= ISA_LEVEL_AVX)
183 | 		encoder_avx_init();
184 | 	else if(use_isa >= ISA_LEVEL_SSSE3)
185 | 		encoder_ssse3_init();
186 | 	else
187 | 		encoder_sse2_init();
188 | # endif
189 | #endif
190 | #ifdef PLATFORM_ARM
191 | 	if(cpu_supports_neon())
192 | 		encoder_neon_init();
193 | #endif
194 | #ifdef __riscv
195 | 	if(cpu_supports_rvv())
196 | 		encoder_rvv_init();
197 | #endif
198 | }
199 | 


--------------------------------------------------------------------------------
/src/yencode/encoder.h:
--------------------------------------------------------------------------------
 1 | #ifndef __YENC_ENCODER_H
 2 | #define __YENC_ENCODER_H
 3 | 
 4 | #include "hedley.h"
 5 | 
 6 | namespace RapidYenc {
 7 | 
 8 | 
 9 | 
10 | extern size_t (*_do_encode)(int, int*, const unsigned char* HEDLEY_RESTRICT, unsigned char* HEDLEY_RESTRICT, size_t, int);
11 | extern int _encode_isa;
12 | static inline size_t encode(int line_size, int* colOffset, const void* HEDLEY_RESTRICT src, void* HEDLEY_RESTRICT dest, size_t len, int doEnd) {
13 | 	return (*_do_encode)(line_size, colOffset, (const unsigned char* HEDLEY_RESTRICT)src, (unsigned char*)dest, len, doEnd);
14 | }
15 | void encoder_init();
16 | static inline int encode_isa_level() {
17 | 	return _encode_isa;
18 | }
19 | 
20 | 
21 | 
22 | }
23 | #endif
24 | 


--------------------------------------------------------------------------------
/src/yencode/encoder_avx.cc:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | #include "encoder_common.h"
 3 | 
 4 | #if defined(__AVX__) && defined(__POPCNT__)
 5 | #include "encoder_sse_base.h"
 6 | 
 7 | void RapidYenc::encoder_avx_init() {
 8 | 	_do_encode = &do_encode_simd< do_encode_sse<ISA_LEVEL_SSE4_POPCNT> >;
 9 | 	encoder_sse_lut<ISA_LEVEL_SSE4_POPCNT>();
10 | 	_encode_isa = ISA_LEVEL_AVX;
11 | }
12 | #else
13 | void RapidYenc::encoder_avx_init() {
14 | 	encoder_ssse3_init();
15 | }
16 | #endif
17 | 
18 | 


--------------------------------------------------------------------------------
/src/yencode/encoder_avx2.cc:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | #include "encoder_common.h"
 3 | 
 4 | #if defined(__AVX2__) && !defined(YENC_DISABLE_AVX256)
 5 | #include "encoder_avx_base.h"
 6 | 
 7 | void RapidYenc::encoder_avx2_init() {
 8 | 	_do_encode = &do_encode_simd< do_encode_avx2<ISA_LEVEL_AVX2> >;
 9 | 	encoder_avx2_lut<ISA_LEVEL_AVX2>();
10 | 	_encode_isa = ISA_LEVEL_AVX2;
11 | }
12 | #else
13 | void RapidYenc::encoder_avx2_init() {
14 | 	encoder_avx_init();
15 | }
16 | #endif
17 | 
18 | 


--------------------------------------------------------------------------------
/src/yencode/encoder_common.h:
--------------------------------------------------------------------------------
  1 | #ifndef __YENC_ENCODER_COMMON
  2 | #define __YENC_ENCODER_COMMON
  3 | 
  4 | namespace RapidYenc {
  5 | 	void encoder_sse2_init();
  6 | 	void encoder_ssse3_init();
  7 | 	void encoder_avx_init();
  8 | 	void encoder_avx2_init();
  9 | 	void encoder_vbmi2_init();
 10 | 	extern const bool encoder_has_avx10;
 11 | 	void encoder_neon_init();
 12 | 	void encoder_rvv_init();
 13 | 	
 14 | 	// lookup tables for scalar processing
 15 | 	extern const unsigned char escapeLUT[256];
 16 | 	extern const uint16_t escapedLUT[256];
 17 | 	
 18 | 	size_t do_encode_generic(int line_size, int* colOffset, const unsigned char* HEDLEY_RESTRICT src, unsigned char* HEDLEY_RESTRICT dest, size_t len, int doEnd);
 19 | }
 20 | 
 21 | 
 22 | 
 23 | template<void(&kernel)(int, int*, const uint8_t* HEDLEY_RESTRICT, uint8_t* HEDLEY_RESTRICT&, size_t&)>
 24 | static size_t do_encode_simd(int line_size, int* colOffset, const unsigned char* HEDLEY_RESTRICT src, unsigned char* HEDLEY_RESTRICT dest, size_t len, int doEnd) {
 25 | 	if(len < 1) return 0;
 26 | 	if(line_size < 12) { // short lines probably not worth processing in a SIMD way
 27 | 		// we assume at least the first and last char exist in the line, and since the first char could be escaped, and SIMD encoder assumes at least one non-first/last char, assumption means that line size has to be >= 4
 28 | 		return RapidYenc::do_encode_generic(line_size, colOffset, src, dest, len, doEnd);
 29 | 	}
 30 | 	
 31 | 	const uint8_t* es = src + len;
 32 | 	uint8_t* p = dest;
 33 | 	
 34 | 	if(*colOffset < 0) *colOffset = 0; // sanity check
 35 | 	
 36 | 	kernel(line_size, colOffset, es, p, len);
 37 | 	
 38 | 	// scalar loop to process remaining
 39 | 	long i = -(long)len;
 40 | 	if(*colOffset == 0 && i < 0) {
 41 | 		uint8_t c = es[i++];
 42 | 		if (LIKELIHOOD(0.0273, RapidYenc::escapedLUT[c] != 0)) {
 43 | 			memcpy(p, RapidYenc::escapedLUT + c, 2);
 44 | 			p += 2;
 45 | 			*colOffset = 2;
 46 | 		} else {
 47 | 			*(p++) = c + 42;
 48 | 			*colOffset = 1;
 49 | 		}
 50 | 	}
 51 | 	while(i < 0) {
 52 | 		uint8_t c = es[i++];
 53 | 		if(*colOffset < line_size-1) {
 54 | 			if(!RapidYenc::escapeLUT[c]) {
 55 | 				p[0] = '=';
 56 | 				p[1] = c+42+64;
 57 | 				p += 2;
 58 | 				(*colOffset) += 2;
 59 | 			} else {
 60 | 				*(p++) = RapidYenc::escapeLUT[c];
 61 | 				(*colOffset) += 1;
 62 | 			}
 63 | 		} else {
 64 | 			if(*colOffset < line_size) {
 65 | 				if (RapidYenc::escapedLUT[c] && c != '.'-42) {
 66 | 					memcpy(p, RapidYenc::escapedLUT + c, 2);
 67 | 					p += 2;
 68 | 				} else {
 69 | 					*(p++) = c + 42;
 70 | 				}
 71 | 				if(i == 0) break;
 72 | 				c = es[i++];
 73 | 			}
 74 | 			
 75 | 			// handle EOL
 76 | 			if (RapidYenc::escapedLUT[c]) {
 77 | 				uint32_t w = UINT32_16_PACK(UINT16_PACK('\r', '\n'), (uint32_t)RapidYenc::escapedLUT[c]);
 78 | 				memcpy(p, &w, sizeof(w));
 79 | 				p += 4;
 80 | 				*colOffset = 2;
 81 | 			} else {
 82 | 				uint32_t w = UINT32_PACK('\r', '\n', (uint32_t)(c+42), 0);
 83 | 				memcpy(p, &w, sizeof(w));
 84 | 				p += 3;
 85 | 				*colOffset = 1;
 86 | 			}
 87 | 		}
 88 | 	}
 89 | 	
 90 | 	if(doEnd) {
 91 | 		// special case: if the last character is a space/tab, it needs to be escaped as it's the final character on the line
 92 | 		unsigned char lc = *(p-1);
 93 | 		if(lc == '\t' || lc == ' ') {
 94 | 			p[-1] = '=';
 95 | 			*p = lc+64;
 96 | 			p++;
 97 | 			(*colOffset)++;
 98 | 		}
 99 | 	}
100 | 	return p - dest;
101 | }
102 | 
103 | #endif /* __YENC_ENCODER_COMMON */
104 | 


--------------------------------------------------------------------------------
/src/yencode/encoder_rvv.cc:
--------------------------------------------------------------------------------
  1 | #include "common.h"
  2 | #include "encoder_common.h"
  3 | 
  4 | #ifdef __riscv_vector
  5 | #include "encoder.h"
  6 | 
  7 | 
  8 | static HEDLEY_ALWAYS_INLINE void encode_eol_handle_pre(const uint8_t* HEDLEY_RESTRICT _src, long& inpos, uint8_t*& outp, long& col, long lineSizeOffset) {
  9 | 	// TODO: vectorize
 10 | 	uint8_t c = _src[inpos++];
 11 | 	if(HEDLEY_UNLIKELY(RapidYenc::escapedLUT[c] && c != '.'-42)) {
 12 | 		memcpy(outp, &RapidYenc::escapedLUT[c], sizeof(uint16_t));
 13 | 		outp += 2;
 14 | 	} else {
 15 | 		*(outp++) = c + 42;
 16 | 	}
 17 | 	
 18 | 	c = _src[inpos++];
 19 | 	if(LIKELIHOOD(0.0273, RapidYenc::escapedLUT[c]!=0)) {
 20 | 		uint32_t w = UINT32_16_PACK(UINT16_PACK('\r', '\n'), (uint32_t)RapidYenc::escapedLUT[c]);
 21 | 		memcpy(outp, &w, sizeof(w));
 22 | 		outp += 4;
 23 | 		col = lineSizeOffset + 2;
 24 | 	} else {
 25 | 		uint32_t w = UINT32_PACK('\r', '\n', (uint32_t)(c+42), 0);
 26 | 		memcpy(outp, &w, sizeof(w));
 27 | 		outp += 3;
 28 | 		col = lineSizeOffset + 1;
 29 | 	}
 30 | }
 31 | 
 32 | namespace RapidYenc {
 33 | 
 34 | HEDLEY_ALWAYS_INLINE void do_encode_rvv(int line_size, int* colOffset, const uint8_t* HEDLEY_RESTRICT srcEnd, uint8_t* HEDLEY_RESTRICT& dest, size_t& len) {
 35 | 	size_t vl2 = RV(vsetvlmax_e8m2)(); // TODO: limit to line length
 36 | 	// TODO: have a LMUL=1 variant if line_size < vl
 37 | 	
 38 | 	// offset position to enable simpler loop condition checking
 39 | 	const int INPUT_OFFSET = vl2*2 -1; // extra chars for EOL handling, -1 to change <= to <
 40 | 	if((intptr_t)len <= INPUT_OFFSET || line_size < (int)vl2*2) return;
 41 | 	
 42 | 	uint8_t *outp = dest;
 43 | 	long inpos = -(long)len;
 44 | 	long lineSizeOffset = -line_size +1;
 45 | 	long col = *colOffset - line_size +1;
 46 | 	
 47 | 	inpos += INPUT_OFFSET;
 48 | 	const uint8_t* _src = srcEnd - INPUT_OFFSET;
 49 | 	
 50 | 	if (HEDLEY_LIKELY(col == -line_size+1)) {
 51 | 		uint8_t c = _src[inpos++];
 52 | 		if (LIKELIHOOD(0.0273, escapedLUT[c] != 0)) {
 53 | 			memcpy(outp, escapedLUT + c, 2);
 54 | 			outp += 2;
 55 | 			col += 2;
 56 | 		} else {
 57 | 			*(outp++) = c + 42;
 58 | 			col += 1;
 59 | 		}
 60 | 	}
 61 | 	if(HEDLEY_UNLIKELY(col >= 0)) {
 62 | 		if(col == 0)
 63 | 			encode_eol_handle_pre(_src, inpos, outp, col, lineSizeOffset);
 64 | 		else {
 65 | 			uint8_t c = _src[inpos++];
 66 | 			if(LIKELIHOOD(0.0273, escapedLUT[c]!=0)) {
 67 | 				uint32_t v = UINT32_16_PACK(UINT16_PACK('\r', '\n'), (uint32_t)escapedLUT[c]);
 68 | 				memcpy(outp, &v, sizeof(v));
 69 | 				outp += 4;
 70 | 				col = 2-line_size + 1;
 71 | 			} else {
 72 | 				uint32_t v = UINT32_PACK('\r', '\n', (uint32_t)(c+42), 0);
 73 | 				memcpy(outp, &v, sizeof(v));
 74 | 				outp += 3;
 75 | 				col = 2-line_size;
 76 | 			}
 77 | 		}
 78 | 	}
 79 | 	
 80 | 	// vector constants
 81 | 	const vuint8mf2_t ALT_SHIFT = RV(vreinterpret_v_u16mf2_u8mf2)(RV(vmv_v_x_u16mf2)(4, vl2));
 82 | 	const uint8_t _MASK_EXPAND[] = {0xAA, 0xAB, 0xAE, 0xAF, 0xBA, 0xBB, 0xBE, 0xBF, 0xEA, 0xEB, 0xEE, 0xEF, 0xFA, 0xFB, 0xFE, 0xFF};
 83 | 	const vuint8m1_t MASK_EXPAND = RV(vle8_v_u8m1)(_MASK_EXPAND, 16);
 84 | 	
 85 | 	
 86 | 	// TODO: consider exploiting partial vector capability
 87 | 	while(inpos < 0) {
 88 | 		vuint8m2_t data = RV(vle8_v_u8m2)(_src + inpos, vl2);
 89 | 		inpos += vl2;
 90 | 		
 91 | 		// search for special chars
 92 | 		// TODO: vrgather strat
 93 | 		
 94 | 		vuint8m2_t tmpData = RV(vsub_vx_u8m2)(data, -42, vl2);
 95 | 		vbool4_t cmp = RV(vmor_mm_b4)(
 96 | 			RV(vmor_mm_b4)(
 97 | 				RV(vmseq_vx_u8m2_b4)(data, -42, vl2),
 98 | 				RV(vmseq_vx_u8m2_b4)(tmpData, '=', vl2),
 99 | 				vl2
100 | 			),
101 | 			RV(vmor_mm_b4)(
102 | 				RV(vmseq_vx_u8m2_b4)(data, '\r'-42, vl2),
103 | 				RV(vmseq_vx_u8m2_b4)(data, '\n'-42, vl2),
104 | 				vl2
105 | 			),
106 | 			vl2
107 | 		);
108 | 		
109 | #ifdef __riscv_v_intrinsic
110 | 		data = RV(vor_vx_u8m2_mu)(cmp, tmpData, tmpData, 64, vl2);
111 | #else
112 | 		data = RV(vor_vx_u8m2_m)(cmp, tmpData, tmpData, 64, vl2);
113 | #endif
114 | 		
115 | 		int idx;
116 | 		size_t count = RV(vcpop_m_b4)(cmp, vl2);
117 | 		if(count > 1) {
118 | 			// widen mask: 4b->8b
119 | 			vuint8mf4_t vcmp = RV_VEC_U8MF4_CAST(cmp);
120 | 			// TODO: use vwsll instead if available
121 | 			//    -  is clmul useful here?
122 | 			vuint8mf2_t xcmp = RV(vreinterpret_v_u16mf2_u8mf2)(RV(vwmulu_vx_u16mf2)(vcmp, 16, vl2));
123 | 			xcmp = RV(vsrl_vv_u8mf2)(xcmp, ALT_SHIFT, vl2);
124 | 			
125 | 			// expand mask by inserting '1' between each bit (0000abcd -> 1a1b1c1d)
126 | 			vuint8m1_t xcmpTmp = RV(vrgather_vv_u8m1)(MASK_EXPAND, RV(vlmul_ext_v_u8mf2_u8m1)(xcmp), vl2);
127 | 			vbool2_t cmpmask = RV_MASK_CAST(2, 8, xcmpTmp);
128 | 			
129 | 			// expand data and insert =
130 | 			// TODO: use vwsll instead if available
131 | 			vuint16m4_t data2 = RV(vzext_vf2_u16m4)(data, vl2);
132 | 			data2 = RV(vsll_vx_u16m4)(data2, 8, vl2);
133 | 			data2 = RV(vor_vx_u16m4)(data2, '=', vl2);
134 | 			
135 | 			// prune unneeded =
136 | 			vuint8m4_t dataTmp = RV(vreinterpret_v_u16m4_u8m4)(data2);
137 | 			vuint8m4_t final_data = RV(vcompress_vm_u8m4)(
138 | #ifdef __riscv_v_intrinsic
139 | 				dataTmp, cmpmask, vl2*2
140 | #else
141 | 				cmpmask, dataTmp, dataTmp, vl2*2
142 | #endif
143 | 			);
144 | 			
145 | 			RV(vse8_v_u8m4)(outp, final_data, vl2*2);
146 | 			outp += vl2 + count;
147 | 			col += vl2 + count;
148 | 			
149 | 			if(col >= 0) {
150 | 				// we overflowed - find correct position to revert back to
151 | 				// TODO: stick with u8 type for vlmax <= 2048 (need to check if ok if vlmax == 2048)
152 | 				//   - considering that it's rare for colWidth > 128, maybe just don't support vectors that long
153 | 				vuint16m8_t xidx = RV(viota_m_u16m8)(cmpmask, vl2*2);
154 | 				vbool2_t discardmask = RV(vmsgeu_vx_u16m8_b2)(xidx, vl2 + count - col, vl2*2);
155 | 				long idx_revert = RV(vcpop_m_b2)(discardmask, vl2*2);
156 | 				
157 | 				outp -= col + (idx_revert & 1);
158 | 				inpos -= ((idx_revert+1) >> 1);
159 | 				
160 | 				goto _encode_eol_handle_pre;
161 | 			}
162 | 		} else {
163 | 			// 0 or 1 special characters
164 | 			{
165 | 				vbool4_t mask = RV(vmsbf_m_b4)(cmp, vl2);
166 | 				// TODO: is it better to shuffle this into two stores, instead of three?
167 | 				RV(vse8_v_u8m2_m)(mask, outp, data, vl2);
168 | 				idx = RV(vcpop_m_b4)(mask, vl2);
169 | 				outp[idx] = '=';
170 | 				RV(vse8_v_u8m2_m)(RV(vmnot_m_b4)(mask, vl2), outp+1, data, vl2);
171 | 				
172 | 				outp += vl2 + count;
173 | 				col += vl2 + count;
174 | 			}
175 | 			
176 | 			if(col >= 0) {
177 | 				if(count > 0) {
178 | 					idx = vl2 - idx;
179 | 					if(HEDLEY_UNLIKELY(col == idx)) {
180 | 						// this is an escape character, so line will need to overflow
181 | 						outp--;
182 | 					} else {
183 | 						inpos += (col > idx);
184 | 					}
185 | 				}
186 | 				outp -= col;
187 | 				inpos -= col;
188 | 				
189 | 				_encode_eol_handle_pre:
190 | 				encode_eol_handle_pre(_src, inpos, outp, col, lineSizeOffset);
191 | 			}
192 | 		}
193 | 	}
194 | 	
195 | 	*colOffset = col + line_size -1;
196 | 	dest = outp;
197 | 	len = -(inpos - INPUT_OFFSET);
198 | }
199 | } // namespace
200 | 
201 | void RapidYenc::encoder_rvv_init() {
202 | 	_do_encode = &do_encode_simd<do_encode_rvv>;
203 | 	_encode_isa = ISA_LEVEL_RVV;
204 | }
205 | #else
206 | void RapidYenc::encoder_rvv_init() {}
207 | #endif /* defined(__riscv_vector) */
208 | 


--------------------------------------------------------------------------------
/src/yencode/encoder_sse2.cc:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | #include "encoder_common.h"
 3 | 
 4 | #ifdef __SSE2__
 5 | #include "encoder_sse_base.h"
 6 | 
 7 | void RapidYenc::encoder_sse2_init() {
 8 | 	_do_encode = &do_encode_simd< do_encode_sse<ISA_LEVEL_SSE2> >;
 9 | 	encoder_sse_lut<ISA_LEVEL_SSE2>();
10 | 	_encode_isa = ISA_LEVEL_SSE2;
11 | }
12 | #else
13 | void RapidYenc::encoder_sse2_init() {}
14 | #endif
15 | 
16 | 


--------------------------------------------------------------------------------
/src/yencode/encoder_ssse3.cc:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | #include "encoder_common.h"
 3 | 
 4 | // slightly faster version which improves the worst case scenario significantly; since worst case doesn't happen often, overall speedup is relatively minor
 5 | // requires PSHUFB (SSSE3) instruction, but will use POPCNT (SSE4.2 (or AMD's ABM, but Phenom doesn't support SSSE3 so doesn't matter)) if available (these only seem to give minor speedups, so considered optional)
 6 | #ifdef __SSSE3__
 7 | #include "encoder_sse_base.h"
 8 | 
 9 | void RapidYenc::encoder_ssse3_init() {
10 | 	_do_encode = &do_encode_simd< do_encode_sse<ISA_LEVEL_SSSE3> >;
11 | 	encoder_sse_lut<ISA_LEVEL_SSSE3>();
12 | 	_encode_isa = ISA_LEVEL_SSSE3;
13 | }
14 | #else
15 | void RapidYenc::encoder_ssse3_init() {
16 | 	encoder_sse2_init();
17 | }
18 | #endif
19 | 
20 | 


--------------------------------------------------------------------------------
/src/yencode/encoder_vbmi2.cc:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | #include "encoder_common.h"
 3 | 
 4 | #if !defined(__EVEX512__) && (defined(__AVX10_1__) || defined(__EVEX256__)) && defined(__AVX512VL__) && defined(__AVX512VBMI2__) && defined(__AVX512BW__)
 5 | const bool RapidYenc::encoder_has_avx10 = true;
 6 | #else
 7 | const bool RapidYenc::encoder_has_avx10 = false;
 8 | #endif
 9 | 
10 | #if defined(__AVX512VL__) && defined(__AVX512VBMI2__) && defined(__AVX512BW__)
11 | # ifndef YENC_DISABLE_AVX256
12 | #  include "encoder_avx_base.h"
13 | 
14 | void RapidYenc::encoder_vbmi2_init() {
15 | 	_do_encode = &do_encode_simd< do_encode_avx2<ISA_LEVEL_VBMI2> >;
16 | 	encoder_avx2_lut<ISA_LEVEL_VBMI2>();
17 | 	_encode_isa = ISA_LEVEL_VBMI2;
18 | }
19 | # else
20 | #  include "encoder_sse_base.h"
21 | void RapidYenc::encoder_vbmi2_init() {
22 | 	_do_encode = &do_encode_simd< do_encode_sse<ISA_LEVEL_VBMI2> >;
23 | 	encoder_sse_lut<ISA_LEVEL_VBMI2>();
24 | 	_encode_isa = ISA_LEVEL_VBMI2;
25 | }
26 | # endif
27 | #else
28 | void RapidYenc::encoder_vbmi2_init() {
29 | 	encoder_avx2_init();
30 | }
31 | #endif
32 | 


--------------------------------------------------------------------------------
/src/yencode/platform.cc:
--------------------------------------------------------------------------------
  1 | #include "common.h"
  2 | #ifdef PLATFORM_ARM
  3 | # ifdef __ANDROID__
  4 | #  include <cpu-features.h>
  5 | # elif defined(_WIN32)
  6 | #  define WIN32_LEAN_AND_MEAN
  7 | #  define NOMINMAX
  8 | #  include <Windows.h>
  9 | # elif defined(__APPLE__)
 10 | #  include <sys/types.h>
 11 | #  include <sys/sysctl.h>
 12 | # elif defined(__has_include)
 13 | #  if __has_include(<sys/auxv.h>)
 14 | #   include <sys/auxv.h>
 15 | #   if __has_include(<asm/hwcap.h>)
 16 | #    include <asm/hwcap.h>
 17 | #   endif
 18 | #  endif
 19 | # endif
 20 | bool RapidYenc::cpu_supports_neon() {
 21 | # if defined(AT_HWCAP)
 22 | #  ifdef __FreeBSD__
 23 | 	unsigned long supported;
 24 | 	elf_aux_info(AT_HWCAP, &supported, sizeof(supported));
 25 | #   ifdef __aarch64__
 26 | 	return supported & HWCAP_ASIMD;
 27 | #   else
 28 | 	return supported & HWCAP_NEON;
 29 | #   endif
 30 | #  else
 31 | #   ifdef __aarch64__
 32 | 	return getauxval(AT_HWCAP) & HWCAP_ASIMD;
 33 | #   else
 34 | 	return getauxval(AT_HWCAP) & HWCAP_NEON;
 35 | #   endif
 36 | #  endif
 37 | # elif defined(ANDROID_CPU_FAMILY_ARM)
 38 | #  ifdef __aarch64__
 39 | 	return android_getCpuFeatures() & ANDROID_CPU_ARM64_FEATURE_ASIMD;
 40 | #  else
 41 | 	return android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON;
 42 | #  endif
 43 | # elif defined(_WIN32)
 44 | 	return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE);
 45 | # elif defined(__APPLE__)
 46 | 	int supported = 0;
 47 | 	size_t len = sizeof(supported);
 48 | 	if(sysctlbyname("hw.optional.neon", &supported, &len, NULL, 0))
 49 | 		return false;
 50 | 	return (bool)supported;
 51 | # endif
 52 | # ifdef __aarch64__
 53 | 	return true; // assume NEON support on AArch64
 54 | # else
 55 | 	return false;
 56 | # endif
 57 | }
 58 | #endif
 59 | 
 60 | 
 61 | #ifdef PLATFORM_X86
 62 | #ifdef _MSC_VER
 63 | # define _cpuid1(ar) __cpuid(ar, 1)
 64 | # define _cpuid1x(ar) __cpuid(ar, 0x80000001)
 65 | # if _MSC_VER >= 1600
 66 | #  define _cpuidX __cpuidex
 67 | #  include <immintrin.h>
 68 | #  define _GET_XCR() _xgetbv(_XCR_XFEATURE_ENABLED_MASK)
 69 | # else
 70 | // not supported
 71 | #  define _cpuidX(ar, eax, ecx) ar[0]=0, ar[1]=0, ar[2]=0, ar[3]=0
 72 | #  define _GET_XCR() 0
 73 | # endif
 74 | #else
 75 | # include <cpuid.h>
 76 | # define _cpuid1(ar) __cpuid(1, ar[0], ar[1], ar[2], ar[3])
 77 | # define _cpuid1x(ar) __cpuid(0x80000001, ar[0], ar[1], ar[2], ar[3])
 78 | # define _cpuidX(ar, eax, ecx) __cpuid_count(eax, ecx, ar[0], ar[1], ar[2], ar[3])
 79 | static inline int _GET_XCR() {
 80 | 	int xcr0;
 81 | 	__asm__ __volatile__("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx");
 82 | 	return xcr0;
 83 | }
 84 | #endif
 85 | // checks if CPU has 128-bit AVX units; currently not used as AVX2 is beneficial even on Zen1
 86 | // static bool cpu_has_slow_avx(cpuid1flag0) {
 87 | 	// int family = ((cpuid1flag0>>8) & 0xf) + ((cpuid1flag0>>16) & 0xff0),
 88 | 		// model = ((cpuid1flag0>>4) & 0xf) + ((cpuid1flag0>>12) & 0xf0);
 89 | 	// return (
 90 | 		   // family == 0x6f // AMD Bulldozer family
 91 | 		// || family == 0x7f // AMD Jaguar/Puma family
 92 | 		// || (family == 0x8f && (model == 0 /*Summit Ridge ES*/ || model == 1 /*Zen*/ || model == 8 /*Zen+*/ || model == 0x11 /*Zen APU*/ || model == 0x18 /*Zen+ APU*/ || model == 0x50 /*Subor Z+*/)) // AMD Zen1 family
 93 | 		// || (family == 6 && model == 0xf) // Centaur/Zhaoxin; overlaps with Intel Core 2, but they don't support AVX
 94 | 	// );
 95 | // }
 96 | 
 97 | 
 98 | int RapidYenc::cpu_supports_isa() {
 99 | 	int flags[4];
100 | 	_cpuid1(flags);
101 | 	int ret = 0;
102 | 	
103 | 	if(flags[2] & 0x800000)
104 | 		ret |= ISA_FEATURE_POPCNT;
105 | 	int flags2[4];
106 | 	_cpuid1x(flags2);
107 | 	if(flags2[2] & 0x20) // ABM
108 | 		ret |= ISA_FEATURE_LZCNT | ISA_FEATURE_POPCNT;
109 | 	
110 | 	int family = ((flags[0]>>8) & 0xf) + ((flags[0]>>16) & 0xff0);
111 | 	int model = ((flags[0]>>4) & 0xf) + ((flags[0]>>12) & 0xf0);
112 | 	
113 | 	if(family == 6 && (
114 | 		model == 0x1C || model == 0x26 || model == 0x27 || model == 0x35 || model == 0x36 || model == 0x37 || model == 0x4A || model == 0x4C || model == 0x4D || model == 0x5A || model == 0x5D
115 | 	))
116 | 		// Intel Bonnell/Silvermont CPU with very slow PSHUFB and PBLENDVB - pretend SSSE3 doesn't exist
117 | 		return ret | ISA_LEVEL_SSE2;
118 | 	
119 | 	if(family == 0x5f && (model == 0 || model == 1 || model == 2))
120 | 		// AMD Bobcat with slow SSSE3 instructions - pretend it doesn't exist
121 | 		return ret | ISA_LEVEL_SSE2;
122 | 	
123 | 	if((flags[2] & 0x200) == 0x200) { // SSSE3
124 | 		if(family == 6 && (model == 0x5c || model == 0x5f || model == 0x7a || model == 0x9c))
125 | 			// Intel Goldmont/plus / Tremont with slow PBLENDVB
126 | 			return ret | ISA_LEVEL_SSSE3;
127 | 		
128 | 		if(flags[2] & 0x80000) { // SSE4.1
129 | 			if((flags[2] & 0x1C800000) == 0x1C800000) { // POPCNT + OSXSAVE + XSAVE + AVX
130 | 				int xcr = _GET_XCR() & 0xff; // ignore unused bits
131 | 				if((xcr & 6) == 6) { // AVX enabled
132 | 					int cpuInfo[4];
133 | 					_cpuidX(cpuInfo, 7, 0);
134 | 					if((cpuInfo[1] & 0x128) == 0x128 && (ret & ISA_FEATURE_LZCNT)) { // BMI2 + AVX2 + BMI1
135 | 						if((xcr & 0xE0) == 0xE0) { // AVX512 XSTATE (also applies to AVX10)
136 | 							// check AVX10
137 | 							int cpuInfo2[4];
138 | 							_cpuidX(cpuInfo2, 7, 1);
139 | 							if(cpuInfo2[3] & 0x80000) {
140 | 								_cpuidX(cpuInfo2, 0x24, 0);
141 | 								if((cpuInfo2[1] & 0xff) >= 1 && ( // minimum AVX10.1
142 | #ifdef YENC_DISABLE_AVX256
143 | 									cpuInfo2[1] & 0x10000 // AVX10/128
144 | #else
145 | 									cpuInfo2[1] & 0x20000 // AVX10/256
146 | #endif
147 | 								)) {
148 | 									if(cpuInfo2[1] & 0x40000) ret |= ISA_FEATURE_EVEX512;
149 | 									return ret | ISA_LEVEL_VBMI2;
150 | 								}
151 | 							}
152 | 							
153 | 							if((cpuInfo[1] & 0xC0010000) == 0xC0010000) { // AVX512BW + AVX512VL + AVX512F
154 | 								ret |= ISA_FEATURE_EVEX512;
155 | 								if(cpuInfo[2] & 0x40)
156 | 									return ret | ISA_LEVEL_VBMI2;
157 | 								return ret | ISA_LEVEL_AVX3;
158 | 							}
159 | 						}
160 | 						// AVX2 is beneficial even on Zen1
161 | 						return ret | ISA_LEVEL_AVX2;
162 | 					}
163 | 					return ret | ISA_LEVEL_AVX;
164 | 				}
165 | 			}
166 | 			return ret | ISA_LEVEL_SSE41;
167 | 		}
168 | 		return ret | ISA_LEVEL_SSSE3;
169 | 	}
170 | 	return ret | ISA_LEVEL_SSE2;
171 | }
172 | 
173 | int RapidYenc::cpu_supports_crc_isa() {
174 | 	int flags[4];
175 | 	_cpuid1(flags);
176 | 	
177 | 	if((flags[2] & 0x80202) == 0x80202) { // SSE4.1 + SSSE3 + CLMUL
178 | 		if((flags[2] & 0x1C000000) == 0x1C000000) { // AVX + OSXSAVE + XSAVE
179 | 			int xcr = _GET_XCR() & 0xff; // ignore unused bits
180 | 			if((xcr & 6) == 6) { // AVX enabled
181 | 				int cpuInfo[4];
182 | 				_cpuidX(cpuInfo, 7, 0);
183 | 				if((cpuInfo[1] & 0x20) == 0x20 && (cpuInfo[2] & 0x400) == 0x400) { // AVX2 + VPCLMULQDQ
184 | 					return 2;
185 | 				}
186 | 			}
187 | 		}
188 | 		return 1;
189 | 	}
190 | 	return 0;
191 | }
192 | 
193 | #endif // PLATFORM_X86
194 | 
195 | #ifdef __riscv
196 | # if defined(__has_include)
197 | #  if __has_include(<sys/auxv.h>)
198 | #   include <sys/auxv.h>
199 | #   if __has_include(<asm/hwcap.h>)
200 | #    include <asm/hwcap.h>
201 | #   endif
202 | #  endif
203 | # endif
204 | bool RapidYenc::cpu_supports_rvv() {
205 | # if defined(AT_HWCAP)
206 | 	unsigned long ret;
207 | #  ifdef __FreeBSD__
208 | 	elf_aux_info(AT_HWCAP, &ret, sizeof(ret));
209 | #  else
210 | 	ret = getauxval(AT_HWCAP);
211 | #  endif
212 | 	return (ret & 0x20112D) == 0x20112D; // IMAFDCV; TODO: how to detect Z* features of 'G'?
213 | # endif
214 | 	return false;
215 | }
216 | #endif
217 | 
218 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/tests/requirements.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | chardet
3 | jaraco.functools
4 | portend


--------------------------------------------------------------------------------
/tests/test.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import ssl
 3 | import sabctools
 4 | import socket
 5 | import time
 6 | 
 7 | 
 8 | hostname = "eunews.frugalusenet.com"
 9 | context = ssl.create_default_context()
10 | 
11 | print(sabctools.openssl_linked)
12 | 
13 | buffer = bytearray(100)
14 | bufferview = memoryview(buffer)
15 | 
16 | with socket.create_connection((hostname, 563)) as sock:
17 |     with context.wrap_socket(sock, server_hostname=hostname) as ssock:
18 |         ssock.setblocking(False)
19 |         time.sleep(1)
20 |         print(ssock.version())
21 |         print(sabctools.unlocked_ssl_recv_into(ssock._sslobj, bufferview[99:]))
22 |         print(buffer)
23 | 


--------------------------------------------------------------------------------
/tests/test_crc32.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import sabctools
 3 | 
 4 | 
 5 | @pytest.mark.parametrize(
 6 |     "crc1,crc2,len2,expected",
 7 |     [
 8 |         (0, 0, 0, 0),
 9 |         (4294967295, 0, 0, 4294967295),
10 |         (0, 4294967295, 0, 4294967295),
11 |         (4294967295, 4294967295, 0, 0),
12 |         (4, 16, 256, 2385497022),
13 |         (100, 200, 300, 1009376567),
14 |         (0, 0, 18446744073709551615, 0),
15 |         (4294967295, 4294967295, 18446744073709551615, 0),
16 |         (0, 100, 1234567890123, 100),
17 |         (100, 0, 1234567890123, 1829446317),
18 |     ],
19 | )
20 | def test_crc32_combine_expected(crc1, crc2, len2, expected):
21 |     assert sabctools.crc32_combine(crc1, crc2, len2) == expected
22 | 
23 | 
24 | @pytest.mark.parametrize(
25 |     "crc1,crc2,expected",
26 |     [
27 |         (0, 0, 0),
28 |         (4294967295, 0, 0),
29 |         (0, 4294967295, 0),
30 |         (4294967295, 4294967295, 1048090088),
31 |         (100, 200, 4155012749),
32 |     ],
33 | )
34 | def test_crc32_multiply_expected(crc1, crc2, expected):
35 |     assert sabctools.crc32_multiply(crc1, crc2) == expected
36 | 
37 | 
38 | @pytest.mark.parametrize(
39 |     "crc1,zeroes,expected",
40 |     [
41 |         (0, 0, 0),
42 |         (4294967295, 0, 4294967295),
43 |         (4294967295, 4294967295, 4294967295),
44 |         (100, 200, 1523530880),
45 |         (0, 18446744073709551615, 0),
46 |         (4294967295, 18446744073709551615, 4294967295),
47 |         (100, 1234567890123, 980217485),
48 |     ],
49 | )
50 | def test_crc32_zero_unpad_expected(crc1, zeroes, expected):
51 |     assert sabctools.crc32_zero_unpad(crc1, zeroes) == expected
52 | 
53 | 
54 | @pytest.mark.parametrize(
55 |     "n,expected",
56 |     [
57 |         (0, 2147483648),
58 |         (1, 1073741824),
59 |         (8, 8388608),
60 |         (30, 2),
61 |         (31, 1),
62 |         (4294967295, 2147483648),
63 |         (4294967296, 1073741824),  # 1
64 |     ],
65 | )
66 | def test_crc32_xpown_expected(n, expected):
67 |     assert sabctools.crc32_xpown(n) == expected
68 | 
69 | 
70 | @pytest.mark.parametrize(
71 |     "n,expected",
72 |     [
73 |         (0, 2147483648),
74 |         (1, 8388608),
75 |         (4294967295, 2147483648),
76 |         (4294967296, 8388608),  # 1
77 |     ],
78 | )
79 | def test_crc32_xpow8n_expected(n, expected):
80 |     assert sabctools.crc32_xpow8n(n) == expected
81 | 


--------------------------------------------------------------------------------
/tests/test_decoder.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import pytest
  3 | import glob
  4 | from tests.testsupport import *
  5 | 
  6 | 
  7 | def test_regular():
  8 |     data_plain = read_plain_yenc_file("test_regular.yenc")
  9 |     assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain)
 10 |     data_plain = read_plain_yenc_file("test_regular_2.yenc")
 11 |     assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain)
 12 | 
 13 | 
 14 | def test_bytes_compat():
 15 |     data_plain = read_plain_yenc_file("test_regular.yenc")
 16 |     assert python_yenc(data_plain) == sabctools.yenc_decode(memoryview(bytes(data_plain)))
 17 | 
 18 | 
 19 | def test_partial():
 20 |     data_plain = read_plain_yenc_file("test_partial.yenc")
 21 |     decoded_data, filename, filesize, begin, size, crc_correct = sabctools_yenc_wrapper(data_plain)
 22 |     assert filename == "90E2Sdvsmds0801dvsmds90E.part06.rar"
 23 |     assert filesize == 49152000
 24 |     assert begin == 15360000
 25 |     assert size == 384000
 26 |     assert crc_correct is None
 27 |     assert len(decoded_data) == 549
 28 | 
 29 | 
 30 | def test_special_chars():
 31 |     data_plain = read_plain_yenc_file("test_special_chars.yenc")
 32 |     # We only compare the data and the filename
 33 |     assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain)
 34 | 
 35 |     data_plain = read_plain_yenc_file("test_special_utf8_chars.yenc")
 36 |     # We only compare the data and the filename
 37 |     assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain)
 38 | 
 39 | 
 40 | def test_bad_crc():
 41 |     data_plain = read_plain_yenc_file("test_bad_crc.yenc")
 42 |     # We only compare the data and the filename
 43 |     assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain)
 44 | 
 45 | 
 46 | def test_bad_crc_end():
 47 |     data_plain = read_plain_yenc_file("test_bad_crc_end.yenc")
 48 |     with pytest.raises(ValueError) as excinfo:
 49 |         sabctools_yenc_wrapper(data_plain)
 50 |     assert "Invalid CRC in footer" in str(excinfo.value)
 51 | 
 52 | 
 53 | def test_no_filename():
 54 |     data_plain = read_plain_yenc_file("test_no_name.yenc")
 55 |     with pytest.raises(ValueError) as excinfo:
 56 |         sabctools_yenc_wrapper(data_plain)
 57 |     assert "Could not find yEnc filename" in str(excinfo.value)
 58 | 
 59 | 
 60 | def test_padded_crc():
 61 |     data_plain = read_plain_yenc_file("test_padded_crc.yenc")
 62 |     assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain)
 63 | 
 64 | 
 65 | def test_end_after_filename():
 66 |     data_plain = read_plain_yenc_file("test_end_after_filename.yenc")
 67 |     with pytest.raises(ValueError):
 68 |         sabctools_yenc_wrapper(data_plain)
 69 | 
 70 | 
 71 | def test_empty():
 72 |     with pytest.raises(ValueError) as excinfo:
 73 |         sabctools.yenc_decode(memoryview(bytearray(b"")))
 74 |     assert "Invalid data length" in str(excinfo.value)
 75 | 
 76 | 
 77 | def test_ref_counts():
 78 |     """Note that sys.getrefcount itself adds another reference!"""
 79 |     # Test regular case
 80 |     data_plain = read_plain_yenc_file("test_regular.yenc")
 81 |     data_out, filename, filesize, begin, end, crc_correct = sabctools_yenc_wrapper(data_plain)
 82 | 
 83 |     assert sys.getrefcount(data_plain) == 2
 84 |     assert sys.getrefcount(data_out) == 2
 85 |     assert sys.getrefcount(filename) == 2
 86 |     assert sys.getrefcount(begin) == 2
 87 |     assert sys.getrefcount(end) == 2
 88 |     assert sys.getrefcount(crc_correct) == 2
 89 | 
 90 |     # Test simple error case
 91 |     fake_inp = memoryview(bytearray(b"1234"))
 92 |     assert sys.getrefcount(fake_inp) == 2
 93 |     with pytest.raises(ValueError):
 94 |         sabctools.yenc_decode(fake_inp)
 95 |     assert sys.getrefcount(fake_inp) == 2
 96 | 
 97 |     # Test further processing
 98 |     data_plain = read_plain_yenc_file("test_bad_crc_end.yenc")
 99 |     with pytest.raises(ValueError):
100 |         sabctools_yenc_wrapper(data_plain)
101 |     assert sys.getrefcount(data_plain) == 2
102 | 
103 | 
104 | def test_crc_pickles():
105 |     all_crc_fails = glob.glob("tests/yencfiles/crc_*")
106 |     for fname in all_crc_fails:
107 |         data_plain = read_pickle(fname)
108 |         assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain)
109 | 
110 | 
111 | def test_small_file_pickles():
112 |     all_pickles = glob.glob("tests/yencfiles/small_file*")
113 |     for fname in all_pickles:
114 |         data_plain = read_pickle(fname)
115 |         assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain)
116 | 


--------------------------------------------------------------------------------
/tests/test_encoder.py:
--------------------------------------------------------------------------------
1 | from tests.testsupport import *
2 | 
3 | 
4 | def test_encoder():
5 |     output, crc = sabctools.yenc_encode(b"Hello world!")
6 |     assert output == b"r\x8f\x96\x96\x99J\xa1\x99\x9c\x96\x8eK"
7 |     assert crc == 0x1B851995
8 | 


--------------------------------------------------------------------------------
/tests/test_sparse.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | import sys
 4 | import tempfile
 5 | import pytest
 6 | from typing import IO
 7 | 
 8 | from tests.testsupport import *
 9 | 
10 | 
11 | def test_sparse():
12 |     file = tempfile.NamedTemporaryFile(delete=False)
13 |     try:
14 |         sabctools.sparse(file, 100)
15 |         assert os.path.getsize(file.name) == 100
16 |         assert is_sparse(file) is True
17 |     finally:
18 |         file.close()
19 |         os.unlink(file.name)
20 | 
21 | @pytest.mark.parametrize(
22 |     "length,position",
23 |     [
24 |         (1024, 0),
25 |         (1024, 512),
26 |         (1024, 4096),
27 |     ],
28 | )
29 | def test_sparse_position_expected(length, position):
30 |     with tempfile.TemporaryFile() as file:
31 |         file.seek(position)
32 |         sabctools.sparse(file, length)
33 |         assert file.tell() == position
34 | 
35 | def is_sparse(file: IO) -> bool:
36 |     """Is the file sparse?
37 |     On Windows this closes the file"""
38 |     if sys.platform == "win32":
39 |         file.close()
40 |         return b"This file is set as sparse" in subprocess.run(
41 |             ["fsutil", "sparse", "queryflag", file.name],
42 |             capture_output=True
43 |         ).stdout
44 | 
45 |     return os.stat(file.name).st_blocks * 512 < os.path.getsize(file.name)
46 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from tests.testsupport import *
 4 | 
 5 | 
 6 | def test_bytearray_malloc():
 7 |     assert len(sabctools.bytearray_malloc(10)) == 10
 8 | 
 9 | 
10 | def test_bytearray_malloc_bad_inputs():
11 |     with pytest.raises(TypeError):
12 |         sabctools.bytearray_malloc(10.0)
13 |     with pytest.raises(SystemError):
14 |         sabctools.bytearray_malloc(-1)
15 |     with pytest.raises(TypeError):
16 |         sabctools.bytearray_malloc("foo")
17 | 


--------------------------------------------------------------------------------
/tests/testsupport.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3 -OO
  2 | # Copyright 2007-2019 The SABnzbd-Team (sabnzbd.org)
  3 | #
  4 | # This program is free software; you can redistribute it and/or
  5 | # modify it under the terms of the GNU General Public License
  6 | # as published by the Free Software Foundation; either version 2
  7 | # of the License, or (at your option) any later version.
  8 | #
  9 | # This program is distributed in the hope that it will be useful,
 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | # GNU General Public License for more details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License
 15 | # along with this program; if not, write to the Free Software
 16 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 17 | 
 18 | ###################
 19 | # SUPPORT FUNCTIONS
 20 | ###################
 21 | import binascii
 22 | import re
 23 | import pickle
 24 | from typing import Tuple, Optional
 25 | 
 26 | import chardet
 27 | import sabctools
 28 | 
 29 | 
 30 | def correct_unknown_encoding(str_or_bytes_in):
 31 |     """Files created on Windows but unpacked/repaired on
 32 |     linux can result in invalid filenames. Try to fix this
 33 |     encoding by going to bytes and then back to unicode again.
 34 |     Last resort we use chardet package
 35 |     """
 36 |     # If already string, back to bytes
 37 |     if not isinstance(str_or_bytes_in, bytes):
 38 |         str_or_bytes_in = str_or_bytes_in.encode("utf-8", "surrogateescape")
 39 | 
 40 |     # Try simple bytes-to-string
 41 |     try:
 42 |         return str_or_bytes_in.decode("utf-8")
 43 |     except UnicodeDecodeError:
 44 |         try:
 45 |             # Try using 8-bit ASCII, if came from Windows
 46 |             return str_or_bytes_in.decode("ISO-8859-1")
 47 |         except ValueError:
 48 |             # Last resort we use the slow chardet package
 49 |             return str_or_bytes_in.decode(chardet.detect(str_or_bytes_in)["encoding"])
 50 | 
 51 | 
 52 | def read_plain_yenc_file(filename: str) -> bytearray:
 53 |     with open("tests/yencfiles/%s" % filename, "rb") as yencfile:
 54 |         return bytearray(yencfile.read())
 55 | 
 56 | 
 57 | def read_pickle(filename):
 58 |     with open(filename, "rb") as yencfile:
 59 |         try:
 60 |             data_chunks, data_bytes = pickle.load(yencfile, encoding="bytes")
 61 |         except:
 62 |             # Reset the pointer and try again
 63 |             yencfile.seek(0)
 64 |             data_chunks, data_bytes, lines = pickle.load(yencfile, encoding="bytes")
 65 |     return bytearray(b"".join(data_chunks))
 66 | 
 67 | 
 68 | def sabctools_yenc_wrapper(data: bytearray) -> Tuple[bytearray, str, int, int, int, Optional[int]]:
 69 |     decoded_data, filename, filesize, begin, size, crc_correct = sabctools.yenc_decode(memoryview(data))
 70 |     return decoded_data, correct_unknown_encoding(filename), filesize, begin, size, crc_correct
 71 | 
 72 | 
 73 | def python_yenc(data_plain):
 74 |     """Use the older decoder to verify the new one"""
 75 |     data = []
 76 | 
 77 |     # Remove the NNTP-double-dot style
 78 |     new_lines = data_plain.split(b"\r\n")
 79 |     for i in range(len(new_lines)):
 80 |         if new_lines[i][:2] == b"..":
 81 |             new_lines[i] = new_lines[i][1:]
 82 |     if new_lines[-1] == b".":
 83 |         new_lines = new_lines[1:-1]
 84 |     data.extend(new_lines)
 85 | 
 86 |     # Parse the yEnc headers
 87 |     yenc, data = parse_yenc_data(data)
 88 |     ybegin, ypart, yend = yenc
 89 | 
 90 |     # Now we get the true flat data
 91 |     flat_yenc_data = b"".join(data)
 92 | 
 93 |     # Remove the escaped-chars
 94 |     for i in (0, 9, 10, 13, 27, 32, 46, 61):
 95 |         j = b"=%c" % (i + 64)
 96 |         flat_yenc_data = flat_yenc_data.replace(j, b"%c" % i)
 97 | 
 98 |     # Use the much faster translate function to do fast-subtract of 42
 99 |     from_bytes = b"".join([b"%c" % i for i in range(256)])
100 |     to_bytes = b"".join([b"%c" % ((i + 256 - 42) % 256) for i in range(256)])
101 |     translate_table = bytes.maketrans(from_bytes, to_bytes)
102 |     decoded_data = flat_yenc_data.translate(translate_table)
103 | 
104 |     # Detect begin and ending
105 |     begin = end = size = 0
106 |     if ypart:
107 |         if begin := ypart.get("begin"):
108 |             begin = int(begin)
109 |         if end := ypart.get("end"):
110 |             end = int(end)
111 |         if end and begin:
112 |             size = end - begin + 1
113 |             begin -= 1
114 | 
115 |     return decoded_data, ybegin["name"], int(ybegin["size"]), begin, size, binascii.crc32(decoded_data)
116 | 
117 | 
118 | def parse_yenc_data(data):
119 |     ybegin = None
120 |     ypart = None
121 |     yend = None
122 | 
123 |     # Check head
124 |     for i in range(min(40, len(data))):
125 |         try:
126 |             if data[i].startswith(b"=ybegin "):
127 |                 splits = 3
128 |                 if data[i].find(b" part=") > 0:
129 |                     splits += 1
130 |                 if data[i].find(b" total=") > 0:
131 |                     splits += 1
132 | 
133 |                 ybegin = get_yenc_data(data[i], splits)
134 | 
135 |                 if data[i + 1].startswith(b"=ypart "):
136 |                     ypart = get_yenc_data(data[i + 1])
137 |                     data = data[i + 2 :]
138 |                     break
139 |                 else:
140 |                     data = data[i + 1 :]
141 |                     break
142 |         except IndexError:
143 |             break
144 | 
145 |     # Check tail
146 |     for i in range(-1, -11, -1):
147 |         try:
148 |             if data[i].startswith(b"=yend "):
149 |                 yend = get_yenc_data(data[i])
150 |                 data = data[:i]
151 |                 break
152 |         except IndexError:
153 |             break
154 | 
155 |     return ((ybegin, ypart, yend), data)
156 | 
157 | 
158 | def get_yenc_data(line, splits=None):
159 |     # Example: =ybegin part=1 line=128 size=123 name=-=DUMMY=- abc.par
160 |     YSPLIT_RE = re.compile(b"([a-zA-Z0-9]+)=")
161 | 
162 |     fields = {}
163 | 
164 |     if splits:
165 |         parts = YSPLIT_RE.split(line, splits)[1:]
166 |     else:
167 |         parts = YSPLIT_RE.split(line)[1:]
168 | 
169 |     if len(parts) % 2:
170 |         return fields
171 | 
172 |     for i in range(0, len(parts), 2):
173 |         key, value = parts[i], parts[i + 1]
174 |         fields[correct_unknown_encoding(key)] = correct_unknown_encoding(value.strip())
175 | 
176 |     return fields
177 | 
178 | 
179 | def yenc_subtract(char, subtract):
180 |     """Wrap-around for below 0"""
181 |     char_diff = char - subtract
182 |     if char_diff < 0:
183 |         return 256 + char_diff
184 |     return char_diff
185 | 


--------------------------------------------------------------------------------
/tests/yencfiles/small_file.pickle:
--------------------------------------------------------------------------------
 1 | ((lp0
 2 | S'222 0 <TuPzQaVfAqTbLzQgZgMtAfDx-newzNZB-1521805632716@PRIVATE>\r\n=ybegin part=1 total=1 line=128 size=13 name=newz[NZB].nfo\r\n=ypart begin=1 end=13\r\n\xa8\x88\x98\x8f\xa1\xa4\x85x\x84l\x87\xa84\r\n=yend size=13 part=1 pcrc32=64217fe2 crc32=64217fe2\r\n'
 3 | p1
 4 | aS'.\r\n'
 5 | p2
 6 | aI154
 7 | (lp3
 8 | tp4
 9 | .
10 | 


--------------------------------------------------------------------------------
/tests/yencfiles/small_file_2.pickle:
--------------------------------------------------------------------------------
 1 | ((lp0
 2 | S'222 0 <SvEjHdEiXpCjHpKwIlUvRzVy-newzNZB-1521805632706@PRIVATE>\r\n=ybegin part=1 total=1 line=128 size=30 name=RARBG.txt\r\n=ypart begin=1 end=30\r\n~\x99\x9c\x9c\x8f\x98\x9eJ\x8e\x99\xa1\x98\x96\x99\x8b\x8e\x8f\x8eJ\x90\x9c\x99\x97J|k|lq4\r\n=yend size=30 part=1 pcrc32=b6327787 crc32=b6327787\r\n'
 3 | p1
 4 | aS'.\r\n'
 5 | p2
 6 | aI167
 7 | (lp3
 8 | tp4
 9 | .
10 | 


--------------------------------------------------------------------------------
/tests/yencfiles/test_bad_crc.yenc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sabnzbd/sabctools/4bf805ad210136fdc70f022141ff363d0ef67e5b/tests/yencfiles/test_bad_crc.yenc


--------------------------------------------------------------------------------
/tests/yencfiles/test_bad_crc_end.yenc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sabnzbd/sabctools/4bf805ad210136fdc70f022141ff363d0ef67e5b/tests/yencfiles/test_bad_crc_end.yenc


--------------------------------------------------------------------------------
/tests/yencfiles/test_end_after_filename.yenc:
--------------------------------------------------------------------------------
1 | =ybegin part=41 line=128 size=49152000 name=90E2Sdvsmds080


--------------------------------------------------------------------------------
/tests/yencfiles/test_no_name.yenc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sabnzbd/sabctools/4bf805ad210136fdc70f022141ff363d0ef67e5b/tests/yencfiles/test_no_name.yenc


--------------------------------------------------------------------------------
/tests/yencfiles/test_padded_crc.yenc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sabnzbd/sabctools/4bf805ad210136fdc70f022141ff363d0ef67e5b/tests/yencfiles/test_padded_crc.yenc


--------------------------------------------------------------------------------
/tests/yencfiles/test_partial.yenc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sabnzbd/sabctools/4bf805ad210136fdc70f022141ff363d0ef67e5b/tests/yencfiles/test_partial.yenc


--------------------------------------------------------------------------------
/tests/yencfiles/test_regular.yenc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sabnzbd/sabctools/4bf805ad210136fdc70f022141ff363d0ef67e5b/tests/yencfiles/test_regular.yenc


--------------------------------------------------------------------------------
/tests/yencfiles/test_regular_2.yenc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sabnzbd/sabctools/4bf805ad210136fdc70f022141ff363d0ef67e5b/tests/yencfiles/test_regular_2.yenc


--------------------------------------------------------------------------------
/tests/yencfiles/test_special_chars.yenc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sabnzbd/sabctools/4bf805ad210136fdc70f022141ff363d0ef67e5b/tests/yencfiles/test_special_chars.yenc


--------------------------------------------------------------------------------
/tests/yencfiles/test_special_utf8_chars.yenc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sabnzbd/sabctools/4bf805ad210136fdc70f022141ff363d0ef67e5b/tests/yencfiles/test_special_utf8_chars.yenc


--------------------------------------------------------------------------------