├── docs └── images │ └── qatzstdplugin.png ├── SECURITY.md ├── .gitignore ├── test ├── fuzzing │ ├── README.md │ ├── Makefile │ └── qatseqprodfuzzer.c ├── Makefile ├── test.c └── benchmark.c ├── LICENSE.ZSTD ├── LICENSE ├── qat-zstd-plugin.spec ├── CONTRIBUTING.md ├── third-party-programs.txt ├── Makefile ├── src ├── Makefile ├── qatseqprod.h └── qatseqprod.c ├── CODE_OF_CONDUCT.md └── README.md /docs/images/qatzstdplugin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/QAT-ZSTD-Plugin/HEAD/docs/images/qatzstdplugin.png -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | Intel is committed to rapidly addressing security vulnerabilities affecting our customers and providing clear guidance on the solution, impact, severity and mitigation. 3 | 4 | ## Reporting a Security Vulnerability 5 | 6 | Please report any security vulnerabilities in this project utilizing the guidelines [here](https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html). 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Build artifacts 2 | *.o 3 | *.a 4 | *.so 5 | *.dylib 6 | *.dll 7 | *.exe 8 | *.out 9 | 10 | # Static and shared libraries 11 | lib*.a 12 | lib*.so* 13 | 14 | # Executables 15 | test/test 16 | test/benchmark 17 | 18 | # Core dumps 19 | core 20 | core.* 21 | vgcore.* 22 | 23 | # Debug files 24 | *.dSYM/ 25 | *.su 26 | *.idb 27 | *.pdb 28 | 29 | # Temporary files 30 | *.tmp 31 | *.temp 32 | *~ 33 | *.swp 34 | *.swo 35 | .DS_Store 36 | Thumbs.db 37 | 38 | # Log files 39 | *.log 40 | 41 | # Coverage files 42 | *.gcno 43 | *.gcda 44 | *.gcov 45 | lcov.info 46 | coverage/ 47 | 48 | # Profiling files 49 | gmon.out 50 | *.prof 51 | 52 | # Static analysis outputs 53 | cppcheck-*.xml 54 | scan-build-*/ 55 | 56 | # IDE/Editor files 57 | .vscode/ 58 | .idea/ 59 | *.sublime-* 60 | tags 61 | TAGS 62 | .clangd/ 63 | 64 | # OS generated files 65 | *.pid 66 | *.seed 67 | *.pid.lock 68 | 69 | # Package files 70 | *.tar.gz 71 | *.tgz 72 | *.zip 73 | *.deb 74 | *.rpm 75 | 76 | # Test results 77 | test_results/ 78 | *.junit.xml 79 | -------------------------------------------------------------------------------- /test/fuzzing/README.md: -------------------------------------------------------------------------------- 1 | Fuzzing test for QAT ZSTD Plugin 2 | ============================== 3 | 4 | Zstrandard*(ZSTD*) provides an interface for external sequence producer to do fuzzing test: [`fuzz_third_party_seq_prod.h`][1] after [1.5.5][2]. 5 | 6 | **Steps to run fuzzing test for QAT ZSTD Plugin** 7 | 8 | ```bash 9 | # Compile qatseqprodfuzzer.o with clang 10 | cd test/fuzzing 11 | make qatseqprodfuzzer.o 12 | 13 | # Build and run fuzzing targets 14 | git clone https://github.com/facebook/zstd.git 15 | cd tests/fuzz 16 | make corpora 17 | python3 ./fuzz.py build all --custom-seq-prod=*/qatseqprodfuzzer.o --enable-fuzzer --enable-asan --enable-ubsan --cc clang --cxx clang++ --ldflags=-lqat_s 18 | python3 ./fuzz.py libfuzzer simple_round_trip 19 | python3 ./fuzz.py libfuzzer stream_round_trip 20 | python3 ./fuzz.py libfuzzer dictionary_round_trip 21 | python3 ./fuzz.py libfuzzer block_round_trip 22 | python3 ./fuzz.py libfuzzer decompress_dstSize_tooSmall 23 | python3 ./fuzz.py libfuzzer dictionary_decompress 24 | python3 ./fuzz.py libfuzzer dictionary_loader 25 | python3 ./fuzz.py libfuzzer dictionary_stream_round_trip 26 | python3 ./fuzz.py libfuzzer raw_dictionary_round_trip 27 | python3 ./fuzz.py libfuzzer sequence_compression_api 28 | python3 ./fuzz.py libfuzzer simple_compress 29 | ``` 30 | 31 | [1]:https://github.com/facebook/zstd/blob/dev/tests/fuzz/fuzz_third_party_seq_prod.h 32 | [2]:https://github.com/facebook/zstd/releases/tag/v1.5.5 -------------------------------------------------------------------------------- /LICENSE.ZSTD: -------------------------------------------------------------------------------- 1 | BSD License 2 | 3 | For Zstandard software 4 | 5 | Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without modification, 8 | are permitted provided that the following conditions are met: 9 | 10 | * Redistributions of source code must retain the above copyright notice, this 11 | list of conditions and the following disclaimer. 12 | 13 | * Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | * Neither the name Facebook, nor Meta, nor the names of its contributors may 18 | be used to endorse or promote products derived from this software without 19 | specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 28 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD-3-Clause License 2 | 3 | Copyright(c) 2016-2025 Intel Corporation. 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions 8 | are met: 9 | 10 | * Redistributions of source code must retain the above copyright 11 | notice, this list of conditions and the following disclaimer. 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in 14 | the documentation and/or other materials provided with the 15 | distribution. 16 | * Neither the name of Intel Corporation nor the names of its 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /qat-zstd-plugin.spec: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | 3 | Name: qat-zstd-plugin 4 | Version: 1.0.0 5 | Release: %autorelease 6 | Summary: Intel QuickAssist Technology ZSTD Plugin 7 | 8 | License: BSD-3-Clause 9 | URL: https://github.com/intel/QAT-ZSTD-Plugin 10 | Source0: %{url}/archive/v%{version}/%{name}-%{version}.tar.gz 11 | 12 | Patch0: test.patch 13 | 14 | BuildRequires: automake 15 | BuildRequires: gcc 16 | BuildRequires: libzstd-devel 17 | BuildRequires: qatlib-devel 18 | BuildRequires: numactl-devel 19 | 20 | # Upstream only supports x86_64 21 | ExclusiveArch: x86_64 22 | 23 | %description 24 | Intel QuickAssist Technology ZSTD is a plugin to Zstandard for accelerating 25 | compression by QAT. ZSTD* is a fast lossless compression algorithm, targeting 26 | real-time compression scenarios at zlib-level and better compression ratios. 27 | 28 | %package devel 29 | Summary: Development files for %{name} 30 | Requires: %{name}%{?_isa} = %{version}-%{release} 31 | 32 | %description devel 33 | The %{name}-devel package contains libraries and header files for 34 | applications that use %{name}. 35 | 36 | %package static 37 | Summary: Static library for %{name} 38 | Requires: %{name}-devel%{?_isa} = %{version}-%{release} 39 | 40 | %description static 41 | The %{name}-static package contains the static %{name} library. 42 | 43 | %prep 44 | %autosetup -p1 -n QAT-ZSTD-Plugin-%{version} 45 | 46 | # fedora/rhel path fixes 47 | sed -i -e 's|/usr/local|%{_prefix}|g' src/Makefile 48 | sed -i -e 's|$(PREFIX)/lib|%{_libdir}|g' src/Makefile 49 | sed -i -e 's|$(PREFIX)/include|%{_includedir}|g' src/Makefile 50 | 51 | %build 52 | %make_build LDFLAGS="$LDFLAGS -lzstd" 53 | make test 54 | 55 | %install 56 | %make_install 57 | 58 | %check 59 | LD_LIBRARY_PATH=%{buildroot}%{_libdir} ./test/test README.md 60 | 61 | %files 62 | %license LICENSE 63 | %{_libdir}/libqatseqprod.so.1 64 | %{_libdir}/libqatseqprod.so.%{version} 65 | 66 | %files devel 67 | %{_includedir}/qatseqprod.h 68 | %{_libdir}/libqatseqprod.so 69 | 70 | %files static 71 | %license LICENSE 72 | %{_libdir}/libqatseqprod.a 73 | 74 | %changelog 75 | %autochangelog 76 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | ### License 4 | 5 | is licensed under the terms in [LICENSE]. By contributing to the project, you agree to the license and copyright terms therein and release your contribution under these terms. 6 | 7 | ### Sign your work 8 | 9 | Please use the sign-off line at the end of the patch. Your signature certifies that you wrote the patch or otherwise have the right to pass it on as an open-source patch. The rules are pretty simple: if you can certify 10 | the below (from [developercertificate.org](http://developercertificate.org/)): 11 | 12 | ``` 13 | Developer Certificate of Origin 14 | Version 1.1 15 | 16 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors. 17 | 660 York Street, Suite 102, 18 | San Francisco, CA 94110 USA 19 | 20 | Everyone is permitted to copy and distribute verbatim copies of this 21 | license document, but changing it is not allowed. 22 | 23 | Developer's Certificate of Origin 1.1 24 | 25 | By making a contribution to this project, I certify that: 26 | 27 | (a) The contribution was created in whole or in part by me and I 28 | have the right to submit it under the open source license 29 | indicated in the file; or 30 | 31 | (b) The contribution is based upon previous work that, to the best 32 | of my knowledge, is covered under an appropriate open source 33 | license and I have the right under that license to submit that 34 | work with modifications, whether created in whole or in part 35 | by me, under the same open source license (unless I am 36 | permitted to submit under a different license), as indicated 37 | in the file; or 38 | 39 | (c) The contribution was provided directly to me by some other 40 | person who certified (a), (b) or (c) and I have not modified 41 | it. 42 | 43 | (d) I understand and agree that this project and the contribution 44 | are public and that a record of the contribution (including all 45 | personal information I submit with it, including my sign-off) is 46 | maintained indefinitely and may be redistributed consistent with 47 | this project or the open source license(s) involved. 48 | ``` 49 | 50 | Then you just add a line to every git commit message: 51 | 52 | Signed-off-by: Joe Smith 53 | 54 | Use your real name (sorry, no pseudonyms or anonymous contributions.) 55 | 56 | If you set your `user.name` and `user.email` git configs, you can sign your 57 | commit automatically with `git commit -s`. 58 | -------------------------------------------------------------------------------- /third-party-programs.txt: -------------------------------------------------------------------------------- 1 | QAT ZSTD Plugin Third Party Programs File 2 | 3 | This file contains the list of third party software (“third party programs”) contained in the Intel software and their required notices and/or license terms. This third party software, even if included with the distribution of the Intel software, may be governed by separate license terms, including without limitation, third party license terms, other Intel software license terms, and open source software license terms. These separate license terms govern your use of the third party programs as set forth in the “third-party-programs.txt” or other similarly-named text file. 4 | 5 | Third party programs and their corresponding required notices and/or license terms are listed below. 6 | 7 | ------------------------------------------------------------- 8 | 1. Software Released under the BSD License: 9 | 10 | Zstandard 11 | Copyright (c) Meta Platforms, Inc. and affiliates. 12 | 13 | BSD License 14 | 15 | Redistribution and use in source and binary forms, with or without modification, 16 | are permitted provided that the following conditions are met: 17 | 18 | * Redistributions of source code must retain the above copyright notice, this 19 | list of conditions and the following disclaimer. 20 | 21 | * Redistributions in binary form must reproduce the above copyright notice, 22 | this list of conditions and the following disclaimer in the documentation 23 | and/or other materials provided with the distribution. 24 | 25 | * Neither the name Facebook, nor Meta, nor the names of its contributors may 26 | be used to endorse or promote products derived from this software without 27 | specific prior written permission. 28 | 29 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 30 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 31 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 32 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 33 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 34 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 35 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 36 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 38 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 | 40 | ------------------------------------------------------------- 41 | Other names and brands may be claimed as the property of others. 42 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # ####################################################################### 2 | # 3 | # BSD LICENSE 4 | # 5 | # Copyright(c) 2007-2025 Intel Corporation. All rights reserved. 6 | # All rights reserved. 7 | # 8 | # Redistribution and use in source and binary forms, with or without 9 | # modification, are permitted provided that the following conditions 10 | # are met: 11 | # 12 | # * Redistributions of source code must retain the above copyright 13 | # notice, this list of conditions and the following disclaimer. 14 | # * Redistributions in binary form must reproduce the above copyright 15 | # notice, this list of conditions and the following disclaimer in 16 | # the documentation and/or other materials provided with the 17 | # distribution. 18 | # * Neither the name of Intel Corporation nor the names of its 19 | # contributors may be used to endorse or promote products derived 20 | # from this software without specific prior written permission. 21 | # 22 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | # 34 | # ####################################################################### 35 | 36 | SRCDIR = src 37 | TESTDIR = test 38 | 39 | .PHONY: default 40 | default: lib 41 | 42 | .PHONY: lib 43 | lib: 44 | $(Q)$(MAKE) -C $(SRCDIR) $@ 45 | 46 | .PHONY: test 47 | test: 48 | $(Q)$(MAKE) -C $(TESTDIR) $@ 49 | 50 | .PHONY: benchmark 51 | benchmark: 52 | $(Q)$(MAKE) -C $(TESTDIR) $@ 53 | 54 | .PHONY: install 55 | install: 56 | $(Q)$(MAKE) -C $(SRCDIR) $@ 57 | 58 | .PHONY: uninstall 59 | uninstall: 60 | $(Q)$(MAKE) -C $(SRCDIR) $@ 61 | 62 | clean: 63 | $(Q)$(MAKE) -C $(SRCDIR) $@ 64 | $(Q)$(MAKE) -C $(TESTDIR) $@ 65 | 66 | ######################## 67 | # RPM package building # 68 | ######################## 69 | rpm: 70 | mkdir -p rpmbuild/BUILD rpmbuild/RPMS rpmbuild/SOURCES rpmbuild/SPECS rpmbuild/SRPMS 71 | rpmbuild --undefine=_disable_source_fetch --define "_topdir $(PWD)/rpmbuild" -ba qat-zstd-plugin.spec 72 | 73 | rpmclean: 74 | @rm -fr rpmbuild 75 | 76 | .PHONY: rpm rpmclean 77 | -------------------------------------------------------------------------------- /test/fuzzing/Makefile: -------------------------------------------------------------------------------- 1 | # ####################################################################### 2 | # 3 | # BSD LICENSE 4 | # 5 | # Copyright(c) 2007-2025 Intel Corporation. All rights reserved. 6 | # All rights reserved. 7 | # 8 | # Redistribution and use in source and binary forms, with or without 9 | # modification, are permitted provided that the following conditions 10 | # are met: 11 | # 12 | # * Redistributions of source code must retain the above copyright 13 | # notice, this list of conditions and the following disclaimer. 14 | # * Redistributions in binary form must reproduce the above copyright 15 | # notice, this list of conditions and the following disclaimer in 16 | # the documentation and/or other materials provided with the 17 | # distribution. 18 | # * Neither the name of Intel Corporation nor the names of its 19 | # contributors may be used to endorse or promote products derived 20 | # from this software without specific prior written permission. 21 | # 22 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | # 34 | # ####################################################################### 35 | LIB = ../../src 36 | 37 | CC = clang 38 | 39 | default: qatseqprodfuzzer.o 40 | 41 | all: qatseqprodfuzzer.o 42 | 43 | ifneq ($(ICP_ROOT), ) 44 | QATFLAGS = -I$(ICP_ROOT)/quickassist/include \ 45 | -I$(ICP_ROOT)/quickassist/include/dc \ 46 | -I$(ICP_ROOT)/quickassist/lookaside/access_layer/include \ 47 | -I$(ICP_ROOT)/quickassist/utilities/libusdm_drv 48 | else 49 | QATFLAGS = -I/usr/local/include/qat -I/usr/include/qat 50 | endif 51 | 52 | ifdef ZSTDLIB 53 | CFLAGS += -I$(ZSTDLIB) 54 | endif 55 | 56 | 57 | CFLAGS += -g -fno-omit-frame-pointer -fsanitize=undefined,address,fuzzer 58 | DEBUGLEVEL ?=0 59 | DEBUGFLAGS += -DDEBUGLEVEL=$(DEBUGLEVEL) 60 | 61 | qatseqprodfuzzer.o: $(LIB)/qatseqprod.c 62 | $(CC) -c $(CFLAGS) $(QATFLAGS) $(DEBUGFLAGS) $^ -o qatseqprod.o 63 | $(CC) -c $(CFLAGS) qatseqprodfuzzer.c -o _qatseqprodfuzzer.o 64 | ld -r qatseqprod.o _qatseqprodfuzzer.o -o $@ 65 | 66 | clean: 67 | $(RM) *.o 68 | -------------------------------------------------------------------------------- /test/fuzzing/qatseqprodfuzzer.c: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | * 3 | * BSD LICENSE 4 | * 5 | * Copyright(c) 2007-2025 Intel Corporation. All rights reserved. 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 12 | * * Redistributions of source code must retain the above copyright 13 | * notice, this list of conditions and the following disclaimer. 14 | * * Redistributions in binary form must reproduce the above copyright 15 | * notice, this list of conditions and the following disclaimer in 16 | * the documentation and/or other materials provided with the 17 | * distribution. 18 | * * Neither the name of Intel Corporation nor the names of its 19 | * contributors may be used to endorse or promote products derived 20 | * from this software without specific prior written permission. 21 | * 22 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | * 34 | ***************************************************************************/ 35 | 36 | #include 37 | #include 38 | #include 39 | #include "qatseqprod.h" 40 | 41 | size_t FUZZ_seqProdSetup(void) 42 | { 43 | return QZSTD_startQatDevice(); 44 | } 45 | 46 | size_t FUZZ_seqProdTearDown(void) 47 | { 48 | return 0; 49 | } 50 | 51 | void *FUZZ_createSeqProdState(void) 52 | { 53 | return QZSTD_createSeqProdState(); 54 | } 55 | 56 | size_t FUZZ_freeSeqProdState(void *state) 57 | { 58 | QZSTD_freeSeqProdState(state); 59 | return 0; 60 | } 61 | 62 | size_t FUZZ_thirdPartySeqProd( 63 | void *sequenceProducerState, 64 | ZSTD_Sequence *outSeqs, size_t outSeqsCapacity, 65 | const void *src, size_t srcSize, 66 | const void *dict, size_t dictSize, 67 | int compressionLevel, 68 | size_t windowSize 69 | ) 70 | { 71 | return qatSequenceProducer(sequenceProducerState, outSeqs, 72 | outSeqsCapacity, src, srcSize, dict, dictSize, 73 | compressionLevel, windowSize); 74 | } -------------------------------------------------------------------------------- /test/Makefile: -------------------------------------------------------------------------------- 1 | # ####################################################################### 2 | # 3 | # BSD LICENSE 4 | # 5 | # Copyright(c) 2007-2025 Intel Corporation. All rights reserved. 6 | # All rights reserved. 7 | # 8 | # Redistribution and use in source and binary forms, with or without 9 | # modification, are permitted provided that the following conditions 10 | # are met: 11 | # 12 | # * Redistributions of source code must retain the above copyright 13 | # notice, this list of conditions and the following disclaimer. 14 | # * Redistributions in binary form must reproduce the above copyright 15 | # notice, this list of conditions and the following disclaimer in 16 | # the documentation and/or other materials provided with the 17 | # distribution. 18 | # * Neither the name of Intel Corporation nor the names of its 19 | # contributors may be used to endorse or promote products derived 20 | # from this software without specific prior written permission. 21 | # 22 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | # 34 | # ####################################################################### 35 | LIB = ../src 36 | 37 | LDFLAGS = $(LIB)/libqatseqprod.a -I$(LIB) 38 | 39 | ifneq ($(ICP_ROOT), ) 40 | LDFLAGS += -lqat_s -lusdm_drv_s -lnuma -Wl,-rpath,$(ICP_ROOT)/build -L$(ICP_ROOT)/build 41 | else 42 | LDFLAGS += -lqat -lusdm -lnuma 43 | endif 44 | 45 | ifdef ZSTDLIB 46 | CFLAGS += -I$(ZSTDLIB) 47 | LDFLAGS += $(ZSTDLIB)/libzstd.a 48 | else 49 | ZSTDLIB := $(shell find /usr /lib /local -name 'libzstd.a' 2>/dev/null | head -n 1| xargs dirname) 50 | LDFLAGS += $(ZSTDLIB)/libzstd.a 51 | endif 52 | 53 | default: test benchmark 54 | 55 | all: test benchmark 56 | 57 | check_zstdlib: 58 | @echo checking zstd static libaray '$(ZSTDLIB)/libzstd.a' 59 | @if [ ! -e $(ZSTDLIB)/libzstd.a ]; then \ 60 | echo libzstd.a is not found, please install libzstd or specify the path manually; \ 61 | exit 1; \ 62 | fi 63 | 64 | test: test.c check_zstdlib 65 | $(Q)$(MAKE) -C $(LIB) 66 | $(CC) $< $(CFLAGS) $(LDFLAGS) -o $@ 67 | 68 | benchmark: benchmark.c check_zstdlib 69 | $(Q)$(MAKE) -C $(LIB) 70 | $(CC) $(CFLAGS) $< $(LDFLAGS) -o $@ -lpthread 71 | 72 | clean: 73 | $(Q)$(MAKE) -C $(LIB) $@ 74 | $(RM) test benchmark 75 | .PHONY: clean check_zstdlib 76 | -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | # ####################################################################### 2 | # 3 | # BSD LICENSE 4 | # 5 | # Copyright(c) 2007-2025 Intel Corporation. All rights reserved. 6 | # All rights reserved. 7 | # 8 | # Redistribution and use in source and binary forms, with or without 9 | # modification, are permitted provided that the following conditions 10 | # are met: 11 | # 12 | # * Redistributions of source code must retain the above copyright 13 | # notice, this list of conditions and the following disclaimer. 14 | # * Redistributions in binary form must reproduce the above copyright 15 | # notice, this list of conditions and the following disclaimer in 16 | # the documentation and/or other materials provided with the 17 | # distribution. 18 | # * Neither the name of Intel Corporation nor the names of its 19 | # contributors may be used to endorse or promote products derived 20 | # from this software without specific prior written permission. 21 | # 22 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | # 34 | # ####################################################################### 35 | 36 | .PHONY: default 37 | default: lib 38 | 39 | DESTDIR ?= 40 | PREFIX ?= /usr/local 41 | LIBDIR ?= $(PREFIX)/lib 42 | INCLUDEDIR ?= $(PREFIX)/include 43 | 44 | CP ?= cp 45 | INSTALL ?= install 46 | 47 | INSTALL_PROGRAM ?= $(INSTALL) 48 | INSTALL_DATA ?= $(INSTALL) -m 644 49 | 50 | ifneq ($(ICP_ROOT), ) 51 | # out-of-tree 52 | QATFLAGS = -I$(ICP_ROOT)/quickassist/include \ 53 | -I$(ICP_ROOT)/quickassist/include/dc \ 54 | -I$(ICP_ROOT)/quickassist/lookaside/access_layer/include \ 55 | -I$(ICP_ROOT)/quickassist/utilities/libusdm_drv 56 | LDFLAGS = -Wl,-rpath,$(ICP_ROOT)/build -L$(ICP_ROOT)/build -lqat_s \ 57 | -lusdm_drv_s -lnuma 58 | else 59 | # In-tree - look for headers in standard locations 60 | QAT_INCLUDE_PATH := $(shell if [ -d /usr/local/include/qat ]; then echo /usr/local/include/qat; elif [ -d /usr/include/qat ]; then echo /usr/include/qat; fi) 61 | ifneq ($(QAT_INCLUDE_PATH), ) 62 | QATFLAGS = -I$(QAT_INCLUDE_PATH) 63 | else 64 | $(error QAT headers not found in Standard path, Please install QATLib development package or set ICP_ROOT for out-of-tree driver) 65 | endif 66 | LDFLAGS = -lqat -lusdm -lnuma 67 | endif 68 | 69 | ifdef ZSTDLIB 70 | CFLAGS += -I$(ZSTDLIB) 71 | endif 72 | 73 | CFLAGS += -Wall -Werror -Wextra -Wcast-align -Wshadow -Wstrict-aliasing=1 \ 74 | -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes \ 75 | -Wundef -Wpointer-arith -Wvla -Wformat=2 -Winit-self \ 76 | -Wfloat-equal -Wwrite-strings -Wredundant-decls -Wc++-compat \ 77 | -pedantic -fstack-protector-strong -fPIE -fPIC \ 78 | -fno-delete-null-pointer-checks -fwrapv -fno-strict-overflow 79 | 80 | DEBUGLEVEL ?=0 81 | 82 | DEBUGFLAGS += -DDEBUGLEVEL=$(DEBUGLEVEL) 83 | ifneq ($(DEBUGLEVEL), 0) 84 | QATFLAGS += -g -O0 85 | else 86 | QATFLAGS += -O3 87 | endif 88 | 89 | # Extract version from header file 90 | VERSION_MAJOR := $(shell grep 'define QZSTD_VERSION_MAJOR' qatseqprod.h | awk '{print $$3}') 91 | VERSION_MINOR := $(shell grep 'define QZSTD_VERSION_MINOR' qatseqprod.h | awk '{print $$3}') 92 | VERSION_PATCH := $(shell grep 'define QZSTD_VERSION_RELEASE' qatseqprod.h | awk '{print $$3}') 93 | 94 | VERSION=$(VERSION_MAJOR).$(VERSION_MINOR).$(VERSION_PATCH) 95 | ifeq (,$(strip $(VERSION_MAJOR)$(VERSION_MINOR)$(VERSION_PATCH))) 96 | $(error Version extraction failed: check qatseqprod.h for QZSTD_VERSION macros) 97 | endif 98 | 99 | qatseqprod.o: qatseqprod.c 100 | $(CC) -c $(CFLAGS) $(QATFLAGS) $(DEBUGFLAGS) $^ -o $@ 101 | 102 | lib: qatseqprod.o 103 | $(AR) rc libqatseqprod.a $^ 104 | $(CC) -shared $^ $(LDFLAGS) -Wl,-soname,libqatseqprod.so.$(VERSION_MAJOR) -o libqatseqprod.so.$(VERSION) 105 | ln -sf libqatseqprod.so.$(VERSION) libqatseqprod.so.$(VERSION_MAJOR) 106 | ln -sf libqatseqprod.so.$(VERSION_MAJOR) libqatseqprod.so 107 | 108 | .PHONY: install 109 | install: lib 110 | [ -e $(DESTDIR)$(LIBDIR) ] || $(INSTALL) -d -m 755 $(DESTDIR)$(LIBDIR)/ 111 | [ -e $(DESTDIR)$(INCLUDEDIR) ] || $(INSTALL) -d -m 755 $(DESTDIR)$(INCLUDEDIR)/ 112 | $(INSTALL_PROGRAM) libqatseqprod.so.$(VERSION) $(DESTDIR)$(LIBDIR) 113 | ln -sf libqatseqprod.so.$(VERSION) $(DESTDIR)$(LIBDIR)/libqatseqprod.so.$(VERSION_MAJOR) 114 | ln -sf libqatseqprod.so.$(VERSION_MAJOR) $(DESTDIR)$(LIBDIR)/libqatseqprod.so 115 | $(INSTALL_DATA) libqatseqprod.a $(DESTDIR)$(LIBDIR) 116 | $(INSTALL_DATA) qatseqprod.h $(DESTDIR)$(INCLUDEDIR) 117 | @echo qatseqprod library successfully installed 118 | 119 | .PHONY: uninstall 120 | uninstall: 121 | $(RM) $(DESTDIR)$(LIBDIR)/libqatseqprod.a 122 | $(RM) $(DESTDIR)$(LIBDIR)/libqatseqprod.so.$(VERSION) 123 | $(RM) $(DESTDIR)$(LIBDIR)/libqatseqprod.so.$(VERSION_MAJOR) 124 | $(RM) $(DESTDIR)$(LIBDIR)/libqatseqprod.so 125 | $(RM) $(DESTDIR)$(INCLUDEDIR)/qatseqprod.h 126 | @echo qatseqprod library successfully uninstalled 127 | 128 | clean: 129 | $(RM) *.o 130 | $(RM) libqatseqprod.a libqatseqprod.so libqatseqprod.so.$(VERSION) libqatseqprod.so.$(VERSION_MAJOR) 131 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, caste, color, religion, or sexual 10 | identity and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the overall 26 | community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or advances of 31 | any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email address, 35 | without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | CommunityCodeOfConduct AT intel DOT com. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series of 86 | actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or permanent 93 | ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within the 113 | community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.1, available at 119 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. 120 | 121 | Community Impact Guidelines were inspired by 122 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. 123 | 124 | For answers to common questions about this code of conduct, see the FAQ at 125 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at 126 | [https://www.contributor-covenant.org/translations][translations]. 127 | 128 | [homepage]: https://www.contributor-covenant.org 129 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html 130 | [Mozilla CoC]: https://github.com/mozilla/diversity 131 | [FAQ]: https://www.contributor-covenant.org/faq 132 | -------------------------------------------------------------------------------- /test/test.c: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | * 3 | * BSD LICENSE 4 | * 5 | * Copyright(c) 2007-2025 Intel Corporation. All rights reserved. 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 12 | * * Redistributions of source code must retain the above copyright 13 | * notice, this list of conditions and the following disclaimer. 14 | * * Redistributions in binary form must reproduce the above copyright 15 | * notice, this list of conditions and the following disclaimer in 16 | * the documentation and/or other materials provided with the 17 | * distribution. 18 | * * Neither the name of Intel Corporation nor the names of its 19 | * contributors may be used to endorse or promote products derived 20 | * from this software without specific prior written permission. 21 | * 22 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | * 34 | ***************************************************************************/ 35 | 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | 45 | #include "qatseqprod.h" 46 | 47 | #ifndef ZSTD_STATIC_LINKING_ONLY 48 | #define ZSTD_STATIC_LINKING_ONLY 49 | #endif 50 | #include "zstd.h" 51 | #include "zstd_errors.h" 52 | 53 | int main(int argc, char *argv[]) 54 | { 55 | char *inputFileName = NULL; 56 | int inputFile = -1; 57 | struct stat inputFileStat; 58 | long inputFileSize = 0; 59 | long dstBufferSize = 0; 60 | unsigned char *srcBuffer = NULL; 61 | unsigned char *dstBuffer = NULL; 62 | unsigned char *decompBuffer = NULL; 63 | ssize_t bytesRead = 0; 64 | size_t cSize = 0; 65 | size_t res = 0; 66 | ZSTD_CCtx *const zc = ZSTD_createCCtx(); 67 | QZSTD_startQatDevice(); 68 | void *sequenceProducerState = QZSTD_createSeqProdState(); 69 | 70 | if (argc != 2) { 71 | printf("Usage: test \n"); 72 | return 1; 73 | } 74 | 75 | inputFileName = argv[1]; 76 | inputFile = open(inputFileName, O_RDONLY); 77 | if (inputFile < 0) { 78 | printf("Cannot open input file: %s\n", inputFileName); 79 | return 1; 80 | } 81 | if (fstat(inputFile, &inputFileStat)) { 82 | printf("Cannot get file stat\n"); 83 | close(inputFile); 84 | return 1; 85 | } 86 | 87 | /* get input file size */ 88 | inputFileSize = lseek(inputFile, 0, SEEK_END); 89 | if (inputFileSize < 0) { 90 | printf("Cannot get file size\n"); 91 | close(inputFile); 92 | return 1; 93 | } 94 | 95 | if (lseek(inputFile, 0, SEEK_SET) < 0) { 96 | printf("Cannot seek to beginning of file\n"); 97 | close(inputFile); 98 | return 1; 99 | } 100 | 101 | dstBufferSize = ZSTD_compressBound(inputFileSize); 102 | 103 | srcBuffer = (unsigned char *)malloc(inputFileSize); 104 | assert(srcBuffer != NULL); 105 | dstBuffer = (unsigned char *)malloc(dstBufferSize); 106 | assert(dstBuffer != NULL); 107 | 108 | bytesRead = read(inputFile, srcBuffer, inputFileSize); 109 | 110 | /* Check for read errors */ 111 | if (bytesRead < 0) { 112 | printf("Error reading file: %s\n", inputFileName); 113 | close(inputFile); 114 | free(srcBuffer); 115 | free(dstBuffer); 116 | return 1; 117 | } 118 | 119 | /* Check for empty file or no data read */ 120 | if (bytesRead == 0) { 121 | printf("No data read from file: %s\n", inputFileName); 122 | close(inputFile); 123 | free(srcBuffer); 124 | free(dstBuffer); 125 | return 1; 126 | } 127 | 128 | decompBuffer = malloc(bytesRead); 129 | assert(decompBuffer); 130 | 131 | /* register qatSequenceProducer */ 132 | ZSTD_registerSequenceProducer( 133 | zc, 134 | sequenceProducerState, 135 | qatSequenceProducer 136 | ); 137 | 138 | res = ZSTD_CCtx_setParameter(zc, ZSTD_c_enableSeqProducerFallback, 1); 139 | if ((int)res <= 0) { 140 | printf("Failed to set fallback\n"); 141 | goto exit; 142 | } 143 | 144 | /* compress */ 145 | cSize = ZSTD_compress2(zc, dstBuffer, dstBufferSize, srcBuffer, 146 | (size_t)bytesRead); 147 | if ((int)cSize <= 0) { 148 | printf("Compress failed\n"); 149 | goto exit; 150 | } 151 | 152 | /* decompress */ 153 | res = ZSTD_decompress(decompBuffer, (size_t)bytesRead, dstBuffer, cSize); 154 | if (res != (size_t)bytesRead) { 155 | printf("Decompressed size is not equal to source size\n"); 156 | goto exit; 157 | } 158 | 159 | /* compare original buffer with decompressed output */ 160 | /* Ensure bytesRead is valid before comparison */ 161 | if (bytesRead <= 0) { 162 | printf("ERROR: Invalid bytes read value for comparison\n"); 163 | goto exit; 164 | } 165 | 166 | if (memcmp(decompBuffer, srcBuffer, (size_t)bytesRead) == 0) { 167 | printf("Compression and decompression were successful!\n"); 168 | printf("Source size: %ld\n", (long)bytesRead); 169 | printf("Compressed size: %lu\n", cSize); 170 | } else { 171 | printf("ERROR: input and validation buffers don't match!\n"); 172 | } 173 | 174 | exit: 175 | ZSTD_freeCCtx(zc); 176 | QZSTD_freeSeqProdState(sequenceProducerState); 177 | QZSTD_stopQatDevice(); 178 | free(srcBuffer); 179 | free(dstBuffer); 180 | free(decompBuffer); 181 | return 0; 182 | } 183 | -------------------------------------------------------------------------------- /src/qatseqprod.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | * 3 | * BSD LICENSE 4 | * 5 | * Copyright(c) 2007-2025 Intel Corporation. All rights reserved. 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 12 | * * Redistributions of source code must retain the above copyright 13 | * notice, this list of conditions and the following disclaimer. 14 | * * Redistributions in binary form must reproduce the above copyright 15 | * notice, this list of conditions and the following disclaimer in 16 | * the documentation and/or other materials provided with the 17 | * distribution. 18 | * * Neither the name of Intel Corporation nor the names of its 19 | * contributors may be used to endorse or promote products derived 20 | * from this software without specific prior written permission. 21 | * 22 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | * 34 | ***************************************************************************/ 35 | #if defined (__cplusplus) 36 | extern "C" { 37 | #endif 38 | 39 | #ifndef QATSEQPROD_H 40 | #define QATSEQPROD_H 41 | 42 | #ifndef ZSTD_STATIC_LINKING_ONLY 43 | #define ZSTD_STATIC_LINKING_ONLY 44 | #endif 45 | #include "zstd.h" 46 | 47 | /** 48 | * Version 49 | */ 50 | #define QZSTD_VERSION "1.0.0" 51 | #define QZSTD_VERSION_MAJOR 1 52 | #define QZSTD_VERSION_MINOR 0 53 | #define QZSTD_VERSION_RELEASE 0 54 | #define QZSTD_VERSION_NUMBER (QZSTD_VERSION_MAJOR *100*100 + QZSTD_VERSION_MINOR *100 \ 55 | + QZSTD_VERSION_RELEASE) 56 | 57 | /** QZSTD_Status_e: 58 | * Error code indicates status 59 | */ 60 | typedef enum { 61 | QZSTD_OK = 0, /* Success */ 62 | QZSTD_STARTED = 1, /* QAT device started */ 63 | QZSTD_FAIL = -1, /* Unspecified error */ 64 | QZSTD_UNSUPPORTED = -2 /* Unsupported */ 65 | } QZSTD_Status_e; 66 | 67 | /** QZSTD_version: 68 | * Return the version of QAT Zstd Plugin. 69 | * 70 | * @retval const char* Version string. 71 | */ 72 | const char *QZSTD_version(void); 73 | 74 | /** qatSequenceProducer: 75 | * Block-level sequence producer with QAT 76 | * This implementation can be registered to zstd by ZSTD_registerSequenceProducer 77 | * for replacing internal block-level sequence producer. With this sequence producer, 78 | * zstd can offload the process of producing block-level sequences to QAT device. 79 | * 80 | * @param sequenceProducerState A pointer to a user-managed state for QAT sequence producer, 81 | * users need to call QZSTD_createSeqProdState to create it, and 82 | * call QZSTD_freeSeqProdState to free it. 83 | * @param outSeqs The output buffer for QAT sequence producer. The memory backing 84 | * outSeqs is managed by the CCtx. 85 | * @param outSeqsCapacity outSeqsCapacity is guaranteed >= ZSTD_sequenceBound(srcSize). 86 | * @param src An input buffer for the sequence producer to parse. 87 | * @param srcSize The size of input buffer which is guaranteed to be <= ZSTD_BLOCKSIZE_MAX. 88 | * @param dict Dict buffer for sequence producer to reference. Currently, it's a NULL pointer, 89 | * and will be supported in the future. 90 | * @param dictSize The size of dict. Currently, zstd will always pass zero into sequence producer. 91 | * @param compressionLevel Zstd compression level, only support L1-L12. 92 | * @param windowSize Representing the maximum allowed offset for sequences 93 | * 94 | * @retval size_t Return number of sequences QAT sequence producer produced 95 | * or error code: ZSTD_SEQUENCE_PRODUCER_ERROR. 96 | * *** LIMITATIONS *** 97 | * - Only support compression level from L1 to L12. 98 | * - ZSTD sequence producer only support zstd compression API which respect advanced parameters. 99 | * - The ZSTD_c_enableLongDistanceMatching cParam is not currently supported. Compression will fail 100 | * if it is enabled and tries to compress with qatsequenceproducer. 101 | * - Dictionaries are not currently supported. Compression will not fail if the user references 102 | * a dictionary, but the dictionary won't have any effect. 103 | * - Stream history is not currently supported. All advanced ZSTD compression APIs, including 104 | * streaming APIs, work with qatsequenceproducer, but each block is treated as an independent 105 | * chunk without history from previous blocks. 106 | * - Multi-threading within a single compression is not currently supported. In other words, 107 | * compression will fail if ZSTD_c_nbWorkers > 0 and an external sequence producer is registered. 108 | * Multi-threading across compressions is fine: simply create one CCtx per thread. 109 | */ 110 | size_t qatSequenceProducer( 111 | void *sequenceProducerState, ZSTD_Sequence *outSeqs, size_t outSeqsCapacity, 112 | const void *src, size_t srcSize, 113 | const void *dict, size_t dictSize, 114 | int compressionLevel, 115 | size_t windowSize 116 | ); 117 | 118 | /** QZSTD_startQatDevice: 119 | * Start QAT device 120 | * This function is used to initialize the QAT hardware. If qatSequenceProducer 121 | * is registered, the QAT device must also be started before the compression 122 | * work starts. 123 | * 124 | * @retval QZSTD_OK QAT device is fully successfully been started. 125 | * @retval QZSTD_STARTED QAT device is started, but the capability does not 126 | * meet the requirements. 127 | * @retval QZSTD_FAIL Failed to start QAT device. 128 | * @retval QZSTD_UNSUPPORTED QAT device or current configuration didn't support LZ4s and postprocessing. 129 | */ 130 | int QZSTD_startQatDevice(void); 131 | 132 | /** QZSTD_stopQatDevice: 133 | * Stop QAT device 134 | * This function is used to free hardware resources. Users need to call this 135 | * function after all compression jobs are finished. 136 | */ 137 | void QZSTD_stopQatDevice(void); 138 | 139 | /** QZSTD_createSeqProdState: 140 | * Create sequence producer state for qatSequenceProducer 141 | * The pointer returned by this function is required for registering qatSequenceProducer. 142 | * One ZSTD CCtx can share one sequence producer state, no need to reallocate for every 143 | * compression job. 144 | */ 145 | void *QZSTD_createSeqProdState(void); 146 | 147 | /** QZSTD_freeSeqProdState: 148 | * Free sequence producer state qatSequenceProducer used 149 | * After all compression jobs are finished, users must free the sequence producer state. 150 | */ 151 | void QZSTD_freeSeqProdState(void *sequenceProducerState); 152 | 153 | #endif /* QATSEQPROD_H */ 154 | 155 | #if defined (__cplusplus) 156 | } 157 | #endif 158 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Intel® QuickAssist Technology ZSTD Plugin (QAT ZSTD Plugin) 2 | 3 | 4 | ## Table of Contents 5 | 6 | - [Introduction](#introduction) 7 | - [Licensing](#licensing) 8 | - [Hardware Requirements](#hardware-requirements) 9 | - [Software Requirements](#software-requirements) 10 | - [Limitations](#limitations) 11 | - [Installation](#installation) 12 | - [Integration Guide](#integration-guide) 13 | - [Legal](#legal) 14 | 15 | 16 | ## Introduction 17 | 18 | The Intel® QuickAssist Technology ZSTD Plugin (QAT ZSTD Plugin) is a high-performance plugin for Zstandard (ZSTD), designed to accelerate compression using Intel® QAT hardware.ZSTD\* is a fast lossless compression algorithm, targeting real-time compression scenarios at zlib-level and better compression ratios. From version [v1.5.4](https://github.com/facebook/zstd/releases/tag/v1.5.4), ZSTD provides a block-level sequence producer API, enabling users to register custom sequence producers. The QAT sequence producer leverages Intel® QAT to offload the generation of block-level sequences (literals and matches), resulting in significant performance improvements for compression levels L1–L12. The produced list of sequences is then post-processed by ZSTD to generate valid compressed blocks, ensuring compatibility and optimal compression results. 19 | 20 | Intel® QAT provides cryptographic and compression acceleration, enhancing data center efficiency and throughput. The QAT ZSTD Plugin integrates seamlessly with ZSTD, allowing applications to benefit from hardware-accelerated compression with minimal code changes. 21 | 22 |

23 | QAT ZSTD Plugin Architecture 24 |

25 | 26 | 27 | ## Hardware Requirements 28 | 29 | - Intel® QuickAssist Technology Gen 4 (Intel® 4xxx series), available on 4th Gen Intel® Xeon® Scalable processors and newer platforms 30 | 31 | 32 | ## Software Requirements 33 | 34 | - [Zstandard v1.5.7](https://github.com/facebook/zstd) 35 | - [Intel® QAT Driver for Linux\* Hardware v2.0 - QAT20.L.1.2.30-00090](https://www.intel.com/content/www/us/en/download/765501.html) (out-of-tree) or [Intel® QATlib 25.08](https://github.com/intel/qatlib) (intree) 36 | 37 | 38 | ## Limitations 39 | 40 | 1. Supports compression levels L1–L12. 41 | 2. Only ZSTD compression APIs that respect advanced parameters are supported (e.g., `ZSTD_compress2`, `ZSTD_compressStream2`). 42 | 3. The `ZSTD_c_enableLongDistanceMatching` parameter is not supported; enabling it will cause compression to fail with QAT sequence producer. 43 | 4. Dictionaries are not supported; Compression will succeed if the dictionary is referenced, but the dictionary will have no effect. 44 | 5. Stream history is not supported; each block is treated as an independent chunk without history from previous blocks. 45 | 6. Multi-threading within a single compression is not supported. Compression will fail if `ZSTD_c_nbWorkers > 0` and an external sequence producer is registered. Each thread must use its own context (`CCtx`). 46 | 47 | For further details, see [zstd.h](https://github.com/facebook/zstd/blob/dev/lib/zstd.h). 48 | 49 | 50 | ## Installation 51 | 52 | ### 1. Install Intel® QAT Driver (out-of-tree) or QATlib (in-tree) 53 | 54 | Choose either the [Intel® QAT Driver for Linux\* Hardware v2.0](https://www.intel.com/content/www/us/en/download/765501.html) (out-of-tree) or [QATlib](https://github.com/intel/qatlib) (in-tree) based on your requirements. 55 | 56 | #### out-of-tree: 57 | 1. Download and install the driver using [Getting Started Guide](https://www.intel.com/content/www/us/en/content-details/632506/intel-quickassist-technology-intel-qat-software-for-linux-getting-started-guide-hardware-version-2-0.html)). 58 | 2. For virtual environments, refer to [Intel® VT with QAT](https://www.intel.com/content/www/us/en/content/details/709210/using-intel-virtualization-technology-intel-vt-with-intel-quickassist-technology-application-note.html). 59 | 3. Update the QAT configuration files `/etc/.conf` as described in the [Programmer's Guide](https://intel.github.io/quickassist/PG/configuration_files_generalsection.html). 60 | 4. Ensure the configuration file contains a `[SHIM]` section (required by QAT ZSTD Plugin) with `dc` services enabled. You may: 61 | - Add a `[SHIM]` section manually, or 62 | - Set the environment variable `QAT_SECTION_NAME` to modify the default section name. 63 | ```ini 64 | [GENERAL] 65 | ServicesEnabled = dc 66 | 67 | # ... Other details 68 | 69 | [SHIM] 70 | NumberCyInstances = 0 71 | NumberDcInstances = 64 72 | NumProcesses = 1 73 | LimitDevAccess = 0 74 | 75 | # Data Compression - User instance #0 76 | Dc1Name = "Dc0" 77 | Dc1IsPolled = 1 78 | Dc1CoreAffinity = 0 79 | 80 | # Data Compression - User instance #1 81 | Dc2Name = "Dc1" 82 | Dc2IsPolled = 1 83 | Dc2CoreAffinity = 1 84 | 85 | # ... repeat for each DC instance up to Dc63 ... 86 | 87 | # Data Compression - User instance #63 88 | Dc63Name = "Dc63" 89 | Dc63IsPolled = 1 90 | Dc63CoreAffinity = 63 91 | ``` 92 | 93 | 5. Restart the QAT service: 94 | ```bash 95 | service qat_service restart 96 | ``` 97 | 98 | #### QATlib (in-tree) Installation 99 | QATlib is available as a RPM package for RHEL, Fedora, Ubuntu, Debian, and SUSE distributions, or can be installed from source ([qatlib/INSTALL](https://github.com/intel/qatlib/blob/main/INSTALL)). Refer [QATlib](https://intel.github.io/quickassist/qatlib/index.html) User Guide for additional information. 100 | 101 | #### USDM or SVM Support 102 | 103 | By default, the QAT sequence producer library uses Shared Virtual Memory (SVM), allowing direct submission of application buffers to QAT hardware for optimal performance. If SVM is not enabled, memory passed to QAT hardware must be DMA-enabled. In this case, Intel's User Space DMA-able Memory (USDM) component provides the necessary support, and the QAT ZSTD Plugin automatically switches to USDM mode. To enable SVM, update the BIOS and driver configuration as described in the [Intel® VT with QAT](https://www.intel.com/content/www/us/en/content/details/709210/using-intel-virtualization-technology-intel-vt-with-intel-quickassist-technology-application-note.html) and the [Programmer's Guide](https://intel.github.io/quickassist/PG/index.html). 104 | 105 | ### 2. Build QAT Sequence Producer Library 106 | 107 | out-of-tree: 108 | Set the `ICP_ROOT` environment variable to the root directory of the QAT driver source tree. 109 | 110 | QATlib (in-tree): 111 | Set the QATlib installation path environment variables before building (default shown below): 112 | ```bash 113 | export LIBRARY_PATH=/usr/local/lib 114 | export LD_LIBRARY_PATH=/usr/local/lib 115 | ``` 116 | 117 | Build the library: 118 | ```bash 119 | make 120 | ``` 121 | If ZSTD 1.5.4+ is not installed system-wide, specify the path to the ZSTD source: 122 | ```bash 123 | make ZSTDLIB=[PATH_TO_ZSTD_LIB_SOURCE] 124 | ``` 125 | Install the library: 126 | ```bash 127 | make install 128 | ``` 129 | 130 | ### 3. Build and Run Test Program 131 | 132 | ```bash 133 | make test 134 | ./test/test [TEST_FILENAME] 135 | ``` 136 | 137 | ### 4. Build and Run Benchmark Tool 138 | 139 | The `benchmark` tool evaluates QAT sequence producer performance. Supported options: 140 | ```bash 141 | -t# Set maximum threads [1–128] (default: 1) 142 | -l# Set iteration loops [1–1,000,000] (default: 1) 143 | -c# Set chunk size (default: 32K) 144 | -E# Auto/enable/disable searchForExternalRepcodes (0: auto; 1: enable; 2: disable; default: auto) 145 | -L# Set compression level [1–12] (default: 1) 146 | -m# Benchmark mode (0: software; 1: QAT; default: 1) 147 | ``` 148 | For optimal performance, increase the number of threads (`-t`). Ensure the number of test threads does not exceed the number of DC instances configured in `/etc/4xxx_devx.conf` 149 | 150 | Example benchmark usage with [Silesia compression corpus](https://sun.aei.polsl.pl/~sdeor/index.php?page=silesia): 151 | ```bash 152 | ./benchmark -m1 -l100 -c64K -t64 -E2 Silesia 153 | ``` 154 | 155 | 156 | ## Integration Guide 157 | 158 | ### Integrating QAT Sequence Producer with `zstd` 159 | 160 | To accelerate compression in the `zstd` command-line tool, modify `FIO_compressZstdFrame` in `zstd/programs/fileio.c` to include `qatseqprod.h` and link with `-lqatseqprod`. 161 | 162 | **Initialization (before compression):** 163 | ```c 164 | /* Initialize and start the QAT device before beginning compression */ 165 | QZSTD_startQatDevice(); 166 | /* Create a state object for the QAT sequence producer */ 167 | void *sequenceProducerState = QZSTD_createSeqProdState(); 168 | /* Register the QAT sequence producer with the ZSTD compression context */ 169 | ZSTD_registerSequenceProducer( 170 | ress.cctx, 171 | sequenceProducerState, 172 | qatSequenceProducer 173 | ); 174 | /* Enable fallback to the default software sequence producer if QAT is unavailable */ 175 | ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableSeqProducerFallback, 1); 176 | ``` 177 | 178 | **Cleanup (after compression):** 179 | ```c 180 | QZSTD_freeSeqProdState(sequenceProducerState); 181 | QZSTD_stopQatDevice(); // Call before process exit or when QAT is no longer needed 182 | ``` 183 | 184 | Recompile `zstd` with `-lqatseqprod`. Only single-threaded mode is supported; use `--single-thread`: 185 | ```bash 186 | ./zstd --single-thread [TEST_FILENAME] 187 | ``` 188 | Note: Some `zstd` parameters are not compatible with sequence producer. Refer to the [zstd manual](https://github.com/facebook/zstd/blob/dev/doc/zstd_manual.html) for details. 189 | 190 | ### Integrating QAT Sequence Producer in Applications 191 | 192 | **Initialization:** 193 | ```c 194 | /* Create a ZSTD compression context */ 195 | ZSTD_CCtx* const zc = ZSTD_createCCtx(); 196 | /* Start and initialize the QAT device before compression */ 197 | QZSTD_startQatDevice(); 198 | /* Create sequence producer state for QAT sequence producer */ 199 | void *sequenceProducerState = QZSTD_createSeqProdState(); 200 | /* Register the QAT sequence producer with the ZSTD context */ 201 | ZSTD_registerSequenceProducer(zc, sequenceProducerState, qatSequenceProducer); 202 | /* Enable fallback to software sequence producer if QAT is unavailable */ 203 | ZSTD_CCtx_setParameter(zc, ZSTD_c_enableSeqProducerFallback, 1); 204 | ``` 205 | 206 | **Compression:** 207 | ```c 208 | /* Perform compression using standard ZSTD APIs */ 209 | /* You may use ZSTD_compress2, ZSTD_compressStream2, or ZSTD_compressStream as needed */ 210 | ZSTD_compress2(zc, dstBuffer, dstBufferSize, srcBuffer, srcbufferSize); 211 | ``` 212 | 213 | **Cleanup:** 214 | ```c 215 | /* Free the sequence producer state to release resources */ 216 | QZSTD_freeSeqProdState(sequenceProducerState); 217 | /* Stop and clean up the QAT device before process exit or when QAT is no longer needed */ 218 | QZSTD_stopQatDevice(); 219 | ``` 220 | 221 | Link your application to both `libzstd` and `libqatseqprod` as demonstrated in the test program. See the example in `test/test.c`. 222 | 223 | 224 | ## Licensing 225 | 226 | This project is licensed under the BSD License. Please refer to the `LICENSE` file in the root directory for details. Additional licensing information is available in the file headers of individual source files. 227 | 228 | ## Legal 229 | 230 | Intel, Intel Atom, and Xeon are trademarks of Intel Corporation in the U.S. and/or other countries. 231 | 232 | \*Other names and brands may be claimed as the property of others. 233 | 234 | Copyright © 2016-2025, Intel Corporation. All rights reserved. 235 | -------------------------------------------------------------------------------- /test/benchmark.c: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | * 3 | * BSD LICENSE 4 | * 5 | * Copyright(c) 2025 Intel Corporation. All rights reserved. 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 12 | * * Redistributions of source code must retain the above copyright 13 | * notice, this list of conditions and the following disclaimer. 14 | * * Redistributions in binary form must reproduce the above copyright 15 | * notice, this list of conditions and the following disclaimer in 16 | * the documentation and/or other materials provided with the 17 | * distribution. 18 | * * Neither the name of Intel Corporation nor the names of its 19 | * contributors may be used to endorse or promote products derived 20 | * from this software without specific prior written permission. 21 | * 22 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | * 34 | ***************************************************************************/ 35 | 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include 46 | 47 | #ifndef ZSTD_STATIC_LINKING_ONLY 48 | #define ZSTD_STATIC_LINKING_ONLY 49 | #endif 50 | #include "zstd.h" 51 | #include "zstd_errors.h" 52 | #include "qatseqprod.h" 53 | 54 | #define NANOSEC (1000000000ULL) /* 1 second */ 55 | #define NANOUSEC (1000) /* 1 usec */ 56 | #define MB (1000000) /* 1MB */ 57 | #define BUCKET_NUM 200 58 | #define DEFAULT_CHUNK_SIZE (32 * 1024) 59 | #define ZSTD_AUTO 0 60 | #define ZSTD_ENABLED 1 61 | #define ZSTD_DISABLED 2 62 | 63 | 64 | #ifndef MIN 65 | #define MIN(a,b) ((a)<(b)?(a):(b)) 66 | #endif 67 | 68 | #define DISPLAY(...) fprintf(stderr, __VA_ARGS__) 69 | 70 | #define GETTIME(now) {clock_gettime(CLOCK_MONOTONIC, &now);}; 71 | #define GETDIFFTIME(start_ticks, end_ticks) (1000000000ULL*( end_ticks.tv_sec - start_ticks.tv_sec ) + ( end_ticks.tv_nsec - start_ticks.tv_nsec )) 72 | 73 | 74 | typedef struct { 75 | size_t chunkSize; /* the max chunk size of ZSTD_compress2 */ 76 | size_t srcSize; /* Input file size */ 77 | unsigned cLevel; /* Compression Level 1 - 12 */ 78 | unsigned nbIterations; /* Number test loops, default is 1 */ 79 | char benchMode; /* 0: software compression, 1: QAT compression*/ 80 | char searchForExternalRepcodes; /* 0: auto 1: enable, 2: disable */ 81 | const unsigned char *srcBuffer; /* Input data point */ 82 | } threadArgs_t; 83 | 84 | typedef struct { 85 | size_t bucketValue[BUCKET_NUM]; 86 | size_t bucket[BUCKET_NUM]; 87 | size_t maxBucketValue; 88 | size_t minBucketValue; 89 | int bucketCount; 90 | size_t num; 91 | size_t sum; 92 | size_t min; 93 | size_t max; 94 | } HistogramStat_t; 95 | 96 | static HistogramStat_t compHistogram; 97 | static pthread_barrier_t g_threadBarrier1, g_threadBarrier2; 98 | static size_t g_threadNum = 0; 99 | static double g_compSpeed = 0, g_decompSpeed = 0; 100 | pthread_mutex_t g_lock; 101 | 102 | static void initHistorgram(HistogramStat_t *historgram) 103 | { 104 | historgram->bucketValue[0] = 1000; 105 | historgram->minBucketValue = historgram->bucketValue[0]; 106 | historgram->bucketCount = 1; 107 | for (int i = 1; i < BUCKET_NUM; i++) { 108 | if (historgram->bucketValue[i] > 0XFFFFFFFF) { 109 | break; 110 | } 111 | historgram->bucketValue[i] = historgram->bucketValue[i - 1] * 1.05; 112 | historgram->maxBucketValue = historgram->bucketValue[i]; 113 | historgram->bucketCount++; 114 | } 115 | memset(historgram->bucket, 0, sizeof(size_t) * BUCKET_NUM); 116 | historgram->num = 0; 117 | historgram->sum = 0; 118 | historgram->min = 0xFFFFFFFF; 119 | historgram->max = 0; 120 | } 121 | 122 | static int getBucketIndex(HistogramStat_t *historgram, size_t value) 123 | { 124 | for (int bucketIndex = 0; bucketIndex < BUCKET_NUM; bucketIndex++) { 125 | if (value < historgram->bucketValue[bucketIndex]) { 126 | return bucketIndex; 127 | } 128 | } 129 | return BUCKET_NUM - 1; 130 | } 131 | 132 | static void bucketAdd(HistogramStat_t *historgram, size_t value) 133 | { 134 | int bucketIndex = getBucketIndex(historgram, value); 135 | __sync_fetch_and_add(&historgram->bucket[bucketIndex], 1); 136 | 137 | __sync_fetch_and_add(&historgram->sum, value); 138 | __sync_fetch_and_add(&historgram->num, 1); 139 | if (value < historgram->min) 140 | __sync_lock_test_and_set(&historgram->min, value); 141 | if (value > historgram->max) 142 | __sync_lock_test_and_set(&historgram->max, value); 143 | } 144 | 145 | static double percentile(HistogramStat_t *historgram, double p) 146 | { 147 | double threshold = historgram->num * (p / 100.0); 148 | size_t cumulative_sum = 0; 149 | for (int index = 0; index < historgram->bucketCount; index++) { 150 | size_t bucket_count = historgram->bucket[index]; 151 | cumulative_sum += bucket_count; 152 | if (cumulative_sum >= threshold) { 153 | size_t left_point = (index == 0) ? 0 : historgram->bucketValue[index - 1]; 154 | size_t right_point = historgram->bucketValue[index]; 155 | size_t left_sum = cumulative_sum - bucket_count; 156 | size_t right_sum = cumulative_sum; 157 | double pos = 0; 158 | size_t right_left_diff = right_sum - left_sum; 159 | if (right_left_diff != 0) { 160 | pos = (threshold - left_sum) / right_left_diff; 161 | } 162 | double r = left_point + (right_point - left_point) * pos; 163 | if (r < historgram->min) 164 | r = historgram->min; 165 | if (r > historgram->max) 166 | r = historgram->max; 167 | return r; 168 | } 169 | } 170 | return historgram->max; 171 | } 172 | 173 | static int usage(const char *exe) 174 | { 175 | DISPLAY("Usage:\n"); 176 | DISPLAY(" %s [arg] filename\n", exe); 177 | DISPLAY("Options:\n"); 178 | DISPLAY(" -t# Set maximum threads [1 - 128] (default: 1)\n"); 179 | DISPLAY(" -l# Set iteration loops [1 - 1000000](default: 1)\n"); 180 | DISPLAY(" -c# Set chunk size (default: 32K)\n"); 181 | DISPLAY(" -E# Auto/enable/disable searchForExternalRepcodes(0: auto; 1: enable; 2: disable; default: auto)\n"); 182 | DISPLAY(" -L# Set compression level [1 - 12] (default: 1)\n"); 183 | DISPLAY(" -m# Benchmark mode, 0: software compression; 1:QAT compression(default: 1) \n"); 184 | DISPLAY(" -h/H Print this help message\n"); 185 | return 0; 186 | } 187 | 188 | /* this function to convert string to unsigned int, 189 | * the string MUST BE starting with numeric and can be 190 | * end with "K" or "M". Such as: 191 | * if input string is "128K" and output will be 131072. 192 | * if input string is "65536" and output will be 65536. 193 | */ 194 | static unsigned stringToU32(const char **s) 195 | { 196 | unsigned value = 0; 197 | while ((**s >= '0') && (**s <= '9')) { 198 | if (value > ((((unsigned)(-1)) / 10) - 1)) { 199 | DISPLAY("ERROR: numeric value is too large\n"); 200 | exit(1); 201 | } 202 | value *= 10; 203 | value += (unsigned)(**s - '0'); 204 | (*s)++ ; 205 | } 206 | if ((**s == 'K') || (**s == 'M')) { 207 | if (value > ((unsigned)(-1)) >> 10) { 208 | DISPLAY("ERROR: numeric value is too large\n"); 209 | exit(1); 210 | } 211 | value <<= 10; 212 | if (**s == 'M') { 213 | if (value > ((unsigned)(-1)) >> 10) { 214 | DISPLAY("ERROR: numeric value is too large\n"); 215 | exit(1); 216 | } 217 | value <<= 10; 218 | } 219 | (*s)++; 220 | } 221 | return value; 222 | } 223 | 224 | void *benchmark(void *args) 225 | { 226 | threadArgs_t *threadArgs = (threadArgs_t *)args; 227 | size_t rc = 0, threadNum; 228 | unsigned loops; 229 | int verifyResult = 0; /* 1: pass, 0: fail */ 230 | size_t *chunkSizes = NULL; /* The array of chunk size */ 231 | size_t *compSizes = NULL; /* The array of compressed size */ 232 | size_t nanosec = 0; 233 | size_t compNanosecSum = 0, decompNanosecSum = 0; 234 | double compSpeed = 0, decompSpeed = 0, ratio = 0; 235 | size_t csCount, nbChunk, destSize, cSize, dcSize; 236 | struct timespec startTicks, endTicks; 237 | unsigned char *destBuffer = NULL, *decompBuffer = NULL; 238 | const unsigned char *srcBuffer = threadArgs->srcBuffer; 239 | size_t srcSize = threadArgs->srcSize; 240 | size_t chunkSize = threadArgs->chunkSize; 241 | unsigned nbIterations = threadArgs->nbIterations; 242 | unsigned cLevel = threadArgs->cLevel; 243 | ZSTD_CCtx *const zc = ZSTD_createCCtx(); 244 | ZSTD_DCtx *const zdc = ZSTD_createDCtx(); 245 | void *matchState = NULL; 246 | int setUpStatus = 0, compressStatus = 0; 247 | 248 | csCount = srcSize / chunkSize + (srcSize % chunkSize ? 1 : 0); 249 | chunkSizes = (size_t *)malloc(csCount * sizeof(size_t)); 250 | compSizes = (size_t *)malloc(csCount * sizeof(size_t)); 251 | assert(chunkSizes && compSizes); 252 | size_t tmpSize = srcSize; 253 | for (nbChunk = 0; nbChunk < csCount; nbChunk++) { 254 | chunkSizes[nbChunk] = MIN(tmpSize, chunkSize); 255 | tmpSize -= chunkSizes[nbChunk]; 256 | } 257 | 258 | destSize = ZSTD_compressBound(srcSize); 259 | destBuffer = (unsigned char *)malloc(destSize); 260 | decompBuffer = (unsigned char *)malloc(srcSize); 261 | assert(destBuffer != NULL); 262 | 263 | if (threadArgs->benchMode == 1) { 264 | QZSTD_startQatDevice(); 265 | matchState = QZSTD_createSeqProdState(); 266 | ZSTD_registerSequenceProducer(zc, matchState, qatSequenceProducer); 267 | } else { 268 | ZSTD_registerSequenceProducer(zc, NULL, NULL); 269 | } 270 | 271 | if (threadArgs->searchForExternalRepcodes == ZSTD_ENABLED) { 272 | rc = ZSTD_CCtx_setParameter(zc, ZSTD_c_searchForExternalRepcodes, 273 | ZSTD_ps_enable); 274 | } else if (threadArgs->searchForExternalRepcodes == ZSTD_DISABLED) { 275 | rc = ZSTD_CCtx_setParameter(zc, ZSTD_c_searchForExternalRepcodes, 276 | ZSTD_ps_disable); 277 | } else { 278 | rc = ZSTD_CCtx_setParameter(zc, ZSTD_c_searchForExternalRepcodes, ZSTD_ps_auto); 279 | } 280 | if (ZSTD_isError(rc)) { 281 | DISPLAY("Fail to set parameter ZSTD_c_searchForExternalRepcodes\n"); 282 | goto setupend; 283 | } 284 | 285 | rc = ZSTD_CCtx_setParameter(zc, ZSTD_c_compressionLevel, cLevel); 286 | if (ZSTD_isError(rc)) { 287 | DISPLAY("Fail to set parameter ZSTD_c_compressionLevel\n"); 288 | goto setupend; 289 | } 290 | 291 | setUpStatus = 1; 292 | 293 | setupend: 294 | 295 | /* Waiting all threads */ 296 | pthread_barrier_wait(&g_threadBarrier1); 297 | if (!setUpStatus) { 298 | goto compressend; 299 | } 300 | 301 | /* Start compression benchmark */ 302 | for (loops = 0; loops < nbIterations; loops++) { 303 | unsigned char *tmpDestBuffer = destBuffer; 304 | const unsigned char *tmpSrcBuffer = srcBuffer; 305 | size_t tmpDestSize = destSize; 306 | for (nbChunk = 0; nbChunk < csCount; nbChunk++) { 307 | GETTIME(startTicks); 308 | cSize = ZSTD_compress2(zc, tmpDestBuffer, tmpDestSize, tmpSrcBuffer, 309 | chunkSizes[nbChunk]); 310 | GETTIME(endTicks); 311 | if (ZSTD_isError(cSize)) { 312 | DISPLAY("Compress failed\n"); 313 | goto compressend; 314 | } 315 | tmpDestBuffer += cSize; 316 | tmpDestSize -= cSize; 317 | tmpSrcBuffer += chunkSizes[nbChunk]; 318 | compSizes[nbChunk] = cSize; 319 | nanosec = GETDIFFTIME(startTicks, endTicks); 320 | bucketAdd(&compHistogram, nanosec); 321 | compNanosecSum += nanosec; 322 | } 323 | } 324 | 325 | cSize = 0; 326 | for (nbChunk = 0; nbChunk < csCount; nbChunk++) { 327 | cSize += compSizes[nbChunk]; 328 | } 329 | 330 | /* Verify the compression result */ 331 | rc = ZSTD_decompress(decompBuffer, srcSize, destBuffer, cSize); 332 | if (rc != srcSize) { 333 | DISPLAY("Decompressed size is not equal to source size\n"); 334 | goto compressend; 335 | } 336 | /* Compare original buffer with decompress output */ 337 | if (!memcmp(decompBuffer, srcBuffer, srcSize)) { 338 | verifyResult = 1; 339 | } else { 340 | verifyResult = 0; 341 | } 342 | 343 | compressStatus = 1; 344 | 345 | compressend: 346 | 347 | pthread_barrier_wait(&g_threadBarrier2); 348 | if (!setUpStatus || !compressStatus) { 349 | goto exit; 350 | } 351 | /* Start decompression benchmark */ 352 | for (loops = 0; loops < nbIterations; loops++) { 353 | unsigned char *tmpDestBuffer = decompBuffer; 354 | const unsigned char *tmpSrcBuffer = destBuffer; 355 | size_t tmpDestSize = srcSize; 356 | for (nbChunk = 0; nbChunk < csCount; nbChunk++) { 357 | GETTIME(startTicks); 358 | dcSize = ZSTD_decompressDCtx(zdc, tmpDestBuffer, tmpDestSize, tmpSrcBuffer, 359 | compSizes[nbChunk]); 360 | GETTIME(endTicks); 361 | if (ZSTD_isError(dcSize)) { 362 | DISPLAY("Decompress failed\n"); 363 | goto exit; 364 | } 365 | tmpDestBuffer += dcSize; 366 | tmpDestSize -= dcSize; 367 | tmpSrcBuffer += compSizes[nbChunk]; 368 | nanosec = GETDIFFTIME(startTicks, endTicks); 369 | decompNanosecSum += nanosec; 370 | } 371 | } 372 | 373 | /*Get current thread num */ 374 | threadNum = __sync_add_and_fetch(&g_threadNum, 1); 375 | 376 | ratio = (double) cSize / (double)srcSize; 377 | compSpeed = (double)(srcSize * nbIterations) / ((double)compNanosecSum / 378 | NANOSEC); 379 | decompSpeed = (double)(srcSize * nbIterations) / ((double)decompNanosecSum / 380 | NANOSEC); 381 | DISPLAY("Thread %lu: Compression: %lu -> %lu, Throughput: Comp: %5.f MB/s, Decomp: %5.f MB/s, Compression Ratio: %2.2f%%, %s\n", 382 | threadNum, srcSize, cSize, (double) compSpeed / MB, (double) decompSpeed / MB, 383 | ratio * 100, 384 | verifyResult ? "PASS" : "FAIL"); 385 | pthread_mutex_lock(&g_lock); 386 | g_compSpeed += compSpeed; 387 | g_decompSpeed += decompSpeed; 388 | pthread_mutex_unlock(&g_lock); 389 | exit: 390 | ZSTD_freeCCtx(zc); 391 | ZSTD_freeDCtx(zdc); 392 | if (threadArgs->benchMode == 1 && matchState) { 393 | QZSTD_freeSeqProdState(matchState); 394 | } 395 | if (chunkSizes) { 396 | free(chunkSizes); 397 | } 398 | if (compSizes) { 399 | free(compSizes); 400 | } 401 | if (destBuffer) { 402 | free(destBuffer); 403 | } 404 | if (decompBuffer) { 405 | free(decompBuffer); 406 | } 407 | return NULL; 408 | } 409 | 410 | int main(int argc, const char **argv) 411 | { 412 | int argNb, threadNb; 413 | int nbThreads = 1; 414 | pthread_t threads[2048]; 415 | size_t srcSize, bytesRead; 416 | unsigned char *srcBuffer = NULL; 417 | const char *fileName = NULL; 418 | int inputFile = -1; 419 | threadArgs_t threadArgs; 420 | 421 | if (argc < 2) 422 | return usage(argv[0]); 423 | 424 | /* Set default value */ 425 | threadArgs.chunkSize = DEFAULT_CHUNK_SIZE; 426 | threadArgs.nbIterations = 1; 427 | threadArgs.cLevel = 1; 428 | threadArgs.benchMode = 1; 429 | threadArgs.searchForExternalRepcodes = ZSTD_AUTO; 430 | 431 | for (argNb = 1; argNb < argc; argNb++) { 432 | const char *arg = argv[argNb]; 433 | if (arg[0] == '-') { 434 | arg++; 435 | while (arg[0] != 0) { 436 | switch (arg[0]) { 437 | /* Display help message */ 438 | case 'h': 439 | case 'H': 440 | return usage(argv[0]); 441 | /* Set maximum threads */ 442 | case 't': 443 | arg++; 444 | nbThreads = stringToU32(&arg); 445 | if (nbThreads > 2048) { 446 | DISPLAY("Invalid thread parameter, maximum is 2048\n"); 447 | return -1; 448 | } 449 | break; 450 | /* Set chunk size */ 451 | case 'c': 452 | arg++; 453 | threadArgs.chunkSize = stringToU32(&arg); 454 | break; 455 | /* Set iterations */ 456 | case 'l': 457 | arg++; 458 | threadArgs.nbIterations = stringToU32(&arg); 459 | break; 460 | /* Set benchmark mode */ 461 | case 'm': 462 | arg++; 463 | threadArgs.benchMode = stringToU32(&arg); 464 | break; 465 | /* Set searchForExternalRepcodes */ 466 | case 'E': 467 | arg++; 468 | threadArgs.searchForExternalRepcodes = stringToU32(&arg); 469 | if (threadArgs.searchForExternalRepcodes != ZSTD_AUTO && 470 | threadArgs.searchForExternalRepcodes != ZSTD_ENABLED && 471 | threadArgs.searchForExternalRepcodes != ZSTD_DISABLED) { 472 | DISPLAY("Invalid searchForExternalRepcodes parameter\n"); 473 | return usage(argv[0]); 474 | } 475 | break; 476 | /* Set compression level */ 477 | case 'L': 478 | arg++; 479 | threadArgs.cLevel = stringToU32(&arg); 480 | break; 481 | /* Unknown argument */ 482 | default : 483 | return usage(argv[0]); 484 | } 485 | } 486 | continue; 487 | } 488 | if (!fileName) { 489 | fileName = arg; 490 | continue; 491 | } 492 | } 493 | if (!fileName) { 494 | return usage(argv[0]); 495 | } 496 | 497 | /* Load input file */ 498 | inputFile = open(fileName, O_RDONLY); 499 | if (inputFile < 0) { 500 | DISPLAY("Cannot open input file: %s\n", fileName); 501 | return -1; 502 | } 503 | srcSize = lseek(inputFile, 0, SEEK_END); 504 | lseek(inputFile, 0, SEEK_SET); 505 | srcBuffer = (unsigned char *)malloc(srcSize); 506 | assert(srcBuffer != NULL); 507 | 508 | bytesRead = 0; 509 | while (bytesRead != srcSize) { 510 | bytesRead += read(inputFile, srcBuffer + bytesRead, srcSize - bytesRead); 511 | } 512 | assert(bytesRead == srcSize); 513 | 514 | threadArgs.srcBuffer = srcBuffer; 515 | threadArgs.srcSize = srcSize; 516 | initHistorgram(&compHistogram); 517 | pthread_mutex_init(&g_lock, NULL); 518 | 519 | pthread_barrier_init(&g_threadBarrier1, NULL, nbThreads); 520 | pthread_barrier_init(&g_threadBarrier2, NULL, nbThreads); 521 | for (threadNb = 0; threadNb < nbThreads; threadNb++) { 522 | pthread_create(&threads[threadNb], NULL, benchmark, &threadArgs); 523 | } 524 | 525 | for (threadNb = 0; threadNb < nbThreads; threadNb++) { 526 | pthread_join(threads[threadNb], NULL); 527 | } 528 | 529 | if (compHistogram.num != 0) { 530 | /* Display Latency statistics */ 531 | DISPLAY("-----------------------------------------------------------\n"); 532 | DISPLAY("Latency Percentiles: P25: %4.2f us, P50: %4.2f us, P75: %4.2f us, P99: %4.2f us, Avg: %4.2f us\n", 533 | percentile(&compHistogram, 25) / NANOUSEC, 534 | percentile(&compHistogram, 50) / NANOUSEC, 535 | percentile(&compHistogram, 75) / NANOUSEC, 536 | percentile(&compHistogram, 99) / NANOUSEC, 537 | (double)(compHistogram.sum / compHistogram.num / NANOUSEC)); 538 | 539 | DISPLAY("ALL Compression Speed: %5.f MB/s, ALL Decompression Speed: %5.f MB/s\n", 540 | g_compSpeed / MB, g_decompSpeed / MB); 541 | 542 | #ifdef DISPLAY_HISTOGRAM 543 | DISPLAY("Latency histogram(nanosec): count: %lu\n", compHistogram.num); 544 | size_t cumulativeSum = 0; 545 | for (int i = 0; i < compHistogram.bucketCount; i++) { 546 | if (compHistogram.bucket[i] != 0) { 547 | cumulativeSum += compHistogram.bucket[i]; 548 | DISPLAY("[%10lu, %10lu] %10lu %7.2f%% %7.2f%%\n", 549 | i == 0 ? 0 : compHistogram.bucketValue[i - 1], compHistogram.bucketValue[i], 550 | compHistogram.bucket[i], 551 | (double)compHistogram.bucket[i] * 100 / compHistogram.num, 552 | (double)cumulativeSum * 100 / compHistogram.num); 553 | } 554 | } 555 | #endif 556 | } 557 | 558 | pthread_barrier_destroy(&g_threadBarrier1); 559 | pthread_barrier_destroy(&g_threadBarrier2); 560 | pthread_mutex_destroy(&g_lock); 561 | QZSTD_stopQatDevice(); 562 | close(inputFile); 563 | free(srcBuffer); 564 | return 0; 565 | } 566 | -------------------------------------------------------------------------------- /src/qatseqprod.c: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | * 3 | * BSD LICENSE 4 | * 5 | * Copyright(c) 2007-2025 Intel Corporation. All rights reserved. 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 12 | * * Redistributions of source code must retain the above copyright 13 | * notice, this list of conditions and the following disclaimer. 14 | * * Redistributions in binary form must reproduce the above copyright 15 | * notice, this list of conditions and the following disclaimer in 16 | * the documentation and/or other materials provided with the 17 | * distribution. 18 | * * Neither the name of Intel Corporation nor the names of its 19 | * contributors may be used to endorse or promote products derived 20 | * from this software without specific prior written permission. 21 | * 22 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | * 34 | ***************************************************************************/ 35 | 36 | /** 37 | ***************************************************************************** 38 | * Dependencies 39 | *****************************************************************************/ 40 | #define _GNU_SOURCE 41 | #ifndef ZSTD_STATIC_LINKING_ONLY 42 | #define ZSTD_STATIC_LINKING_ONLY 43 | #endif 44 | #include "zstd.h" 45 | 46 | #include 47 | #include 48 | #include 49 | #include 50 | #include 51 | #include 52 | #include 53 | #include 54 | #include 55 | #include 56 | #include 57 | #include 58 | #include 59 | #include 60 | #include 61 | #include 62 | #include /* INT_MAX */ 63 | #include 64 | #include /* memset */ 65 | #include 66 | 67 | #include "cpa.h" 68 | #include "cpa_dc.h" 69 | #include "icp_sal_poll.h" 70 | #include "icp_sal_user.h" 71 | #include "qae_mem.h" 72 | 73 | #include "qatseqprod.h" 74 | 75 | #define KB (1024) 76 | 77 | #define COMP_LVL_MINIMUM (1) 78 | #define COMP_LVL_MAXIMUM (12) 79 | #define NUM_BLOCK_OF_RETRY_INTERVAL (1000) 80 | 81 | #define MAX_GRAB_RETRY (10) 82 | #define MAX_SEND_REQUEST_RETRY (5) 83 | #define MAX_DEVICES (512) 84 | 85 | #define SECTION_NAME_SIZE (32) 86 | 87 | #define INTER_SZ(src_sz) (2 * (src_sz)) 88 | #define COMPRESS_SRC_BUFF_SZ (ZSTD_BLOCKSIZE_MAX) 89 | 90 | #define ML_BITS 4 91 | #define ML_MASK ((1U << ML_BITS) - 1) 92 | #define RUN_BITS (8 - ML_BITS) 93 | #define RUN_MASK ((1U << RUN_BITS) - 1) 94 | 95 | #define LZ4MINMATCH 2 96 | 97 | /* Max latency of polling in the worst condition (nanoseconds) */ 98 | #define MAXTIMEOUT 2000000000ULL 99 | 100 | #define TIMESPENT_NS(a, b) (((a.tv_sec * 1000000000ULL) + a.tv_nsec) - ((b.tv_sec * 1000000000ULL) + b.tv_nsec)) 101 | 102 | /* Branch prediction hints for optimization */ 103 | #ifdef __GNUC__ 104 | #define LIKELY(x) __builtin_expect(!!(x), 1) 105 | #define UNLIKELY(x) __builtin_expect(!!(x), 0) 106 | #else 107 | #define LIKELY(x) (x) 108 | #define UNLIKELY(x) (x) 109 | #endif 110 | 111 | /** QZSTD_Session_T: 112 | * This structure contains all session parameters including a buffer used to store 113 | * lz4s output for current session and other parameters 114 | */ 115 | typedef struct QZSTD_Session_S { 116 | int instHint; /*which instance we last used*/ 117 | unsigned char 118 | *qatIntermediateBuf; /* Buffer to store lz4s output for decoding */ 119 | unsigned char reqPhyContMem; /* 1: QAT requires physically contiguous memory */ 120 | CpaDcSessionSetupData 121 | sessionSetupData; /* Session set up data for this session */ 122 | unsigned int failOffloadCnt; /* Failed offloading requests counter */ 123 | } QZSTD_Session_T; 124 | 125 | /** QZSTD_Instance_T: 126 | * This structure contains instance parameter, every session need to grab one 127 | * instance to submit request 128 | */ 129 | typedef struct QZSTD_Instance_S { 130 | CpaInstanceInfo2 instanceInfo; 131 | CpaDcInstanceCapabilities instanceCap; 132 | CpaStatus jobStatus; 133 | CpaDcSessionSetupData sessionSetupData; 134 | CpaDcSessionHandle cpaSessHandle; 135 | CpaDcRqResults res; 136 | Cpa32U buffMetaSize; 137 | CpaStatus instStartStatus; 138 | unsigned char reqPhyContMem; /* 1: QAT requires physically contiguous memory */ 139 | 140 | /* Tracks memory where the intermediate buffers reside. */ 141 | CpaBufferList **intermediateBuffers; 142 | Cpa16U intermediateCnt; 143 | CpaBufferList *srcBuffer; 144 | CpaBufferList *destBuffer; 145 | 146 | unsigned int lock; 147 | unsigned char memSetup; 148 | unsigned char cpaSessSetup; 149 | unsigned char dcInstSetup; 150 | unsigned int numRetries; 151 | 152 | unsigned int seqNumIn; 153 | unsigned int seqNumOut; 154 | int cbStatus; 155 | } QZSTD_Instance_T; 156 | 157 | /** QZSTD_ProcessData_T: 158 | * Process data for controlling instance resource 159 | */ 160 | typedef struct QZSTD_ProcessData_S { 161 | int qzstdInitStatus; 162 | CpaInstanceHandle *dcInstHandle; 163 | QZSTD_Instance_T *qzstdInst; 164 | Cpa16U numInstances; 165 | pthread_mutex_t mutex; 166 | } QZSTD_ProcessData_T; 167 | 168 | typedef struct QZSTD_InstanceList_S { 169 | QZSTD_Instance_T instance; 170 | CpaInstanceHandle dcInstHandle; 171 | struct QZSTD_InstanceList_S *next; 172 | } QZSTD_InstanceList_T; 173 | 174 | typedef struct QZSTD_Hardware_S { 175 | QZSTD_InstanceList_T devices[MAX_DEVICES]; 176 | unsigned int devNum; 177 | unsigned int maxDevId; 178 | } QZSTD_Hardware_T; 179 | 180 | QZSTD_ProcessData_T gProcess = { 181 | .qzstdInitStatus = QZSTD_FAIL, 182 | .mutex = PTHREAD_MUTEX_INITIALIZER 183 | }; 184 | 185 | extern CpaStatus icp_adf_get_numDevices(Cpa32U *); 186 | 187 | int debugLevel = DEBUGLEVEL; 188 | 189 | #define QZSTD_DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__) 190 | 191 | /** 192 | * Print log message 193 | * @param l 194 | * Log level 195 | * 0: release mode, no debug log will be printed 196 | * 1: only print error info 197 | * 2: print events at every position 198 | * 3+: print all events and sequence data 199 | * @param ... 200 | * The format string 201 | */ 202 | #define QZSTD_LOG(l, ...){ \ 203 | if (l<=debugLevel) { \ 204 | QZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \ 205 | } } 206 | 207 | const char *QZSTD_version(void) 208 | { 209 | return QZSTD_VERSION; 210 | } 211 | 212 | /** QZSTD_calloc: 213 | * This function is used to allocate contiguous or discontiguous memory(initialized to zero) 214 | * according to parameter and return pointer to allocated memory 215 | */ 216 | static void *QZSTD_calloc(size_t nb, size_t size, unsigned char reqPhyContMem) 217 | { 218 | if (!reqPhyContMem) { 219 | return calloc(nb, size); 220 | } else { 221 | int cpu_id = sched_getcpu(); 222 | int real_numa = numa_node_of_cpu(cpu_id); 223 | if (real_numa == -1) { 224 | QZSTD_LOG(1, "couldn't find NUMA node of CPU %d\n", cpu_id); 225 | return NULL; 226 | } 227 | 228 | return qaeMemAllocNUMA(nb * size, real_numa, 64); 229 | } 230 | } 231 | 232 | /** QZSTD_free: 233 | * This function needs to be called in pairs with QZSTD_calloc 234 | * to free memory by QZSTD_calloc. 235 | */ 236 | static void QZSTD_free(void *ptr, unsigned char reqPhyContMem) 237 | { 238 | if (!reqPhyContMem) { 239 | free(ptr); 240 | } else { 241 | qaeMemFreeNUMA(&ptr); 242 | } 243 | } 244 | 245 | /** QZSTD_virtToPhys: 246 | * Convert virtual address to physical 247 | */ 248 | static __inline CpaPhysicalAddr QZSTD_virtToPhys(void *virtAddr) 249 | { 250 | return (CpaPhysicalAddr)qaeVirtToPhysNUMA(virtAddr); 251 | } 252 | 253 | static QZSTD_InstanceList_T *QZSTD_getInstance(unsigned int devId, 254 | QZSTD_Hardware_T *qatHw) 255 | { 256 | QZSTD_InstanceList_T *instances; 257 | QZSTD_InstanceList_T *firstInstance; 258 | int i; 259 | 260 | if (devId >= MAX_DEVICES || NULL == qatHw) { 261 | return NULL; 262 | } 263 | 264 | instances = &qatHw->devices[devId]; 265 | firstInstance = instances->next; 266 | 267 | /* no instance */ 268 | if (NULL == firstInstance) { 269 | goto exit; 270 | } 271 | 272 | instances->next = firstInstance->next; 273 | 274 | /* last instance */ 275 | if (NULL == instances->next && qatHw->devNum > 0) { 276 | qatHw->devNum--; 277 | if (qatHw->maxDevId > 0 && devId == qatHw->maxDevId) { 278 | for (i = qatHw->maxDevId - 1; i >= 0; i--) { 279 | if (qatHw->devices[i].next) { 280 | qatHw->maxDevId = i; 281 | break; 282 | } 283 | } 284 | } 285 | } 286 | 287 | exit: 288 | return firstInstance; 289 | } 290 | 291 | static void QZSTD_clearDevices(QZSTD_Hardware_T *qatHw) 292 | { 293 | unsigned int i; 294 | if (NULL == qatHw || 0 == qatHw->devNum) { 295 | return; 296 | } 297 | 298 | for (i = 0; i <= qatHw->maxDevId; i++) { 299 | QZSTD_InstanceList_T *inst = QZSTD_getInstance(i, qatHw); 300 | while (inst) { 301 | free(inst); 302 | inst = NULL; 303 | inst = QZSTD_getInstance(i, qatHw); 304 | } 305 | } 306 | } 307 | 308 | /** QZSTD_stopQat: 309 | * Stop DC instance and QAT device 310 | */ 311 | static void QZSTD_stopQat(void) 312 | { 313 | int i; 314 | CpaStatus status = CPA_STATUS_SUCCESS; 315 | 316 | QZSTD_LOG(2, "Call stopQat\n"); 317 | if (NULL != gProcess.dcInstHandle && 318 | NULL != gProcess.qzstdInst) { 319 | for (i = 0; i < gProcess.numInstances; i++) { 320 | if (0 != gProcess.qzstdInst[i].dcInstSetup) { 321 | status = cpaDcStopInstance(gProcess.dcInstHandle[i]); 322 | if (CPA_STATUS_SUCCESS != status) { 323 | QZSTD_LOG(1, "Stop instance failed, status=%d\n", status); 324 | } 325 | } 326 | } 327 | 328 | free(gProcess.dcInstHandle); 329 | gProcess.dcInstHandle = NULL; 330 | free(gProcess.qzstdInst); 331 | gProcess.qzstdInst = NULL; 332 | } 333 | 334 | (void)icp_sal_userStop(); 335 | 336 | gProcess.numInstances = (Cpa16U)0; 337 | gProcess.qzstdInitStatus = QZSTD_FAIL; 338 | } 339 | 340 | static void QZSTD_removeSession(int i) 341 | { 342 | unsigned char reqPhyContMem = gProcess.qzstdInst[i].reqPhyContMem; 343 | int rc; 344 | 345 | if (0 == gProcess.qzstdInst[i].cpaSessSetup) { 346 | return; 347 | } 348 | 349 | /* Remove session */ 350 | if ((NULL != gProcess.dcInstHandle[i]) && 351 | (NULL != gProcess.qzstdInst[i].cpaSessHandle)) { 352 | /* polling here if there still are some responses haven't beed polled 353 | * if didn't poll there response, cpaDcRemoveSession will raise error message 354 | */ 355 | do { 356 | rc = icp_sal_DcPollInstance(gProcess.dcInstHandle[i], 0); 357 | } while (CPA_STATUS_SUCCESS == rc); 358 | cpaDcRemoveSession(gProcess.dcInstHandle[i], 359 | gProcess.qzstdInst[i].cpaSessHandle); 360 | QZSTD_free(gProcess.qzstdInst[i].cpaSessHandle, reqPhyContMem); 361 | gProcess.qzstdInst[i].cpaSessHandle = NULL; 362 | gProcess.qzstdInst[i].cpaSessSetup = 0; 363 | } 364 | } 365 | 366 | /** QZSTD_cleanUpInstMem: 367 | * Release the memory bound to corresponding instance 368 | */ 369 | static void QZSTD_cleanUpInstMem(int i) 370 | { 371 | int j; 372 | QZSTD_Instance_T *qzstdInst = &(gProcess.qzstdInst[i]); 373 | unsigned char reqPhyContMem = qzstdInst->reqPhyContMem; 374 | 375 | if (NULL != qzstdInst->intermediateBuffers) { 376 | for (j = 0; j < qzstdInst->intermediateCnt; j++) { 377 | if (NULL != qzstdInst->intermediateBuffers[j]) { 378 | if (NULL != qzstdInst->intermediateBuffers[j]->pBuffers) { 379 | if (NULL != qzstdInst->intermediateBuffers[j]->pBuffers->pData) { 380 | QZSTD_free(qzstdInst->intermediateBuffers[j]->pBuffers->pData, reqPhyContMem); 381 | qzstdInst->intermediateBuffers[j]->pBuffers->pData = NULL; 382 | } 383 | QZSTD_free(qzstdInst->intermediateBuffers[j]->pBuffers, reqPhyContMem); 384 | qzstdInst->intermediateBuffers[j]->pBuffers = NULL; 385 | } 386 | if (NULL != qzstdInst->intermediateBuffers[j]->pPrivateMetaData) { 387 | QZSTD_free(qzstdInst->intermediateBuffers[j]->pPrivateMetaData, reqPhyContMem); 388 | qzstdInst->intermediateBuffers[j]->pPrivateMetaData = NULL; 389 | } 390 | QZSTD_free(qzstdInst->intermediateBuffers[j], 0); 391 | qzstdInst->intermediateBuffers[j] = NULL; 392 | } 393 | } 394 | QZSTD_free(qzstdInst->intermediateBuffers, 0); 395 | qzstdInst->intermediateBuffers = NULL; 396 | } 397 | 398 | /*src buffer*/ 399 | if (NULL != qzstdInst->srcBuffer) { 400 | if (reqPhyContMem && qzstdInst->srcBuffer->pBuffers) { 401 | if (NULL != qzstdInst->srcBuffer->pBuffers->pData) { 402 | QZSTD_free(qzstdInst->srcBuffer->pBuffers->pData, reqPhyContMem); 403 | qzstdInst->srcBuffer->pBuffers->pData = NULL; 404 | } 405 | } 406 | if (NULL != qzstdInst->srcBuffer->pBuffers) { 407 | QZSTD_free(qzstdInst->srcBuffer->pBuffers, reqPhyContMem); 408 | qzstdInst->srcBuffer->pBuffers = NULL; 409 | } 410 | if (NULL != qzstdInst->srcBuffer->pPrivateMetaData) { 411 | QZSTD_free(qzstdInst->srcBuffer->pPrivateMetaData, reqPhyContMem); 412 | qzstdInst->srcBuffer->pPrivateMetaData = NULL; 413 | } 414 | QZSTD_free(qzstdInst->srcBuffer, 0); 415 | qzstdInst->srcBuffer = NULL; 416 | } 417 | 418 | /*dest buffer*/ 419 | if (NULL != qzstdInst->destBuffer) { 420 | if (NULL != qzstdInst->destBuffer->pBuffers) { 421 | QZSTD_free(qzstdInst->destBuffer->pBuffers, reqPhyContMem); 422 | qzstdInst->destBuffer->pBuffers = NULL; 423 | } 424 | if (NULL != qzstdInst->destBuffer->pPrivateMetaData) { 425 | QZSTD_free(qzstdInst->destBuffer->pPrivateMetaData, reqPhyContMem); 426 | qzstdInst->destBuffer->pPrivateMetaData = NULL; 427 | } 428 | QZSTD_free(qzstdInst->destBuffer, 0); 429 | qzstdInst->destBuffer = NULL; 430 | } 431 | } 432 | 433 | void QZSTD_stopQatDevice(void) 434 | { 435 | pthread_mutex_lock(&gProcess.mutex); 436 | if (QZSTD_OK == gProcess.qzstdInitStatus) { 437 | int i = 0; 438 | 439 | for (i = 0; i < gProcess.numInstances; i++) { 440 | if (0 != gProcess.qzstdInst[i].cpaSessSetup) { 441 | QZSTD_removeSession(i); 442 | } 443 | if (0 != gProcess.qzstdInst[i].memSetup) { 444 | QZSTD_cleanUpInstMem(i); 445 | } 446 | } 447 | QZSTD_stopQat(); 448 | } 449 | if (QZSTD_STARTED == gProcess.qzstdInitStatus) { 450 | (void)icp_sal_userStop(); 451 | gProcess.qzstdInitStatus = QZSTD_FAIL; 452 | } 453 | pthread_mutex_unlock(&gProcess.mutex); 454 | } 455 | 456 | static int QZSTD_setInstance(unsigned int devId, 457 | QZSTD_InstanceList_T *newInstance, 458 | QZSTD_Hardware_T *qatHw) 459 | { 460 | QZSTD_InstanceList_T *instances; 461 | 462 | if (devId >= MAX_DEVICES || NULL == newInstance || NULL == qatHw || 463 | NULL != newInstance->next) { 464 | return QZSTD_FAIL; 465 | } 466 | 467 | instances = &qatHw->devices[devId]; 468 | 469 | /* first instance */ 470 | if (NULL == instances->next) { 471 | qatHw->devNum++; 472 | } 473 | 474 | while (instances->next) { 475 | instances = instances->next; 476 | } 477 | instances->next = newInstance; 478 | 479 | if (devId > qatHw->maxDevId) { 480 | qatHw->maxDevId = devId; 481 | } 482 | 483 | return QZSTD_OK; 484 | } 485 | 486 | const char *QZSTD_getSectionName(void) 487 | { 488 | static char sectionName[SECTION_NAME_SIZE]; 489 | const char *envName; 490 | const char *defaultName = "SHIM"; 491 | size_t envLen; 492 | 493 | /* Use secure_getenv if available, otherwise fallback to getenv */ 494 | #ifdef __GLIBC__ 495 | /* secure_getenv is available in glibc 2.17+ */ 496 | #if defined(__GLIBC_PREREQ) && __GLIBC_PREREQ(2, 17) 497 | envName = secure_getenv("QAT_SECTION_NAME"); 498 | #else 499 | envName = getenv("QAT_SECTION_NAME"); 500 | #endif 501 | #else 502 | /* For non-glibc systems, fallback to getenv */ 503 | envName = getenv("QAT_SECTION_NAME"); 504 | #endif 505 | 506 | /* Use default if env var is not set, empty, or too long */ 507 | if (!envName || (envLen = strlen(envName)) == 0 || 508 | envLen >= SECTION_NAME_SIZE) { 509 | QZSTD_LOG(2, "Section Name invalid. Using default section name '%s'\n", 510 | defaultName); 511 | envName = defaultName; 512 | } 513 | 514 | /* Safe copy with guaranteed null termination */ 515 | snprintf(sectionName, SECTION_NAME_SIZE, "%s", envName); 516 | return sectionName; 517 | } 518 | 519 | static int QZSTD_salUserStart(void) 520 | { 521 | if (CPA_TRUE != icp_sal_userIsQatAvailable()) { 522 | QZSTD_LOG(1, 523 | "There is no QAT device available, please check QAT device status\n"); 524 | return QZSTD_FAIL; 525 | } 526 | 527 | if (CPA_STATUS_SUCCESS != icp_sal_userStart(QZSTD_getSectionName())) { 528 | QZSTD_LOG(1, "icp_sal_userStart failed\n"); 529 | return QZSTD_FAIL; 530 | } 531 | 532 | return QZSTD_OK; 533 | } 534 | 535 | static int QZSTD_getAndShuffleInstance(void) 536 | { 537 | int i; 538 | unsigned int devId = 0; 539 | QZSTD_Hardware_T *qatHw = NULL; 540 | unsigned int instanceFound = 0; 541 | unsigned int instanceMatched = 0; 542 | QZSTD_InstanceList_T *newInst; 543 | if (CPA_STATUS_SUCCESS != cpaDcGetNumInstances(&gProcess.numInstances)) { 544 | QZSTD_LOG(1, "cpaDcGetNumInstances failed\n"); 545 | goto exit; 546 | } 547 | 548 | gProcess.dcInstHandle = (CpaInstanceHandle *)calloc( 549 | gProcess.numInstances, sizeof(CpaInstanceHandle)); 550 | gProcess.qzstdInst = (QZSTD_Instance_T *)calloc(gProcess.numInstances, 551 | sizeof(QZSTD_Instance_T)); 552 | if (NULL == gProcess.dcInstHandle || NULL == gProcess.qzstdInst) { 553 | QZSTD_LOG(1, "calloc for qzstdInst failed\n"); 554 | goto exit; 555 | } 556 | 557 | if (CPA_STATUS_SUCCESS != cpaDcGetInstances( 558 | gProcess.numInstances, gProcess.dcInstHandle)) { 559 | QZSTD_LOG(1, "cpaDcGetInstances failed\n"); 560 | goto exit; 561 | } 562 | 563 | qatHw = (QZSTD_Hardware_T *)calloc(1, sizeof(QZSTD_Hardware_T)); 564 | if (NULL == qatHw) { 565 | QZSTD_LOG(1, "calloc for qatHw failed\n"); 566 | goto exit; 567 | } 568 | for (i = 0; i < gProcess.numInstances; i++) { 569 | newInst = (QZSTD_InstanceList_T *)calloc(1, sizeof(QZSTD_InstanceList_T)); 570 | if (NULL == newInst) { 571 | QZSTD_LOG(1, "calloc failed\n"); 572 | goto exit; 573 | } 574 | 575 | if (CPA_STATUS_SUCCESS != cpaDcInstanceGetInfo2( 576 | gProcess.dcInstHandle[i], &newInst->instance.instanceInfo)) { 577 | QZSTD_LOG(1, "cpaDcInstanceGetInfo2 failed\n"); 578 | free(newInst); 579 | goto exit; 580 | } 581 | 582 | if (CPA_STATUS_SUCCESS != cpaDcQueryCapabilities( 583 | gProcess.dcInstHandle[i], &newInst->instance.instanceCap)) { 584 | QZSTD_LOG(1, "cpaDcQueryCapabilities failed\n"); 585 | free(newInst); 586 | goto exit; 587 | } 588 | 589 | newInst->instance.lock = 0; 590 | newInst->instance.memSetup = 0; 591 | newInst->instance.cpaSessSetup = 0; 592 | newInst->instance.dcInstSetup = 0; 593 | newInst->instance.numRetries = 0; 594 | newInst->dcInstHandle = gProcess.dcInstHandle[i]; 595 | 596 | devId = newInst->instance.instanceInfo.physInstId.packageId; 597 | if (QZSTD_OK != QZSTD_setInstance(devId, newInst, qatHw)) { 598 | QZSTD_LOG(1, "QZSTD_setInstance on device %u failed\n", devId); 599 | free(newInst); 600 | goto exit; 601 | } 602 | } 603 | 604 | /* shuffle instance */ 605 | for (i = 0; instanceFound < gProcess.numInstances; i++) { 606 | devId = i % (qatHw->maxDevId + 1); 607 | newInst = QZSTD_getInstance(devId, qatHw); 608 | if (NULL == newInst) { 609 | continue; 610 | } 611 | instanceFound++; 612 | 613 | /* check lz4s support */ 614 | if (!newInst->instance.instanceCap.checksumXXHash32 || 615 | !newInst->instance.instanceCap.statelessLZ4SCompression) { 616 | free(newInst); 617 | continue; 618 | } 619 | 620 | if (newInst->instance.instanceInfo.requiresPhysicallyContiguousMemory) { 621 | newInst->instance.reqPhyContMem = 1; 622 | } else { 623 | newInst->instance.reqPhyContMem = 0; 624 | } 625 | 626 | memcpy(&gProcess.qzstdInst[instanceMatched], &newInst->instance, 627 | sizeof(QZSTD_Instance_T)); 628 | gProcess.dcInstHandle[instanceMatched] = newInst->dcInstHandle; 629 | free(newInst); 630 | instanceMatched++; 631 | } 632 | 633 | if (0 == instanceMatched) { 634 | QZSTD_LOG(1, "No instance with matching capabilities\n"); 635 | goto exit; 636 | } 637 | 638 | QZSTD_clearDevices(qatHw); 639 | free(qatHw); 640 | qatHw = NULL; 641 | 642 | return QZSTD_OK; 643 | 644 | exit: 645 | if (qatHw) { 646 | QZSTD_clearDevices(qatHw); 647 | free(qatHw); 648 | qatHw = NULL; 649 | } 650 | if (NULL != gProcess.dcInstHandle) { 651 | free(gProcess.dcInstHandle); 652 | gProcess.dcInstHandle = NULL; 653 | } 654 | if (NULL != gProcess.qzstdInst) { 655 | free(gProcess.qzstdInst); 656 | gProcess.qzstdInst = NULL; 657 | } 658 | 659 | if (instanceFound && 0 == instanceMatched) { 660 | return QZSTD_UNSUPPORTED; 661 | } else { 662 | return QZSTD_FAIL; 663 | } 664 | } 665 | 666 | static void QZSTD_dcCallback(void *cbDataTag, CpaStatus stat) 667 | { 668 | if (NULL != cbDataTag) { 669 | QZSTD_Instance_T *qzstdInst = (QZSTD_Instance_T *)cbDataTag; 670 | qzstdInst->seqNumOut++; 671 | if (qzstdInst->seqNumIn != qzstdInst->seqNumOut) { 672 | return; 673 | } 674 | 675 | if (CPA_DC_OK == stat) { 676 | qzstdInst->cbStatus = QZSTD_OK; 677 | } else { 678 | qzstdInst->cbStatus = QZSTD_FAIL; 679 | } 680 | } 681 | } 682 | 683 | /** QZSTD_allocInstMem: 684 | * Allocate memory for corresponding instance 685 | */ 686 | static int QZSTD_allocInstMem(int i) 687 | { 688 | int j; 689 | CpaStatus status; 690 | CpaStatus rc; 691 | unsigned int srcSz; 692 | unsigned int interSz; 693 | unsigned char reqPhyContMem = gProcess.qzstdInst[i].reqPhyContMem; 694 | 695 | rc = QZSTD_OK; 696 | srcSz = COMPRESS_SRC_BUFF_SZ; 697 | interSz = INTER_SZ(srcSz); 698 | 699 | status = 700 | cpaDcBufferListGetMetaSize(gProcess.dcInstHandle[i], 1, 701 | &(gProcess.qzstdInst[i].buffMetaSize)); 702 | if (UNLIKELY(CPA_STATUS_SUCCESS != status)) { 703 | QZSTD_LOG(1, "cpaDcBufferListGetMetaSize failed\n"); 704 | goto cleanup; 705 | } 706 | 707 | status = cpaDcGetNumIntermediateBuffers( 708 | gProcess.dcInstHandle[i], &(gProcess.qzstdInst[i].intermediateCnt)); 709 | if (UNLIKELY(CPA_STATUS_SUCCESS != status)) { 710 | QZSTD_LOG(1, "cpaDcGetNumIntermediateBuffers failed\n"); 711 | goto cleanup; 712 | } 713 | gProcess.qzstdInst[i].intermediateBuffers = 714 | (CpaBufferList **)QZSTD_calloc((size_t)gProcess.qzstdInst[i].intermediateCnt, 715 | sizeof(CpaBufferList *), 0); 716 | if (UNLIKELY(NULL == gProcess.qzstdInst[i].intermediateBuffers)) { 717 | QZSTD_LOG(1, "Failed to allocate memory\n"); 718 | goto cleanup; 719 | } 720 | 721 | for (j = 0; j < gProcess.qzstdInst[i].intermediateCnt; j++) { 722 | gProcess.qzstdInst[i].intermediateBuffers[j] = 723 | (CpaBufferList *)QZSTD_calloc(1, sizeof(CpaBufferList), 0); 724 | if (UNLIKELY(NULL == gProcess.qzstdInst[i].intermediateBuffers[j])) { 725 | QZSTD_LOG(1, "Failed to allocate memory\n"); 726 | goto cleanup; 727 | } 728 | if (0 != gProcess.qzstdInst[i].buffMetaSize) { 729 | gProcess.qzstdInst[i].intermediateBuffers[j]->pPrivateMetaData = 730 | QZSTD_calloc(1, (size_t)(gProcess.qzstdInst[i].buffMetaSize), reqPhyContMem); 731 | if (UNLIKELY(NULL == 732 | gProcess.qzstdInst[i].intermediateBuffers[j]->pPrivateMetaData)) { 733 | QZSTD_LOG(1, "Failed to allocate memory\n"); 734 | goto cleanup; 735 | } 736 | } 737 | 738 | gProcess.qzstdInst[i].intermediateBuffers[j]->pBuffers = 739 | (CpaFlatBuffer *)QZSTD_calloc(1, sizeof(CpaFlatBuffer), reqPhyContMem); 740 | if (UNLIKELY(NULL == gProcess.qzstdInst[i].intermediateBuffers[j]->pBuffers)) { 741 | QZSTD_LOG(1, "Failed to allocate memory\n"); 742 | goto cleanup; 743 | } 744 | 745 | gProcess.qzstdInst[i].intermediateBuffers[j]->pBuffers->pData = 746 | (Cpa8U *)QZSTD_calloc(1, interSz, reqPhyContMem); 747 | if (UNLIKELY(NULL == 748 | gProcess.qzstdInst[i].intermediateBuffers[j]->pBuffers->pData)) { 749 | QZSTD_LOG(1, "Failed to allocate memory\n"); 750 | goto cleanup; 751 | } 752 | 753 | gProcess.qzstdInst[i].intermediateBuffers[j]->pBuffers->dataLenInBytes = 754 | interSz; 755 | } 756 | gProcess.qzstdInst[i].srcBuffer = 757 | (CpaBufferList *)QZSTD_calloc(1, sizeof(CpaBufferList), 0); 758 | if (UNLIKELY(NULL == gProcess.qzstdInst[i].srcBuffer)) { 759 | QZSTD_LOG(1, "Failed to allocate memory\n"); 760 | goto cleanup; 761 | } 762 | gProcess.qzstdInst[i].srcBuffer->numBuffers = 1; 763 | 764 | if (0 != gProcess.qzstdInst[i].buffMetaSize) { 765 | gProcess.qzstdInst[i].srcBuffer->pPrivateMetaData = 766 | QZSTD_calloc(1, (size_t)gProcess.qzstdInst[i].buffMetaSize, reqPhyContMem); 767 | if (UNLIKELY(NULL == gProcess.qzstdInst[i].srcBuffer->pPrivateMetaData)) { 768 | QZSTD_LOG(1, "Failed to allocate memory\n"); 769 | goto cleanup; 770 | } 771 | } 772 | 773 | gProcess.qzstdInst[i].srcBuffer->pBuffers = 774 | (CpaFlatBuffer *)QZSTD_calloc(1, sizeof(CpaFlatBuffer), reqPhyContMem); 775 | if (UNLIKELY(NULL == gProcess.qzstdInst[i].srcBuffer->pBuffers)) { 776 | QZSTD_LOG(1, "Failed to allocate memory\n"); 777 | goto cleanup; 778 | } 779 | if (reqPhyContMem) { 780 | gProcess.qzstdInst[i].srcBuffer->pBuffers->pData = 781 | (Cpa8U *)QZSTD_calloc(1, srcSz, reqPhyContMem); 782 | if (UNLIKELY(NULL == gProcess.qzstdInst[i].srcBuffer->pBuffers->pData)) { 783 | QZSTD_LOG(1, "Failed to allocate memory\n"); 784 | goto cleanup; 785 | } 786 | } else { 787 | gProcess.qzstdInst[i].srcBuffer->pBuffers->pData = NULL; 788 | } 789 | 790 | gProcess.qzstdInst[i].destBuffer = 791 | (CpaBufferList *)QZSTD_calloc(1, sizeof(CpaBufferList), 0); 792 | if (UNLIKELY(NULL == gProcess.qzstdInst[i].destBuffer)) { 793 | QZSTD_LOG(1, "Failed to allocate memory\n"); 794 | goto cleanup; 795 | } 796 | gProcess.qzstdInst[i].destBuffer->numBuffers = 1; 797 | 798 | if (0 != gProcess.qzstdInst[i].buffMetaSize) { 799 | gProcess.qzstdInst[i].destBuffer->pPrivateMetaData = 800 | QZSTD_calloc(1, (size_t)gProcess.qzstdInst[i].buffMetaSize, reqPhyContMem); 801 | if (UNLIKELY(NULL == gProcess.qzstdInst[i].destBuffer->pPrivateMetaData)) { 802 | QZSTD_LOG(1, "Failed to allocate memory\n"); 803 | goto cleanup; 804 | } 805 | } 806 | 807 | gProcess.qzstdInst[i].destBuffer->pBuffers = 808 | (CpaFlatBuffer *)QZSTD_calloc(1, sizeof(CpaFlatBuffer), reqPhyContMem); 809 | if (UNLIKELY(NULL == gProcess.qzstdInst[i].destBuffer->pBuffers)) { 810 | QZSTD_LOG(1, "Failed to allocate memory\n"); 811 | goto cleanup; 812 | } 813 | 814 | gProcess.qzstdInst[i].memSetup = 1; 815 | 816 | done_inst: 817 | return rc; 818 | 819 | cleanup: 820 | QZSTD_cleanUpInstMem(i); 821 | rc = QZSTD_FAIL; 822 | goto done_inst; 823 | } 824 | 825 | static int QZSTD_startDcInstance(int i) 826 | { 827 | int rc = QZSTD_OK; 828 | 829 | if (CPA_STATUS_SUCCESS != cpaDcSetAddressTranslation( 830 | gProcess.dcInstHandle[i], (CpaVirtualToPhysical)QZSTD_virtToPhys)) { 831 | QZSTD_LOG(1, "cpaDcSetAddressTranslation failed\n"); 832 | rc = QZSTD_FAIL; 833 | goto done; 834 | } 835 | 836 | gProcess.qzstdInst[i].instStartStatus = cpaDcStartInstance( 837 | gProcess.dcInstHandle[i], gProcess.qzstdInst[i].intermediateCnt, 838 | gProcess.qzstdInst[i].intermediateBuffers); 839 | if (CPA_STATUS_SUCCESS != gProcess.qzstdInst[i].instStartStatus) { 840 | QZSTD_LOG(1, "cpaDcStartInstance failed\n"); 841 | rc = QZSTD_FAIL; 842 | goto done; 843 | } 844 | 845 | gProcess.qzstdInst[i].seqNumIn = 0; 846 | gProcess.qzstdInst[i].seqNumOut = 0; 847 | gProcess.qzstdInst[i].dcInstSetup = 1; 848 | 849 | done: 850 | return rc; 851 | } 852 | 853 | static int QZSTD_cpaInitSess(QZSTD_Session_T *sess, int i) 854 | { 855 | Cpa32U sessionSize = 0; 856 | Cpa32U ctxSize = 0; 857 | unsigned char reqPhyContMem = gProcess.qzstdInst[i].reqPhyContMem; 858 | 859 | /*setup and start DC session*/ 860 | if (UNLIKELY(CPA_STATUS_SUCCESS != cpaDcGetSessionSize(gProcess.dcInstHandle[i], 861 | &sess->sessionSetupData, &sessionSize, &ctxSize))) { 862 | QZSTD_LOG(1, "cpaDcGetSessionSize failed\n"); 863 | return QZSTD_FAIL; 864 | } 865 | 866 | gProcess.qzstdInst[i].cpaSessHandle = QZSTD_calloc(1, (size_t)(sessionSize), 867 | reqPhyContMem); 868 | if (UNLIKELY(NULL == gProcess.qzstdInst[i].cpaSessHandle)) { 869 | QZSTD_LOG(1, "Failed to allocate memory\n"); 870 | return QZSTD_FAIL; 871 | } 872 | 873 | if (UNLIKELY(CPA_STATUS_SUCCESS != cpaDcInitSession( 874 | gProcess.dcInstHandle[i], gProcess.qzstdInst[i].cpaSessHandle, 875 | &sess->sessionSetupData, NULL, QZSTD_dcCallback))) { 876 | QZSTD_LOG(1, "cpaDcInitSession failed\n"); 877 | QZSTD_free(gProcess.qzstdInst[i].cpaSessHandle, reqPhyContMem); 878 | gProcess.qzstdInst[i].cpaSessHandle = NULL; 879 | return QZSTD_FAIL; 880 | } 881 | 882 | gProcess.qzstdInst[i].sessionSetupData = sess->sessionSetupData; 883 | gProcess.qzstdInst[i].cpaSessSetup = 1; 884 | 885 | return QZSTD_OK; 886 | } 887 | 888 | static int QZSTD_cpaUpdateSess(QZSTD_Session_T *sess, int i) 889 | { 890 | unsigned char reqPhyContMem = gProcess.qzstdInst[i].reqPhyContMem; 891 | gProcess.qzstdInst[i].cpaSessSetup = 0; 892 | 893 | /* Remove session */ 894 | if (CPA_STATUS_SUCCESS != cpaDcRemoveSession( 895 | gProcess.dcInstHandle[i], gProcess.qzstdInst[i].cpaSessHandle)) { 896 | QZSTD_LOG(1, "cpaDcRemoveSession failed\n"); 897 | return QZSTD_FAIL; 898 | } 899 | 900 | QZSTD_free(gProcess.qzstdInst[i].cpaSessHandle, reqPhyContMem); 901 | gProcess.qzstdInst[i].cpaSessHandle = NULL; 902 | 903 | return QZSTD_cpaInitSess(sess, i); 904 | } 905 | 906 | static int QZSTD_grabInstance(int hint) 907 | { 908 | int i, j, rc, f; 909 | 910 | if (hint >= gProcess.numInstances || hint < 0) { 911 | hint = 0; 912 | } 913 | 914 | /*otherwise loop through all of them*/ 915 | f = 0; 916 | for (j = 0; j < MAX_GRAB_RETRY; j++) { 917 | for (i = 0; i < gProcess.numInstances; i++) { 918 | if (f == 0) { 919 | i = hint; 920 | f = 1; 921 | }; 922 | rc = __sync_lock_test_and_set(&(gProcess.qzstdInst[i].lock), 1); 923 | if (LIKELY(0 == rc)) { 924 | return i; 925 | } 926 | } 927 | } 928 | return -1; 929 | } 930 | 931 | static void QZSTD_releaseInstance(int i) 932 | { 933 | __sync_lock_release(&(gProcess.qzstdInst[i].lock)); 934 | } 935 | 936 | static void QZSTD_setupSess(QZSTD_Session_T *zstdSess) 937 | { 938 | zstdSess->instHint = -1; 939 | zstdSess->sessionSetupData.compType = CPA_DC_LZ4S; 940 | zstdSess->sessionSetupData.autoSelectBestHuffmanTree = CPA_DC_ASB_ENABLED; 941 | zstdSess->sessionSetupData.sessDirection = CPA_DC_DIR_COMPRESS; 942 | zstdSess->sessionSetupData.sessState = CPA_DC_STATELESS; 943 | zstdSess->sessionSetupData.checksum = CPA_DC_XXHASH32; 944 | zstdSess->sessionSetupData.huffType = CPA_DC_HT_STATIC; 945 | zstdSess->sessionSetupData.minMatch = CPA_DC_MIN_3_BYTE_MATCH; 946 | zstdSess->failOffloadCnt = 0; 947 | } 948 | 949 | int QZSTD_startQatDevice(void) 950 | { 951 | int status; 952 | pthread_mutex_lock(&gProcess.mutex); 953 | 954 | if (QZSTD_FAIL == gProcess.qzstdInitStatus) { 955 | gProcess.qzstdInitStatus = QZSTD_OK == QZSTD_salUserStart() ? QZSTD_STARTED : 956 | QZSTD_FAIL; 957 | } 958 | 959 | if (QZSTD_STARTED == gProcess.qzstdInitStatus) { 960 | gProcess.qzstdInitStatus = QZSTD_OK == QZSTD_getAndShuffleInstance() ? 961 | QZSTD_OK : QZSTD_STARTED; 962 | } 963 | QZSTD_LOG(2, "InitStatus: %d\n", gProcess.qzstdInitStatus); 964 | status = gProcess.qzstdInitStatus; 965 | pthread_mutex_unlock(&gProcess.mutex); 966 | return status; 967 | } 968 | 969 | static unsigned isLittleEndian(void) 970 | { 971 | const union { 972 | unsigned int u; 973 | unsigned char c[4]; 974 | } one = {1}; /* don't use static : performance detrimental */ 975 | return one.c[0]; 976 | } 977 | 978 | static unsigned short read16(const void *memPtr) 979 | { 980 | unsigned short val; 981 | memcpy(&val, memPtr, sizeof(val)); 982 | return val; 983 | } 984 | 985 | static unsigned short readLE16(const void *memPtr) 986 | { 987 | if (isLittleEndian()) { 988 | return read16(memPtr); 989 | } else { 990 | const unsigned char *p = (const unsigned char *)memPtr; 991 | return (unsigned short)((unsigned short)p[0] + (p[1] << 8)); 992 | } 993 | } 994 | 995 | void *QZSTD_createSeqProdState(void) 996 | { 997 | QZSTD_Session_T *zstdSess = (QZSTD_Session_T *)calloc(1, 998 | sizeof(QZSTD_Session_T)); 999 | QZSTD_setupSess(zstdSess); 1000 | return (void *)zstdSess; 1001 | } 1002 | 1003 | void QZSTD_freeSeqProdState(void *sequenceProducerState) 1004 | { 1005 | QZSTD_Session_T *zstdSess = (QZSTD_Session_T *)sequenceProducerState; 1006 | if (zstdSess) { 1007 | if (zstdSess->qatIntermediateBuf) { 1008 | QZSTD_free(zstdSess->qatIntermediateBuf, zstdSess->reqPhyContMem); 1009 | zstdSess->qatIntermediateBuf = NULL; 1010 | } 1011 | free(zstdSess); 1012 | zstdSess = NULL; 1013 | } 1014 | } 1015 | 1016 | static size_t QZSTD_decLz4s(ZSTD_Sequence *outSeqs, size_t outSeqsCapacity, 1017 | unsigned char *lz4sBuff, unsigned int lz4sBufSize) 1018 | { 1019 | unsigned char *ip = lz4sBuff; 1020 | unsigned char *endip = lz4sBuff + lz4sBufSize; 1021 | unsigned int histLiteralLen = 0; 1022 | 1023 | size_t seqsIdx = 0; 1024 | 1025 | while (LIKELY(ip < endip && lz4sBufSize > 0)) { 1026 | size_t length = 0; 1027 | size_t offset = 0; 1028 | size_t literalLen = 0, matchlen = 0; 1029 | /* get literal length */ 1030 | unsigned const token = *ip++; 1031 | if ((length = (token >> ML_BITS)) == RUN_MASK) { 1032 | unsigned s; 1033 | do { 1034 | s = *ip++; 1035 | length += s; 1036 | } while (s == 255); 1037 | } 1038 | literalLen = length; 1039 | ip += length; 1040 | if (ip == endip) { /* Meet the end of the LZ4 sequence */ 1041 | literalLen += histLiteralLen; 1042 | outSeqs[seqsIdx].litLength = literalLen; 1043 | outSeqs[seqsIdx].offset = offset; 1044 | outSeqs[seqsIdx].matchLength = matchlen; 1045 | QZSTD_LOG(3, "Last sequence, literalLen: %zu, offset: %zu, matchlen: %zu\n", 1046 | literalLen, offset, matchlen); 1047 | break; 1048 | } 1049 | 1050 | /* get matchPos */ 1051 | offset = readLE16(ip); 1052 | ip += 2; 1053 | 1054 | /* get match length */ 1055 | length = token & ML_MASK; 1056 | if (length == ML_MASK) { 1057 | unsigned s; 1058 | do { 1059 | s = *ip++; 1060 | length += s; 1061 | } while (s == 255); 1062 | } 1063 | if (length != 0) { 1064 | length += LZ4MINMATCH; 1065 | matchlen = (unsigned short)length; 1066 | literalLen += histLiteralLen; 1067 | 1068 | /* update ZSTD_Sequence */ 1069 | outSeqs[seqsIdx].offset = offset; 1070 | outSeqs[seqsIdx].litLength = literalLen; 1071 | outSeqs[seqsIdx].matchLength = matchlen; 1072 | QZSTD_LOG(3, "sequence, literalLen: %zu, offset: %zu, matchlen: %zu\n", 1073 | literalLen, offset, matchlen); 1074 | histLiteralLen = 0; 1075 | ++seqsIdx; 1076 | if (seqsIdx >= (outSeqsCapacity - 1)) { 1077 | QZSTD_LOG(1, "Sequence exceed capacity\n"); 1078 | return ZSTD_SEQUENCE_PRODUCER_ERROR; 1079 | } 1080 | } else { 1081 | if (literalLen > 0) { 1082 | /* When match length is 0, the literalLen needs to be 1083 | temporarily stored and processed together with the next data 1084 | block.*/ 1085 | histLiteralLen += literalLen; 1086 | } 1087 | } 1088 | } 1089 | if (ip != endip) { 1090 | QZSTD_LOG(1, "Unexpected decode error\n"); 1091 | return ZSTD_SEQUENCE_PRODUCER_ERROR; 1092 | } 1093 | return ++seqsIdx; 1094 | } 1095 | 1096 | static inline void QZSTD_castConstPointer(unsigned char **dest, 1097 | const void **src) 1098 | { 1099 | memcpy(dest, src, sizeof(char *)); 1100 | } 1101 | 1102 | static inline int QZSTD_isTimeOut(struct timespec timeStart, 1103 | struct timespec timeNow) 1104 | { 1105 | unsigned long long timeSpent = TIMESPENT_NS(timeNow, timeStart); 1106 | return timeSpent > MAXTIMEOUT ? 1 : 0; 1107 | } 1108 | 1109 | size_t qatSequenceProducer( 1110 | void *sequenceProducerState, ZSTD_Sequence *outSeqs, size_t outSeqsCapacity, 1111 | const void *src, size_t srcSize, 1112 | const void *dict, size_t dictSize, 1113 | int compressionLevel, 1114 | size_t windowSize) 1115 | { 1116 | int i; 1117 | size_t rc = ZSTD_SEQUENCE_PRODUCER_ERROR; 1118 | int qrc = CPA_STATUS_FAIL; 1119 | CpaDcOpData opData; 1120 | int retry_cnt = MAX_SEND_REQUEST_RETRY; 1121 | struct timespec timeStart; 1122 | struct timespec timeNow; 1123 | QZSTD_Session_T *zstdSess = (QZSTD_Session_T *)sequenceProducerState; 1124 | Cpa32U intermediateBufLen = 0; 1125 | 1126 | if (windowSize < (srcSize < 32 * KB ? srcSize : 32 * KB) || dictSize > 0 || 1127 | dict) { 1128 | QZSTD_LOG(2, 1129 | "Currently not use windowsSize and not support dictionary, windowsSize: %zu, srcSize: %zu, dictSize: %zu\n", 1130 | windowSize, srcSize, dictSize); 1131 | return ZSTD_SEQUENCE_PRODUCER_ERROR; 1132 | } 1133 | 1134 | /* QAT only support L1-L12 */ 1135 | if (UNLIKELY(compressionLevel < COMP_LVL_MINIMUM || 1136 | compressionLevel > COMP_LVL_MAXIMUM)) { 1137 | QZSTD_LOG(1, "Only can offload L1-L12 to QAT, current compression level: %d\n" 1138 | , compressionLevel); 1139 | return ZSTD_SEQUENCE_PRODUCER_ERROR; 1140 | } 1141 | 1142 | /* check hardware initialization status */ 1143 | pthread_mutex_lock(&gProcess.mutex); 1144 | if (gProcess.qzstdInitStatus != QZSTD_OK) { 1145 | pthread_mutex_unlock(&gProcess.mutex); 1146 | zstdSess->failOffloadCnt++; 1147 | if (zstdSess->failOffloadCnt >= NUM_BLOCK_OF_RETRY_INTERVAL) { 1148 | zstdSess->failOffloadCnt = 0; 1149 | if (QZSTD_startQatDevice() != QZSTD_OK) { 1150 | QZSTD_LOG(1, "Tried to restart QAT device, but failed\n"); 1151 | return ZSTD_SEQUENCE_PRODUCER_ERROR; 1152 | } 1153 | } else { 1154 | QZSTD_LOG(1, "The hardware was not successfully started\n"); 1155 | return ZSTD_SEQUENCE_PRODUCER_ERROR; 1156 | } 1157 | } else { 1158 | pthread_mutex_unlock(&gProcess.mutex); 1159 | } 1160 | 1161 | zstdSess->sessionSetupData.compLevel = (CpaDcCompLvl)compressionLevel; 1162 | 1163 | i = QZSTD_grabInstance(zstdSess->instHint); 1164 | if (-1 == i) { 1165 | QZSTD_LOG(1, "Failed to grab instance\n"); 1166 | return ZSTD_SEQUENCE_PRODUCER_ERROR; 1167 | } 1168 | 1169 | zstdSess->instHint = i; 1170 | zstdSess->reqPhyContMem = gProcess.qzstdInst[i].reqPhyContMem; 1171 | 1172 | /* allocate instance's buffer */ 1173 | if (0 == gProcess.qzstdInst[i].memSetup) { 1174 | if (QZSTD_OK != QZSTD_allocInstMem(i)) { 1175 | QZSTD_LOG(1, "Failed to allocate instance related memory\n"); 1176 | rc = ZSTD_SEQUENCE_PRODUCER_ERROR; 1177 | goto exit; 1178 | } 1179 | } 1180 | 1181 | /* start Dc Instance */ 1182 | if (0 == gProcess.qzstdInst[i].dcInstSetup) { 1183 | if (QZSTD_OK != QZSTD_startDcInstance(i)) { 1184 | QZSTD_LOG(1, "Failed to start DC instance\n"); 1185 | rc = ZSTD_SEQUENCE_PRODUCER_ERROR; 1186 | goto exit; 1187 | } 1188 | } 1189 | 1190 | /* init cpaSessHandle */ 1191 | if (UNLIKELY(0 == gProcess.qzstdInst[i].cpaSessSetup)) { 1192 | if (UNLIKELY(QZSTD_OK != QZSTD_cpaInitSess(zstdSess, i))) { 1193 | QZSTD_LOG(1, "Failed to init sess\n"); 1194 | rc = ZSTD_SEQUENCE_PRODUCER_ERROR; 1195 | goto exit; 1196 | } 1197 | } 1198 | 1199 | /* update cpaSessHandle if need */ 1200 | if (0 != memcmp(&zstdSess->sessionSetupData, 1201 | &gProcess.qzstdInst[i].sessionSetupData, 1202 | sizeof(CpaDcSessionSetupData))) { 1203 | if (UNLIKELY(QZSTD_OK != QZSTD_cpaUpdateSess(zstdSess, i))) { 1204 | QZSTD_LOG(1, "Failed to update sess\n"); 1205 | rc = ZSTD_SEQUENCE_PRODUCER_ERROR; 1206 | goto exit; 1207 | } 1208 | } 1209 | 1210 | if (UNLIKELY(CPA_STATUS_SUCCESS != cpaDcLZ4SCompressBound( 1211 | gProcess.dcInstHandle[i], 1212 | ZSTD_BLOCKSIZE_MAX, &intermediateBufLen))) { 1213 | QZSTD_LOG(1, "Failed to calculate compress bound\n"); 1214 | rc = ZSTD_SEQUENCE_PRODUCER_ERROR; 1215 | goto exit; 1216 | } 1217 | 1218 | /* Allocate intermediate buffer for storing lz4s format compressed by QAT */ 1219 | if (NULL == zstdSess->qatIntermediateBuf) { 1220 | zstdSess->qatIntermediateBuf = 1221 | (unsigned char *)QZSTD_calloc(1, intermediateBufLen, 1222 | zstdSess->reqPhyContMem); 1223 | if (UNLIKELY(NULL == zstdSess->qatIntermediateBuf)) { 1224 | QZSTD_LOG(1, "Failed to allocate memory"); 1225 | rc = ZSTD_SEQUENCE_PRODUCER_ERROR; 1226 | goto exit; 1227 | } 1228 | } 1229 | 1230 | if (zstdSess->reqPhyContMem) { 1231 | memcpy(gProcess.qzstdInst[i].srcBuffer->pBuffers->pData, src, srcSize); 1232 | } else { 1233 | QZSTD_castConstPointer(&(gProcess.qzstdInst[i].srcBuffer->pBuffers->pData), 1234 | &src); 1235 | } 1236 | gProcess.qzstdInst[i].destBuffer->pBuffers->pData = 1237 | (Cpa8U *)zstdSess->qatIntermediateBuf; 1238 | 1239 | gProcess.qzstdInst[i].srcBuffer->pBuffers->dataLenInBytes = srcSize; 1240 | gProcess.qzstdInst[i].destBuffer->pBuffers->dataLenInBytes = 1241 | intermediateBufLen; 1242 | 1243 | memset(&opData, 0, sizeof(CpaDcOpData)); 1244 | opData.inputSkipData.skipMode = CPA_DC_SKIP_DISABLED; 1245 | opData.outputSkipData.skipMode = CPA_DC_SKIP_DISABLED; 1246 | opData.compressAndVerify = CPA_TRUE; 1247 | opData.flushFlag = CPA_DC_FLUSH_FINAL; 1248 | 1249 | gProcess.qzstdInst[i].res.checksum = 0; 1250 | 1251 | do { 1252 | /* Submit request to QAT */ 1253 | qrc = cpaDcCompressData2(gProcess.dcInstHandle[i], 1254 | gProcess.qzstdInst[i].cpaSessHandle, 1255 | gProcess.qzstdInst[i].srcBuffer, 1256 | gProcess.qzstdInst[i].destBuffer, &opData, 1257 | &gProcess.qzstdInst[i].res, (void *)&gProcess.qzstdInst[i]); 1258 | retry_cnt--; 1259 | } while (CPA_STATUS_RETRY == qrc && retry_cnt > 0); 1260 | 1261 | if (UNLIKELY(CPA_STATUS_SUCCESS != qrc)) { 1262 | QZSTD_LOG(1, "Failed to submit request, status: %d\n", qrc); 1263 | rc = ZSTD_SEQUENCE_PRODUCER_ERROR; 1264 | goto error; 1265 | } 1266 | 1267 | gProcess.qzstdInst[i].seqNumIn++; 1268 | 1269 | clock_gettime(CLOCK_MONOTONIC, &timeStart); 1270 | 1271 | do { 1272 | /* Poll responses */ 1273 | qrc = icp_sal_DcPollInstance(gProcess.dcInstHandle[i], 0); 1274 | /* Check timeout when polling doesn't succeed to reduce syscall */ 1275 | if (UNLIKELY(qrc != CPA_STATUS_SUCCESS)) { 1276 | clock_gettime(CLOCK_MONOTONIC, &timeNow); 1277 | if (QZSTD_isTimeOut(timeStart, timeNow)) { 1278 | QZSTD_LOG(1, "Polling time out\n"); 1279 | break; 1280 | } 1281 | } 1282 | } while (CPA_STATUS_RETRY == qrc || (CPA_STATUS_SUCCESS == qrc && 1283 | gProcess.qzstdInst[i].seqNumIn != gProcess.qzstdInst[i].seqNumOut)); 1284 | 1285 | if (UNLIKELY(CPA_STATUS_FAIL == qrc)) { 1286 | gProcess.qzstdInst[i].seqNumOut++; 1287 | QZSTD_LOG(1, "Polling failed, polling status: %d\n", qrc); 1288 | rc = ZSTD_SEQUENCE_PRODUCER_ERROR; 1289 | goto error; 1290 | } 1291 | 1292 | if (CPA_STATUS_RETRY == qrc) { 1293 | QZSTD_LOG(1, "Polling failed on RETRY with timeout, polling status: %d\n", qrc); 1294 | rc = ZSTD_SEQUENCE_PRODUCER_ERROR; 1295 | goto error; 1296 | } 1297 | if (UNLIKELY(gProcess.qzstdInst[i].cbStatus == QZSTD_FAIL)) { 1298 | QZSTD_LOG(1, "Error in dc callback, cbStatus: %d\n", 1299 | gProcess.qzstdInst[i].cbStatus); 1300 | rc = ZSTD_SEQUENCE_PRODUCER_ERROR; 1301 | goto error; 1302 | } 1303 | 1304 | if (gProcess.qzstdInst[i].res.consumed < srcSize || 1305 | gProcess.qzstdInst[i].res.produced == 0 || 1306 | gProcess.qzstdInst[i].res.produced > intermediateBufLen || 1307 | CPA_STATUS_SUCCESS != gProcess.qzstdInst[i].res.status) { 1308 | QZSTD_LOG(1, 1309 | "QAT result error, srcSize: %zu, consumed: %d, produced: %d, res.status:%d\n", 1310 | srcSize, gProcess.qzstdInst[i].res.consumed, gProcess.qzstdInst[i].res.produced, 1311 | gProcess.qzstdInst[i].res.status); 1312 | rc = ZSTD_SEQUENCE_PRODUCER_ERROR; 1313 | goto error; 1314 | } 1315 | QZSTD_LOG(2, "srcSize: %zu, consumed: %d, produced: %d\n", 1316 | srcSize, gProcess.qzstdInst[i].res.consumed, 1317 | gProcess.qzstdInst[i].res.produced); 1318 | 1319 | /* If source data is uncompressed, create one sequence */ 1320 | if (CPA_TRUE == gProcess.qzstdInst[i].res.dataUncompressed) { 1321 | outSeqs[0].litLength = srcSize; 1322 | outSeqs[0].offset = 0; 1323 | outSeqs[0].matchLength = 0; 1324 | rc = 1; 1325 | } else { 1326 | rc = QZSTD_decLz4s(outSeqs, outSeqsCapacity, zstdSess->qatIntermediateBuf, 1327 | gProcess.qzstdInst[i].res.produced); 1328 | } 1329 | if (rc >= (outSeqsCapacity - 1) || ZSTD_SEQUENCE_PRODUCER_ERROR == rc) { 1330 | QZSTD_LOG(1, "Decode error\n"); 1331 | rc = ZSTD_SEQUENCE_PRODUCER_ERROR; 1332 | goto error; 1333 | } 1334 | QZSTD_LOG(2, "Produced %zu sequences\n", rc); 1335 | 1336 | error: 1337 | /* reset pData */ 1338 | if (!zstdSess->reqPhyContMem) { 1339 | gProcess.qzstdInst[i].srcBuffer->pBuffers->pData = NULL; 1340 | } 1341 | gProcess.qzstdInst[i].destBuffer->pBuffers->pData = NULL; 1342 | 1343 | exit: 1344 | /* release QAT instance */ 1345 | QZSTD_releaseInstance(i); 1346 | return rc; 1347 | } 1348 | --------------------------------------------------------------------------------