├── .gitignore ├── COPYING ├── Makefile.am ├── README ├── README.build ├── README.engine ├── README.tests ├── configure.ac ├── debian ├── changelog ├── control ├── copyright ├── libpka1-dev.install ├── libpka1-doc.doc-base ├── libpka1-doc.docs ├── libpka1-engine.docs ├── libpka1-engine.install ├── libpka1-testutils.docs ├── libpka1-testutils.install ├── libpka1.docs ├── libpka1.install ├── rules └── upstream │ └── metadata ├── doc ├── Doxyfile ├── Makefile.am └── pdf │ ├── pka-architecture-doc.pdf │ └── pka-programming-doc.pdf ├── engine ├── Makefile.am ├── e_bluefield.c ├── engine.exports └── helper │ ├── ec_local.h │ ├── pka_helper.c │ └── pka_helper.h ├── include ├── pka_addrs.h ├── pka_atomic.h ├── pka_barrier.h ├── pka_common.h ├── pka_config.h ├── pka_cpu.h ├── pka_debug.h ├── pka_firmware.h ├── pka_ioctl.h ├── pka_lock.S ├── pka_mmio.h ├── pka_types.h └── pka_utils.h ├── lib ├── Makefile.am ├── pka.c ├── pka.h ├── pka_dev.c ├── pka_dev.h ├── pka_internal.h ├── pka_mem.c ├── pka_mem.h ├── pka_queue.c ├── pka_queue.h ├── pka_ring.c ├── pka_ring.h └── pka_vectors.h ├── libpka.spec ├── m4 └── ax_pthread.m4 └── tests ├── Makefile.am ├── performance └── pka_test_performance.c ├── pka_test_utils.c ├── pka_test_utils.h ├── power └── pka_test_power.c └── validation ├── pka_test_validation.c └── pka_test_vectors.h /.gitignore: -------------------------------------------------------------------------------- 1 | autom4te.cache 2 | build-aux 3 | /Makefile 4 | /Makefile.in 5 | /aclocal.m4 6 | /libtool 7 | /autoscan.log 8 | /autoscan-*.log 9 | /configure.scan 10 | /configure 11 | /config.h 12 | /config.h.in 13 | /config.log 14 | /config.status 15 | /stamp-h? 16 | /m4/libtool.m4 17 | /m4/ltoptions.m4 18 | /m4/ltsugar.m4 19 | /m4/ltversion.m4 20 | /m4/lt~obsolete.m4 21 | /m4/pkg.m4 22 | .deps/ 23 | .libs/ 24 | .dirstamp 25 | *.o 26 | *.l[ao] 27 | *~ 28 | /doc/html/ 29 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | 2018-2023 NVIDIA Corporation & affiliates. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions 5 | are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | 3. Neither the name of the copyright holder nor the names of its contributors 15 | may be used to endorse or promote products derived from this software 16 | without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' 19 | AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS 22 | BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, 23 | OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 26 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 27 | OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 28 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | ACLOCAL_AMFLAGS = -I m4 --install 2 | dist_doc_DATA = README README.tests README.engine COPYING 3 | SUBDIRS = $(ENABLED_SUBDIRS) 4 | 5 | include_HEADERS = \ 6 | lib/pka.h \ 7 | lib/pka_dev.h \ 8 | lib/pka_mem.h \ 9 | lib/pka_ring.h \ 10 | lib/pka_queue.h \ 11 | include/pka_addrs.h \ 12 | include/pka_atomic.h \ 13 | include/pka_barrier.h \ 14 | include/pka_common.h \ 15 | include/pka_config.h \ 16 | include/pka_cpu.h \ 17 | include/pka_debug.h \ 18 | include/pka_firmware.h \ 19 | include/pka_ioctl.h \ 20 | include/pka_mmio.h \ 21 | include/pka_types.h \ 22 | include/pka_utils.h 23 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | © 2023 NVIDIA Corporation & affiliates. 2 | 3 | License: BSD-3-Clause 4 | 5 | BlueField PKA API 1.0 6 | Mellanox BlueField Public Key Acceleration (PKA) Package 7 | 8 | Author, Khalil Blaiech 9 | 10 | 11 | =============================================================================== 12 | Overview 13 | =============================================================================== 14 | 15 | This directory hierarchy holds the Mellanox BlueField PKA software. 16 | It contains the documentation, sources, and tests needed to use the 17 | BlueField PKA hardware. It offers a custom API required to code 18 | PKA-based applications. 19 | 20 | The PKA software package consists of (1) an API specification, which 21 | is the application writer's view (this is also intended to provide 22 | complete interfaces to use with OpenSSL), (2) an API implementation 23 | for BlueField, (3) validation test suite, an independent set of 24 | test routines that run against the API implementation and verifies 25 | that it correctly implements all of the defined APIs at a functional 26 | level, and (4) a dynamic OpenSSL engine component to support RSA 27 | operations and interfaces with the BlueField PKA hardware. 28 | 29 | 30 | =============================================================================== 31 | Important notes 32 | =============================================================================== 33 | 34 | * The BlueField PKA software is intended for BlueField products that 35 | support the crypto-enabled feature (High Bin/Crypto BlueField chip). 36 | 37 | To verify whether the BlueField chip has crypto capabilities, 38 | contact Mellanox or check the CPU flags; 'aes', 'sha1', and 39 | 'sha2' must be present. 40 | 41 | # lscpu 42 | ... 43 | Flags: fp asimd evtstrm aes pmull sha1 sha2 crc32 cpuid 44 | 45 | * BlueField boot loader MUST enable SMMU support (bf-smmu) in order to 46 | benefit of the full hardware and software capabilities (e.g.: Public 47 | Key operations, and TRNG support); the SMMU support might be set in 48 | UEFI menu through system configuration options. 49 | This feature is supported UEFI version 0.99-f0e004e (BlueField release 50 | >= 1.0.beta1.10517). Earlier versions requires a special UEFI build. 51 | 52 | From UEFI menu: 53 | 54 | Select 'Device Manager', and enter 'System Configuration' 55 | screen to enable SMMU; Simply check 'Enable SMMU' option. 56 | 57 | To verify whether the SMMU support is enabled: 58 | 59 | On Yocto Poky systems: 60 | # /opt/mlnx/scripts/bfver 61 | ... 62 | BlueField UEFI version: 0.99-f0e004e 63 | ... 64 | Boot ACPI: bf-smmu 65 | ... 66 | 67 | * The BlueField PKA software requires the MLXBF PKA kernel module 68 | to be installed; the kernel module is referred to as 'pka-mlxbf'; 69 | it should be provided as part of the BlueField Software Distribution. 70 | 71 | To verify whether the MLXBF PKA driver is installed: 72 | 73 | On Yocto Poky systems: 74 | # lsmod | grep -i pka 75 | pka_mlxbf 53248 0 76 | 77 | On CentOS/Ubuntu systems: 78 | # lsmod | grep -i pka 79 | pka_mlxbf 262144 0 80 | vfio 262144 2 vfio_iommu_type1,pka_mlxbf 81 | 82 | To install the MLXBF PKA driver: 83 | 84 | On Yocto Poky systems: 85 | # modprobe pka-mlxbf 86 | 87 | On CentOS systems, install the driver source RPM; Download the 88 | SRPM from /mswg/release/sw_mc_soc/BlueField-1.0.beta1.10513/SRPMS. 89 | 90 | # rpmbuild --rebuild pka-mlxbf-1.0-0.g5bd5b80.src.rpm 91 | # cd ~/rpmbuild/RPMS/aarch64/ 92 | # rpm -ivh pka-mlxbf-1.0-0.g5bd5b80_4.11.0_22.el7a.aarch64.rpm 93 | # modprobe pka-mlxbf 94 | 95 | 96 | =============================================================================== 97 | Basic directory structure 98 | =============================================================================== 99 | 100 | The directory is structured as follow: 101 | 102 | doc/html/ 103 | API reference documentation 104 | 105 | doc/pdf/ 106 | Documentation related to the BlueField PKA hardware and software 107 | specification. It also provides details about the architecture, 108 | the design and the implementation of the API. It covers most of 109 | the API concepts in case customers wish to use it directly. 110 | 111 | engine/ 112 | Source file of the OpenSSL engine and its associated helper 113 | files to integrate with the BlueField PKA library. 114 | 115 | include/ 116 | Various helpers and header files used by the library sources 117 | as well as the BlueField PK driver module. 118 | 119 | lib/ 120 | Complete sources and header files, including the userspace API 121 | (i.e., pka.h) as well as the library implementation. Sources 122 | are built as a shared library which might be used by userspace 123 | applications. 124 | 125 | tests/ 126 | Test suite to provide a comprehensive set of API validation 127 | tests that are intended to be used to verify whether the 128 | implementation meets the design requirements. It also includes 129 | additional tests which might be used for power tests. 130 | 131 | =============================================================================== 132 | Naming convention 133 | =============================================================================== 134 | 135 | Source files and header files are named: pka_*, the suffix identifies 136 | the file. Test files are named: pka_test_*, the suffix identifies the 137 | test. C functions for the library files must be called pka__*, 138 | where the possible suffix can refer to either objects and actions. For 139 | instance, if a function belongs to 'pka_dev.c', it should be called 140 | 'pka_dev__()'. 141 | 142 | 143 | =============================================================================== 144 | How to build 145 | =============================================================================== 146 | 147 | Look in 'README.build' for the list of build dependencies and for more 148 | detail on how to build. 149 | 150 | Directory 'tests' contains test applications for BlueField PKA API 151 | calls and features support. 152 | 153 | In general you can build: 154 | 155 | autoreconf -ifv 156 | ./configure 157 | 158 | Use 'make' to build PKA library and PKA API documentation; 'make install' 159 | will copy all required binary files to the install directory. 160 | 161 | Note that verification tests and OpenSSL engine are built separately, 162 | if needed. Use 'make' and 'make install' to build and install the tests 163 | executables and the dynamically-loadable engine module. 164 | -------------------------------------------------------------------------------- /README.build: -------------------------------------------------------------------------------- 1 | © 2023 NVIDIA Corporation & affiliates. 2 | 3 | License: BSD-3-Clause 4 | 5 | BlueField PKA API 1.0 6 | 7 | Author, Khalil Blaiech 8 | 9 | 10 | =============================================================================== 11 | Overview 12 | =============================================================================== 13 | 14 | The Mellanox BlueField PKA software is a set of libraries and tests 15 | intended to be used with the BlueField PKA hardware. 16 | 17 | The Mellanox BlueField PKA software offers a custom API required to 18 | code PKA-based applications. 19 | 20 | 21 | =============================================================================== 22 | Dependencies 23 | =============================================================================== 24 | 25 | Building PK library, documentation and tests requires the following: 26 | 27 | 1. Linux Kernel >= 4.11.0 28 | 29 | Mellanox BlueField products supports the following Linux distributions: 30 | 31 | * BlueField Yocto Poky (4.14.53 aarch64) 32 | * CentOS 7 (4.11.0-22.el7a.aarch64) 33 | * Ubuntu 18.04 (4.18.0-1000-mellanox aarch64) 34 | 35 | The kernel configuration should enable VFIO support and its dependencies, 36 | and hardware random number generator support. 37 | 38 | 2. Autotools 39 | 40 | automake 41 | autoconf 42 | libtool 43 | 44 | On CentOS systems: 45 | # sudo yum install automake autoconf libtool 46 | 47 | On Ubuntu systems: 48 | # sudo apt install automake autoconf libtool 49 | 50 | 3. Required packages 51 | 52 | Libraries currently required to link: openssl 53 | 54 | OpenSSL is needed to build the dynamically-loadable BlueField PKA 55 | engine. Note that OpenSSL 1.1 and higher is required. Earlier 56 | versions may or may not work. 57 | 58 | 4. Doxygen documentation 59 | 60 | The API documentation is generated via doxygen. The trusted version 61 | of doxygen is 1.8.5. Only HTML documentation is built. 62 | 63 | 64 | =============================================================================== 65 | Important Notes 66 | =============================================================================== 67 | 68 | * The PKA libraries and drivers should be already installed within the 69 | given Linux distribution; 70 | 71 | On poky and CentOS, the PKA library is installed into 72 | '/usr/lib64/libPKA.so' 73 | 74 | On Ubuntu, the PKA library is installed into 75 | '/usr/lib/aarch64-linux-gnu/libPKA.so' 76 | 77 | =============================================================================== 78 | How to build 79 | =============================================================================== 80 | 81 | 1. Build PKA software and documentation: 82 | 83 | # autoreconf -ivf 84 | # ./configure 85 | # make 86 | # make install 87 | 88 | 2. Cross compile of PKA software: 89 | 90 | To cross compile the PKA library and documentation, the Yocto-produced 91 | SDK might be installed: 92 | 93 | $ /mswg/release/sw_mc_soc/BlueField-1.0../poky-glibc-x86_64-core-image-full-sdk-aarch64-toolchain-BlueField-1.0...2.4.1.sh 94 | Poky (Yocto Project Reference Distro) SDK installer version 2.4.1 95 | ================================================================= 96 | Enter target directory for SDK (default: /opt/poky/2.4.1): /labhome/kblaiech/workspace/poky/2.4.1 97 | You are about to install the SDK to "/labhome/kblaiech/workspace/poky/2.4.1". Proceed[Y/n]? 98 | Y 99 | Extracting SDK............................................................. 100 | ........................................................................... 101 | ........................................................................... 102 | ........................................................done 103 | Setting it up...done 104 | SDK has been successfully set up and is ready to be used. 105 | Each time you wish to use the SDK in a new shell session, you need to source the environment setup script e.g. 106 | $ . /labhome/kblaiech/workspace/poky/2.4.1/environment-setup-aarch64-poky-linux 107 | 108 | To use the SDK cross-compilation tools, you should 'source' the top-level 109 | "environment-setup-aarch64-poky-linux" script to set various environment 110 | variables, including $PATH, $CC, $CROSS_COMPILE, etc. 111 | 112 | $ source /poky/2.4.1/environment-setup-aarch64-poky-linux 113 | 114 | To build the PKA software: 115 | 116 | $ autoreconf -ivf 117 | $ ./configure --host=aarch64-poky-linux --prefix= 118 | $ make 119 | $ make install 120 | 121 | 3. Build PKA tests 122 | 123 | Look in 'README.tests' for further detail on how to build, install 124 | and run the tests suite. 125 | 126 | By default, tests suite support is enabled, run 'configure' script 127 | with option '--disable-testutils' to disable it: 128 | 129 | # ./configure --disable-testutils 130 | 131 | 4. Build PKA engine 132 | 133 | Look in 'README.engine' for further detail on how to build, install, 134 | load and run OpenSSL applications with BlueField PKA engine. 135 | 136 | By default, engine support is enabled, run 'configure' script with 137 | option '--disable-engine' to disable it: 138 | 139 | # ./configure --disable-engine 140 | 141 | =============================================================================== 142 | Building packages 143 | =============================================================================== 144 | 145 | 5 deb packages are generated: libpka1, libpka1-engine, libpka1-testutils, libpka1-dev, libpka1-doc. 146 | 147 | libpka1 - main package that contains libPKA.so library 148 | libpka1-engine - package that contains OpenSSL engine 149 | libpka1-testutils - contains test utilities (pka_test_validation pka_test_performance) 150 | libpka1-dev - contains header files for libPKA.so library 151 | libpka1-doc - contains documentation 152 | 153 | DEB packages: 154 | 155 | For deb packages build is done using standard Debian tools such as dpkg-buildpackages, debuild or using debian/rules 156 | Example of building deb package. Command should be run from source root: 157 | 158 | $ dpkg-buildpackage -b -uc -us 159 | 160 | RPM packages: 161 | 162 | RPM packages are build using rpmbuild. 163 | Example of building rpm package. Command should be run from source root: 164 | 165 | $ rpmdev-setuptree 166 | $ tar -cvzp --show-transformed --transform 's,^\.,./libpka-2.0,' --file /root/rpmbuild/SOURCES/libpka-2.0.tar.gz --exclude='.git*' . 167 | $ rpmbuild -bb libpka.spec 168 | -------------------------------------------------------------------------------- /README.engine: -------------------------------------------------------------------------------- 1 | © 2023 NVIDIA Corporation & affiliates. 2 | 3 | License: BSD-3-Clause 4 | 5 | BlueField PKA engine 1.0 6 | 7 | Author, Khalil Blaiech 8 | 9 | 10 | =============================================================================== 11 | Overview 12 | =============================================================================== 13 | 14 | The BlueField PKA engine is a set dynamically-loadable OpenSSL engine that 15 | interfaces with the BlueField PKA hardware. It provides support for RSA 16 | operation such as signature generation and verification. 17 | 18 | The BlueField PKA engine might be integrated with OpenSSL 1.0 and 1.1. 19 | Earlier versions may or may not work; use at your own risk! 20 | 21 | Bluefield PKA engine can also be used with OpenSSL 3.0 provided OpenSSL 3.0 22 | is built with compatibility for OpenSSL 1.1. This can be done by passing the 23 | option "--api=1.1.0" during the configure step for OpenSSL 3.0. 24 | Once OpenSSL 3.0 is built with compatibility for OpenSSL 1.1, follow the steps 25 | below on how to build, install and use the Bluefield PKA engine. 26 | 27 | Currently, OpenSSL 3.0 is in alpha release (alpha12), so it is not a stable 28 | release; some of the advertised features might not work! 29 | 30 | =============================================================================== 31 | Important Notes 32 | =============================================================================== 33 | 34 | * The PKA engine requires the PKA libraries and drivers installed. 35 | Look at 'README' to build and install PKA libraries. Note that 36 | the PKA engine is pre-installed: 37 | 38 | On Yocto Poky systems, the engine is installed into 39 | '/usr/lib64/engines-1.1/libbfengine.so' 40 | 41 | On CentOS systems, the engine is installed into 42 | '/usr/lib64/libbfengine.so.1.1.0' 43 | 44 | On Ubuntu systems, the engine is installed into 45 | '/usr/lib/aarch64-linux-gnu/libbfengine.so.1.1.0' 46 | 47 | On Debian systems, the engine is installed into 48 | '/usr/lib/aarch64-linux-gnu/libbfengine.so.1.1.0' 49 | 50 | Users are invited to install the shared object file into the 51 | appropriate destination directory within the OpenSSL engine folder, 52 | if needed. This depends on OpennSSL installation and settings. 53 | 54 | * The PKA engine requires OpenSSL libraries, such as libcrypto and 55 | libssl. Note that Yocto Poky, CentOS, Debian and Ubuntu systems have a pre- 56 | installed openssl package: 57 | 58 | On Yocto Poky systems: 59 | # openssl version 60 | OpenSSL 1.1.1g 21 Apr 2020 61 | 62 | On CentOS systems: 63 | # openssl version 64 | OpenSSL 1.0.2k-fips 26 Jan 2017 65 | 66 | On Ubuntu systems: 67 | # openssl version 68 | OpenSSL 1.1.1f 31 Mar 2020 69 | 70 | On Debian systems: 71 | # openssl version 72 | OpenSSL 1.1.1d 10 Sep 2019 73 | 74 | For native compilation, simply load the necessary libraries using 75 | the appropriate tool set. 76 | 77 | On CentOS systems: 78 | # yum install openssl-devel 79 | 80 | On Ubuntu systems: 81 | # apt install openssl-devel 82 | 83 | Cross compilation requires cross compiling the individual libraries. 84 | In order for a cross compiled executable to run on a target system, 85 | one must build the same version as that which is installed on the 86 | target rootfs. 87 | 88 | * OpenSSL version 1.1.1 is recommended. This is necessary in-order to 89 | offload ECDH and ECDSA. 90 | 91 | 92 | =============================================================================== 93 | How to build 94 | =============================================================================== 95 | 96 | Build BlueField PKA engine module: 97 | 98 | # cd 99 | # make 100 | # make install 101 | 102 | =============================================================================== 103 | How to install 104 | =============================================================================== 105 | 106 | The PKA engine module has to be installed within the engines folder 107 | of the installed OpenSSL. This may vary depending on OpenSSL installation 108 | as well as the Linux distribution. 109 | 110 | Note that both Yocto Poky and CentOS have already the engine module 111 | installed within the correct directory. 112 | 113 | On Yocto Poky, the engine module is installed into 114 | '/usr/lib64/engines-1.1/pka.so' 115 | 116 | On CentOS, the engine module is installed into 117 | '/usr/lib64/openssl/engines/libpka.so' 118 | 119 | On Ubuntu, the engine module is installed into 120 | '/usr/lib/aarch64-linux-gnu/engines-1.1/pka.so' 121 | 122 | On Debian, the engine module is installed into 123 | '/usr/lib/aarch64-linux-gnu/engines-1.1/pka.so' 124 | 125 | If needed, install the engine module to OpenSSL 'engines' directory. 126 | 127 | On Yocto Poky systems running default OpenSSL 1.1.1g: 128 | # cd /lib 129 | # cp libbfengine.so.1.1.0 /usr/lib64/engines-1.1/ 130 | # cd /usr/lib64/engines-1.1/ 131 | # ln -s libbfengine.so.1.1.0 pka.so 132 | 133 | On CentOS systems running default OpenSSL 1.0.2k-fips: 134 | # cd /lib 135 | # cp libbfengine.so.1.1.0 /usr/lib64/openssl/engines/ 136 | # cd /usr/lib64/openssl/engines/ 137 | # ln -s libbfengine.so.1.1.0 libpka.so 138 | 139 | On Ubuntu systems running default OpenSSL 1.1.1f: 140 | # cd /lib 141 | # cp libbfengine.so.1.1.0 /usr/lib/aarch64-linux-gnu/engines-1.1/ 142 | # cd /usr/lib/aarch64-linux-gnu/engines-1.1/ 143 | # ln -s libbfengine.so.1.1.0 pka.so 144 | 145 | On Debian systems running default OpenSSL 1.1.1d: 146 | # cd /lib 147 | # cp libbfengine.so.1.1.0 /usr/lib/aarch64-linux-gnu/engines-1.1/ 148 | # cd /usr/lib/aarch64-linux-gnu/engines-1.1/ 149 | # ln -s libbfengine.so.1.1.0 pka.so 150 | 151 | Note that OpenSSL 1.0 requires the prefix 'lib' to be appended to 152 | the ID of the PKA engine, i.e. 'libpka.so'. OpenSSL 1.1 the module 153 | name should be 'pka.so'; as same as the ID of the engine. 154 | 155 | Please refer to Section "Important Notes" in order to locate the pre-built 156 | engine module, in case the module didn't get built manually. 157 | 158 | =============================================================================== 159 | How to load 160 | =============================================================================== 161 | 162 | To load the BlueField PKA engine module into OpenSSL: 163 | 164 | On Yocto Poky systems: 165 | # openssl engine dynamic \ 166 | > -pre SO_PATH:/usr/lib64/engines-1.1/pka.so \ 167 | > -pre ID:pka -pre LOAD 168 | (dynamic) Dynamic engine loading support 169 | [Success]: SO_PATH:/usr/lib64/engines/libpka.so 170 | [Success]: ID:pka 171 | [Success]: LOAD 172 | Loaded: (pka) BlueField PKA engine support 173 | [ available ] 174 | 175 | On CentOS systems: 176 | # openssl engine -t dynamic \ 177 | > -pre SO_PATH:/usr/lib64/openssl/engines/libpka.so \ 178 | > -pre ID:pka -pre LOAD 179 | (dynamic) Dynamic engine loading support 180 | [Success]: SO_PATH:/usr/lib64/openssl/engines/libpka.so 181 | [Success]: ID:pka 182 | [Success]: LOAD 183 | Loaded: (pka) BlueField PKA engine support 184 | [ available ] 185 | 186 | On Ubuntu systems: 187 | # openssl engine dynamic \ 188 | > -pre SO_PATH:/usr/lib/aarch64-linux-gnu/engines-1.1/pka.so \ 189 | > -pre ID:pka -pre LOAD 190 | (dynamic) Dynamic engine loading support 191 | [Success]: SO_PATH:/usr/lib/aarch64-linux-gnu/engines-1.1/pka.so 192 | [Success]: ID:pka 193 | [Success]: LOAD 194 | Loaded: (pka) BlueField PKA engine support 195 | [ available ] 196 | 197 | On Debian systems: 198 | # openssl engine dynamic \ 199 | > -pre SO_PATH:/usr/lib/aarch64-linux-gnu/engines-1.1/pka.so \ 200 | > -pre ID:pka -pre LOAD 201 | (dynamic) Dynamic engine loading support 202 | [Success]: SO_PATH:/usr/lib/aarch64-linux-gnu/engines-1.1/pka.so 203 | [Success]: ID:pka 204 | [Success]: LOAD 205 | Loaded: (pka) BlueField PKA engine support 206 | [ available ] 207 | 208 | Note that the option '-t' is omitted when working with OpenSSL 1.1, 209 | since this cannot be used with option 'engine'. 210 | 211 | Now, the BlueField PKA engine is ready to use; no need to reload the 212 | engine across reboot. 213 | 214 | To use the BlueField PKA engine, one may pass the option '-engine pka' 215 | to dynamically load the module and enable hardware acceleration. 216 | 217 | Examples: 218 | 219 | 1. RSA 220 | # openssl speed -engine pka rsa512 221 | engine "pka" set. 222 | ... 223 | 224 | 2. DSA 225 | # openssl speed -engine pka dsa512 226 | engine "pka" set. 227 | ... 228 | 229 | 3. ECDH 230 | # openssl speed -engine pka ecdhp256 231 | engine "pka" set. 232 | ... 233 | 234 | 4. ECDSA 235 | # openssl speed -engine pka ecdsap256 236 | engine "pka" set. 237 | ... 238 | 239 | 5. Random number generation (RNG) 240 | # openssl rand -engine pka 512 241 | engine "pka" set. 242 | ... 243 | -------------------------------------------------------------------------------- /README.tests: -------------------------------------------------------------------------------- 1 | © 2023 NVIDIA Corporation & affiliates. 2 | 3 | License: BSD-3-Clause 4 | 5 | BlueField PKA Tests Suite 6 | 7 | Author, Khalil Blaiech 8 | 9 | 10 | =============================================================================== 11 | Overview 12 | =============================================================================== 13 | 14 | The BlueField PKA Tests Suite consists of a set of tests application, an 15 | independent set of test routines that run against the API implementation 16 | and verifies that it correctly implements all of the defined APIs at a 17 | functional level. 18 | 19 | The BlueField PKA Tests Suite is intended to be used solely for QA and 20 | verification purposes. Tests wouldn't be exposed to customers nor users. 21 | 22 | 23 | =============================================================================== 24 | Important Notes 25 | =============================================================================== 26 | 27 | * The PKA tests suite requires the PKA libraries and drivers installed. 28 | Look at 'README' to build and install PKA libraries and drivers. 29 | 30 | =============================================================================== 31 | How to build 32 | =============================================================================== 33 | 34 | To build and install PKA tests: 35 | 36 | # cd 37 | # make 38 | # make install 39 | 40 | Test applications should be installed into '/bin'. 41 | 42 | 43 | =============================================================================== 44 | Validation tests 45 | =============================================================================== 46 | 47 | Example of usage: 48 | 49 | # ./pka_test_validation -c 8 -r 4 -s 1 50 | 51 | 52 | syntax: pka_test_validation [--help|-h] [--mode|-m MODE] 53 | [--sync|-s SYNC] --cpu NUM --ring NUM 54 | 55 | Mandatory parameters: 56 | -c, --cpu NUM CPU count. 57 | -r, --ring NUM Ring count. 58 | 59 | Optional parameters: 60 | -m, --mode MODE Application mode 61 | 0: single process mode (default) 62 | 1: multi process mode 63 | -s, --sync SYNC Synchronization mode for multithread operations 64 | 0: none of operations are lock-free 65 | 1: all operations are lock-free (default) 66 | -h, --help Display help and exit. 67 | 68 | 69 | =============================================================================== 70 | Power usage tests 71 | =============================================================================== 72 | 73 | Example of usage: 74 | 75 | # ./pka_test_power -r 16 -t 10 -k 4 76 | 77 | 78 | syntax: pka_test_power [--help|-h] [--time|-t SECONDS] 79 | [--key|-k SIZE] --ring NUM 80 | 81 | Mandatory parameters: 82 | -r, --ring NUM Ring count. 83 | 84 | Optional parameters: 85 | -t, --time SECONDS Number of seconds to run. 86 | -k, --key SIZE Key size in Kbits: 87 | 0: RSA 2048 bits (default) 88 | 1: RSA 4096 bits 89 | -h, --help Display help and exit. 90 | 91 | =============================================================================== 92 | Performance tests 93 | =============================================================================== 94 | 95 | Example of usage: 96 | 97 | # ./pka_test_perforamce -c MOD_EXP -e little -k 1 -m 100 -n 4 -q 10 \ 98 | > -t 1 -o 1 -v 0 -y no 99 | 100 | syntax: pka_test_performance OPTIONS 101 | 102 | Where OPTIONS can be: 103 | 104 | -b primary bit_len to use 105 | -e ( big | little ) endianness of the interface 106 | -h print this message and exit 107 | -k num of different key subsystems to make 108 | -m num of runs of each test per thread 109 | -n num of tests (per key subsystem) to make 110 | -q number of cmds each thread keeps in play 111 | -r report the per thread stats/results 112 | -s secondary bit_len for some cryptosystems 113 | -t number of threads/tiles to use 114 | -o number of PKA rings to use 115 | -v verbosity level - in range 0-3 116 | -y ( yes | no ) check_results if set to yes 117 | -c name of the test kind. One of: 118 | ADD, SUBTRACT, MULTIPLY, DIVIDE, DIV_MOD, MODULO 119 | SHIFT_LEFT, SHIFT_RIGHT, MOD_INVERT 120 | MOD_EXP, RSA_MOD_EXP, RSA_VERIFY, RSA_MOD_EXP_WITH_CRT 121 | ECC_ADD, ECC_DOUBLE, ECC_MULTIPLY 122 | ECDSA_GEN, ECDSA_VERIFY, ECDSA_GEN_VERIFY 123 | DSA_GEN, DSA_VERIFY, DSA_GEN_VERIFY 124 | 125 | The default command line options (except for -b and -s) are: 126 | '-c MOD_EXP -e little -k 1 -m 100 -n 4 -q 10 -t 1 -o 1 -v 0 -y no' 127 | The defaults for '-b' and '-s' depend upon the test name (as 128 | given by '-c') as follows: 129 | a) the default for '-b' is 1024 for all tests except 130 | for the ECC_* tests and ECDSA_* tests when it is 256. 131 | b) the default for -s is 33 for RSA_VERIFY, 'bit_len - 1' 132 | for DIVIDE, DIV_MOD, MODULO, and DSA_* tests, 'bit_len / 2' 133 | for the ECDSA_* tests and unused for for all other tests. 134 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | AC_PREREQ([2.69]) 2 | m4_define([libpka_VERSION], [2.0]) 3 | AC_INIT([libpka], m4_defn([libpka_VERSION]), [networking-support@nvidia.com], [libpka-]m4_defn([libpka_VERSION]), [https://github.com/Mellanox/pka]) 4 | AC_CONFIG_SRCDIR([engine/e_bluefield.c]) 5 | AC_CONFIG_AUX_DIR([build-aux]) 6 | AC_CONFIG_MACRO_DIR([m4]) 7 | AC_CONFIG_HEADERS([config.h]) 8 | AM_INIT_AUTOMAKE([foreign no-define subdir-objects]) 9 | AC_CANONICAL_HOST 10 | AC_MSG_CHECKING([host architecture]) 11 | AS_IF( 12 | [test x"$host_cpu" = x"aarch64"], 13 | [AC_MSG_RESULT([ok])], 14 | [AC_MSG_ERROR([only aarch64 is supported])] 15 | ) 16 | 17 | AC_PROG_CC 18 | AC_PROG_INSTALL 19 | AM_PROG_AS 20 | PKG_PROG_PKG_CONFIG 21 | 22 | AC_CHECK_HEADERS([fcntl.h inttypes.h stdint.h sys/ioctl.h sys/time.h unistd.h]) 23 | AC_CHECK_HEADER_STDBOOL 24 | AC_C_INLINE 25 | AC_TYPE_INT32_T 26 | AC_TYPE_INT64_T 27 | AC_TYPE_INT8_T 28 | AC_TYPE_PID_T 29 | AC_TYPE_SIZE_T 30 | AC_TYPE_SSIZE_T 31 | AC_TYPE_UINT16_T 32 | AC_TYPE_UINT32_T 33 | AC_TYPE_UINT64_T 34 | AC_TYPE_UINT8_T 35 | AC_FUNC_MALLOC 36 | AC_FUNC_MMAP 37 | AC_CHECK_FUNCS([gettimeofday memset munmap strcasecmp strrchr strtol]) 38 | 39 | AX_PTHREAD 40 | 41 | LT_INIT([shared disable-static pic-only]) 42 | 43 | ENABLED_SUBDIRS=lib 44 | 45 | dnl Checks for tests utils 46 | AC_ARG_ENABLE([testutils], 47 | [AS_HELP_STRING([--disable-testutils], [Disable building PKA tests utilities (Default: yes)])], 48 | [], 49 | [enable_testutils=yes]) 50 | 51 | AS_IF([test x"$enable_testutils" != x"no"], 52 | [ENABLED_SUBDIRS="$ENABLED_SUBDIRS tests"] 53 | ) 54 | 55 | AC_ARG_WITH([libcrypto], 56 | [AS_HELP_STRING([--with-libcrypto], [Use specific libcrypto library. Must be at least version 1.1. Library search is done using pkg-config. (Default: libcrypto)])], 57 | [], 58 | [with_libcrypto=libcrypto]) 59 | 60 | PKG_CHECK_MODULES([LIBCRYPTO], 61 | ["$with_libcrypto" >= 1.1.0],[], 62 | [AC_MSG_ERROR([libcrypto library is either not found or its version less than 1.1])]) 63 | 64 | dnl Checks if need to enable openssl pka engine 65 | AC_ARG_ENABLE([engine], 66 | [AS_HELP_STRING([--disable-engine], [Build PKA openssl engine (Default: yes)])], 67 | [], 68 | [enable_engine=yes]) 69 | 70 | AS_IF([test x"$enable_engine" != x"no"], 71 | [ENABLED_SUBDIRS="$ENABLED_SUBDIRS engine" 72 | 73 | cryptoenginesdir=$($PKG_CONFIG --variable=enginesdir --silence-errors $with_libcrypto) 74 | AC_SUBST([cryptoenginesdir]) 75 | ] 76 | ) 77 | 78 | dnl Documentation 79 | AC_ARG_WITH([doxygen], 80 | [AS_HELP_STRING([--with-doxygen], [Generate documentation. This requires doxygen program (Default: doxygen)])], 81 | [], 82 | [with_doxygen=doxygen]) 83 | 84 | AS_IF([test x"$with_doxygen" != x"no"], 85 | [AC_PATH_TOOL([DOXYGEN],["$with_doxygen"]) 86 | AS_IF([test x"$DOXYGEN" = x""], 87 | [AC_MSG_ERROR([documentation requested but doxygen is not found (to disable use --without-doxygen)])] 88 | ) 89 | ENABLED_SUBDIRS="$ENABLED_SUBDIRS doc" 90 | AC_SUBST([DOXYGEN]) 91 | ] 92 | ) 93 | 94 | AC_SUBST([AM_CPPFLAGS],["-I\$(top_srcdir)/lib -I\$(top_srcdir)/include"]) 95 | AC_SUBST([ENABLED_SUBDIRS]) 96 | AC_CONFIG_FILES([Makefile lib/Makefile engine/Makefile tests/Makefile doc/Makefile]) 97 | AC_OUTPUT 98 | -------------------------------------------------------------------------------- /debian/changelog: -------------------------------------------------------------------------------- 1 | libpka (2.0-2) UNRELEASED; urgency=low 2 | 3 | * Add BF3 MB, HB cards support. 4 | * Add OpenSSL async_jobs support. 5 | * Add asynchronous PKA processing. 6 | * Add dedicated polling thread for asynchronous PKA processing. 7 | * Bug fixes 8 | * Package split 9 | 10 | -- Shih-Yi Chen Thu, 11 Jan 2024 13:40:24 +0000 11 | 12 | libpka (1.4-1) UNRELEASED; urgency=low 13 | 14 | * Guard against multiple calls to engine destroy function. 15 | * BF3/Mustang updates. 16 | * Bug fixes. 17 | * Update firmware image and hence add support for curve 448 and curve 25519. 18 | * Enable DRBG block in TRNG. 19 | * Add tests for TRNG (True Random Number Generator) validation. 20 | * Support OpenSSL 3.0 when built with backward compatibility for OpenSSL 21 | 1.1.0 22 | * Shared memory created for multiple libpka instances is useless and hence 23 | remove it. 24 | * Instead of static addresses, read the address from ACPI table. 25 | * Fix ring open issue; don't try ring open when rings are busy. 26 | * Create missing openssl dynamic engine symlink for rpm package. 27 | 28 | -- Mahantesh Salimath Fri, 26 Nov 2021 21:13:58 +0000 29 | libpka (1.3-1) UNRELEASED; urgency=low 30 | 31 | * Update documentation. 32 | * Add TRNG support. 33 | * Remove /src hierarchy. 34 | * Allow all users to access pka s/w. 35 | * Fix multithreading issue in test framework. 36 | * Fix issues in queue and ring layer. 37 | * Read frequency from CPU register and optimize perf calculations. 38 | 39 | -- Mahantesh Salimath Fri, 20 Nov 2020 21:13:58 +0000 40 | libpka (1.2-1) UNRELEASED; urgency=low 41 | 42 | * Fix multithreading issue in test framework. 43 | * Switch to software method for measuring performance. 44 | * Read CPU frequency from SMBIOS table. 45 | * Update README file with openssl ecdh, ecdsa and dsa speed test 46 | instructions. 47 | * Encapsulate pka engine against different openssl versions. 48 | 49 | -- Mahantesh Salimath Fri, 09 Oct 2020 21:13:58 +0000 50 | libpka (1.1-1) UNRELEASED; urgency=low 51 | 52 | * Add ECDH and DH API's in pka library. 53 | * Add DH, DSA, ECDH and ECDSA support to pka engine. 54 | 55 | -- Mahantesh Salimath Tue, 28 Jul 2020 21:13:58 +0000 56 | libpka (1.0-1) UNRELEASED; urgency=low 57 | 58 | * Initial release. 59 | 60 | -- Mahantesh Salimath Fri, 08 May 2020 13:22:58 +0000 61 | -------------------------------------------------------------------------------- /debian/control: -------------------------------------------------------------------------------- 1 | Source: libpka 2 | Section: libdevel 3 | Priority: optional 4 | Maintainer: Shih-Yi Chen 5 | Build-Depends: debhelper-compat (= 11), 6 | libssl-dev, 7 | autoconf, 8 | automake, 9 | libtool, 10 | doxygen, 11 | pkg-config 12 | Standards-Version: 4.1.4 13 | Homepage: https://github.com/Mellanox/pka 14 | 15 | Package: libpka1 16 | Architecture: arm64 17 | Section: libs 18 | Depends: ${misc:Depends}, ${shlibs:Depends} 19 | Description: NVIDIA BlueField Public Key Acceleration (PKA) library 20 | This package provides Public Key Acceleration (PKA) API implementation 21 | for NVIDIA BlueField 22 | 23 | Package: libpka1-engine 24 | Architecture: arm64 25 | Section: libs 26 | Depends: libpka1 (= ${binary:Version}), 27 | ${misc:Depends}, 28 | ${shlibs:Depends} 29 | Description: NVIDIA BlueField Public Key Acceleration (PKA) library - OpenSSL engine module 30 | This package provides OpenSSL dynamic engine module to support 31 | hardware implementation of RSA, DSA, DH, ECDH and ECDSA 32 | operations using BlueField PKA hardware 33 | 34 | Package: libpka1-testutils 35 | Architecture: arm64 36 | Section: misc 37 | Depends: libpka1 (= ${binary:Version}), 38 | ${misc:Depends}, 39 | ${shlibs:Depends} 40 | Description: NVIDIA BlueField Public Key Acceleration (PKA) library - Test utilities 41 | This package provides validation utilities for testing libpka 42 | functionality with NVIDIA BlueField PKA hardware 43 | 44 | Package: libpka1-dev 45 | Architecture: any 46 | Multi-Arch: same 47 | Section: libdevel 48 | Depends: libpka1 (= ${binary:Version}), 49 | ${misc:Depends} 50 | Description: NVIDIA BlueField Public Key Acceleration (PKA) library - Headers 51 | This package contains header files for libpka 52 | 53 | Package: libpka1-doc 54 | Architecture: all 55 | Multi-Arch: foreign 56 | Section: doc 57 | Depends: ${misc:Depends} 58 | Description: NVIDIA BlueField Public Key Acceleration (PKA) library - Documentation 59 | This package contains libpka API documentation 60 | and PDF API specification for libpka packages 61 | -------------------------------------------------------------------------------- /debian/copyright: -------------------------------------------------------------------------------- 1 | Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ 2 | Upstream-Name: libpka 3 | Source: https://github.com/Mellanox/pka 4 | 5 | Files: * 6 | Copyright: 2018-2023 NVIDIA Corporation & affiliates. 7 | License: BSD-3-clause 8 | 9 | Files: engine/e_bluefield.c 10 | Copyright: 2008-2016 The OpenSSL Project Authors. 11 | License: OpenSSL 12 | 13 | Files: engine/helper/ec_local.h 14 | Copyright: 2001-2019 The OpenSSL Project Authors. 15 | 2002, Oracle and/or its affiliates. 16 | License: OpenSSL 17 | 18 | License: BSD-3-clause 19 | Redistribution and use in source and binary forms, 20 | with or without modification, are permitted provided that the 21 | following conditions are met: 22 | . 23 | 1. Redistributions of source code must retain the above copyright notice, 24 | this list of conditions and the following disclaimer. 25 | . 26 | 2. Redistributions in binary form must reproduce the above copyright notice, 27 | this list of conditions and the following disclaimer in the documentation 28 | and/or other materials provided with the distribution. 29 | . 30 | 3. Neither the name of the copyright holder nor the names of its contributors 31 | may be used to endorse or promote products derived from this software 32 | without specific prior written permission. 33 | . 34 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' 35 | AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 36 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 37 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS 38 | BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, 39 | OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 40 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 41 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 42 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 43 | OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 44 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 | 46 | License: OpenSSL 47 | The OpenSSL toolkit stays under a double license, i.e. both the conditions of 48 | the OpenSSL License and the original SSLeay license apply to the toolkit. 49 | See below for the actual license texts. Actually both licenses are BSD-style 50 | Open Source licenses. In case of any license issues related to OpenSSL 51 | please contact openssl-core@openssl.org. 52 | . 53 | OpenSSL License 54 | --------------- 55 | . 56 | ==================================================================== 57 | Copyright (c) 1998-2018 The OpenSSL Project. All rights reserved. 58 | . 59 | Redistribution and use in source and binary forms, with or without 60 | modification, are permitted provided that the following conditions 61 | are met: 62 | . 63 | 1. Redistributions of source code must retain the above copyright 64 | notice, this list of conditions and the following disclaimer. 65 | . 66 | 2. Redistributions in binary form must reproduce the above copyright 67 | notice, this list of conditions and the following disclaimer in 68 | the documentation and/or other materials provided with the 69 | distribution. 70 | . 71 | 3. All advertising materials mentioning features or use of this 72 | software must display the following acknowledgment: 73 | "This product includes software developed by the OpenSSL Project 74 | for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 75 | . 76 | 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 77 | endorse or promote products derived from this software without 78 | prior written permission. For written permission, please contact 79 | openssl-core@openssl.org. 80 | . 81 | 5. Products derived from this software may not be called "OpenSSL" 82 | nor may "OpenSSL" appear in their names without prior written 83 | permission of the OpenSSL Project. 84 | . 85 | 6. Redistributions of any form whatsoever must retain the following 86 | acknowledgment: 87 | "This product includes software developed by the OpenSSL Project 88 | for use in the OpenSSL Toolkit (http://www.openssl.org/)" 89 | . 90 | THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 91 | EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 92 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 93 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 94 | ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 95 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 96 | NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 97 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 98 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 99 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 100 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 101 | OF THE POSSIBILITY OF SUCH DAMAGE. 102 | ==================================================================== 103 | . 104 | This product includes cryptographic software written by Eric Young 105 | (eay@cryptsoft.com). This product includes software written by Tim 106 | Hudson (tjh@cryptsoft.com). 107 | . 108 | Original SSLeay License 109 | ----------------------- 110 | . 111 | Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 112 | All rights reserved. 113 | . 114 | This package is an SSL implementation written 115 | by Eric Young (eay@cryptsoft.com). 116 | The implementation was written so as to conform with Netscapes SSL. 117 | . 118 | This library is free for commercial and non-commercial use as long as 119 | the following conditions are aheared to. The following conditions 120 | apply to all code found in this distribution, be it the RC4, RSA, 121 | lhash, DES, etc., code; not just the SSL code. The SSL documentation 122 | included with this distribution is covered by the same copyright terms 123 | except that the holder is Tim Hudson (tjh@cryptsoft.com). 124 | . 125 | Copyright remains Eric Young's, and as such any Copyright notices in 126 | the code are not to be removed. 127 | If this package is used in a product, Eric Young should be given attribution 128 | as the author of the parts of the library used. 129 | This can be in the form of a textual message at program startup or 130 | in documentation (online or textual) provided with the package. 131 | . 132 | Redistribution and use in source and binary forms, with or without 133 | modification, are permitted provided that the following conditions 134 | are met: 135 | 1. Redistributions of source code must retain the copyright 136 | notice, this list of conditions and the following disclaimer. 137 | 2. Redistributions in binary form must reproduce the above copyright 138 | notice, this list of conditions and the following disclaimer in the 139 | documentation and/or other materials provided with the distribution. 140 | 3. All advertising materials mentioning features or use of this software 141 | must display the following acknowledgement: 142 | "This product includes cryptographic software written by 143 | Eric Young (eay@cryptsoft.com)" 144 | The word 'cryptographic' can be left out if the rouines from the library 145 | being used are not cryptographic related :-). 146 | 4. If you include any Windows specific code (or a derivative thereof) from 147 | the apps directory (application code) you must include an acknowledgement: 148 | "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 149 | . 150 | THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 151 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 152 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 153 | ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 154 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 155 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 156 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 157 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 158 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 159 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 160 | SUCH DAMAGE. 161 | . 162 | The licence and distribution terms for any publically available version or 163 | derivative of this code cannot be changed. i.e. this code cannot simply be 164 | copied and put under another distribution licence 165 | [including the GNU Public Licence.] 166 | -------------------------------------------------------------------------------- /debian/libpka1-dev.install: -------------------------------------------------------------------------------- 1 | usr/include/*.h 2 | -------------------------------------------------------------------------------- /debian/libpka1-doc.doc-base: -------------------------------------------------------------------------------- 1 | Document: libpka-reference 2 | Title: Public Key Acceleration documentation 3 | Author: Khalil Blaiech 4 | Abstract: Reference documentation of Public Key Acceleration system 5 | and API description of Public Key Acceleration Library 6 | Section: Programming 7 | 8 | Format: HTML 9 | Index: /usr/share/doc/libpka*/html/index.html 10 | Files: /usr/share/doc/libpka*/html/*.html 11 | 12 | Format: PDF 13 | Files: /usr/share/doc/libpka*/pdf/*.pdf 14 | -------------------------------------------------------------------------------- /debian/libpka1-doc.docs: -------------------------------------------------------------------------------- 1 | usr/share/doc/libpka-*/html 2 | usr/share/doc/libpka-*/pdf 3 | -------------------------------------------------------------------------------- /debian/libpka1-engine.docs: -------------------------------------------------------------------------------- 1 | usr/share/doc/*/README.engine 2 | 3 | -------------------------------------------------------------------------------- /debian/libpka1-engine.install: -------------------------------------------------------------------------------- 1 | usr/lib/*/engines*/*.so 2 | -------------------------------------------------------------------------------- /debian/libpka1-testutils.docs: -------------------------------------------------------------------------------- 1 | usr/share/doc/*/README.tests 2 | -------------------------------------------------------------------------------- /debian/libpka1-testutils.install: -------------------------------------------------------------------------------- 1 | usr/bin/pka_* 2 | -------------------------------------------------------------------------------- /debian/libpka1.docs: -------------------------------------------------------------------------------- 1 | usr/share/doc/*/README 2 | -------------------------------------------------------------------------------- /debian/libpka1.install: -------------------------------------------------------------------------------- 1 | usr/lib/*/*so* 2 | -------------------------------------------------------------------------------- /debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | 3 | #export DH_VERBOSE = 1 4 | export DEB_BUILD_MAINT_OPTIONS = hardening=+all reproducible=+all future=+all 5 | export LDFLAGS=-Wl,-Bsymbolic-functions 6 | 7 | %: 8 | dh $@ 9 | 10 | #Create engine symlink because strongswan openssl.cnf tries to load library in different places depending on distro 11 | execute_after_dh_auto_install: 12 | find $(CURDIR)/debian/ -name "*.la" -delete 13 | ln -s "libbfengine.so" `find $(CURDIR)/debian/ -iname 'libbfengine.so' -printf '%h/pka.so'` 14 | -------------------------------------------------------------------------------- /debian/upstream/metadata: -------------------------------------------------------------------------------- 1 | Bug-Database: https://github.com/Mellanox/pka/issues 2 | Bug-Submit: https://github.com/Mellanox/pka/issues/new 3 | -------------------------------------------------------------------------------- /doc/Doxyfile: -------------------------------------------------------------------------------- 1 | PROJECT_NAME = "Public Key Acceleration Library Reference Guide" 2 | PROJECT_NUMBER = $(PKGVER) 3 | OUTPUT_DIRECTORY = $(DOCDIR) 4 | INPUT = $(SRCDIR)/lib/pka.h 5 | EXTRACT_ALL = YES 6 | QUIET = YES 7 | HTML_OUTPUT = $(HTML_OUTPUT) 8 | GENERATE_LATEX = NO 9 | -------------------------------------------------------------------------------- /doc/Makefile.am: -------------------------------------------------------------------------------- 1 | .PHONY: html 2 | .INTERMEDIATE: html 3 | 4 | DX_CONFIG=Doxyfile 5 | DX_HTML_OUTPUT=html 6 | DOCDIR=$(srcdir) 7 | SRCDIR=$(top_srcdir) 8 | OUTPUT_DIRS=pdf html 9 | 10 | $(DOCDIR)/$(PACKAGE).tag: $(DX_CONFIG) 11 | PKGVER=$(PACKAGE_VERSION) DOCDIR=$(DOCDIR) HTML_OUTPUT=$(DX_HTML_OUTPUT) SRCDIR=$(SRCDIR) $(DOXYGEN) $(DX_CONFIG) 12 | $(AM_V_at)echo Timestamp >$@ 13 | 14 | clean-local: 15 | -rm -rf $(DOCDIR)/html $(DOCDIR)/$(PACKAGE).tag 16 | 17 | install-data-local: 18 | @for d in $(OUTPUT_DIRS); do \ 19 | if ! test -d "$(DESTDIR)$(docdir)"$$d; then \ 20 | echo "$(mkinstalldirs) '$(DESTDIR)$(docdir)/'$$d"; \ 21 | $(mkinstalldirs) '$(DESTDIR)$(docdir)/'$$d; \ 22 | fi; \ 23 | echo "cp -pR $(srcdir)/$$d '$(DESTDIR)$(docdir)/'"; \ 24 | cp -pR $(srcdir)/$$d '$(DESTDIR)$(docdir)/' || exit $$?; \ 25 | done 26 | 27 | uninstall-local: 28 | @for d in $(OUTPUT_DIRS); do \ 29 | echo "test ! -d '$(DESTDIR)$(docdir)/'$$d || \ 30 | { find '$(DESTDIR)$(docdir)/'$$d -type d ! -perm -200 -exec chmod u+w '{}' ';' && \ 31 | rm -rf '$(DESTDIR)$(docdir)/'$$d; }"; \ 32 | test ! -d '$(DESTDIR)$(docdir)/'$$d || \ 33 | { find '$(DESTDIR)$(docdir)/'$$d -type d ! -perm -200 -exec chmod u+w '{}' ';' && \ 34 | rm -rf '$(DESTDIR)$(docdir)/'$$d; }; \ 35 | done 36 | 37 | html: $(DOCDIR)/$(PACKAGE).tag 38 | all: html 39 | -------------------------------------------------------------------------------- /doc/pdf/pka-architecture-doc.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mellanox/pka/7fbbcd0ef0e263789d5fd9994cc0b115e107d4d6/doc/pdf/pka-architecture-doc.pdf -------------------------------------------------------------------------------- /doc/pdf/pka-programming-doc.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mellanox/pka/7fbbcd0ef0e263789d5fd9994cc0b115e107d4d6/doc/pdf/pka-programming-doc.pdf -------------------------------------------------------------------------------- /engine/Makefile.am: -------------------------------------------------------------------------------- 1 | cryptoengines_LTLIBRARIES = libbfengine.la 2 | libbfengine_la_SOURCES = \ 3 | e_bluefield.c \ 4 | helper/pka_helper.c 5 | libbfengine_la_CPPFLAGS = -I$(srcdir)/helper/ $(AM_CPPFLAGS) 6 | libbfengine_la_CFLAGS = -DOPENSSL_API_COMPAT=0x10100000L $(LIBCRYPTO_CFLAGS) 7 | libbfengine_la_LIBADD = $(top_builddir)/lib/libPKA.la $(LIBCRYPTO_LIBS) 8 | libbfengine_la_LDFLAGS = $(AM_LDFLAGS) -module -shared -avoid-version -export-symbols "$(srcdir)/engine.exports" 9 | -------------------------------------------------------------------------------- /engine/engine.exports: -------------------------------------------------------------------------------- 1 | v_check 2 | bind_engine 3 | -------------------------------------------------------------------------------- /engine/helper/ec_local.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Note: Although these structures are internal to OpenSSL, they are defined 3 | * here, because in order to override ecc point operations with pka 4 | * API's the fields inside the structure have to be changed directly as 5 | * OpenSSL doesn't provide API's to do so. 6 | * 7 | * Care needs to be taken to preserve the order and members of the structure 8 | * same as in the respective versions of OpenSSL. 9 | * 10 | * Structures are mirrored from crypto/ec/ec_local.h inside openssl github repo. 11 | * Prior to openssl 1.1.1 version, this file is named as ec_lcl.h 12 | * 13 | * Copyright 2001-2019 The OpenSSL Project Authors. All Rights Reserved. 14 | * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved 15 | * 16 | * Licensed under the OpenSSL license (the "License"). You may not use 17 | * this file except in compliance with the License. You can obtain a copy 18 | * in the file LICENSE in the source distribution or at 19 | * https://www.openssl.org/source/license.html 20 | */ 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | 28 | struct ec_method_st { 29 | /* Various method flags */ 30 | int flags; 31 | /* used by EC_METHOD_get_field_type: */ 32 | int field_type; /* a NID */ 33 | /* 34 | * used by EC_GROUP_new, EC_GROUP_free, EC_GROUP_clear_free, 35 | * EC_GROUP_copy: 36 | */ 37 | int (*group_init) (EC_GROUP *); 38 | void (*group_finish) (EC_GROUP *); 39 | void (*group_clear_finish) (EC_GROUP *); 40 | int (*group_copy) (EC_GROUP *, const EC_GROUP *); 41 | /* used by EC_GROUP_set_curve, EC_GROUP_get_curve: */ 42 | int (*group_set_curve) (EC_GROUP *, const BIGNUM *p, const BIGNUM *a, 43 | const BIGNUM *b, BN_CTX *); 44 | int (*group_get_curve) (const EC_GROUP *, BIGNUM *p, BIGNUM *a, BIGNUM *b, 45 | BN_CTX *); 46 | /* used by EC_GROUP_get_degree: */ 47 | int (*group_get_degree) (const EC_GROUP *); 48 | int (*group_order_bits) (const EC_GROUP *); 49 | /* used by EC_GROUP_check: */ 50 | int (*group_check_discriminant) (const EC_GROUP *, BN_CTX *); 51 | /* 52 | * used by EC_POINT_new, EC_POINT_free, EC_POINT_clear_free, 53 | * EC_POINT_copy: 54 | */ 55 | int (*point_init) (EC_POINT *); 56 | void (*point_finish) (EC_POINT *); 57 | void (*point_clear_finish) (EC_POINT *); 58 | int (*point_copy) (EC_POINT *, const EC_POINT *); 59 | /*- 60 | * used by EC_POINT_set_to_infinity, 61 | * EC_POINT_set_Jprojective_coordinates_GFp, 62 | * EC_POINT_get_Jprojective_coordinates_GFp, 63 | * EC_POINT_set_affine_coordinates, 64 | * EC_POINT_get_affine_coordinates, 65 | * EC_POINT_set_compressed_coordinates: 66 | */ 67 | int (*point_set_to_infinity) (const EC_GROUP *, EC_POINT *); 68 | #if (OPENSSL_VERSION_NUMBER < 0x30000000L) 69 | int (*point_set_Jprojective_coordinates_GFp) (const EC_GROUP *, 70 | EC_POINT *, const BIGNUM *x, 71 | const BIGNUM *y, 72 | const BIGNUM *z, BN_CTX *); 73 | int (*point_get_Jprojective_coordinates_GFp) (const EC_GROUP *, 74 | const EC_POINT *, BIGNUM *x, 75 | BIGNUM *y, BIGNUM *z, 76 | BN_CTX *); 77 | #endif 78 | int (*point_set_affine_coordinates) (const EC_GROUP *, EC_POINT *, 79 | const BIGNUM *x, const BIGNUM *y, 80 | BN_CTX *); 81 | int (*point_get_affine_coordinates) (const EC_GROUP *, const EC_POINT *, 82 | BIGNUM *x, BIGNUM *y, BN_CTX *); 83 | int (*point_set_compressed_coordinates) (const EC_GROUP *, EC_POINT *, 84 | const BIGNUM *x, int y_bit, 85 | BN_CTX *); 86 | /* used by EC_POINT_point2oct, EC_POINT_oct2point: */ 87 | size_t (*point2oct) (const EC_GROUP *, const EC_POINT *, 88 | point_conversion_form_t form, unsigned char *buf, 89 | size_t len, BN_CTX *); 90 | int (*oct2point) (const EC_GROUP *, EC_POINT *, const unsigned char *buf, 91 | size_t len, BN_CTX *); 92 | /* used by EC_POINT_add, EC_POINT_dbl, ECP_POINT_invert: */ 93 | int (*add) (const EC_GROUP *, EC_POINT *r, const EC_POINT *a, 94 | const EC_POINT *b, BN_CTX *); 95 | int (*dbl) (const EC_GROUP *, EC_POINT *r, const EC_POINT *a, BN_CTX *); 96 | int (*invert) (const EC_GROUP *, EC_POINT *, BN_CTX *); 97 | /* 98 | * used by EC_POINT_is_at_infinity, EC_POINT_is_on_curve, EC_POINT_cmp: 99 | */ 100 | int (*is_at_infinity) (const EC_GROUP *, const EC_POINT *); 101 | int (*is_on_curve) (const EC_GROUP *, const EC_POINT *, BN_CTX *); 102 | int (*point_cmp) (const EC_GROUP *, const EC_POINT *a, const EC_POINT *b, 103 | BN_CTX *); 104 | /* used by EC_POINT_make_affine, EC_POINTs_make_affine: */ 105 | int (*make_affine) (const EC_GROUP *, EC_POINT *, BN_CTX *); 106 | int (*points_make_affine) (const EC_GROUP *, size_t num, EC_POINT *[], 107 | BN_CTX *); 108 | /* 109 | * used by EC_POINTs_mul, EC_POINT_mul, EC_POINT_precompute_mult, 110 | * EC_POINT_have_precompute_mult (default implementations are used if the 111 | * 'mul' pointer is 0): 112 | */ 113 | /*- 114 | * mul() calculates the value 115 | * 116 | * r := generator * scalar 117 | * + points[0] * scalars[0] 118 | * + ... 119 | * + points[num-1] * scalars[num-1]. 120 | * 121 | * For a fixed point multiplication (scalar != NULL, num == 0) 122 | * or a variable point multiplication (scalar == NULL, num == 1), 123 | * mul() must use a constant time algorithm: in both cases callers 124 | * should provide an input scalar (either scalar or scalars[0]) 125 | * in the range [0, ec_group_order); for robustness, implementers 126 | * should handle the case when the scalar has not been reduced, but 127 | * may treat it as an unusual input, without any constant-timeness 128 | * guarantee. 129 | */ 130 | int (*mul) (const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar, 131 | size_t num, const EC_POINT *points[], const BIGNUM *scalars[], 132 | BN_CTX *); 133 | int (*precompute_mult) (EC_GROUP *group, BN_CTX *); 134 | int (*have_precompute_mult) (const EC_GROUP *group); 135 | /* internal functions */ 136 | /* 137 | * 'field_mul', 'field_sqr', and 'field_div' can be used by 'add' and 138 | * 'dbl' so that the same implementations of point operations can be used 139 | * with different optimized implementations of expensive field 140 | * operations: 141 | */ 142 | int (*field_mul) (const EC_GROUP *, BIGNUM *r, const BIGNUM *a, 143 | const BIGNUM *b, BN_CTX *); 144 | int (*field_sqr) (const EC_GROUP *, BIGNUM *r, const BIGNUM *a, BN_CTX *); 145 | int (*field_div) (const EC_GROUP *, BIGNUM *r, const BIGNUM *a, 146 | const BIGNUM *b, BN_CTX *); 147 | /*- 148 | * 'field_inv' computes the multiplicative inverse of a in the field, 149 | * storing the result in r. 150 | * 151 | * If 'a' is zero (or equivalent), you'll get an EC_R_CANNOT_INVERT error. 152 | */ 153 | int (*field_inv) (const EC_GROUP *, BIGNUM *r, const BIGNUM *a, BN_CTX *); 154 | /* e.g. to Montgomery */ 155 | int (*field_encode) (const EC_GROUP *, BIGNUM *r, const BIGNUM *a, 156 | BN_CTX *); 157 | /* e.g. from Montgomery */ 158 | int (*field_decode) (const EC_GROUP *, BIGNUM *r, const BIGNUM *a, 159 | BN_CTX *); 160 | int (*field_set_to_one) (const EC_GROUP *, BIGNUM *r, BN_CTX *); 161 | /* private key operations */ 162 | size_t (*priv2oct)(const EC_KEY *eckey, unsigned char *buf, size_t len); 163 | int (*oct2priv)(EC_KEY *eckey, const unsigned char *buf, size_t len); 164 | int (*set_private)(EC_KEY *eckey, const BIGNUM *priv_key); 165 | int (*keygen)(EC_KEY *eckey); 166 | int (*keycheck)(const EC_KEY *eckey); 167 | int (*keygenpub)(EC_KEY *eckey); 168 | int (*keycopy)(EC_KEY *dst, const EC_KEY *src); 169 | void (*keyfinish)(EC_KEY *eckey); 170 | /* custom ECDH operation */ 171 | int (*ecdh_compute_key)(unsigned char **pout, size_t *poutlen, 172 | const EC_POINT *pub_key, const EC_KEY *ecdh); 173 | /* Inverse modulo order */ 174 | #if (OPENSSL_VERSION_NUMBER >= 0x30000000L) 175 | /* custom ECDSA */ 176 | int (*ecdsa_sign_setup)(EC_KEY *eckey, BN_CTX *ctx, BIGNUM **kinvp, 177 | BIGNUM **rp); 178 | ECDSA_SIG *(*ecdsa_sign_sig)(const unsigned char *dgst, int dgstlen, 179 | const BIGNUM *kinv, const BIGNUM *r, 180 | EC_KEY *eckey); 181 | int (*ecdsa_verify_sig)(const unsigned char *dgst, int dgstlen, 182 | const ECDSA_SIG *sig, EC_KEY *eckey); 183 | #endif 184 | 185 | #if (OPENSSL_VERSION_NUMBER > 0x10100000L) 186 | int (*field_inverse_mod_ord)(const EC_GROUP *, BIGNUM *r, 187 | const BIGNUM *x, BN_CTX *); 188 | #endif 189 | int (*blind_coordinates)(const EC_GROUP *group, EC_POINT *p, BN_CTX *ctx); 190 | #if (OPENSSL_VERSION_NUMBER > 0x10100000L) 191 | int (*ladder_pre)(const EC_GROUP *group, 192 | EC_POINT *r, EC_POINT *s, 193 | EC_POINT *p, BN_CTX *ctx); 194 | int (*ladder_step)(const EC_GROUP *group, 195 | EC_POINT *r, EC_POINT *s, 196 | EC_POINT *p, BN_CTX *ctx); 197 | int (*ladder_post)(const EC_GROUP *group, 198 | EC_POINT *r, EC_POINT *s, 199 | EC_POINT *p, BN_CTX *ctx); 200 | #endif 201 | }; 202 | 203 | /* 204 | * Types and functions to manipulate pre-computed values. 205 | */ 206 | typedef struct nistp224_pre_comp_st NISTP224_PRE_COMP; 207 | typedef struct nistp256_pre_comp_st NISTP256_PRE_COMP; 208 | typedef struct nistp521_pre_comp_st NISTP521_PRE_COMP; 209 | typedef struct nistz256_pre_comp_st NISTZ256_PRE_COMP; 210 | typedef struct ec_pre_comp_st EC_PRE_COMP; 211 | 212 | struct ec_group_st { 213 | EC_METHOD *meth; 214 | EC_POINT *generator; /* optional */ 215 | BIGNUM *order, *cofactor; 216 | int curve_name; /* optional NID for named curve */ 217 | int asn1_flag; /* flag to control the asn1 encoding */ 218 | #if (OPENSSL_VERSION_NUMBER >= 0x30000000L) 219 | int decoded_from_explicit_params; /* set if decoded from explicit 220 | * curve parameters encoding */ 221 | #endif 222 | point_conversion_form_t asn1_form; 223 | unsigned char *seed; /* optional seed for parameters (appears in 224 | * ASN1) */ 225 | size_t seed_len; 226 | /* 227 | * The following members are handled by the method functions, even if 228 | * they appear generic 229 | */ 230 | /* 231 | * Field specification. For curves over GF(p), this is the modulus; for 232 | * curves over GF(2^m), this is the irreducible polynomial defining the 233 | * field. 234 | */ 235 | BIGNUM *field; 236 | /* 237 | * Field specification for curves over GF(2^m). The irreducible f(t) is 238 | * then of the form: t^poly[0] + t^poly[1] + ... + t^poly[k] where m = 239 | * poly[0] > poly[1] > ... > poly[k] = 0. The array is terminated with 240 | * poly[k+1]=-1. All elliptic curve irreducibles have at most 5 non-zero 241 | * terms. 242 | */ 243 | int poly[6]; 244 | /* 245 | * Curve coefficients. (Here the assumption is that BIGNUMs can be used 246 | * or abused for all kinds of fields, not just GF(p).) For characteristic 247 | * > 3, the curve is defined by a Weierstrass equation of the form y^2 = 248 | * x^3 + a*x + b. For characteristic 2, the curve is defined by an 249 | * equation of the form y^2 + x*y = x^3 + a*x^2 + b. 250 | */ 251 | BIGNUM *a, *b; 252 | /* enable optimized point arithmetics for special case */ 253 | int a_is_minus3; 254 | /* method-specific (e.g., Montgomery structure) */ 255 | void *field_data1; 256 | /* method-specific */ 257 | void *field_data2; 258 | /* method-specific */ 259 | int (*field_mod_func) (BIGNUM *, const BIGNUM *, const BIGNUM *, 260 | BN_CTX *); 261 | /* data for ECDSA inverse */ 262 | BN_MONT_CTX *mont_data; 263 | 264 | /* 265 | * Precomputed values for speed. The PCT_xxx names match the 266 | * pre_comp.xxx union names; see the SETPRECOMP and HAVEPRECOMP 267 | * macros, below. 268 | */ 269 | enum { 270 | PCT_none, 271 | PCT_nistp224, PCT_nistp256, PCT_nistp521, PCT_nistz256, 272 | PCT_ec 273 | } pre_comp_type; 274 | union { 275 | NISTP224_PRE_COMP *nistp224; 276 | NISTP256_PRE_COMP *nistp256; 277 | NISTP521_PRE_COMP *nistp521; 278 | NISTZ256_PRE_COMP *nistz256; 279 | EC_PRE_COMP *ec; 280 | } pre_comp; 281 | #if (OPENSSL_VERSION_NUMBER >= 0x30000000L) 282 | OSSL_LIB_CTX *libctx; 283 | char *propq; 284 | #endif 285 | }; 286 | 287 | struct ec_key_st { 288 | const EC_KEY_METHOD *meth; 289 | ENGINE *engine; 290 | int version; 291 | EC_GROUP *group; 292 | EC_POINT *pub_key; 293 | BIGNUM *priv_key; 294 | unsigned int enc_flag; 295 | point_conversion_form_t conv_form; 296 | int references; 297 | int flags; 298 | #ifndef FIPS_MODULE 299 | CRYPTO_EX_DATA ex_data; 300 | #endif 301 | CRYPTO_RWLOCK *lock; 302 | #if (OPENSSL_VERSION_NUMBER >= 0x30000000L) 303 | OSSL_LIB_CTX *libctx; 304 | char *propq; 305 | 306 | /* Provider data */ 307 | size_t dirty_cnt; /* If any key material changes, increment this */ 308 | #endif 309 | }; 310 | 311 | struct ec_point_st { 312 | EC_METHOD *meth; 313 | /* NID for the curve if known */ 314 | int curve_name; 315 | /* 316 | * All members except 'meth' are handled by the method functions, even if 317 | * they appear generic 318 | */ 319 | BIGNUM *X; 320 | BIGNUM *Y; 321 | BIGNUM *Z; /* Jacobian projective coordinates: * (X, Y, 322 | * Z) represents (X/Z^2, Y/Z^3) if Z != 0 */ 323 | int Z_is_one; /* enable optimized point arithmetics for 324 | * special case */ 325 | }; 326 | -------------------------------------------------------------------------------- /engine/helper/pka_helper.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2023 NVIDIA Corporation & affiliates. 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #ifndef PKA_HELPER_H 5 | #define PKA_HELPER_H 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include "pka.h" 16 | #include "pka_utils.h" 17 | #include "pka_vectors.h" 18 | 19 | // 64-bit processor 20 | #ifdef BN_ULONG 21 | #define PKA_ULONG BN_ULONG 22 | #else 23 | #define PKA_ULONG uint64_t 24 | #endif 25 | 26 | #ifdef BN_BYTES 27 | #define PKA_BYTES BN_BYTES 28 | #else 29 | #define PKA_BYTES 8 30 | #endif 31 | 32 | #define PKA_BITS (PKA_BYTES * 8) 33 | 34 | 35 | #define PKA_ENGINE_QUEUE_CNT 8 36 | #define PKA_ENGINE_RING_CNT 4 37 | #define PKA_ENGINE_QUEUE_CNT_BF3_MB 16 38 | #define PKA_ENGINE_RING_CNT_BF3_MB 4 39 | #define PKA_ENGINE_QUEUE_CNT_BF3_HB 24 40 | #define PKA_ENGINE_RING_CNT_BF3_HB 4 41 | 42 | 43 | #define PKA_ENGINE_INSTANCE_NAME "SSL engine" 44 | 45 | #define PKA_MAX_OBJS 32 // 32 objs 46 | #define PKA_CMD_DESC_MAX_DATA_SIZE (1 << 14) // 16K bytes. 47 | #define PKA_RSLT_DESC_MAX_DATA_SIZE (1 << 12) // 4K bytes. 48 | 49 | #define PKA_25519_PUBKEY_SIZE 32 50 | #define PKA_25519_PRIKEY_SIZE 32 51 | #define PKA_448_PUBKEY_SIZE 56 52 | #define PKA_448_PRIKEY_SIZE 56 53 | 54 | #define PKA_NO_FLAG 0 55 | #define PKA_NO_PRIV_KEY 1 56 | 57 | #define PKA_CURVE25519_BITS 253 58 | #define PKA_CURVE25519_SECURITY_BITS 128 59 | 60 | #define PKA_CURVE448_BITS 446 61 | #define PKA_CURVE448_SECURITY_BITS 224 62 | 63 | #define sizeof_static_array(x) \ 64 | ((sizeof((x))) / sizeof((x)[0])) 65 | 66 | #define engine_pka_keypair_invalid(kpair, ossl_nid, check_private) \ 67 | (((kpair) == NULL) || ((kpair)->nid != ossl_nid) || \ 68 | (check_private && (!(kpair)->has_private))) 69 | 70 | // This encapsulates big number information. This structure enables 71 | // compatibility to OpenSSL 72 | typedef struct { 73 | PKA_ULONG *d; // Pointer to an array of 'PKA_BITS' bit chunks. 74 | int top; // Index of last used d +1. 75 | int dmax; // Size of the d array. 76 | int neg; // one if the number is negative. 77 | int flags; 78 | } pka_bignum_t; 79 | 80 | // This encapsulates the engine information. As of now, the PKA library 81 | // does not support mult-processes, a single engine is created. This engine 82 | // allows multiple handlers to share the PKA instance. 83 | typedef struct { 84 | pka_instance_t instance; 85 | bool valid; 86 | } pka_engine_info_t; 87 | 88 | struct engine_pka_nid_data_st 89 | { 90 | const char *name; 91 | size_t privk_bytes; 92 | size_t pubk_bytes; 93 | int (*derive_pubkey)(unsigned char *buf, pka_operand_t *private_key); 94 | }; 95 | 96 | // This function implement all the needed PKA initialization, in order to 97 | // enable hardware acceleration. This function is not thread-safe. 98 | int pka_init(void); 99 | 100 | // This function releases all the PKA resources previously initialized. This 101 | // function is not thread-safe. 102 | int pka_finish(void); 103 | 104 | #if (OPENSSL_VERSION_NUMBER >= 0x10100000L) 105 | struct pka_keypair { 106 | pka_operand_t private_key; 107 | pka_operand_t public_key; 108 | int nid; 109 | bool has_private; 110 | }; 111 | 112 | typedef struct pka_keypair ENGINE_PKA_KEYPAIR; 113 | 114 | // This function allocates the memory resources required to store public and 115 | // private key pairs of size @size. 116 | // Note: @flag is currently not useful but is reserved for future when 117 | // only public key or private key resources need to be allocated. 118 | ENGINE_PKA_KEYPAIR *engine_pka_keypair_new(int nid, int flag, int size); 119 | 120 | // This function releases all the memory resources allocated for 121 | // public and private key pair. 122 | int engine_pka_keypair_free(ENGINE_PKA_KEYPAIR *kpair); 123 | #endif 124 | 125 | // This function implements the modular exponentiation using BlueField 126 | // PKA hardware. 127 | int pka_bn_mod_exp(pka_bignum_t *bn_value, 128 | pka_bignum_t *bn_exponent, 129 | pka_bignum_t *bn_modulus, 130 | pka_bignum_t *bn_result); 131 | 132 | // This function implements the modular exponentiation with CRT using 133 | // BlueField PKA hardware. 134 | int pka_rsa_mod_exp_crt(pka_bignum_t *bn_value, 135 | pka_bignum_t *bn_p, 136 | pka_bignum_t *bn_q, 137 | pka_bignum_t *bn_d_p, 138 | pka_bignum_t *bn_d_q, 139 | pka_bignum_t *bn_qinv, 140 | pka_bignum_t *bn_result); 141 | 142 | // This function implements the elliptic curve point addition using 143 | // Bluefield PKA hardware. 144 | int pka_bn_ecc_pt_add(pka_bignum_t *bn_p, 145 | pka_bignum_t *bn_a, 146 | pka_bignum_t *bn_b, 147 | pka_bignum_t *bn_x1, 148 | pka_bignum_t *bn_y1, 149 | pka_bignum_t *bn_x2, 150 | pka_bignum_t *bn_y2, 151 | pka_bignum_t *bn_result_x, 152 | pka_bignum_t *bn_result_y); 153 | 154 | // This function implements the elliptic curve point multiplication using 155 | // Bluefield PKA hardware. 156 | int pka_bn_ecc_pt_mult(pka_bignum_t *bn_p, 157 | pka_bignum_t *bn_a, 158 | pka_bignum_t *bn_b, 159 | pka_bignum_t *bn_x, 160 | pka_bignum_t *bn_y, 161 | pka_bignum_t *bn_multiplier, 162 | pka_bignum_t *bn_result_x, 163 | pka_bignum_t *bn_result_y); 164 | 165 | // This function implements the modular inverse using BlueField 166 | // PKA hardware. 167 | int pka_bn_mod_inv(pka_bignum_t *bn_value, 168 | pka_bignum_t *bn_modulus, 169 | pka_bignum_t *bn_result); 170 | 171 | // This function implements the random number generation using BlueField 172 | // PKA hardware. 173 | int pka_get_random_bytes(uint8_t *buf, 174 | int len); 175 | 176 | int pka_mont_25519_mult(unsigned char *buf, 177 | pka_operand_t *point_x, 178 | pka_operand_t *multiplier); 179 | 180 | int pka_mont_25519_derive_pubkey(unsigned char *buf, 181 | pka_operand_t *priv_key); 182 | 183 | int pka_mont_448_mult(unsigned char *buf, 184 | pka_operand_t *point_x, 185 | pka_operand_t *multiplier); 186 | 187 | int pka_mont_448_derive_pubkey(unsigned char *buf, 188 | pka_operand_t *priv_key); 189 | 190 | #ifdef __cplusplus 191 | } 192 | #endif 193 | 194 | #endif // PKA_HELPER_H 195 | -------------------------------------------------------------------------------- /include/pka_addrs.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2023 NVIDIA Corporation & affiliates. 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #ifndef __PKA_ADDRS_H__ 5 | #define __PKA_ADDRS_H__ 6 | 7 | // Define memory size in bytes 8 | #define MEM_SIZE_4KB 0x1000 9 | #define MEM_SIZE_8KB 0x2000 10 | #define MEM_SIZE_16KB 0x4000 11 | #define MEM_SIZE_32KB 0x8000 12 | #define MEM_SIZE_64KB 0x10000 13 | 14 | // 15 | // COMMON SPACE 16 | // 17 | #define CRYPTO_COMMON_BASE 0x0 18 | 19 | // Common IO CSR addresses/offsets: These are all addressed as 8-byte registers. 20 | #define DEV_INFO_ADDR (0x00 | CRYPTO_COMMON_BASE) 21 | #define DEV_CTL_ADDR (0x08 | CRYPTO_COMMON_BASE) 22 | #define MMIO_INFO_ADDR (0x10 | CRYPTO_COMMON_BASE) 23 | #define SCRATCHPAD_ADDR (0x20 | CRYPTO_COMMON_BASE) 24 | #define SEMAPHORE0_ADDR (0x28 | CRYPTO_COMMON_BASE) 25 | #define SEMAPHORE1_ADDR (0x30 | CRYPTO_COMMON_BASE) 26 | #define CLOCK_COUNT_ADDR (0x38 | CRYPTO_COMMON_BASE) 27 | #define INT_SETUP_ADDR (0x40 | CRYPTO_COMMON_BASE) 28 | #define CRED_CTL_ADDR (0x50 | CRYPTO_COMMON_BASE) 29 | #define SAM_CTL_ADDR (0x58 | CRYPTO_COMMON_BASE) 30 | 31 | // 32 | // CRYPTO SPACE 33 | // 34 | 35 | // All addresses/offsets herein are BYTE addresses. 36 | 37 | // EIP154 CSRS: 38 | 39 | // Global Control Space CSR addresses/offsets. These are accessed from the 40 | // ARM as 8 byte reads/writes however only the bottom 32 bits are implemented. 41 | #define PKA_CLOCK_SWITCH_ADDR 0x11C68 42 | #define PKA_CLK_FORCE_ADDR 0x11C80 43 | #define MODE_SELECTION_ADDR 0x11C88 44 | #define PKA_PROT_STATUS_ADDR 0x11C90 45 | #define PKA_OPTIONS_ADDR 0x11DF0 46 | #define PKA_VERSION_ADDR 0x11DF8 47 | 48 | // Advanced Interrupt Controller CSR addresses/offsets. These are accessed 49 | // from the ARM as 8 byte reads/writes however only the bottom 32 bits are 50 | // implemented. 51 | #define AIC_POL_CTRL_ADDR 0x11E00 52 | #define AIC_TYPE_CTRL_ADDR 0x11E08 53 | #define AIC_ENABLE_CTRL_ADDR 0x11E10 54 | #define AIC_RAW_STAT_ADDR 0x11E18 55 | #define AIC_ENABLE_SET_ADDR 0x11E18 56 | #define AIC_ENABLED_STAT_ADDR 0x11E20 57 | #define AIC_ACK_ADDR 0x11E20 58 | #define AIC_ENABLE_CLR_ADDR 0x11E28 59 | #define AIC_OPTIONS_ADDR 0x11E30 60 | #define AIC_VERSION_ADDR 0x11E38 61 | 62 | // The True Random Number Generator CSR addresses/offsets. These are accessed 63 | // from the ARM as 8 byte reads/writes however only the bottom 32 bits are 64 | // implemented. 65 | #define TRNG_OUTPUT_0_ADDR 0x12000 66 | #define TRNG_OUTPUT_1_ADDR 0x12008 67 | #define TRNG_OUTPUT_2_ADDR 0x12010 68 | #define TRNG_OUTPUT_3_ADDR 0x12018 69 | #define TRNG_STATUS_ADDR 0x12020 70 | #define TRNG_INTACK_ADDR 0x12020 71 | #define TRNG_CONTROL_ADDR 0x12028 72 | #define TRNG_CONFIG_ADDR 0x12030 73 | #define TRNG_ALARMCNT_ADDR 0x12038 74 | #define TRNG_FROENABLE_ADDR 0x12040 75 | #define TRNG_FRODETUNE_ADDR 0x12048 76 | #define TRNG_ALARMMASK_ADDR 0x12050 77 | #define TRNG_ALARMSTOP_ADDR 0x12058 78 | #define TRNG_TEST_ADDR 0x120E0 79 | #define TRNG_BLOCKCNT_ADDR 0x120E8 80 | #define TRNG_OPTIONS_ADDR 0x120F0 81 | #define TRNG_TEST_ADDR 0x120E0 82 | #define TRNG_RAW_L_ADDR 0x12060 83 | #define TRNG_RAW_H_ADDR 0x12068 84 | #define TRNG_RUN_CNT_ADDR 0x12080 85 | #define TRNG_MONOBITCNT_ADDR 0x120B8 86 | #define TRNG_POKER_3_0_ADDR 0x120C0 87 | #define TRNG_POKER_7_4 0x120C8 88 | #define TRNG_POKER_B_8 0x120D0 89 | #define TRNG_POKER_F_C 0x120D8 90 | 91 | #define TRNG_PS_AI_0_ADDR 0x12080 92 | #define TRNG_PS_AI_1_ADDR 0x12088 93 | #define TRNG_PS_AI_2_ADDR 0x12090 94 | #define TRNG_PS_AI_3_ADDR 0x12098 95 | #define TRNG_PS_AI_4_ADDR 0x120A0 96 | #define TRNG_PS_AI_5_ADDR 0x120A8 97 | #define TRNG_PS_AI_6_ADDR 0x120B0 98 | #define TRNG_PS_AI_7_ADDR 0x120B8 99 | #define TRNG_PS_AI_8_ADDR 0x120C0 100 | #define TRNG_PS_AI_9_ADDR 0x120C8 101 | #define TRNG_PS_AI_10_ADDR 0x120D0 102 | #define TRNG_PS_AI_11_ADDR 0x120D8 103 | 104 | // Control register address/offset. This is accessed from the ARM using 8 105 | // byte reads/writes however only the bottom 32 bits are implemented. 106 | #define PKA_MASTER_SEQ_CTRL_ADDR 0x27F90 107 | 108 | // Ring CSRs: These are all accessed from the ARM using 8 byte reads/writes 109 | // however only the bottom 32 bits are implemented. 110 | 111 | // Ring 0 CSRS 112 | #define COMMAND_COUNT_0_ADDR 0x80080 113 | #define RESULT_COUNT_0_ADDR 0x80088 114 | #define IRQ_THRESH_0_ADDR 0x80090 115 | 116 | // Ring 1 CSRS: 117 | #define COMMAND_COUNT_1_ADDR 0x90080 118 | #define RESULT_COUNT_1_ADDR 0x90088 119 | #define IRQ_THRESH_1_ADDR 0x90090 120 | 121 | // Ring 2 CSRS: 122 | #define COMMAND_COUNT_2_ADDR 0xA0080 123 | #define RESULT_COUNT_2_ADDR 0xA0088 124 | #define IRQ_THRESH_2_ADDR 0xA0090 125 | 126 | // Ring 3 CSRS: 127 | #define COMMAND_COUNT_3_ADDR 0xB0080 128 | #define RESULT_COUNT_3_ADDR 0xB0088 129 | #define IRQ_THRESH_3_ADDR 0xB0090 130 | 131 | // EIP154 RAM regions: Note that the FARM_PROG_RAM_X address range overlaps 132 | // with the FARM_DATA_RAM_X and FARM_DATA_RAM_X_EXT address ranges. This 133 | // conflict is resolved by using the FARM_PROG_RAM_X only when the 134 | // Sequencer is in SW reset, and the DATA_RAMs are picked only when the 135 | // engine is operation. 136 | // 137 | // Note: 138 | // The FARM_DATA_RAM_X_EXT RAMs may also be 139 | // called the LNME FIFO RAMs in some of the documentation. 140 | // 141 | // PKA_BUFFER_RAM : 1024 x 64 - 8K bytes 142 | // PKA_SECURE_RAM : 1536 x 64 - 12K bytes 143 | // PKA_MASTER_PROG_RAM : 8192 x 32 - 32K bytes 144 | // FARM_DATA_RAM_X : 1024 x 64 - 8K bytes 145 | // FARM_DATA_RAM_X_EXT : 256 x 32 - 1K bytes 146 | // FARM_PROG_RAM_X : 2048 x 32 - 8K bytes 147 | // 148 | // Note: 149 | // *TBD* Since hardware guys multiplied the address per 2, the size of 150 | // each memory/registers group increased and become two times larger. 151 | // Memory size should be adjusted accordingly: 152 | // PKA Buffer RAM size : 8KB --> 16KB 153 | // PKA Secure RAM size : 8KB --> 16KB 154 | // PKA Master Program RAM size : 32KB --> 64KB 155 | // PKA Farm Data RAM size : 4KB --> 8KB 156 | // PKA Farm Data RAM extension size : 4KB --> 8KB 157 | // PKA Farm Program RAM size : 8KB --> 16KB 158 | // 159 | #define PKA_BUFFER_RAM_BASE 0x00000 160 | #define PKA_BUFFER_RAM_SIZE MEM_SIZE_16KB // 0x00000...0x03FFF 161 | 162 | #define PKA_SECURE_RAM_BASE 0x20000 163 | #define PKA_SECURE_RAM_SIZE MEM_SIZE_16KB // 0x20000...0x23FFF 164 | 165 | #define PKA_MASTER_PROG_RAM_BASE 0x30000 166 | #define PKA_MASTER_PROG_RAM_SIZE MEM_SIZE_64KB // 0x30000...0x3FFFF 167 | 168 | #define FARM_DATA_RAM_0_BASE 0x40000 169 | #define FARM_DATA_RAM_0_SIZE MEM_SIZE_8KB // 0x40000...0x41FFF 170 | #define FARM_DATA_RAM_0_EXT_BASE 0x42000 171 | #define FARM_DATA_RAM_0_EXT_SIZE MEM_SIZE_8KB // 0x42000...0x43FFF 172 | #define FARM_PROG_RAM_0_BASE 0x40000 173 | #define FARM_PROG_RAM_0_SIZE MEM_SIZE_16KB // 0x40000...0x43FFF 174 | #define FARM_DATA_RAM_1_BASE 0x44000 175 | #define FARM_DATA_RAM_1_SIZE MEM_SIZE_8KB // 0x44000...0x45FFF 176 | #define FARM_DATA_RAM_1_EXT_BASE 0x46000 177 | #define FARM_DATA_RAM_1_EXT_SIZE MEM_SIZE_8KB // 0x46000...0x47FFF 178 | #define FARM_PROG_RAM_1_BASE 0x44000 179 | #define FARM_PROG_RAM_1_SIZE MEM_SIZE_16KB // 0x44000...0x47FFF 180 | #define FARM_DATA_RAM_2_BASE 0x48000 181 | #define FARM_DATA_RAM_2_SIZE MEM_SIZE_8KB // 0x48000...0x49FFF 182 | #define FARM_DATA_RAM_2_EXT_BASE 0x4A000 183 | #define FARM_DATA_RAM_2_EXT_SIZE MEM_SIZE_8KB // 0x4A000...0x4BFFF 184 | #define FARM_PROG_RAM_2_BASE 0x48000 185 | #define FARM_PROG_RAM_2_SIZE MEM_SIZE_16KB // 0x48000...0x4BFFF 186 | #define FARM_DATA_RAM_3_BASE 0x4C000 187 | #define FARM_DATA_RAM_3_SIZE MEM_SIZE_8KB // 0x4C000...0x4DFFF 188 | #define FARM_DATA_RAM_3_EXT_BASE 0x4E000 189 | #define FARM_DATA_RAM_3_EXT_SIZE MEM_SIZE_8KB // 0x4E000...0x4FFFF 190 | #define FARM_PROG_RAM_3_BASE 0x4C000 191 | #define FARM_PROG_RAM_3_SIZE MEM_SIZE_16KB // 0x4C000...0x4FFFF 192 | #define FARM_DATA_RAM_4_BASE 0x50000 193 | #define FARM_DATA_RAM_4_SIZE MEM_SIZE_8KB // 0x50000...0x51FFF 194 | #define FARM_DATA_RAM_4_EXT_BASE 0x52000 195 | #define FARM_DATA_RAM_4_EXT_SIZE MEM_SIZE_8KB // 0x52000...0x53FFF 196 | #define FARM_PROG_RAM_4_BASE 0x50000 197 | #define FARM_PROG_RAM_4_SIZE MEM_SIZE_16KB // 0x50000...0x53FFF 198 | #define FARM_DATA_RAM_5_BASE 0x54000 199 | #define FARM_DATA_RAM_5_SIZE MEM_SIZE_8KB // 0x54000...0x55FFF 200 | #define FARM_DATA_RAM_5_EXT_BASE 0x56000 201 | #define FARM_DATA_RAM_5_EXT_SIZE MEM_SIZE_8KB // 0x56000...0x57FFF 202 | #define FARM_PROG_RAM_5_BASE 0x54000 203 | #define FARM_PROG_RAM_5_SIZE MEM_SIZE_16KB // 0x54000...0x57FFF 204 | 205 | // PKA Buffer RAM offsets. These are NOT real CSR's but instead are 206 | // specific offset/addresses within the EIP154 PKA_BUFFER_RAM. 207 | 208 | // Ring 0: 209 | #define RING_CMMD_BASE_0_ADDR 0x00000 210 | #define RING_RSLT_BASE_0_ADDR 0x00010 211 | #define RING_SIZE_TYPE_0_ADDR 0x00020 212 | #define RING_RW_PTRS_0_ADDR 0x00028 213 | #define RING_RW_STAT_0_ADDR 0x00030 214 | 215 | // Ring 1 216 | #define RING_CMMD_BASE_1_ADDR 0x00040 217 | #define RING_RSLT_BASE_1_ADDR 0x00050 218 | #define RING_SIZE_TYPE_1_ADDR 0x00060 219 | #define RING_RW_PTRS_1_ADDR 0x00068 220 | #define RING_RW_STAT_1_ADDR 0x00070 221 | 222 | // Ring 2 223 | #define RING_CMMD_BASE_2_ADDR 0x00080 224 | #define RING_RSLT_BASE_2_ADDR 0x00090 225 | #define RING_SIZE_TYPE_2_ADDR 0x000A0 226 | #define RING_RW_PTRS_2_ADDR 0x000A8 227 | #define RING_RW_STAT_2_ADDR 0x000B0 228 | 229 | // Ring 3 230 | #define RING_CMMD_BASE_3_ADDR 0x000C0 231 | #define RING_RSLT_BASE_3_ADDR 0x000D0 232 | #define RING_SIZE_TYPE_3_ADDR 0x000E0 233 | #define RING_RW_PTRS_3_ADDR 0x000E8 234 | #define RING_RW_STAT_3_ADDR 0x000F0 235 | 236 | // Ring Options 237 | #define PKA_RING_OPTIONS_ADDR 0x07FF8 238 | 239 | // Alternate Window RAM size 240 | #define PKA_WINDOW_RAM_REGION_SIZE MEM_SIZE_16KB 241 | 242 | // Currently, we do not use these MiCA specific CSRs. 243 | // The PKI (not EIP154) CSR address/offsets: These are all addressed as 244 | // 8-byte registers. 245 | #define PKA_INT_MASK_ADDR 0x00 246 | #define PKA_INT_MASK_SET_ADDR 0x08 247 | #define PKA_INT_MASK_RESET_ADDR 0x10 248 | #define PKA_ZEROIZE_ADDR 0x40 249 | #define TST_FRO_ADDR 0x50 250 | #define FRO_COUNT_ADDR 0x58 251 | #define PKA_PARITY_CTL_ADDR 0x60 252 | #define PKA_PARITY_STAT_ADDR 0x68 253 | 254 | #endif // __PKA_ADDRS_H__ 255 | -------------------------------------------------------------------------------- /include/pka_atomic.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2023 NVIDIA Corporation & affiliates. 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #ifndef __PKA_ATOMIC_H__ 5 | #define __PKA_ATOMIC_H__ 6 | 7 | #include 8 | #include 9 | 10 | // ARMv8 Assembler code to implement the locking and atomic_bit operations: 11 | 12 | 13 | // Atomic integers using relaxed memory ordering 14 | // 15 | // Atomic integer types (pka_atomic32_t and pka_atomic64_t) can be used to 16 | // implement e.g. shared counters. If not otherwise documented, operations in 17 | // this API are implemented using "RELAXED memory ordering" (see memory 18 | // order descriptions in the C11 specification). Relaxed operations do not 19 | // provide synchronization or ordering for other memory accesses (initiated 20 | // before or after the operation), only atomicity of the operation itself is 21 | // guaranteed. 22 | 23 | // Atomic 32-bit unsigned integer 24 | typedef struct 25 | { 26 | uint32_t v; ///< Actual storage for the atomic variable 27 | } pka_atomic32_t __pka_aligned(sizeof(uint32_t)); // Enforce alignement! 28 | 29 | // Atomic 64-bit unsigned integer 30 | typedef struct 31 | { 32 | uint64_t v; ///< Actual storage for the atomic variable 33 | #if __GCC_ATOMIC_LLONG_LOCK_FREE < 2 34 | // Some architectures do not support lock-free operations on 64-bit 35 | // data types. We use a spin lock to ensure atomicity. 36 | char lock; ///< Spin lock (if needed) used to ensure atomic access 37 | #endif 38 | } pka_atomic64_t __pka_aligned(sizeof(uint64_t)); // Enforce alignement! 39 | 40 | // 32-bit operations in RELAXED memory ordering 41 | 42 | static inline void pka_atomic32_init(pka_atomic32_t *atom, uint32_t val) 43 | { 44 | __atomic_store_n(&atom->v, val, __ATOMIC_RELAXED); 45 | } 46 | 47 | // Load value of atomic uint32 variable. Return Value of the variable 48 | static inline uint32_t pka_atomic32_load(pka_atomic32_t *atom) 49 | { 50 | return __atomic_load_n(&atom->v, __ATOMIC_RELAXED); 51 | } 52 | 53 | // Atomic fetch and add of 32-bit atomic variable. Return Value of the atomic 54 | // variable before the addition. 55 | static inline uint32_t _pka_atomic32_fetch_add_relaxed(pka_atomic32_t *atom, 56 | uint32_t val) 57 | { 58 | return __atomic_fetch_add(&atom->v, val, __ATOMIC_RELAXED); 59 | } 60 | 61 | // Fetch and increment atomic uint32 variable. Return Value of the variable 62 | // before the increment 63 | static inline uint32_t pka_atomic32_fetch_inc_relaxed(pka_atomic32_t *atom) 64 | { 65 | return __atomic_fetch_add(&atom->v, 1, __ATOMIC_RELAXED); 66 | } 67 | 68 | // Increment atomic uint32 variable 69 | static inline void pka_atomic32_inc(pka_atomic32_t *atom) 70 | { 71 | (void)__atomic_fetch_add(&atom->v, 1, __ATOMIC_RELAXED); 72 | } 73 | 74 | 75 | // Atomic fetch and subtract of 32-bit atomic variable. Return Value of the 76 | // atomic variable before the subtraction 77 | static inline uint32_t _pka_atomic32_fetch_sub_relaxed(pka_atomic32_t *atom, 78 | uint32_t val) 79 | { 80 | return __atomic_fetch_sub(&atom->v, val, __ATOMIC_RELAXED); 81 | } 82 | 83 | static inline uint32_t pka_atomic32_sub(pka_atomic32_t *atom, 84 | uint32_t val) 85 | { 86 | return _pka_atomic32_fetch_sub_relaxed(atom, val); 87 | } 88 | 89 | // Decrement atomic uint32 variable 90 | static inline void pka_atomic32_dec(pka_atomic32_t *atom) 91 | { 92 | (void)__atomic_fetch_sub(&atom->v, 1, __ATOMIC_RELAXED); 93 | } 94 | 95 | // 64-bit operations in RELAXED memory ordering 96 | 97 | #if __GCC_ATOMIC_LLONG_LOCK_FREE < 2 98 | #define ATOMIC_CAS_OP(ret_ptr, old_val, new_val) \ 99 | ({ \ 100 | if (atom->v == (old_val)) { \ 101 | atom->v = (new_val); \ 102 | *(ret_ptr) = 1; \ 103 | } else { \ 104 | *(ret_ptr) = 0; \ 105 | } \ 106 | }) 107 | 108 | // Helper macro for lock-based atomic operations on 64-bit integers. Return 109 | // The old value of the variable. 110 | #define ATOMIC_OP(atom, expr) \ 111 | ({ \ 112 | uint64_t _old_val; \ 113 | /* Loop while lock is already taken, stop when lock becomes clear */ \ 114 | while (__atomic_test_and_set(&(atom)->lock, __ATOMIC_ACQUIRE)) \ 115 | (void)0; \ 116 | _old_val = (atom)->v; \ 117 | (expr); /* Perform whatever update is desired */ \ 118 | __atomic_clear(&(atom)->lock, __ATOMIC_RELEASE); \ 119 | _old_val; /* Return old value */ \ 120 | }) 121 | #endif 122 | 123 | static inline void pka_atomic64_init(pka_atomic64_t *atom, uint64_t val) 124 | { 125 | atom->v = val; 126 | #if __GCC_ATOMIC_LLONG_LOCK_FREE < 2 127 | __atomic_clear(&atom->lock, __ATOMIC_RELAXED); 128 | #endif 129 | } 130 | 131 | // Atomic fetch and add of 64-bit atomic variable. Return Value of the atomic 132 | // variable before the addition 133 | static inline uint64_t _pka_atomic64_fetch_add_relaxed(pka_atomic64_t *atom, 134 | uint64_t val) 135 | { 136 | #if __GCC_ATOMIC_LLONG_LOCK_FREE < 2 137 | return ATOMIC_OP(atom, atom->v += val); 138 | #else 139 | return __atomic_fetch_add(&atom->v, val, __ATOMIC_RELAXED); 140 | #endif 141 | } 142 | 143 | // Increment atomic uint64 variable 144 | static inline void pka_atomic64_inc(pka_atomic64_t *atom) 145 | { 146 | #if __GCC_ATOMIC_LLONG_LOCK_FREE < 2 147 | (void)ATOMIC_OP(atom, atom->v++); 148 | #else 149 | (void)__atomic_fetch_add(&atom->v, 1, __ATOMIC_RELAXED); 150 | #endif 151 | } 152 | 153 | // Atomic fetch and subtract of 64-bit atomic variable. Return Value of the 154 | // atomic variable before the addition 155 | static inline uint64_t _pka_atomic64_fetch_sub_relaxed(pka_atomic64_t *atom, 156 | uint64_t val) 157 | { 158 | #if __GCC_ATOMIC_LLONG_LOCK_FREE < 2 159 | return ATOMIC_OP(atom, atom->v -= val); 160 | #else 161 | return __atomic_fetch_sub(&atom->v, val, __ATOMIC_RELAXED); 162 | #endif 163 | } 164 | 165 | // Decrement atomic uint64 variable 166 | static inline void pka_atomic64_dec(pka_atomic64_t *atom) 167 | { 168 | #if __GCC_ATOMIC_LLONG_LOCK_FREE < 2 169 | (void)ATOMIC_OP(atom, atom->v--); 170 | #else 171 | (void)__atomic_fetch_sub(&atom->v, 1, __ATOMIC_RELAXED); 172 | #endif 173 | } 174 | 175 | 176 | // Operations with non-relaxed memory ordering 177 | // 178 | // An operation with RELEASE memory ordering (pka_atomic_xxx_rel_xxx()) 179 | // ensures that other threads loading the same atomic variable with ACQUIRE 180 | // memory ordering see all stores (from the calling thread) that happened 181 | // before this releasing store. 182 | // 183 | // An operation with ACQUIRE memory ordering (pka_atomic_xxx_acq_xxx()) 184 | // ensures that the calling thread sees all stores (done by the releasing 185 | // thread) that happened before a RELEASE memory ordered store to the same 186 | // atomic variable. 187 | // 188 | // An operation with ACQUIRE-and-RELEASE memory ordering 189 | // (pka_atomic_xxx_acq_rel_xxx()) combines the effects of ACQUIRE and RELEASE 190 | // memory orders. A single operation acts as both an acquiring load and 191 | // a releasing store. 192 | 193 | // 32-bit operations in non-RELAXED memory ordering 194 | 195 | // Compare and swap atomic uint32 variable using ACQUIRE-and-RELEASE memory 196 | // ordering 197 | static inline int pka_atomic32_cas_acq_rel(pka_atomic32_t *atom, 198 | uint32_t *old_val, 199 | uint32_t new_val) 200 | { 201 | return __atomic_compare_exchange_n(&atom->v, old_val, new_val, 202 | 0 /* strong */, 203 | __ATOMIC_ACQ_REL, 204 | __ATOMIC_RELAXED); 205 | } 206 | 207 | // Compare and swap atomic uint64 variable using ACQUIRE-and-RELEASE memory 208 | // ordering 209 | static inline int pka_atomic64_cas_acq_rel(pka_atomic64_t *atom, 210 | uint64_t *old_val, 211 | uint64_t new_val) 212 | { 213 | #if __GCC_ATOMIC_LLONG_LOCK_FREE < 2 214 | int ret; 215 | *old_val = ATOMIC_OP(atom, ATOMIC_CAS_OP(&ret, *old_val, new_val)); 216 | return ret; 217 | #else 218 | return __atomic_compare_exchange_n(&atom->v, old_val, new_val, 219 | 0 /* strong */, 220 | __ATOMIC_ACQ_REL, 221 | __ATOMIC_RELAXED); 222 | #endif 223 | } 224 | 225 | 226 | typedef enum 227 | { 228 | LOCK_ACQUIRED = 1, 229 | LOCK_RELEASED = -1, 230 | LOCK_NOT_ACQUIRED = -1, 231 | LOCK_BIT_SET = 0, 232 | } pka_lock_t; 233 | 234 | // Functions below are implemented in the assembler file (pka_lock.S) 235 | 236 | // The following function will try to acquire the lock by atomically setting the 237 | // bottom byte of the "lock" to its thread number "num + 1" (allowing for the 238 | // possibility that thread number's start at 0). But this will only succeed if 239 | // this bottom byte is zero. If the lock is already held by another thread 240 | // (bottom byte is non-zero) then based upon the "bit" argument it will 241 | // either (a) return failure or (b) set its dedicated "request" bit in this 242 | // same "lock" so that the current lock owner will know about this request - 243 | // in particular the lock owner will not be able to release this lock while 244 | // any of these request bits are set. 245 | // 246 | // Note that the dedicated thread request bit for "num" N is located at 247 | // "lock" bit N + 8. This implies a maximum of 56 PK threads per execution 248 | // context. 249 | // 250 | // This function will return 1 if the lock was acquired (in which case the 251 | // thread bit is never set - even if set_bit was TRUE). This function will 252 | // return 0 if the lock was NOT acquired but the thread bit was set (which 253 | // implies "set_bit" is TRUE). Finally it will return -1 if the lock was NOT 254 | // acquired AND the thread bit was not set because "set_bit" was FALSE. 255 | int pka_try_acquire_lock(uint64_t *lock_v, uint32_t num, bool set_bit); 256 | 257 | // The following function will try to release the lock by atomically setting 258 | // the bottom byte of the lock_word to 0. However this will fail if any of the 259 | // dedicated "request" bits in the upper 7 bytes are set, in which case the 260 | // current lock owner thread MAY still have work to do. 261 | // 262 | // Return -1 if the lock was released. Otherwise return the thread_num 263 | // corresponding to ONE of the set request bits and clr this bit. 264 | int pka_try_release_lock(uint64_t *lock_v, uint32_t num); 265 | 266 | #endif // __PKA_ATOMIC_H__ 267 | -------------------------------------------------------------------------------- /include/pka_barrier.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2023 NVIDIA Corporation & affiliates. 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #ifndef __PKA_BARRIER_H__ 5 | #define __PKA_BARRIER_H__ 6 | 7 | #include "pka_atomic.h" 8 | 9 | // ARMv8 Assembler code to implement memory barrier: 10 | 11 | #define dmb(opt) ({ asm volatile("dmb " #opt : : : "memory"); }) 12 | #define dsb(opt) ({ asm volatile("dsb " #opt : : : "memory"); }) 13 | 14 | // General memory barrier. Guarantees that the LOAD and STORE operations 15 | // generated before the barrier occur before the LOAD and STORE operations 16 | // generated after. This function is architecture dependent. 17 | static inline void pka_mb(void) 18 | { 19 | dmb(ish); 20 | } 21 | 22 | // Write memory barrier. Guarantees that the STORE operations generated before 23 | // the barrier occur before the STORE operations generated after. This function 24 | // is architecture dependent. 25 | static inline void pka_wmb(void) 26 | { 27 | dmb(ishst); 28 | } 29 | 30 | // Read memory barrier. Guarantees that the LOAD operations generated before 31 | // the barrier occur before the LOAD operations generated after. This function 32 | // is architecture dependent. 33 | static inline void pka_rmb(void) 34 | { 35 | dmb(ishld); 36 | } 37 | 38 | static inline void pka_mb_full(void) 39 | { 40 | dsb(sy); 41 | } 42 | 43 | // CPU pause -i.e. wait for few CPU cycles. This function is implemented in the 44 | // assembler file (pka_lock.S) 45 | void pka_wait(); 46 | 47 | // PKA thread synchronization barrier 48 | typedef struct 49 | { 50 | uint32_t count; ///< Thread count 51 | pka_atomic32_t bar; ///< Barrier counter 52 | } pka_barrier_t; 53 | 54 | static inline void pka_barrier_init(pka_barrier_t *barrier, uint32_t count) 55 | { 56 | barrier->count = (uint32_t)count; 57 | pka_atomic32_init(&barrier->bar, 0); 58 | } 59 | 60 | // Efficient barrier_sync - 61 | // 62 | // Barriers are initialized with a count of the number of callers 63 | // that must sync on the barrier before any may proceed. 64 | // 65 | // To avoid race conditions and to permit the barrier to be fully 66 | // reusable, the barrier value cycles between 0..2*count-1. When 67 | // synchronizing the wasless variable simply tracks which half of 68 | // the cycle the barrier was in upon entry. Exit is when the 69 | // barrier crosses to the other half of the cycle. 70 | static inline void pka_barrier_wait(pka_barrier_t *barrier) 71 | { 72 | uint32_t count; 73 | int wasless; 74 | 75 | pka_mb_full(); 76 | 77 | count = pka_atomic32_fetch_inc_relaxed(&barrier->bar); 78 | wasless = count < barrier->count; 79 | 80 | if (count == (2 * barrier->count - 1)) 81 | { 82 | // Wrap around *atomically* 83 | pka_atomic32_sub(&barrier->bar, 2 * barrier->count); 84 | } 85 | else 86 | { 87 | while ((pka_atomic32_load(&barrier->bar) < barrier->count) 88 | == wasless) 89 | pka_wait(); 90 | } 91 | 92 | pka_mb_full(); 93 | } 94 | 95 | #endif // __PKA_BARRIER_H__ 96 | -------------------------------------------------------------------------------- /include/pka_common.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2023 NVIDIA Corporation & affiliates. 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #ifndef __PKA_COMMON_H__ 5 | #define __PKA_COMMON_H__ 6 | 7 | /// Common byte definitions 8 | 9 | #define MEGABYTE (1024 * 1024) 10 | 11 | #define BYTES_PER_WORD 4 12 | #define BYTES_PER_DOUBLE_WORD 8 13 | 14 | #ifndef __KERNEL__ 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | /// Generic, commonly-used macro and inline function definitions for PKA lib. 23 | 24 | #ifndef typeof 25 | #define typeof __typeof__ 26 | #endif 27 | 28 | #ifndef asm 29 | #define asm __asm__ 30 | #endif 31 | 32 | #define PKA_CACHE_LINE_SIZE 64 ///< Cache line size. 33 | //Force alignment to cache line. 34 | #define __pka_aligned(a) __attribute__((__aligned__(a))) 35 | #define __pka_cache_aligned __pka_aligned(PKA_CACHE_LINE_SIZE) 36 | 37 | // Macro to mark functions for inlining. 38 | #define __pka_noinline __attribute__((noinline)) 39 | #define __pka_inline inline __attribute__((always_inline)) 40 | // Macro to mark functions and fields scheduled for removal. 41 | #define __pka_deprecated __attribute__((__deprecated__)) 42 | // Force a structure to be packed. 43 | #define __pka_packed __attribute__((__packed__)) 44 | 45 | // Macro to define a function that does not return 46 | #define __pka_noreturn __attribute__((__noreturn__)) 47 | 48 | /// Macros for bit manipulation 49 | 50 | #define BIT_MASK(bits) ((1U << (bits)) - 1) 51 | 52 | #define BIT_IS_SET(var, pos) (((var) >> (pos)) & 1) 53 | 54 | /// Macros for pointer arithmetic 55 | 56 | // Add a byte-value offset from a pointer 57 | #define PKA_PTR_ADD(ptr, x) ((void*)((uintptr_t)(ptr) + (x))) 58 | 59 | /// Macros/static functions for doing alignment 60 | 61 | // Macro to align a value to a given power-of-two. The resultant value 62 | // will be of the same type as the first parameter, and will be no 63 | // bigger than the first parameter. Second parameter must be a 64 | // power-of-two value. 65 | #define PKA_ALIGN_FLOOR(val, align) \ 66 | (typeof(val))((val) & (~((typeof(val))((align) - 1)))) 67 | 68 | // Macro to align a pointer to a given power-of-two. The resultant 69 | // pointer will be a pointer of the same type as the first parameter, and 70 | // point to an address no higher than the first parameter. Second parameter 71 | // must be a power-of-two value. 72 | #define PKA_PTR_ALIGN_FLOOR(ptr, align) \ 73 | ((typeof(ptr))PKA_ALIGN_FLOOR((uintptr_t)ptr, align)) 74 | 75 | // Macro to align a pointer to a given power-of-two. The resultant 76 | // pointer will be a pointer of the same type as the first parameter, and 77 | // point to an address no lower than the first parameter. Second parameter 78 | // must be a power-of-two value. 79 | #define PKA_PTR_ALIGN_CEIL(ptr, align) \ 80 | PKA_PTR_ALIGN_FLOOR((typeof(ptr))PKA_PTR_ADD(ptr, (align) - 1), align) 81 | 82 | // Macro to align a value to a given power-of-two. The resultant value 83 | // will be of the same type as the first parameter, and will be no lower 84 | // than the first parameter. Second parameter must be a power-of-two 85 | // value. 86 | #define PKA_ALIGN_CEIL(val, align) \ 87 | PKA_ALIGN_FLOOR(((val) + ((typeof(val)) (align) - 1)), align) 88 | 89 | // Macro to align a pointer to a given power-of-two. The resultant 90 | // pointer will be a pointer of the same type as the first parameter, and 91 | // point to an address no lower than the first parameter. Second parameter 92 | // must be a power-of-two value. 93 | // This function is the same as PKA_PTR_ALIGN_CEIL 94 | #define PKA_PTR_ALIGN(ptr, align) PKA_PTR_ALIGN_CEIL(ptr, align) 95 | 96 | // Macro to align a value to a given power-of-two. The resultant 97 | // value will be of the same type as the first parameter, and 98 | // will be no lower than the first parameter. Second parameter 99 | // must be a power-of-two value. 100 | // This function is the same as PKA_ALIGN_CEIL 101 | #define PKA_ALIGN(val, align) PKA_ALIGN_CEIL(val, align) 102 | 103 | // Checks if a pointer is aligned to a given power-of-two value. It returns 104 | // true (1) where the pointer is correctly aligned, false (0) otherwise. 105 | static inline int pka_is_aligned(void *ptr, unsigned align) 106 | { 107 | return PKA_PTR_ALIGN(ptr, align) == ptr; 108 | } 109 | 110 | 111 | /// Macro for calculating the number of elements in the array. 112 | #define PKA_DIM(a) (sizeof (a) / sizeof ((a)[0])) 113 | 114 | /// Macros for calculating min and max 115 | 116 | // Macro to return the maximum of two numbers. 117 | #define MAX(a, b) (((a) <= (b)) ? (b) : (a)) 118 | // Macro to return the minimum of two numbers. 119 | #define MIN(a, b) (((a) <= (b)) ? (a) : (b)) 120 | 121 | /// Macros for branch prediction 122 | 123 | // Check if a branch is likely to be taken. This compiler builtin allows the 124 | // developer to indicate if a branch is likely to be taken. 125 | #define likely(x) __builtin_expect((x),1) 126 | 127 | // Check if a branch is unlikely to be taken. This compiler builtin allows the 128 | // developer to indicate if a branch is unlikely to be taken. 129 | #define unlikely(x) __builtin_expect((x),0) 130 | 131 | /// __builtin_prefetch (const void *addr, rw, locality) 132 | /// 133 | /// rw 0..1 (0: read, 1: write) 134 | /// locality 0..3 (0: dont leave to cache, 3: leave on all cache levels) 135 | 136 | // Cache prefetch address 137 | #define prefetch(x) __builtin_prefetch((x), 0, 3) 138 | 139 | // Cache prefetch address for storing 140 | #define prefetch_store(x) __builtin_prefetch((x), 1, 3) 141 | 142 | /// Macros to work with powers of 2 143 | 144 | // Returns true if n is a power of 2 145 | static inline int pka_is_power_of_2(uint32_t n) 146 | { 147 | return n && !(n & (n - 1)); 148 | } 149 | 150 | // Aligns input parameter to the next power of 2 151 | static inline uint32_t pka_align32pow2(uint32_t x) 152 | { 153 | x--; 154 | x |= x >> 1; 155 | x |= x >> 2; 156 | x |= x >> 4; 157 | x |= x >> 8; 158 | x |= x >> 16; 159 | 160 | return x + 1; 161 | } 162 | 163 | // Aligns 64b input parameter to the next power of 2 164 | static inline uint64_t pka_align64pow2(uint64_t v) 165 | { 166 | v--; 167 | v |= v >> 1; 168 | v |= v >> 2; 169 | v |= v >> 4; 170 | v |= v >> 8; 171 | v |= v >> 16; 172 | v |= v >> 32; 173 | 174 | return v + 1; 175 | } 176 | 177 | #endif // __KERNEL__ 178 | 179 | #endif // __PKA_COMMON_H__ 180 | -------------------------------------------------------------------------------- /include/pka_config.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2023 NVIDIA Corporation & affiliates. 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #ifndef __PKA_CONFIG_H__ 5 | #define __PKA_CONFIG_H__ 6 | 7 | #include "pka_addrs.h" 8 | 9 | // The maximum number of PKA shims referred to as IO blocks. 10 | #define PKA_MAX_NUM_IO_BLOCKS 24 11 | // The maximum number of Rings supported by IO block (shim). 12 | #define PKA_MAX_NUM_IO_BLOCK_RINGS 4 13 | 14 | #define PKA_MAX_NUM_RINGS \ 15 | (PKA_MAX_NUM_IO_BLOCK_RINGS * PKA_MAX_NUM_IO_BLOCKS) 16 | 17 | // Bitmask to represent rings, grouped into 8 bit (uint8_t) blocks. 18 | #define PKA_RING_NUM_BITMASK \ 19 | ((PKA_MAX_NUM_RINGS / 8) + 1) 20 | 21 | // Resources are regions which include info control/status words, 22 | // count registers and host window ram. 23 | #define PKA_MAX_NUM_RING_RESOURCES 3 24 | 25 | // PKA Ring resources. 26 | // Define Ring resources parameters including base address, size (in bytes) 27 | // and ring spacing. 28 | #define PKA_RING_WORDS_ADDR PKA_BUFFER_RAM_BASE 29 | #define PKA_RING_CNTRS_ADDR COMMAND_COUNT_0_ADDR 30 | 31 | #define PKA_RING_WORDS_SIZE 0x40 // 64 bytes 32 | #define PKA_RING_CNTRS_SIZE 0x20 // 32 bytes (3 count registers) 33 | #define PKA_RING_MEM_SIZE 0x4000 // 16K bytes 34 | 35 | #define PKA_RING_WORDS_SPACING 0x40 // 64 bytes 36 | #define PKA_RING_CNTRS_SPACING 0x10000 // 64K bytes 37 | #define PKA_RING_MEM_0_SPACING 0x4000 // 16K bytes 38 | #define PKA_RING_MEM_1_SPACING 0x10000 // 64K bytes 39 | 40 | // PKA Window RAM parameters. 41 | // Define whether to split or not Window RAM during PKA device creation phase. 42 | #define SPLIT_WINDOW_RAM_MODE_ENABLED 1 43 | #define SPLIT_WINDOW_RAM_MODE_DISABLED 0 44 | #define PKA_SPLIT_WINDOW_RAM_MODE SPLIT_WINDOW_RAM_MODE_DISABLED 45 | // Defines for Window RAM partition. It is valid for 16K memory. 46 | #define PKA_WINDOW_RAM_RING_MEM_SIZE 0x0800 // 2KB 47 | #define PKA_WINDOW_RAM_DATA_MEM_SIZE 0x3800 // 14KB 48 | 49 | // Window RAM/Alternate Window RAM offset mask for BF1 and BF2 50 | #define PKA_WINDOW_RAM_OFFSET_MASK1 0x730000 51 | 52 | // Window RAM/Alternate Window RAM offset mask for BF3 53 | #define PKA_WINDOW_RAM_OFFSET_MASK2 0x70000 54 | 55 | // Macro for mapping PKA Ring address into Window RAM address. It converts the 56 | // ring address, either physical address or virtual address, to valid address 57 | // into the Window RAM. This is done assuming the Window RAM base, size and 58 | // mask. Here, base is the actual physical address of the Window RAM, with the 59 | // help of mask it is reduced to Window RAM offset within that PKA block. 60 | // Further, with the help of addr and size, we arrive at the Window RAM 61 | // offset address for a PKA Ring within the given Window RAM. 62 | #define PKA_RING_MEM_ADDR(base, mask, addr, size) \ 63 | ((base & mask) | (((addr) & 0xffff) | \ 64 | ((((addr) & ~((size) - 1)) & 0xf0000) >> 2))) 65 | 66 | // PKA Master Sequencer Control/Status Register 67 | // Write '1' to bit [31] puts the Master controller Sequencer in a reset 68 | // reset state. Resetting the Sequencer (in order to load other firmware) 69 | // should only be done when the EIP-154 is not performing any operations. 70 | #define PKA_MASTER_SEQ_CTRL_RESET_VAL 0x80000000 71 | // Write '1' to bit [30] will reset all Command and Result counters. This 72 | // bit is write-only and self clearing and can only be set if the ‘Reset’ 73 | // bit [31] is ‘1’. 74 | #define PKA_MASTER_SEQ_CTRL_CLEAR_COUNTERS_VAL 0x40000000 75 | // Bit [8] in the PKA Master Sequencer Control/Status Register is tied to 76 | // the 'pka_master_irq interrupt' on the EIP-154 interrupt controller. 77 | #define PKA_MASTER_SEQ_CTRL_MASTER_IRQ_BIT 8 78 | // Sequencer status bits are used by the Master controller Sequencer to 79 | // reflect status. Bit [0] is tied to the 'pka_master_irq' interrupt on 80 | // the EIP-154 interrupt controller. 81 | #define PKA_MASTER_SEQ_CTRL_STATUS_BYTE 0x01 82 | // 'pka_master_irq' mask for the Master controller Sequencer Status Register. 83 | #define PKA_MASTER_SEQ_CTRL_MASTER_IRQ_MASK 0x100 84 | 85 | // Advanced Interrupt Controller (AIC) configuration 86 | // AIC Polarity Control Register is used to set each individual interrupt 87 | // signal (High Level / Rising Edge) during the initialization phase. 88 | // '0' = Low level or falling edge. 89 | // '1' = High level or rising edge. 90 | #define PKA_AIC_POL_CTRL_REG_VAL 0x000FFFFF 91 | // AIC Type Control Register is used to set each interrupt to level or edge. 92 | // '0' = Level. 93 | // '1' = Edge. 94 | #define PKA_AIC_TYPE_CTRL_REG_VAL 0x000FFFFF 95 | // AIC Enable Control Register is used to enable interrupt inputs. 96 | // '0' = Disabled. 97 | // '1' = Enabled. 98 | #define PKA_AIC_ENABLE_CTRL_REG_VAL 0x000F030F 99 | // AIC Enabled Status Register bits reflect the status of the interrupts 100 | // gated with the enable bits of the AIC_ENABLE_CTRL Register. 101 | // '0' = Inactive. 102 | // '1' = Pending. 103 | #define PKA_AIC_ENABLE_STAT_REG_VAL 0x000F030F 104 | 105 | // 'pka_master_irq' mask for the AIC Enabled Status Register. 106 | #define PKA_AIC_ENABLED_STAT_MASTER_IRQ_MASK 0x100 107 | 108 | // PKA_RING_OPTIONS field to specify the priority in which rings are handled: 109 | // '00' = full rotating priority, 110 | // '01' = fixed priority (ring 0 lowest), 111 | // '10' = ring 0 has the highest priority and the remaining rings have 112 | // rotating priority, 113 | // '11' = reserved, do not use. 114 | #define PKA_FULL_ROTATING_PRIORITY 0x0 115 | #define PKA_FIXED_PRIORITY 0x1 116 | #define PKA_RING_0_HAS_THE_HIGHEST_PRIORITY 0x2 117 | #define PKA_RESERVED 0x3 118 | #define PKA_RING_OPTIONS_PRIORITY PKA_FULL_ROTATING_PRIORITY 119 | 120 | // 'Signature' byte used because the ring options are transferred through RAM 121 | // which does not have a defined reset value. The EIP-154 master controller 122 | // keeps reading the PKA_RING_OPTIONS word at start-up until the ‘Signature’ 123 | // byte contains 0x46 and the ‘Reserved’ field contains zero. 124 | #define PKA_RING_OPTIONS_SIGNATURE_BYTE 0x46 125 | 126 | // Order of the result reporting: Two schemas are available: 127 | // InOrder - This means that the results will be reported in the same order 128 | // as the commands were provided. 129 | // OutOfOrder - This means that the results are reported as soon as they are 130 | // available 131 | #define PKA_RING_TYPE_IN_ORDER_BIT 1 132 | #define PKA_RING_TYPE_OUT_OF_ORDER_BIT 0 133 | #define PKA_RING_TYPE_IN_ORDER PKA_RING_TYPE_OUT_OF_ORDER_BIT 134 | 135 | // Byte order of the data written/read to/from Rings. 136 | // Little Endian (LE) - The least significant bytes have the lowest address. 137 | // Big Endian (BE) - The most significant bytes come first. 138 | #define PKA_RING_BYTE_ORDER_LE 0 139 | #define PKA_RING_BYTE_ORDER_BE 1 140 | #define PKA_RING_BYTE_ORDER PKA_RING_BYTE_ORDER_LE 141 | 142 | // 'trng_clk_on' mask for PKA Clock Switch Forcing Register. Turn on the 143 | // TRNG clock. When the TRNG is controlled via the Host slave interface, 144 | // this engine needs to be turned on by setting bit 11. 145 | #define PKA_CLK_FORCE_TRNG_ON 0x800 146 | 147 | // Number of TRNG Output registers 148 | #define PKA_TRNG_OUTPUT_CNT 4 149 | 150 | // TRNG Configuration 151 | #define PKA_TRNG_CONFIG_REG_VAL 0x00020008 152 | // TRNG Alarm Counter Register Value 153 | #define PKA_TRNG_ALARMCNT_REG_VAL 0x000200FF 154 | // TRNG FRO Enable Register Value 155 | #define PKA_TRNG_FROENABLE_REG_VAL 0x00FFFFFF 156 | // TRNG Control Register Value; Set bit 10 to start the EIP-76 a.k.a TRNG 157 | // engine, gathering entropy from the FROs. 158 | #define PKA_TRNG_CONTROL_REG_VAL 0x00000400 159 | 160 | // TRNG Control bit 161 | #define PKA_TRNG_CONTROL_TEST_MODE 0x100 162 | 163 | // TRNG Control Register Value; Set bit 10 and 12 to start the EIP-76 a.k.a TRNG 164 | // engine with DRBG enabled, gathering entropy from the FROs. 165 | #define PKA_TRNG_CONTROL_DRBG_REG_VAL 0x00001400 166 | 167 | // DRBG enabled TRNG 'request_data' value. REQ_DATA_VAL (in accordance with 168 | // DATA_BLOCK_MASK) requests 256 blocks of 128-bit random output. 169 | // 4095 blocks is the max number that can be requested for the TRNG(with DRBG) 170 | // configuration on Bluefield platforms. 171 | #define PKA_TRNG_CONTROL_REQ_DATA_VAL 0x10010000 172 | 173 | // Mask for 'Data Block' in TRNG Control Register. 174 | #define PKA_TRNG_DRBG_DATA_BLOCK_MASK 0xfff00000 175 | 176 | // Set bit 12 of TRNG Control Register to enable DRBG functionality. 177 | #define PKA_TRNG_CONTROL_DRBG_ENABLE_VAL 0x00001000 178 | 179 | // Set bit 8 a.ka 'test_sp_800_90 DRBG' bit in the TRNG Test Register. 180 | #define PKA_TRNG_TEST_DRBG_VAL 0x00000080 181 | 182 | // Number of Personalization String/Additional Input Registers 183 | #define PKA_TRNG_PS_AI_REG_COUNT 12 184 | 185 | // DRBG Reseed enable 186 | #define PKA_TRNG_CONTROL_DRBG_RESEED 0x00008000 187 | 188 | // TRNG Status bits 189 | #define PKA_TRNG_STATUS_READY 0x1 190 | #define PKA_TRNG_STATUS_SHUTDOWN_OFLO 0x2 191 | #define PKA_TRNG_STATUS_TEST_READY 0x100 192 | #define PKA_TRNG_STATUS_MONOBIT_FAIL 0x80 193 | #define PKA_TRNG_STATUS_RUN_FAIL 0x10 194 | #define PKA_TRNG_STATUS_POKER_FAIL 0x40 195 | 196 | // TRNG Alarm Counter bits 197 | #define PKA_TRNG_ALARMCNT_STALL_RUN_POKER 0x8000 198 | 199 | // TRNG Test bits 200 | #define PKA_TRNG_TEST_KNOWN_NOISE 0x20 201 | #define PKA_TRNG_TEST_NOISE 0x2000 202 | 203 | #endif // __PKA_CONFIG_H__ 204 | -------------------------------------------------------------------------------- /include/pka_cpu.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2023 NVIDIA Corporation & affiliates. 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #ifndef __PKA_CPU_H__ 5 | #define __PKA_CPU_H__ 6 | 7 | #ifdef __KERNEL__ 8 | #include 9 | #include 10 | #else 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #endif 22 | 23 | #include "pka_common.h" 24 | 25 | #define MAX_CPU_NUMBER 16 // BlueField specific 26 | 27 | #define MEGA 1000000 28 | #define GIGA 1000000000 29 | 30 | #define MS_PER_S 1000 31 | #define US_PER_S 1000000 32 | #define NS_PER_S 1000000000 33 | 34 | // Initial guess at our CPU speed. We set this to be larger than any 35 | // possible real speed, so that any calculated delays will be too long, 36 | // rather than too short. 37 | // 38 | //*Warning: use dummy value for frequency 39 | #define CPU_HZ_MAX (2 * GIGA) // Cortex A72 : 2 GHz max -> 2.5 GHz max 40 | //#define CPU_HZ_MAX (1255 * MEGA) // CPU Freq for High/Bin Chip 41 | 42 | // YIELD hints the CPU to switch to another thread if possible 43 | // and executes as a NOP otherwise. 44 | #define pka_cpu_yield() ({ asm volatile("yield" : : : "memory"); }) 45 | // ISB flushes the pipeline, then restarts. This is guaranteed to 46 | // stall the CPU a number of cycles. 47 | #define pka_cpu_relax() ({ asm volatile("isb" : : : "memory"); }) 48 | 49 | #ifdef __KERNEL__ 50 | // Processor speed in hertz; used in routines which might be called very 51 | // early in boot. 52 | static inline uint64_t pka_early_cpu_speed(void) 53 | { 54 | return CPU_HZ_MAX; 55 | } 56 | #else 57 | 58 | #ifdef __BIG_ENDIAN__ 59 | #define WORD(x) (u16)((x)[0] + ((x)[1] << 8)) 60 | #define DWORD(x) (u32)((x)[0] + ((x)[1] << 8) + ((x)[2] << 16) + ((x)[3] << 24)) 61 | #define QWORD(x) (U64(DWORD(x), DWORD(x + 4))) 62 | #else 63 | #define WORD(x) (uint16_t)(*(const uint16_t *)(x)) 64 | #define DWORD(x) (uint32_t)(*(const uint32_t *)(x)) 65 | #define QWORD(x) (*(const uint64_t *)(x)) 66 | #endif 67 | #define MAX_CLOCK_CYCLES UINT64_MAX 68 | 69 | /// Global variable holding the cpu frequency. 70 | extern uint64_t cpu_f_hz; 71 | 72 | static __pka_inline uint64_t pka_get_hz_ticks(void) 73 | { 74 | uint64_t freq_64; 75 | 76 | // Read counter 77 | asm volatile("mrs %0, cntfrq_el0" : "=r" (freq_64)); 78 | return freq_64; 79 | } 80 | 81 | /// Returns maximum frequency of specified CPU (in Hz) on success, and 0 82 | /// on failure 83 | static inline uint64_t pka_cpu_hz_max_id(int id) 84 | { 85 | if (id < 0 || id >= MAX_CPU_NUMBER) 86 | return 0; 87 | 88 | /// Below check is to avoid multiple read. 89 | if (cpu_f_hz) 90 | return cpu_f_hz; 91 | 92 | cpu_f_hz = pka_get_hz_ticks(); 93 | 94 | /// if frequency reading is zero, return max value. 95 | if (!cpu_f_hz) 96 | cpu_f_hz = CPU_HZ_MAX; 97 | 98 | return cpu_f_hz; 99 | } 100 | 101 | /// Returns maximum frequency of this CPU (in Hz) on success, and 0 on failure. 102 | static inline uint64_t pka_cpu_hz_max(void) 103 | { 104 | return pka_cpu_hz_max_id(0); 105 | } 106 | 107 | /// Read the system counter frequency 108 | static inline uint64_t pka_cpu_rdfrq(void) 109 | { 110 | uint64_t frq; 111 | 112 | asm volatile("mrs %0, cntfrq_el0" : "=r" (frq)); 113 | return frq; 114 | } 115 | 116 | /// Read the time base register. 117 | static inline uint64_t pka_cpu_rdvct(void) 118 | { 119 | uint64_t vct; 120 | 121 | asm volatile("mrs %0, cntvct_el0" : "=r" (vct)); 122 | return vct; 123 | } 124 | 125 | /// Return current CPU cycle count. Cycle count may not be reset at PKA init 126 | /// and thus may wrap back to zero between two calls. Use pka_cpu_cycles_max() 127 | /// to read the maximum count value after which it wraps. Cycle count frequency 128 | /// follows the CPU frequency and thus may change at any time. The count may 129 | /// advance in steps larger than one. Use pka_cpu_cycles_resolution() to read 130 | /// the step size. 131 | /// 132 | /// @note Do not use CPU count for time measurements since the frequency may 133 | /// vary. 134 | /// 135 | /// @note This call is easily portable to any ARM architecture, however, 136 | /// it may be damn slow and imprecise for some tasks. 137 | static inline uint64_t pka_cpu_cycles(void) 138 | { 139 | #ifdef PKA_AARCH_64 140 | return pka_cpu_rdvct(); 141 | #else 142 | struct timespec time; 143 | uint64_t sec, ns, hz, cycles; 144 | int ret; 145 | 146 | ret = clock_gettime(CLOCK_MONOTONIC_RAW, &time); 147 | 148 | if (ret != 0) 149 | abort(); 150 | 151 | hz = pka_cpu_hz_max(); 152 | sec = (uint64_t)time.tv_sec; 153 | ns = (uint64_t)time.tv_nsec; 154 | 155 | cycles = sec * hz; 156 | cycles += (ns * hz) / GIGA; 157 | 158 | return cycles; 159 | #endif 160 | } 161 | 162 | /// Return maximum CPU cycle count value before it wraps back to zero. 163 | static inline uint64_t pka_cpu_cycles_max(void) 164 | { 165 | return MAX_CLOCK_CYCLES; 166 | } 167 | 168 | /// CPU cycle count may advance in steps larger than one. This function returns 169 | /// resolution of pka_cpu_cycles() in CPU cycles. 170 | static inline uint64_t pka_cpu_cycles_resolution(void) 171 | { 172 | return 1; 173 | } 174 | 175 | /// Format CPU cycles. If PKA_AARCH_64 is defined, this function deduce the 176 | /// the number of cycles from raw counters (generic timers count). 177 | static inline uint64_t pka_cpu_cycles_format(uint64_t cycles_cnt) 178 | { 179 | #ifdef PKA_AARCH_64 180 | uint64_t ns, hz, hz_max, cycles; 181 | 182 | hz = pka_cpu_rdfrq(); 183 | ns = cycles_cnt * NS_PER_S; 184 | ns /= hz; 185 | 186 | hz_max = pka_cpu_hz_max(); 187 | cycles = (ns * hz_max) / GIGA; 188 | 189 | return cycles; 190 | #else 191 | return cycles_cnt; 192 | #endif 193 | } 194 | 195 | /// Calculate difference between cycle counts c1 and c2. Parameter c1 must 196 | /// be the first cycle count sample and c2 the second. The function handles 197 | /// correctly single cycle count wrap between c1 and c2. 198 | static inline uint64_t pka_cpu_cycles_diff(uint64_t c2, uint64_t c1) 199 | { 200 | uint64_t cycles; 201 | 202 | if (likely(c2 >= c1)) 203 | cycles = c2 - c1; 204 | else 205 | cycles = c2 + (pka_cpu_cycles_max() - c1) + 1; 206 | 207 | return pka_cpu_cycles_format(cycles); 208 | } 209 | 210 | /// Pause CPU execution for a short while. This call is intended for tight 211 | /// loops which poll a shared resource. A short pause within the loop may 212 | /// save energy and improve system performance as CPU polling frequency is 213 | /// reduced. 214 | static inline void pka_cpu_pause(void) 215 | { 216 | __asm__ __volatile__ ("nop"); 217 | __asm__ __volatile__ ("nop"); 218 | __asm__ __volatile__ ("nop"); 219 | __asm__ __volatile__ ("nop"); 220 | } 221 | 222 | #endif // __KERNEL__ 223 | 224 | #endif // __PKA_CPU_H__ 225 | -------------------------------------------------------------------------------- /include/pka_debug.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2023 NVIDIA Corporation & affiliates. 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #ifndef __PKA_DEBUG_H__ 5 | #define __PKA_DEBUG_H__ 6 | 7 | // PKA library bitmask. Use those bits to enable debug messages 8 | #define PKA_DRIVER 0x0001 9 | #define PKA_DEV 0x0002 10 | #define PKA_RING 0x0004 11 | #define PKA_QUEUE 0x0008 12 | #define PKA_MEM 0x0010 13 | #define PKA_USER 0x0020 14 | #define PKA_TESTS 0x0040 15 | // PKA debug mask. This indicates the debug/verbosity level. 16 | #define PKA_DEBUG_LIB_MASK 0x0040 17 | 18 | #ifdef __KERNEL__ 19 | 20 | #define PKA_PRINT(lib, fmt, args...) \ 21 | ({ pr_info(#lib": "fmt, ##args); }) 22 | 23 | #define PKA_ERROR(lib, fmt, args...) \ 24 | ({ pr_err(#lib": %s: error: "fmt, __func__, ##args); }) 25 | 26 | #define PKA_DEBUG(lib, fmt, args...) \ 27 | ({ \ 28 | if (lib & PKA_DEBUG_LIB_MASK) \ 29 | pr_debug(#lib": %s: "fmt, __func__, ##args); \ 30 | }) 31 | 32 | #define PKA_PANIC(lib, msg, args...) \ 33 | ({ \ 34 | pr_info(#lib": %s: panic: "msg, __func__, ##args); \ 35 | panic(msg, ##args); \ 36 | }) 37 | 38 | #else 39 | 40 | #define PKA_PRINT(lib, fmt, args...) \ 41 | ({ printf(#lib": "fmt, ##args); }) 42 | 43 | #define PKA_ERROR(lib, fmt, args...) \ 44 | ({ printf(#lib": %s: error: "fmt, __func__, ##args); }) 45 | 46 | #define PKA_DEBUG(lib, fmt, args...) \ 47 | ({ \ 48 | if (lib & PKA_DEBUG_LIB_MASK) \ 49 | printf(#lib": %s: "fmt, __func__, ##args); \ 50 | }) 51 | 52 | #define PKA_ASSERT_STR(cond, msg) \ 53 | ({ if (!(cond)) { \ 54 | printf("%s\n", msg); \ 55 | abort(); } \ 56 | }) 57 | 58 | #define PKA_ASSERT(cond) PKA_ASSERT_STR(cond, " assert failed: " #cond) 59 | 60 | #endif // __KERNEL__ 61 | 62 | #endif // __PKA_DEBUG_H__ 63 | -------------------------------------------------------------------------------- /include/pka_ioctl.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2023 NVIDIA Corporation & affiliates. 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #ifndef __PKA_IOCTL_H__ 5 | #define __PKA_IOCTL_H__ 6 | 7 | #ifdef __KERNEL__ 8 | #include 9 | #include 10 | #else 11 | #include 12 | #include 13 | #endif 14 | 15 | #define PKA_IOC_TYPE 0xB7 16 | 17 | /// PKA_RING_GET_REGION_INFO - _IORW(PKA_IOC_TYPE, 0x0, pka_dev_region_info_t) 18 | /// 19 | /// Retrieve information about a device region. This is intended to describe 20 | /// MMIO, I/O port, as well as bus specific regions (ex. PCI config space). 21 | /// Zero sized regions may be used to describe unimplemented regions. 22 | /// Return: 0 on success, -errno on failure. 23 | typedef struct 24 | { 25 | uint32_t reg_index; ///< Registers region index. 26 | uint64_t reg_size; ///< Registers region size (bytes). 27 | uint64_t reg_offset; ///< Registers region offset from start of device fd. 28 | 29 | uint32_t mem_index; ///< Memory region index. 30 | uint64_t mem_size; ///< Memory region size (bytes). 31 | uint64_t mem_offset; ///< Memory region offset from start of device fd. 32 | } pka_dev_region_info_t; 33 | #define PKA_RING_GET_REGION_INFO _IOWR(PKA_IOC_TYPE, 0x0, pka_dev_region_info_t) 34 | 35 | /// PKA_GET_RING_INFO - _IORW(PKA_IOC_TYPE, 0x1, pka_dev_ring_info_t) 36 | /// 37 | /// Retrieve information about a ring. This is intended to describe ring 38 | /// information words located in PKA_BUFFER_RAM. Ring information includes 39 | /// base addresses, size and statistics. 40 | /// Return: 0 on success, -errno on failure. 41 | typedef struct // Bluefield specific ring information 42 | { 43 | /// Base address of the command descriptor ring. 44 | uint64_t cmmd_base; 45 | 46 | /// Base address of the result descriptor ring. 47 | uint64_t rslt_base; 48 | 49 | /// Size of a command ring in number of descriptors, minus 1. 50 | /// Minimum value is 0 (for 1 descriptor); maximum value is 51 | /// 65535 (for 64K descriptors). 52 | uint16_t size; 53 | 54 | /// This field specifies the size (in 32-bit words) of the 55 | /// space that PKI command and result descriptor occupies on 56 | /// the Host. 57 | uint16_t host_desc_size : 10; 58 | 59 | /// Indicates whether the result ring delivers results strictly 60 | /// in-order ('1') or that result descriptors are written to the 61 | /// result ring as soon as they become available, so out-of-order 62 | /// ('0'). 63 | uint8_t in_order : 1; 64 | 65 | /// Read pointer of the command descriptor ring. 66 | uint16_t cmmd_rd_ptr; 67 | 68 | /// Write pointer of the result descriptor ring. 69 | uint16_t rslt_wr_ptr; 70 | 71 | /// Read statistics of the command descriptor ring. 72 | uint16_t cmmd_rd_stats; 73 | 74 | /// Write statistics of the result descriptor ring. 75 | uint16_t rslt_wr_stats; 76 | 77 | } pka_dev_hw_ring_info_t; 78 | #define PKA_GET_RING_INFO _IOWR(PKA_IOC_TYPE, 0x1, pka_dev_hw_ring_info_t) 79 | 80 | /// PKA_CLEAR_RING_COUNTERS - _IO(PKA_IOC_TYPE, 0x2) 81 | /// 82 | /// Clear counters. This is intended to reset all command and result counters. 83 | /// Return: 0 on success, -errno on failure. 84 | #define PKA_CLEAR_RING_COUNTERS _IO(PKA_IOC_TYPE, 0x2) 85 | 86 | /// PKA_GET_RANDOM_BYTES - _IOWR(PKA_IOC_TYPE, 0x3, pka_dev_trng_info_t) 87 | /// 88 | /// Get random bytes from True Random Number Generator(TRNG). 89 | /// Return: 0 on success, -errno on failure. 90 | typedef struct // True Random Number Generator information 91 | { 92 | /// Number of random bytes in the buffer; Length of the buffer. 93 | uint32_t count; 94 | 95 | /// Data buffer to hold the random bytes. 96 | uint8_t *data; 97 | 98 | } pka_dev_trng_info_t; 99 | #define PKA_GET_RANDOM_BYTES _IOWR(PKA_IOC_TYPE, 0x3, pka_dev_trng_info_t) 100 | 101 | #endif // __PKA_IOCTL_H__ 102 | -------------------------------------------------------------------------------- /include/pka_lock.S: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2023 NVIDIA Corporation & affiliates. 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | // ARMv8 Assembler code to implement the locking and atomic_bit ops: 5 | 6 | // The following function will try to acquire the lock by atomically setting 7 | // the bottom byte of the "lock" to its thread number "num + 1" (allowing for 8 | // the possibility that thread number's start at 0). But this will only 9 | // succeed if this bottom byte is zero. If the lock is already held by 10 | // another thread (bottom byte is non-zero) then based upon the "bit" argument 11 | // it will either (a) return failure or (b) set its dedicated "request" bit in 12 | // this same "lock" so that the current lock owner will know about this request, 13 | // in particular the lock owner will not be able to release this lock while 14 | // any of these request bits are set. 15 | // 16 | // Note that the dedicated thread request bit for "num" N is located at 17 | // "lock" bit N + 8. This implies a maximum of 56 PK threads per execution 18 | // context. 19 | // 20 | // This function will return 1 if the lock was acquired (in which case the 21 | // thread bit is never set - even if set_bit was TRUE). This function will 22 | // return 0 if the lock was NOT acquired but the thread bit was set (which 23 | // implies "set_bit" is TRUE). Finally it will return -1 if the lock was NOT 24 | // acquired AND the thread bit was not set because "set_bit" was FALSE. 25 | 26 | // int pka_try_acquire_lock(uint64_t *lock, 27 | // uint32_t num), 28 | // bool set_bit); 29 | 30 | .text 31 | .balign 64 32 | .global pka_try_acquire_lock 33 | pka_try_acquire_lock: 34 | ldaxr X3, [X0] 35 | add X5, X1, #8 36 | lsl X4, X2, X5 37 | orr X4, X4, X3 38 | add X5, X1, #1 39 | mov X6, #0xFF 40 | ands X6, X3, X6 41 | orr X5, X5, X3 42 | csel X3, X5, X4, eq 43 | stlxr W7, X3, [X0] 44 | cbnz W7, acquire_lock_fail 45 | 46 | // Our store exclusive instruction succeeded in changing the lock_word. 47 | // If we wrote our "num + 1" to the bottom byte we now own the lock 48 | // so return 1. Otherwise return 0 when set_bit is TRUE and -1 when 49 | // set_bit is FALSE. 50 | beq lock_acquired 51 | sub X0, X2, #1 52 | ret 53 | 54 | lock_acquired: 55 | mov X0, #1 56 | ret 57 | 58 | // When the load exclusive/store exclusive fail (i.e. store doesn't happen), 59 | // wait a little bit of time before trying again. 60 | acquire_lock_fail: 61 | mov X6, #20 62 | 63 | acquire_lock_delay_loop: 64 | sub X6, X6, #1 65 | cmp X6, #0 66 | ble pka_try_acquire_lock 67 | b acquire_lock_delay_loop 68 | 69 | 70 | // The following function will try to release the lock by atomically setting 71 | // the bottom byte of the lock_word to 0. However this will fail if any of the 72 | // dedicated "request" bits in the upper 7 bytes are set, in which case the 73 | // current lock owner thread MAY still have work to do. 74 | // 75 | // Return -1 if the lock was released. Otherwise return the thread_num 76 | // corresponding to ONE of the set request bits and clr this bit. 77 | 78 | // int pka_try_release_lock(uint64_t *lock_word, uint32_t thread_num); 79 | 80 | .balign 64 81 | .global pka_try_release_lock 82 | pka_try_release_lock: 83 | ldaxr X2, [X0] 84 | mov X3, #0xFF 85 | mov X4, #63 86 | bic X3, X2, X3 87 | clz X5, X3 88 | sub X5, X4, X5 89 | cmp X3, #0 90 | mov X4, #1 91 | lsl X6, X4, X5 92 | bic X6, X2, X6 93 | csel X2, X3, X6, eq 94 | stlxr W7, X2, [X0] 95 | cbnz W7, release_lock_fail 96 | 97 | beq lock_released 98 | sub X0, X5, #8 99 | ret 100 | 101 | lock_released: 102 | mov X0, #-1 103 | ret 104 | 105 | // When the load exclusive/store exclusive fail (i.e. store doesn't happen), 106 | // wait a little bit of time before trying again. 107 | release_lock_fail: 108 | mov X6, #20 109 | 110 | release_lock_delay_loop: 111 | sub X6, X6, #1 112 | cmp X6, #0 113 | ble pka_try_release_lock 114 | b release_lock_delay_loop 115 | 116 | 117 | 118 | // void pka_wait(void) 119 | 120 | .global pka_wait 121 | pka_wait: 122 | mov X0, #50 123 | 124 | pause_loop: 125 | sub X0, X0, #1 126 | cmp X0, #0 127 | bgt pause_loop 128 | 129 | ret 130 | -------------------------------------------------------------------------------- /include/pka_mmio.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2023 NVIDIA Corporation & affiliates. 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #ifndef __PKA_MMIO_H__ 5 | #define __PKA_MMIO_H__ 6 | 7 | 8 | /// Macros for standard MMIO functions. 9 | 10 | #ifdef __KERNEL__ 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | #define pka_mmio_read64(addr) readq_relaxed(addr) 17 | #define pka_mmio_write64(addr, val) writeq_relaxed((val), (addr)) 18 | #define pka_mmio_read(addr) pka_mmio_read64(addr) 19 | #define pka_mmio_write(addr, val) pka_mmio_write64((addr), (val)) 20 | 21 | #else 22 | 23 | #include 24 | #include 25 | #include 26 | 27 | #ifndef __BIG_ENDIAN__ 28 | 29 | static inline __attribute__((always_inline)) uint64_t 30 | __pka_mmio_read64(void* addr) 31 | { 32 | return *((volatile uint64_t*)addr); 33 | } 34 | 35 | static inline __attribute__((always_inline)) void 36 | __pka_mmio_write64(void* addr, uint64_t val) 37 | { 38 | *((volatile uint64_t*)addr) = val; 39 | } 40 | 41 | #else 42 | 43 | static inline __attribute__((always_inline)) uint64_t 44 | __pka_mmio_read64(void* addr) 45 | { 46 | return __builtin_bswap64(*((volatile uint64_t*)addr)); 47 | } 48 | 49 | static inline __attribute__((always_inline)) void 50 | __pka_mmio_write64(void* addr, uint64_t val) 51 | { 52 | *((volatile uint64_t*)addr) = __builtin_bswap64(val); 53 | } 54 | 55 | #endif //__BIG_ENDIAN__ 56 | 57 | /* Default size is 64-bit. */ 58 | #define pka_mmio_read(addr) __pka_mmio_read64(addr) 59 | #define pka_mmio_write(addr, val) __pka_mmio_write64(addr, val) 60 | 61 | #endif // __KERNEL__ 62 | 63 | #endif // __PKA_MMIO_H__ 64 | -------------------------------------------------------------------------------- /include/pka_types.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2023 NVIDIA Corporation & affiliates. 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #ifndef __PKA_TYPES_H__ 5 | #define __PKA_TYPES_H__ 6 | 7 | #ifdef __KERNEL__ 8 | #include 9 | #else 10 | #include 11 | #endif 12 | 13 | typedef enum { SUCCESS, FAILURE } pka_status_t; 14 | 15 | /// PKA error codes. 16 | /// This enumeration lists the error codes returned by the main API functions. 17 | typedef enum 18 | { 19 | PKA_NO_ERROR = 0, ///< Successful return code. 20 | PKA_OPERAND_MISSING = -1500, ///< operand missing 21 | PKA_OPERAND_BUF_MISSING = -1501, ///< operand buf is NULL 22 | PKA_OPERAND_LEN_ZERO = -1502, ///< operand len is 0 23 | PKA_OPERAND_LEN_TOO_SHORT = -1503, ///< operand len is too short for op 24 | PKA_OPERAND_LEN_TOO_LONG = -1504, ///< operand len is too long for op 25 | PKA_OPERAND_LEN_A_LT_LEN_B = -1505, ///< operand ordering error 26 | PKA_OPERAND_VAL_GE_MODULUS = -1506, ///< value operand is >= modulus 27 | PKA_OPERAND_Q_GE_OPERAND_P = -1507, ///< q operand is >= p operand 28 | PKA_OPERAND_MODULUS_IS_EVEN = -1508, ///< modulus must be odd for this op 29 | PKA_RESULT_MUST_BE_POSITIVE = -1509, ///< all result big integers >= 0 30 | PKA_OPERAND_FIFO_FULL = -1510, ///< operand request fifo full 31 | PKA_CMD_RING_FULL = -1511, ///< cmd request fifo full 32 | PKA_DRIVER_TOO_BUSY = -1512, ///< PKA driver backlog too large 33 | PKA_BAD_OPERAND_CNT = -1513, ///< wrong operand_cnt for cmd 34 | PKA_TRY_GET_RESULTS_FAILED = -1514, ///< try found result fifo empty 35 | PKA_TRY_GET_RANDOM_FAILED = -1515, ///< random number fifo empty 36 | PKA_RESULT_BUF_NULL = -1516, ///< result buf ptr is NULL 37 | PKA_RESULT_BUF_TOO_SMALL = -1517, ///< result buf_len too small 38 | PKA_BAD_RESULT_IDX = -1518, ///< bad rsult_idx 39 | PKA_RESULT_FIFO_EMPTY = -1519, ///< result fifo empty 40 | PKA_CURVE_TYPE_INVALID = -1520 ///< Invalid curve type 41 | } pka_ret_code_t; 42 | 43 | /// The pka_comparison_t enumeration is the result type for internal comparison. 44 | typedef enum 45 | { 46 | PKA_NO_COMPARE, 47 | PKA_LESS_THAN, 48 | PKA_EQUAL, 49 | PKA_GREATER_THAN 50 | } pka_comparison_t; 51 | 52 | #endif // __PKA_TYPES_H__ 53 | -------------------------------------------------------------------------------- /include/pka_utils.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2023 NVIDIA Corporation & affiliates. 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #ifndef __PKA_UTILS_H__ 5 | #define __PKA_UTILS_H__ 6 | 7 | #include "pka_config.h" 8 | #include "pka_common.h" 9 | #include "pka_debug.h" 10 | #include "pka_types.h" 11 | #include "pka_mmio.h" 12 | #include "pka_ioctl.h" 13 | #include "pka_cpu.h" 14 | #ifndef __KERNEL__ 15 | #include "pka_barrier.h" 16 | #include "pka_atomic.h" 17 | #endif 18 | 19 | #endif // __PKA_UTILS_H__ 20 | -------------------------------------------------------------------------------- /lib/Makefile.am: -------------------------------------------------------------------------------- 1 | lib_LTLIBRARIES = libPKA.la 2 | libPKA_la_SOURCES = \ 3 | pka.c \ 4 | pka_dev.c \ 5 | pka_mem.c \ 6 | pka_ring.c \ 7 | pka_queue.c \ 8 | ../include/pka_lock.S 9 | 10 | libPKA_la_CFLAGS = $(PTHREAD_CFLAGS) $(LIBCRYPTO_CFLAGS) 11 | libPKA_la_LIBADD = $(PTHREAD_LIBS) $(LIBCRYPTO_LIBS) 12 | libPKA_la_LDFLAGS = -shared -version-info 2:0:1 13 | -------------------------------------------------------------------------------- /lib/pka_dev.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2023 NVIDIA Corporation & affiliates. 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #ifndef __PKA_DEV_H__ 5 | #define __PKA_DEV_H__ 6 | 7 | /// 8 | /// @file 9 | /// 10 | /// API to handle the PKA EIP-154 I/O block (shim). It provides functions 11 | /// and data structures to initialize and configure the PKA shim. It's the 12 | /// "southband interface" for communication with PKA hardware resources. 13 | /// 14 | 15 | #ifdef __KERNEL__ 16 | #include 17 | #include 18 | #include "pka_firmware.h" 19 | #else 20 | #include 21 | #include 22 | #include 23 | #endif 24 | 25 | #include 26 | 27 | 28 | #include "pka_ring.h" 29 | #include "pka_utils.h" 30 | 31 | #define PKA_SYSFS_RING_DEVICES "/sys/bus/platform/devices" 32 | #define PKA_VFIO_DIR "/dev/vfio" 33 | #define PKA_VFIO_CONTAINER_PATH "/dev/vfio/vfio" 34 | #define PKA_VFIO_GROUP_FMT "/dev/vfio/%d" 35 | 36 | #define PKA_DEVFS_RING_DEVICES "/dev/pka/%d" 37 | 38 | // Defines specific to device-tree and Linux operating system. 39 | // Careful, all constants MUST be conform with both devicetree 40 | // (DTS) and ACPI tables (SSDT). 41 | // *TBD* Better to be detected automatically (or passed as arg 42 | // so far). 43 | #define PKA_DEV_RING_DT_PREFIX_0 "45000000.eip154:ring@%d" 44 | #define PKA_DEV_RING_DT_PREFIX_1 "47000000.eip154:ring@%d" 45 | #define PKA_DEV_RING_DT_PREFIX_2 "4d000000.eip154:ring@%d" 46 | #define PKA_DEV_RING_DT_PREFIX_3 "4f000000.eip154:ring@%d" 47 | #define PKA_DEV_RING_DT_PREFIX_4 "44000000.eip154:ring@%d" 48 | #define PKA_DEV_RING_DT_PREFIX_5 "46000000.eip154:ring@%d" 49 | #define PKA_DEV_RING_DT_PREFIX_6 "4c000000.eip154:ring@%d" 50 | #define PKA_DEV_RING_DT_PREFIX_7 "4e000000.eip154:ring@%d" 51 | 52 | #define PKA_DEV_RING_ACPI_PREFIX "MLNXBF11:%02x" 53 | 54 | /// Device resource structure 55 | typedef struct 56 | { 57 | void *ioaddr; ///< (iore)mapped version of addr, for 58 | /// driver internal use. 59 | 60 | uint64_t base; ///< base address of the device's 61 | /// resource 62 | 63 | uint64_t size; ///< size of IO 64 | 65 | uint8_t type; ///< type of resource addr points to 66 | int8_t status; ///< status of the resource 67 | 68 | char *name; ///< name of the resource 69 | } pka_dev_res_t; 70 | 71 | /// defines for pka_dev_res->type 72 | #define PKA_DEV_RES_TYPE_MEM 1 // resource type is memory 73 | #define PKA_DEV_RES_TYPE_REG 2 // resource type is register 74 | 75 | /// defines for pka_dev_res->status 76 | #define PKA_DEV_RES_STATUS_MAPPED 1 // the resource is (iore)-mapped 77 | #define PKA_DEV_RES_STATUS_UNMAPPED -1 // the resource is unmapped 78 | 79 | /// PKA Ring resources structure 80 | typedef struct 81 | { 82 | pka_dev_res_t info_words; // ring information words 83 | pka_dev_res_t counters; // ring counters 84 | pka_dev_res_t window_ram; // window RAM 85 | } pka_dev_ring_res_t; 86 | 87 | typedef struct pka_dev_shim_s pka_dev_shim_t; 88 | 89 | /// PKA Ring structure 90 | typedef struct 91 | { 92 | uint32_t ring_id; ///< ring identifier. 93 | 94 | pka_dev_shim_t *shim; ///< pointer to the shim associated 95 | /// to the ring. 96 | 97 | uint32_t resources_num; ///< number of ring resources. 98 | pka_dev_ring_res_t resources; ///< ring resources. 99 | 100 | pka_dev_hw_ring_info_t *ring_info; ///< ring information. 101 | uint32_t num_cmd_desc; ///< number of command descriptors. 102 | 103 | int8_t status; ///< status of the ring. 104 | 105 | #ifdef __KERNEL__ 106 | struct mutex mutex; ///< mutex lock for sharing ring device 107 | #endif 108 | } pka_dev_ring_t; 109 | 110 | /// defines for pka_dev_ring->status 111 | #define PKA_DEV_RING_STATUS_UNDEFINED -1 112 | #define PKA_DEV_RING_STATUS_INITIALIZED 1 113 | #define PKA_DEV_RING_STATUS_READY 2 114 | #define PKA_DEV_RING_STATUS_BUSY 3 115 | #define PKA_DEV_RING_STATUS_FINALIZED 4 116 | 117 | /// PKA Shim resources structure 118 | typedef struct 119 | { 120 | pka_dev_res_t buffer_ram; // buffer RAM 121 | pka_dev_res_t master_prog_ram; // master controller program RAM 122 | pka_dev_res_t master_seq_ctrl; // master sequencer controller CSR 123 | pka_dev_res_t aic_csr; // interrupt controller CSRs 124 | pka_dev_res_t trng_csr; // TRNG module CSRs 125 | pka_dev_res_t ext_csr; // MiCA specific CSRs (glue logic) 126 | } pka_dev_shim_res_t; 127 | 128 | #define PKA_DEV_SHIM_RES_CNT 6 // Number of PKA device resources 129 | 130 | /// Platform global shim resource information 131 | typedef struct 132 | { 133 | pka_dev_res_t *res_tbl[PKA_DEV_SHIM_RES_CNT]; 134 | uint8_t res_cnt; 135 | } pka_dev_gbl_shim_res_info_t; 136 | 137 | struct pka_dev_mem_res 138 | { 139 | uint64_t eip154_base; ///< base address for eip154 mmio registers 140 | uint64_t eip154_size; ///< eip154 mmio register region size 141 | 142 | uint64_t wndw_ram_off_mask; ///< common offset mask for alt window ram and window ram 143 | uint64_t wndw_ram_base; ///< base address for window ram 144 | uint64_t wndw_ram_size; ///< window ram region size 145 | 146 | uint64_t alt_wndw_ram_0_base; ///< base address for alternate window ram 0 147 | uint64_t alt_wndw_ram_1_base; ///< base address for alternate window ram 1 148 | uint64_t alt_wndw_ram_2_base; ///< base address for alternate window ram 2 149 | uint64_t alt_wndw_ram_3_base; ///< base address for alternate window ram 3 150 | uint64_t alt_wndw_ram_size; ///< alternate window ram regions size 151 | 152 | uint64_t csr_base; ///< base address for csr registers 153 | uint64_t csr_size; ///< csr area size 154 | }; 155 | 156 | /// PKA Shim structure 157 | struct pka_dev_shim_s 158 | { 159 | struct pka_dev_mem_res mem_res; 160 | 161 | uint64_t trng_err_cycle; ///< TRNG error cycle 162 | 163 | uint32_t shim_id; ///< shim identifier 164 | 165 | uint32_t rings_num; ///< Number of supported rings (hw 166 | /// specific) 167 | 168 | pka_dev_ring_t **rings; ///< pointer to rings which belong to 169 | /// the shim. 170 | 171 | uint8_t ring_priority; ///< specify the priority in which 172 | /// rings are handled. 173 | 174 | uint8_t ring_type; ///< indicates whether the result 175 | /// ring delivers results strictly 176 | /// in-order. 177 | 178 | pka_dev_shim_res_t resources; ///< shim resources 179 | 180 | uint8_t window_ram_split; ///< Window RAM mode. if non-zero, 181 | /// the splitted window RAM scheme 182 | /// is used. 183 | 184 | uint32_t busy_ring_num; ///< Number of active rings (rings in 185 | /// busy state) 186 | 187 | uint8_t trng_enabled; ///< Whether the TRNG engine is 188 | /// enabled. 189 | 190 | int8_t status; ///< status of the shim 191 | 192 | #ifdef __KERNEL__ 193 | struct mutex mutex; ///< mutex lock for sharing shim 194 | #endif 195 | }; 196 | 197 | /// defines for pka_dev_shim->status 198 | #define PKA_SHIM_STATUS_UNDEFINED -1 199 | #define PKA_SHIM_STATUS_CREATED 1 200 | #define PKA_SHIM_STATUS_INITIALIZED 2 201 | #define PKA_SHIM_STATUS_RUNNING 3 202 | #define PKA_SHIM_STATUS_STOPPED 4 203 | #define PKA_SHIM_STATUS_FINALIZED 5 204 | 205 | /// defines for pka_dev_shim->window_ram_split 206 | #define PKA_SHIM_WINDOW_RAM_SPLIT_ENABLED 1 // window RAM is splitted into 207 | // 4 * 16KB blocks 208 | 209 | #define PKA_SHIM_WINDOW_RAM_SPLIT_DISABLED 2 // window RAM is not splitted 210 | // and occupies 64KB 211 | 212 | /// defines for pka_dev_shim->trng_enabled 213 | #define PKA_SHIM_TRNG_ENABLED 1 214 | #define PKA_SHIM_TRNG_DISABLED 0 215 | 216 | /// Platform global configuration structure 217 | typedef struct 218 | { 219 | uint32_t dev_shims_cnt; ///< number of registered PKA shims. 220 | uint32_t dev_rings_cnt; ///< number of registered Rings. 221 | 222 | pka_dev_shim_t *dev_shims[PKA_MAX_NUM_IO_BLOCKS]; ///< table of registered 223 | /// PKA shims. 224 | 225 | pka_dev_ring_t *dev_rings[PKA_MAX_NUM_RINGS]; ///< table of registered 226 | /// Rings. 227 | } pka_dev_gbl_config_t; 228 | 229 | extern pka_dev_gbl_config_t pka_gbl_config; 230 | 231 | #ifndef __KERNEL__ 232 | /// Return ring information and initialize ring descriptors. 233 | int pka_dev_get_ring_info(pka_ring_info_t *ring_info); 234 | 235 | /// Return true if there is an available ring, false if not. This function 236 | /// verifies if there is a free ring which can be used. It returns true if 237 | /// true, otherwise it returns false. The input parameter rings_num refers 238 | /// to the number of rings to look for. 239 | bool pka_dev_has_avail_ring(pka_ring_info_t *ring_info, 240 | uint32_t rings_num); 241 | #endif 242 | 243 | #ifdef __KERNEL__ 244 | 245 | /// Ring getter for pka_dev_gbl_config_t structure which holds all system 246 | /// global configuration. This configuration is shared and common to kernel 247 | /// device driver associated with PKA hardware. 248 | pka_dev_ring_t *pka_dev_get_ring(uint32_t ring_id); 249 | 250 | /// Shim getter for pka_dev_gbl_config_t structure which holds all system 251 | /// global configuration. This configuration is shared and common to kernel 252 | /// device driver associated with PKA hardware. 253 | pka_dev_shim_t *pka_dev_get_shim(uint32_t shim_id); 254 | 255 | /// Register a Ring. This function initializes a Ring and configures its 256 | /// related resources, and returns a pointer to that ring. 257 | pka_dev_ring_t *pka_dev_register_ring(uint32_t ring_id, uint32_t shim_id); 258 | 259 | /// Unregister a Ring 260 | int pka_dev_unregister_ring(pka_dev_ring_t *ring); 261 | 262 | /// Register PKA IO block. This function initializes a shim and configures its 263 | /// related resources, and returns a pointer to that ring. 264 | pka_dev_shim_t *pka_dev_register_shim(uint32_t shim_id, uint8_t shim_fw_id, 265 | struct pka_dev_mem_res *mem_res); 266 | 267 | /// Unregister PKA IO block 268 | int pka_dev_unregister_shim(pka_dev_shim_t *shim); 269 | 270 | /// Reset a Ring. 271 | int pka_dev_reset_ring(pka_dev_ring_t *ring); 272 | 273 | /// Clear ring counters. This function resets the master sequencer controller 274 | /// to clear the command and result counters. 275 | int pka_dev_clear_ring_counters(pka_dev_ring_t *ring); 276 | 277 | /// Read data from the TRNG. Drivers can fill up to 'cnt' bytes of data into 278 | /// the buffer 'data'. The buffer 'data' is aligned for any type and 'cnt' is 279 | /// a multiple of 4. 280 | int pka_dev_trng_read(pka_dev_shim_t *shim, uint32_t *data, uint32_t cnt); 281 | 282 | /// Return true if the TRNG engine is enabled, false if not. 283 | bool pka_dev_has_trng(pka_dev_shim_t *shim); 284 | 285 | #endif // __KERNEL__ 286 | 287 | /// Open the file descriptor associated with ring. It returns an integer value, 288 | /// which is used to refer to the file. If unsuccessful, it returns a negative 289 | /// error. 290 | int pka_dev_open_ring(pka_ring_info_t *ring_info); 291 | 292 | /// Close the file descriptor associated with ring. The function returns 0 if 293 | /// successful, negative value to indicate an error. 294 | int pka_dev_close_ring(pka_ring_info_t *ring_info); 295 | 296 | /// Map ring resources. 297 | int pka_dev_mmap_ring(pka_ring_info_t *ring_info); 298 | 299 | /// Unmap ring resources. 300 | int pka_dev_munmap_ring(pka_ring_info_t *ring_info); 301 | 302 | #endif /// __PKA_DEV_H__ 303 | -------------------------------------------------------------------------------- /lib/pka_internal.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2023 NVIDIA Corporation & affiliates. 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #ifndef __PKA_INTERNAL_H__ 5 | #define __PKA_INTERNAL_H__ 6 | 7 | #ifdef __KERNEL__ 8 | #include 9 | #else 10 | #include 11 | #include 12 | #include 13 | #include 14 | #endif 15 | 16 | #include "pka_queue.h" 17 | #include "pka_ring.h" 18 | 19 | #define PKA_LIB_VERSION "v1" 20 | 21 | #define PKA_DEFAULT_NAME "default" 22 | #define PKA_DEFAULT_SIZE (16 * MEGABYTE) // 16 MB 23 | 24 | #define PKA_MAX_QUEUES_NUM 48 25 | 26 | typedef struct 27 | { 28 | pka_queue_t *cmd_queue; ///< pointer to SW command queue. 29 | pka_queue_t *rslt_queue; ///< pointer to SW result queue. 30 | } pka_worker_t; 31 | 32 | 33 | // Shared structure - Should be visible to PK process and threads 34 | typedef struct 35 | { 36 | pid_t main_pid; ///< main process identifier 37 | 38 | uint32_t requests_cnt; ///< command request counter. 39 | uint32_t queues_cnt; ///< number of queues supported. 40 | uint32_t cmd_queue_size; ///< size of a command queue. 41 | uint32_t rslt_queue_size; ///< size of a result queue. 42 | 43 | pka_atomic32_t workers_cnt; ///< number of active workers. 44 | pka_worker_t workers[PKA_MAX_QUEUES_NUM]; ///< table of initialized 45 | /// thread workers. 46 | 47 | uint32_t rings_byte_order; ///< byte order whether BE or LE. 48 | uint8_t rings_mask[PKA_RING_NUM_BITMASK]; ///< bitmask of allocated HW rings. 49 | uint32_t rings_cnt; ///< number of allocated Rings. 50 | pka_ring_info_t rings[PKA_MAX_NUM_RINGS]; ///< table of allocated rings 51 | /// to process PK commands. 52 | 53 | /// Lock-free implementations have higher performance and scale better 54 | /// than implementations using locks. User can decide whether to use 55 | /// lock-free implementation or its own locking mechanism by setting flags. 56 | /// these flags tend to optimize performance on platforms that implement 57 | /// a performance critical operation using locks. 58 | pka_atomic64_t lock; ///< protect shared resources. 59 | pka_flags_t flags; ///< flags supplied during creation. 60 | 61 | uint8_t *mem_ptr; ///< pointer to free memory space of 62 | /// SW queues. 63 | 64 | uint8_t mem[0] __pka_cache_aligned; ///< memory space of SW queues starts 65 | /// here. 66 | } pka_global_info_t; 67 | 68 | typedef struct 69 | { 70 | uint32_t id; ///< handle identifier - thread specific. 71 | uint32_t req_num; ///< number of outstanding requests. 72 | pka_global_info_t *gbl_info; ///< pointer to the instance information the 73 | /// handle belongs to. 74 | } pka_local_info_t; 75 | 76 | static pka_global_info_t *pka_gbl_info; ///< PK global information. 77 | 78 | // For Future use - currently used for statistics and might be extended 79 | // and edited later. 80 | typedef struct 81 | { 82 | uint64_t start_cycles; ///< cycle count when cmd was submitted. 83 | uint64_t overhead_cycles; ///< overhead cycles count from submitting 84 | /// a cmd until pushing it to the HW ring. 85 | uint64_t processing_cycles; ///< cmd processing cycles count. 86 | uint32_t valid; ///< if set to 'PKA_CMD_STATS_VALID' 87 | /// then the stats entry is valid. 88 | } pka_cmd_stats_t; 89 | 90 | #define PKA_CMD_STATS_VALID 0xDEADBEEF 91 | 92 | typedef struct 93 | { 94 | pka_cmd_stats_t cmd_stats[4096]; ///< stats entry 95 | uint16_t index:12; ///< index in 0 .. 4095, wrapping is permitted. 96 | } pka_cmd_stats_db_t; 97 | 98 | static pka_cmd_stats_db_t pka_cmd_stats_db[PKA_MAX_QUEUES_NUM]; 99 | 100 | #endif // __PKA_INTERNAL_H__ 101 | -------------------------------------------------------------------------------- /lib/pka_mem.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2023 NVIDIA Corporation & affiliates. 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #ifndef __PKA_MEM_H__ 5 | #define __PKA_MEM_H__ 6 | 7 | /// 8 | /// @file 9 | /// 10 | /// This file describes a memory management interface used by rings to allocate 11 | /// free memory needed by PK commands. 12 | /// 13 | /// PKA memory allocator's job is primarily to manage the data memory - i.e. 14 | /// efficiently allocate and free memory space to hold the input/output vectors. 15 | /// One could use this code to do individual allocations and frees for each 16 | /// vector, but instead it is expected that a single contiguous allocation/free 17 | /// will be done for all the vectors - i.e. operands and results, belonging to 18 | /// a single command. It is possible to also support a mode of operation, 19 | /// whereby individual operand allocation can be used when a single command 20 | /// allocation fails for lack of memory (i.e. this can deal efficiently with 21 | /// the occasional data memory fragmentation where there is enough contiguous 22 | /// memory pieces to hold the individual operand, but not single piece large 23 | /// enough to hold all of the operands). 24 | /// 25 | /// This code assumes that Data Memory is in the bottom 14KB of the "PKA window 26 | /// RAM" and so the addresses for the rings start at offset 0x3800. Also, note 27 | /// that just because the rings hold 16 descriptors, does not mean that 16 28 | /// commands can be outstanding - since it is expected that often the Data 29 | /// Memory will run out before any or all of the rings are full themselves. 30 | /// Of course the opposite can also happen (though less likely) - that is the 31 | /// rings are full, when the Data Memory is not! 32 | /// 33 | /// Note also that ALL allocations handled by this code start at least on 34 | /// 64-byte boundaries and ALL allocations have sizes that are a multiple of 35 | /// 64 bytes. The algorithm here always maximally coalesces contiguous free 36 | /// space. In other words, there is never a case where two free space descri- 37 | /// -ptors point to adjacent memory. Of course the converse is not true. Used 38 | /// space blocks can be adjacent to either other used space blocks to free space 39 | /// blocks. 40 | /// 41 | /// Valid free space descriptors (i.e. those whose size is not zero) are kept on 42 | /// various lists based upon their size. Non-valid free space descriptors (so 43 | /// called "free" avail space descriptors) are linked on a single free list. 44 | 45 | #include 46 | #include 47 | #include 48 | #include 49 | 50 | #include "pka_utils.h" 51 | 52 | #define ALIGN_SHIFT 6 53 | #define ALIGNMENT (1 << ALIGN_SHIFT) 54 | #define ALIGN_MASK (ALIGNMENT - 1) 55 | #define MAX_PADDING (3 * ALIGNMENT) 56 | 57 | #ifdef PKA_WINDOW_RAM_DATA_MEM_SIZE 58 | #define DATA_MEM_SIZE PKA_WINDOW_RAM_DATA_MEM_SIZE 59 | #else 60 | #define DATA_MEM_SIZE 0x3800 // 14 KB 61 | #endif 62 | 63 | #define MIN_ALLOC_SIZE 192 64 | #define MAX_ALLOC_SIZE 2560 65 | 66 | #define MAX_ALLOCS 248 67 | #define MAX_CHUNK_IDX 250 68 | #define NUM_OF_AVAIL_SIZES 40 69 | #define MAX_MEM_MAP_IDX ((DATA_MEM_SIZE >> ALIGN_SHIFT) - 1) 70 | 71 | #define ON_FREE_LIST 0 72 | #define AVAIL_MEM 1 73 | #define USED_MEM 2 74 | 75 | #define IS_AVAIL_MEM(map_value) ((map_value >> 12) == AVAIL_MEM) 76 | #define IS_USED_MEM(map_value) ((map_value >> 12) == USED_MEM) 77 | #define MEM_DESC_IDX(map_value) (map_value & 0x00FF) 78 | #define USED_SIZE(map_value) (map_value & 0x0FFF) 79 | 80 | typedef uint8_t pka_mem_idx_t; 81 | 82 | /// This structure declares a "view" into memory allowing access to necessary 83 | /// fields at known offsets from a given base. The size field holds bytes 84 | /// representing a multiple of 64, and can range in size from 64 bytes to 85 | /// 14K bytes (i.e. all of Data Memory can be described by a single free space 86 | /// descriptor and will be when there are no allocations). A value of zero 87 | /// indicates that this is NOT a currently valid descriptor i.e. it must be 88 | /// on the free list. 89 | typedef struct // 8 bytes long. 90 | { 91 | uint16_t offset; ///< chunk offset in bytes. 92 | uint16_t size; ///< chunk size in bytes, including 93 | /// overhead. 94 | 95 | pka_mem_idx_t next_chunk_idx; ///< next chunk index in list. 96 | pka_mem_idx_t prev_chunk_idx; ///< previous chunk index in list. 97 | 98 | uint8_t kind; ///< whether chunk is free or available. 99 | uint8_t list_idx; ///< chunk index in list. 100 | } pka_mem_chunk_t; 101 | 102 | /// This structure declares linked lists used by memory descriptor below. 103 | typedef struct // 4 bytes long 104 | { 105 | pka_mem_idx_t head; 106 | pka_mem_idx_t tail; 107 | uint8_t size; 108 | uint8_t list_idx; 109 | } pka_mem_chunk_list_t; 110 | 111 | /// This structure declares a "memory descriptor" which holds lists of the 112 | /// available/free memory chunks, and a mapping of memory into chunks. 113 | typedef struct 114 | { 115 | // The following table is used to map a location in Data Memory into a 116 | // chunk OR a used size. The input to the mapping is the offset from the 117 | // start of "PKA window RAM" divided by the ALIGNMENT. The result of this 118 | // mapping fcn is a 16 bit integer - called the MemMap - which is used to 119 | // mark the memory as used or available and either give the used size or 120 | // give the index of the avail chunk table. Only the start and end locations 121 | // of the covered used/avail memory have non-zero values in this table. 122 | // Note in the (rare) case of the used/avail memory being ALIGNMENT bytes 123 | // in size, then the start location is the same as the end location, 124 | // but this still works out OK. 125 | uint16_t mem_map_tbl[MAX_MEM_MAP_IDX + 1]; 126 | 127 | pka_mem_chunk_list_t avail_lists[NUM_OF_AVAIL_SIZES]; 128 | pka_mem_chunk_t chunk_tbl[MAX_CHUNK_IDX + 1]; 129 | 130 | // Note that the freeList is only singly-linked, even though these same 131 | // descriptors are doubly-linked when on the avail_lists! 132 | pka_mem_chunk_list_t free_list; 133 | 134 | uint32_t alloc_cnt; 135 | uint32_t alloc_bytes; 136 | } pka_mem_desc_t; 137 | 138 | 139 | /// Check whether data memory is full or not. This function is used to 140 | /// tell whether or not pka_mem_alloc will succeed or not. Returns FALSE 141 | /// if pka_mem_alloc will succeed and TRUE if it will fail. 142 | bool pka_mem_is_full(uint32_t ring_id, uint32_t data_size); 143 | 144 | /// Allocate data memory. Add a contiguous memory chunk where vectors can be 145 | /// instantiated. It returns the offset of the allocated memory. 146 | uint16_t pka_mem_alloc(uint32_t ring_id, uint32_t size); 147 | 148 | /// Free data memory. Clear the memory entries from mapping list, free the 149 | /// memory chunks, and coalesce free continuous memory chunks. The chunks must 150 | /// not be used as they will be freed. 151 | void pka_mem_free(uint32_t ring_id, uint16_t offset); 152 | 153 | /// Create a new data memory in PKA Window RAM. This function allocate memory 154 | /// and make it available. All elements of the memory are allocated, in one 155 | /// continuous chunk of memory. 156 | void pka_mem_create(uint32_t ring_id); 157 | 158 | /// Reset allocated PKA window RAM region. 159 | void pka_mem_reset(uint32_t dst_offset, void* mem_ptr, uint32_t operands_size); 160 | 161 | #endif // __PKA_MEM_H__ 162 | -------------------------------------------------------------------------------- /lib/pka_queue.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2023 NVIDIA Corporation & affiliates. 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #ifndef __PKA_QUEUE_H__ 5 | #define __PKA_QUEUE_H__ 6 | 7 | /// 8 | /// @file 9 | /// 10 | /// API to manage software queues. It consists of an implementation of circular 11 | /// queues on top of command/result descriptor rings. It allows multiple threads 12 | /// to submit PK commands to the hardware without causing ring congestion. 13 | /// Software-based queues are assigned to clients which may run over single or 14 | /// multiple threads, a pair of queue per thread: one queue to append command 15 | /// descriptors and an other one to append result descriptors. Each group of 16 | /// queues is associated to a one or group of rings depending on client context. 17 | /// The implementation of the software-based queues help to leverage the small 18 | /// size of descriptor rings and avoid interrupts, so far (processes have to 19 | /// wait until a given ring can accept new descriptors again). 20 | /// Queues have the following properties : 21 | /// - FIFO, 22 | /// - Capacity is fixed, 23 | /// - Lockless implementation, 24 | /// However, having many circular queues with significant size may costs in 25 | /// terms of memory (more than linked list queue). An empty queue contains 26 | /// at least N pointers. 27 | /// 28 | /// Note that the current API implements an Enq/Deq a fixed number of items 29 | /// from a queue and does not support multi producer/consumer. 30 | /// 31 | /// Also note that the implementation includes a mechanism which exert a back 32 | /// pressure to inform a given client to pause. It defines a threshold, once 33 | /// an enqueue reaches the high threshold, the client is notified. 34 | /// 35 | 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | 43 | #include "pka_utils.h" 44 | #include "pka_ring.h" 45 | 46 | /// PK queue result descriptor structure. This structure characterize an 47 | /// item in PK SW queue. One can enqueue/dequeue descriptors similar to 48 | /// those associated with HW rings (64 bytes item). The purpose here is 49 | /// to decrease the overhead added due to SW queue during PK operations 50 | /// processing. This structure holds the minimal information required to 51 | /// retrieve a PK result. It also aims to increase the number of items 52 | /// -i.e. results in the result queue. 53 | typedef struct // 32 bytes 54 | { 55 | uint32_t size; // total size the result descriptor. This 56 | // field is common to both result and cmd 57 | // descriptors and MUST remain in the top 58 | // 32 bits of the two structure since it 59 | // is used for enqueuing and dequeuing objs 60 | // to/from the queue. 61 | 62 | uint32_t cmd_num; // command request number. 63 | uint32_t result1_len; // length of the first result. 64 | uint32_t result2_len; // length of the second result. 65 | 66 | uint64_t user_data; // opaque user data information address. 67 | uint32_t opcode; // PK operation code 68 | uint8_t result_cnt; // might be 0, 1 or 2 69 | uint8_t status; // the raw result_code. 70 | uint8_t compare_result; // the raw compare_result. 71 | uint8_t queue_num; // queue number 72 | } pka_queue_rslt_desc_t __pka_aligned(8); 73 | 74 | #define QUEUE_RSLT_DESC_SIZE sizeof(pka_queue_rslt_desc_t) 75 | 76 | 77 | /// PK queue command descriptor structure. This structure characterize 78 | /// an item in PK SW command queue. One can enqueue/dequeue the HW rings 79 | /// descriptors (64 bytes item). The purpose here is to have a structure 80 | /// which holds the minimal information required to process PK commands 81 | /// and decrease the overhead added due to SW queue. Note that it tends 82 | /// to increase the number of items -i.e. results in the result queue. 83 | typedef struct // 40 bytes. 84 | { 85 | uint16_t size; // total size the command descriptor. This 86 | // field is common to both result and cmd 87 | // descriptors and MUST remain in the top 88 | // 32 bits of the two structure since it 89 | // is used for enqueuing and dequeuing objs 90 | // to/from the queue. 91 | 92 | uint8_t operand_cnt; // number of operands. 93 | uint8_t shift_cnt; // shift value used by the PK command. 94 | 95 | uint64_t user_data; // opaque user data information address. 96 | uint32_t opcode; // code of the requested PK command. 97 | uint32_t operands_len; // aligned and padded data vectors size. It 98 | // refers to size of both command and result 99 | // operands. 100 | 101 | uint32_t cmd_num; // command request number. 102 | 103 | } pka_queue_cmd_desc_t __pka_aligned(8); 104 | 105 | #define QUEUE_CMD_DESC_SIZE sizeof(pka_queue_cmd_desc_t) 106 | 107 | #ifdef PKA_LIB_QUEUE_DEBUG 108 | // A structure that stores the queue statistics. 109 | struct pka_queue_debug_stats { 110 | uint64_t enq_success_objs; ///< Objects successfully enqueued. 111 | uint64_t enq_fail_objs; ///< Objects that failed to be enqueued. 112 | uint64_t deq_success_objs; ///< Objects successfully dequeued. 113 | uint64_t deq_fail_objs; ///< Objects that failed to be dequeued. 114 | } __pka_cache_aligned; 115 | #endif 116 | 117 | /* structure to hold a pair of head/tail values */ 118 | typedef struct { 119 | volatile uint32_t head; /**< Prod/consumer head. */ 120 | volatile uint32_t tail; /**< Prod/consumer tail. */ 121 | } pka_queue_headtail_t; 122 | 123 | typedef struct 124 | { 125 | uint32_t flags; ///< Flags supplied at creation. 126 | uint32_t size; ///< Size of the queue. 127 | uint32_t mask; ///< Mask (size-1) of queue. 128 | uint32_t capacity; ///< Usable size of queue. 129 | 130 | uint8_t pad0 __pka_cache_aligned; ///< empty cache line. 131 | 132 | // Queue producer status. 133 | pka_queue_headtail_t prod __pka_cache_aligned; 134 | uint8_t pad1 __pka_cache_aligned; ///< empty cache line. 135 | 136 | // Queue consumer status. 137 | pka_queue_headtail_t cons __pka_cache_aligned; 138 | uint8_t pad2 __pka_cache_aligned; ///< empty cache line. 139 | 140 | #ifdef PKA_LIB_QUEUE_DEBUG 141 | struct pka_queue_debug_stats stats; 142 | #endif 143 | 144 | // lock is required for multi-thread environment 145 | void *mutex; 146 | uint8_t mem[0] __pka_cache_aligned; ///< Memory space of queue starts here. 147 | /// not volatile so need to be careful 148 | /// about compiler re-ordering. 149 | } pka_queue_t; 150 | 151 | #define PKA_QUEUE_DESC_MAX_SIZE (1 << 12) // 4K bytes. 152 | 153 | #define PKA_QUEUE_TYPE_CMD 0x1 ///< the default type is command queue. 154 | #define PKA_QUEUE_TYPE_RSLT 0x2 ///< The default type is result queue. 155 | 156 | #define PKA_QUEUE_MASK_SIZE (unsigned)(0x007fffff) ///< Queue mask size (8MB) 157 | 158 | #ifdef PKA_LIB_QUEUE_DEBUG 159 | #define __QUEUE_STAT_ADD(q, name, n) ({ ##q##->stats.##name##_objs += n; }) 160 | #else 161 | #define __QUEUE_STAT_ADD(q, name, n) do {} while(0) 162 | #endif 163 | 164 | 165 | /// Calculate the memory size needed for a queue. This function returns the 166 | /// number of bytes needed for a queue, given the number of elements in it. 167 | /// This value is the sum of the size of the structure pka_queue_t and the 168 | /// size of the memory needed by the items. The value is aligned to a cache 169 | /// line size. 170 | ssize_t pka_queue_get_memsize(uint32_t size); 171 | 172 | /// Create a new queue in memory then initialize a queue structure in memory 173 | /// pointed by "queue". The size of the memory area must be large enough to 174 | /// store the queue header and data. 175 | /// It is advised to use "pka_queue_get_memsize()" to get the appropriate size. 176 | /// The queue size must be a power of two. Water marking is disabled by default. 177 | /// The real usable queue size is 'size-1' instead of 'size' to differentiate a 178 | /// free queue from an empty queue. 179 | /// 180 | /// Indeed, current implementation supposes that the memory given by the caller 181 | /// is shareable among PKA applications. 182 | pka_queue_t *pka_queue_create(ssize_t size, uint32_t flags, void *mem); 183 | 184 | /// Free the given queue. 185 | void pka_queue_free(pka_queue_t **queue); 186 | 187 | /// Change the high water mark. If 'count' is 0, water marking is disabled. 188 | /// Otherwise, it is set to the 'count' value. The 'count' value must be 189 | /// greater than 0 and less than the ring size. This function can be called 190 | /// at any time (not necessarily at initialization). 191 | int pka_queue_set_water_mark(pka_queue_t *queue, uint32_t size); 192 | 193 | /// Enqueue a command on the queue (copy command from user context -> queue). 194 | int pka_queue_cmd_enqueue(pka_queue_t *queue, 195 | pka_queue_cmd_desc_t *cmd_desc, 196 | pka_operands_t *operands); 197 | 198 | /// Enqueue a result on the queue (copy result from ring -> queue). 199 | int pka_queue_rslt_enqueue(pka_queue_t *queue, 200 | pka_ring_info_t *ring, 201 | pka_ring_hw_rslt_desc_t *ring_desc, 202 | pka_queue_rslt_desc_t *rslt_desc); 203 | 204 | /// Dequeue a command from a queue (copy cmd from queue -> ring). 205 | int pka_queue_cmd_dequeue(pka_queue_t *queue, 206 | pka_ring_hw_cmd_desc_t *ring_desc, 207 | pka_ring_alloc_t *alloc); 208 | 209 | /// Dequeue a result from a queue (copy result from queue -> user context). 210 | int pka_queue_rslt_dequeue_by_user_data(pka_queue_t *queue, 211 | pka_queue_rslt_desc_t *rslt_desc, 212 | pka_results_t *results, 213 | void *user_data); 214 | 215 | /// Set queue command descriptor. 216 | int pka_queue_set_cmd_desc(pka_queue_cmd_desc_t *cmd_desc, 217 | uint32_t cmd_num, 218 | void *user_data, 219 | pka_opcode_t opcode, 220 | pka_operands_t *operands); 221 | 222 | /// Set queue result descriptor. 223 | int pka_queue_set_rslt_desc(pka_queue_rslt_desc_t *rslt_desc, 224 | pka_ring_hw_rslt_desc_t *ring_desc, 225 | uint32_t cmd_num, 226 | uint64_t user_data, 227 | uint8_t queue_num); 228 | 229 | /// Load a command descriptor from a queue. 230 | int pka_queue_load_cmd_desc(pka_queue_cmd_desc_t *cmd_desc, pka_queue_t *queue); 231 | 232 | /// Return the number of entries in a queue (in bytes). 233 | static inline uint32_t pka_queue_count(pka_queue_t *queue) 234 | { 235 | uint32_t prod_tail = queue->prod.tail; 236 | uint32_t cons_tail = queue->cons.tail; 237 | uint32_t count = (prod_tail - cons_tail) & queue->mask; 238 | return (count > queue->capacity) ? queue->capacity : count; 239 | } 240 | 241 | /// Return the number of free entries in a queue (in bytes). 242 | static inline uint32_t pka_queue_free_count(pka_queue_t *queue) 243 | { 244 | return queue->capacity - pka_queue_count(queue); 245 | } 246 | 247 | /// Test if a ring is full. Returns 1 if a queue is full, 0 if not. 248 | static inline int pka_queue_is_full(pka_queue_t *queue) 249 | { 250 | return pka_queue_free_count(queue) == 0; 251 | } 252 | 253 | /// Test if a ring is empty. Returns 1 if a queue is empty, 0 if not. 254 | static inline int pka_queue_is_empty(pka_queue_t *queue) 255 | { 256 | return pka_queue_count(queue) == 0; 257 | } 258 | 259 | /// dump the status of the queue on the console 260 | void pka_queue_dump(pka_queue_t *queue); 261 | 262 | #endif /// __PKA_QUEUE_H__ 263 | -------------------------------------------------------------------------------- /lib/pka_ring.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2023 NVIDIA Corporation & affiliates. 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #ifndef __PKA_RING_H__ 5 | #define __PKA_RING_H__ 6 | 7 | /// 8 | /// @file 9 | /// 10 | /// This file forms an interface to the BlueField Public Key Accelerator based 11 | /// on EIP-154. 12 | /// 13 | /// Rings are used as a communication mechanism between ARM cores (controller) 14 | /// and the farm engines controlled by EIP-154 master firmware. 15 | /// 16 | /// Note that the API defines data structures and functions to manage rings 17 | /// within window RAM, and to enqueue/dequeue descriptors. Rings are considered 18 | /// as a memory of descriptors (command/result descriptors) using finite size 19 | /// circular queue and a couple of control status registers (count registers). 20 | /// 21 | 22 | 23 | #ifdef __KERNEL__ 24 | #include 25 | #else 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include // for O_* constants 32 | #include 33 | #include "pka_vectors.h" 34 | #endif 35 | 36 | #ifdef PKA_LIB_RING_DEBUG 37 | // A structure that stores the ring statistics. 38 | typedef struct 39 | { 40 | uint64_t enq_success_cmd; ///< Cmd descriptors successfully enqueued. 41 | uint64_t enq_fail_cmd; ///< Cmd descriptors that failed to be enqueued. 42 | uint64_t deq_success_rslt; ///< Rslt descriptors successfully dequeued. 43 | uint64_t deq_fail_rslt; ///< Rslt descriptors that failed to be dequeued. 44 | } pka_ring_debug_stats __pka_cache_aligned; 45 | #endif 46 | 47 | #ifdef PKA_LIB_RING_DEBUG 48 | #define __RING_STAT_ADD(r, name, n) ({ ##r##->stats.##name += 1; }) 49 | #else 50 | #define __RING_STAT_ADD(r, name, n) do {} while(0) 51 | #endif 52 | 53 | /// Bluefield PKA command descriptor. 54 | typedef struct // 64 bytes long. 64 bytes aligned 55 | { 56 | uint64_t pointer_a; 57 | uint64_t pointer_b; 58 | uint64_t pointer_c; 59 | uint64_t pointer_d; 60 | uint64_t tag; 61 | uint64_t pointer_e; 62 | 63 | #ifdef __AARCH64EB__ 64 | uint64_t linked : 1; 65 | uint64_t driver_status : 2; 66 | uint64_t odd_powers : 5; ///< shiftCnt for shift ops 67 | uint64_t kdk : 2; ///< Key Decryption Key number 68 | uint64_t encrypted_mask : 6; 69 | uint64_t rsvd_3 : 8; 70 | uint64_t command : 8; 71 | uint64_t rsvd_2 : 5; 72 | uint64_t length_b : 9; 73 | uint64_t output_attr : 1; 74 | uint64_t input_attr : 1; 75 | uint64_t rsvd_1 : 5; 76 | uint64_t length_a : 9; 77 | uint64_t rsvd_0 : 2; 78 | #else 79 | uint64_t rsvd_0 : 2; 80 | uint64_t length_a : 9; 81 | uint64_t rsvd_1 : 5; 82 | uint64_t input_attr : 1; 83 | uint64_t output_attr : 1; 84 | uint64_t length_b : 9; 85 | uint64_t rsvd_2 : 5; 86 | uint64_t command : 8; 87 | uint64_t rsvd_3 : 8; 88 | uint64_t encrypted_mask : 6; 89 | uint64_t kdk : 2; ///< Key Decryption Key number 90 | uint64_t odd_powers : 5; ///< shiftCnt for shift ops 91 | uint64_t driver_status : 2; 92 | uint64_t linked : 1; 93 | #endif 94 | 95 | uint64_t rsvd_4; 96 | } pka_ring_hw_cmd_desc_t; 97 | 98 | #define CMD_DESC_SIZE sizeof(pka_ring_hw_cmd_desc_t) // Must be 64 99 | 100 | /// Bluefield PKA result descriptor. 101 | typedef struct // 64 bytes long. 64 bytes aligned 102 | { 103 | uint64_t pointer_a; 104 | uint64_t pointer_b; 105 | uint64_t pointer_c; 106 | uint64_t pointer_d; 107 | uint64_t tag; 108 | 109 | #ifdef __AARCH64EB__ 110 | uint64_t rsvd_5 : 13; 111 | uint64_t cmp_result : 3; 112 | uint64_t modulo_is_0 : 1; 113 | uint64_t rsvd_4 : 2; 114 | uint64_t modulo_msw_offset : 11; 115 | uint64_t rsvd_3 : 2; 116 | uint64_t rsvd_2 : 11; 117 | uint64_t main_result_msb_offset : 5; 118 | uint64_t result_is_0 : 1; 119 | uint64_t rsvd_1 : 2; 120 | uint64_t main_result_msw_offset : 11; 121 | uint64_t rsvd_0 : 2; 122 | 123 | uint64_t linked : 1; 124 | uint64_t driver_status : 2; ///< Always written to 0 125 | uint64_t odd_powers : 5; ///< shiftCnt for shift ops 126 | uint64_t kdk : 2; ///< Key Decryption Key number 127 | uint64_t encrypted_mask : 6; 128 | uint64_t result_code : 8; 129 | uint64_t command : 8; 130 | uint64_t rsvd_8 : 5; 131 | uint64_t length_b : 9; 132 | uint64_t output_attr : 1; 133 | uint64_t input_attr : 1; 134 | uint64_t rsvd_7 : 5; 135 | uint64_t length_a : 9; 136 | uint64_t rsvd_6 : 2; 137 | #else 138 | uint64_t rsvd_0 : 2; 139 | uint64_t main_result_msw_offset : 11; 140 | uint64_t rsvd_1 : 2; 141 | uint64_t result_is_0 : 1; 142 | uint64_t main_result_msb_offset : 5; 143 | uint64_t rsvd_2 : 11; 144 | uint64_t rsvd_3 : 2; 145 | uint64_t modulo_msw_offset : 11; 146 | uint64_t rsvd_4 : 2; 147 | uint64_t modulo_is_0 : 1; 148 | uint64_t cmp_result : 3; 149 | uint64_t rsvd_5 : 13; 150 | 151 | uint64_t rsvd_6 : 2; 152 | uint64_t length_a : 9; 153 | uint64_t rsvd_7 : 5; 154 | uint64_t input_attr : 1; 155 | uint64_t output_attr : 1; 156 | uint64_t length_b : 9; 157 | uint64_t rsvd_8 : 5; 158 | uint64_t command : 8; 159 | uint64_t result_code : 8; 160 | uint64_t encrypted_mask : 6; 161 | uint64_t kdk : 2; ///< Key Decryption Key number 162 | uint64_t odd_powers : 5; ///< shiftCnt for shift ops 163 | uint64_t driver_status : 2; ///< Always written to 0 164 | uint64_t linked : 1; 165 | #endif 166 | 167 | uint64_t rsvd_9; 168 | } pka_ring_hw_rslt_desc_t; 169 | 170 | #define RESULT_DESC_SIZE sizeof(pka_ring_hw_rslt_desc_t) // Must be 64 171 | 172 | /// Describes a PKA command/result ring as used by the hardware. A pair of 173 | /// command and result rings in PKA window memory, and the data memory used 174 | /// by the commands. 175 | typedef struct 176 | { 177 | uint32_t num_descs; ///< total number of descriptors in the ring. 178 | 179 | uint32_t cmd_ring_base; ///< base address of the command ring. 180 | uint32_t cmd_idx; ///< index of the command in a ring. 181 | 182 | uint32_t rslt_ring_base; ///< base address of the result ring. 183 | uint32_t rslt_idx; ///< index of the result in a ring. 184 | 185 | uint32_t operands_base; ///< operands memory base address. 186 | uint32_t operands_end; ///< end address of operands memory. 187 | 188 | uint32_t desc_size; ///< size of each element in the ring. 189 | 190 | uint64_t cmd_desc_mask; ///< bitmask of free(0)/in_use(1) cmd descriptors. 191 | uint32_t cmd_desc_cnt; ///< number of command descriptors currently in use. 192 | uint32_t rslt_desc_cnt; ///< number of result descriptors currently ready. 193 | } pka_ring_desc_t; 194 | 195 | /// This structure declares ring parameters which can be used by user interface. 196 | typedef struct 197 | { 198 | int fd; ///< file descriptor. 199 | int group; ///< iommu group. 200 | int container; ///< vfio container 201 | 202 | uint32_t idx; ///< ring index. 203 | uint32_t ring_id; ///< hardware ring identifier. 204 | 205 | uint64_t mem_off; ///< offset specific to window RAM region. 206 | uint64_t mem_addr; ///< window RAM region address. 207 | uint64_t mem_size; ///< window RAM region size. 208 | 209 | uint64_t reg_off; ///< offset specific to count registers region. 210 | uint64_t reg_addr; ///< count registers region address. 211 | uint64_t reg_size; ///< count registers region size. 212 | 213 | void *mem_ptr; ///< pointer to mapped memory region. 214 | void *reg_ptr; ///< pointer to mapped counters region. 215 | 216 | pka_ring_desc_t ring_desc; ///< ring descriptor. 217 | 218 | #ifdef PKA_LIB_RING_DEBUG 219 | struct pka_ring_debug_stats stats; 220 | #endif 221 | 222 | uint8_t big_endian; ///< big endian byte order when enabled. 223 | } pka_ring_info_t; 224 | 225 | typedef struct 226 | { 227 | uint32_t dst_offset; ///< operands destination offset. 228 | uint32_t max_dst_offset; ///< operands end offset. 229 | 230 | pka_ring_info_t *ring; 231 | } pka_ring_alloc_t; 232 | 233 | // This structure encapsulates 'user data' information, it also includes 234 | // additional information useful for command processing and statistics. 235 | typedef struct 236 | { 237 | uint64_t valid; ///< if set to 'PKA_UDATA_INFO_VALID' then info is valid 238 | uint64_t user_data; ///< opaque user address. 239 | uint64_t cmd_num; ///< command request number. 240 | uint8_t cmd_desc_idx; ///< index of the cmd descriptor in HW rings 241 | uint8_t ring_num; ///< command request number. 242 | uint8_t queue_num; ///< queue number. 243 | } pka_udata_info_t; 244 | 245 | #define PKA_UDATA_INFO_VALID 0xDEADBEEF 246 | 247 | // This structure consists of a data base to store user data information. 248 | // Note that a data base should be associated with a hardware ring. 249 | typedef struct 250 | { 251 | pka_udata_info_t entries[256]; // user data information entries. 252 | uint8_t index; // entry index. Wrapping is permitted. 253 | } pka_udata_db_t; 254 | 255 | #ifndef __KERNEL__ 256 | /// Lookup for 'req_rings_num' number of rings. This function search for a 257 | /// set of free hardware rings which can be used. It returns 0 on success, 258 | /// a negative error code on failure. Note that it also returns the number 259 | /// of rings found (cnt - might be less that the requested number if no enough 260 | /// rings available), the associated mask, and a table of rings matching 261 | /// that number. 262 | int pka_ring_lookup(pka_ring_info_t rings[], 263 | uint32_t req_rings_num, 264 | uint8_t byte_order, 265 | uint8_t mask[], 266 | uint32_t *cnt); 267 | 268 | /// Free a set of assigned rings, referred by their number (cnt), their mask. 269 | /// It returns 0 on success, a negative error code on failure. 270 | int pka_ring_free(pka_ring_info_t rings[], uint8_t mask[], uint32_t *cnt); 271 | 272 | /// Returns the number of available of rooms to append a command descriptors 273 | /// within a given ring. 274 | uint32_t pka_ring_has_available_room(pka_ring_info_t *ring); 275 | 276 | /// Returns the number of available results (when result is ready). Note that 277 | /// the returned value may reflect the number of processed commands. 278 | uint32_t pka_ring_has_ready_rslt(pka_ring_info_t *ring); 279 | 280 | /// Return whether the returned pointer to use data info is valid or not. 281 | bool pka_ring_pop_tag(pka_ring_hw_rslt_desc_t *result_desc, 282 | uint64_t *user_data, 283 | uint64_t *cmd_num, 284 | uint8_t *queue_num, 285 | uint8_t *ring_num); 286 | 287 | /// Set ring command descriptor tag which is used to hold a pointer to user 288 | /// data info associated with a cmd. 289 | void pka_ring_push_tag(pka_ring_hw_cmd_desc_t *cmd, 290 | uint64_t user_data, 291 | uint64_t cmd_num, 292 | uint8_t queue_num, 293 | uint8_t ring_num); 294 | 295 | /// Write the command descriptor according to the PK command. This function 296 | /// should be called before enqueuing the descriptor on a ring. 297 | int pka_ring_set_cmd_desc(pka_ring_hw_cmd_desc_t *cmd, 298 | pka_ring_alloc_t *alloc, 299 | pka_opcode_t opcode, 300 | uint32_t operand_cnt, 301 | uint32_t shift_cnt, 302 | pka_operand_t operands[]); 303 | 304 | /// Enqueue one command descriptor on a ring. This function verifies if there 305 | /// is space in the queue for the command and append the descriptor. It returns 306 | /// 0 on success, a negative error code on failure. 307 | int pka_ring_enqueue_cmd_desc(pka_ring_info_t *ring, 308 | pka_ring_hw_cmd_desc_t *cmd_desc); 309 | 310 | /// Dequeue one result descriptor from a ring. This function verifies if there 311 | /// is a ready result in the queue for the command and read the descriptor. It 312 | /// returns 0 on success, a negative error code on failure. 313 | int pka_ring_dequeue_rslt_desc(pka_ring_info_t *ring, 314 | pka_ring_hw_rslt_desc_t *result_desc); 315 | 316 | /// Get the output vector(s) associated with a result descriptor from ring 317 | /// memory and copy it to a queue. It returns the queue head address. 318 | uint32_t pka_ring_get_result(pka_ring_info_t *ring, 319 | pka_ring_hw_rslt_desc_t *result_desc, 320 | uint8_t *queue_ptr, 321 | uint32_t queue_size, 322 | uint32_t result1_offset, 323 | uint32_t result2_offset, 324 | uint32_t result1_size, 325 | uint32_t result2_size); 326 | 327 | /// Set the size of result operands and return the number of results associated 328 | /// with a given PK command. 329 | uint32_t pka_ring_results_len(pka_ring_hw_rslt_desc_t *result_desc, 330 | uint32_t *result1_len, 331 | uint32_t *result2_len); 332 | 333 | /// Dump the status of the ring on the console 334 | void pka_ring_dump(pka_ring_info_t *r); 335 | 336 | #endif // !__KERNEL__ 337 | 338 | #endif /// __PKA_RING_H__ 339 | 340 | 341 | -------------------------------------------------------------------------------- /lib/pka_vectors.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2023 NVIDIA Corporation & affiliates. 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #ifndef __PKA_VECTORS_H__ 5 | #define __PKA_VECTORS_H__ 6 | 7 | #ifdef __KERNEL__ 8 | #include 9 | #else 10 | #include 11 | #endif 12 | 13 | #include "pka.h" 14 | #include "pka_types.h" 15 | 16 | // EIP-154 maximum operand lengths in bytes: 17 | #define MAX_GEN_VEC_SZ (258 * 4) 18 | #define MAX_MODEXP_CRT_VEC_SZ (130 * 4) 19 | #define MAX_ECC_VEC_SZ ( 24 * 4) 20 | 21 | // The following maximum lengths are deliberately a little larger than the 22 | // actual HW limits above so as to allow for algorithms that might have 23 | // intermediate 24 | #define MAX_BUF (260 * 4) // EIP154 max byte length is actually 258 * 4 25 | #define MAX_ECC_BUF (25 * 4) // EIP154 max ECC length is actually 24 * 4 26 | 27 | /// The pka_operands_t record type is used to package the entire set of 28 | /// input operands (big integers) of a single crypto operation. 29 | typedef struct // 4 + (16 * 11) = 180 bytes long 30 | { 31 | uint8_t operand_cnt; ///< Number of valid operands. 32 | uint8_t shift_amount; ///< Holds the shift amount arg. 33 | uint8_t encrypt_results[2]; ///< Reserved for future use. 34 | pka_operand_t operands[MAX_OPERAND_CNT]; ///< Actual operand descriptors. 35 | } pka_operands_t; 36 | 37 | typedef struct 38 | { 39 | pka_operand_t p; 40 | pka_operand_t q; 41 | pka_operand_t g; 42 | } dsa_domain_params_t; 43 | 44 | typedef struct 45 | { 46 | pka_operand_t *modulus; 47 | pka_operand_t *private; 48 | pka_operand_t *public; 49 | 50 | pka_operand_t *p; 51 | pka_operand_t *q; 52 | pka_operand_t *dp; 53 | pka_operand_t *dq; 54 | pka_operand_t *qInv; 55 | } rsa_system_t; 56 | 57 | #endif // __PKA_VECTORS_H__ 58 | -------------------------------------------------------------------------------- /libpka.spec: -------------------------------------------------------------------------------- 1 | %global _hardened_build 1 2 | %{!?rhel: %global rhel 8} 3 | 4 | %if 0%{?rhel} < 8 5 | %global openssl_ver 11 6 | %global configure_flags --with-libcrypto=libcrypto11 LIBCRYPTO_LIBS="-l:libcrypto.so.1.1" 7 | %endif 8 | 9 | Name: libpka 10 | Epoch: 1 11 | Version: 2.0 12 | Release: 2%{?dist} 13 | Summary: NVIDIA BlueField Public Key Acceleration (PKA) library 14 | Group: Development/Libraries 15 | License: BSD-3-Clause AND OpenSSL 16 | URL: https://github.com/Mellanox/pka 17 | Source: %{name}-%{version}.tar.gz 18 | 19 | ExclusiveArch: aarch64 20 | BuildRequires: automake, autoconf, doxygen, libtool, pkgconfig 21 | BuildRequires: openssl%{?openssl_ver}-devel 22 | Requires: openssl%{?openssl_ver}-libs 23 | 24 | %description 25 | This package provides Public Key Acceleration (PKA) API implementation for NVIDIA BlueField 26 | 27 | %package devel 28 | Summary: Development files for libpka 29 | Group: Development/Libraries 30 | Requires: %{name} = %{epoch}:%{version}-%{release} 31 | 32 | %description devel 33 | Provides header files for linking with libpka 34 | 35 | %package engine 36 | Summary: OpenSSL dynamic engine for NVIDIA BlueField PKA 37 | Group: Development/Libraries 38 | ExclusiveArch: aarch64 39 | Requires: %{name} = %{epoch}:%{version}-%{release}, openssl%{?openssl_ver}-libs 40 | 41 | %description engine 42 | This package provides OpenSSL dynamic engine component to support hardware implementation of 43 | RSA, DSA, DH, ECDH and ECDSA operations with the BlueField PKA hardware. 44 | 45 | %package testutils 46 | Summary: Test utilities for NVIDIA BlueField PKA 47 | Group: Development/Libraries 48 | ExclusiveArch: aarch64 49 | Requires: %{name} = %{epoch}:%{version}-%{release} 50 | 51 | %description testutils 52 | This package provides validation utilities for testing libpka functionality with NVIDIA BlueField PKA hardware. 53 | 54 | %package doc 55 | Summary: Documentation for libpka package 56 | Group: Documentation 57 | 58 | %description doc 59 | Provides libpka API documentation and PDF API specification for libpka package 60 | 61 | %prep 62 | %autosetup 63 | 64 | %build 65 | autoreconf -fiv 66 | %configure --docdir=%{_pkgdocdir} %{?configure_flags} 67 | %make_build 68 | 69 | %install 70 | %make_install 71 | find %{buildroot} -name "*.la" -delete 72 | %{__ln_s} libbfengine.so `find %{buildroot}%{_libdir} -iname 'libbfengine.so' -printf '%%h/pka.so'` 73 | 74 | %files 75 | %defattr(-, root, root) 76 | %license %{_pkgdocdir}/COPYING 77 | %doc %{_pkgdocdir}/README 78 | %{_libdir}/*.so* 79 | 80 | %files engine 81 | %defattr(-, root, root) 82 | %license %{_pkgdocdir}/COPYING 83 | %doc %{_pkgdocdir}/README.engine 84 | %{_libdir}/engine*/*.so 85 | 86 | %files testutils 87 | %defattr(-, root, root) 88 | %license %{_pkgdocdir}/COPYING 89 | %doc %{_pkgdocdir}/README.tests 90 | %{_bindir}/pka_* 91 | 92 | %files devel 93 | %defattr(-, root, root) 94 | %license %{_pkgdocdir}/COPYING 95 | %{_includedir}/*.h 96 | 97 | %files doc 98 | %defattr(-, root, root) 99 | %license %{_pkgdocdir}/COPYING 100 | %doc %{_pkgdocdir}/html 101 | %doc %{_pkgdocdir}/pdf 102 | -------------------------------------------------------------------------------- /m4/ax_pthread.m4: -------------------------------------------------------------------------------- 1 | # =========================================================================== 2 | # https://www.gnu.org/software/autoconf-archive/ax_pthread.html 3 | # =========================================================================== 4 | # 5 | # SYNOPSIS 6 | # 7 | # AX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) 8 | # 9 | # DESCRIPTION 10 | # 11 | # This macro figures out how to build C programs using POSIX threads. It 12 | # sets the PTHREAD_LIBS output variable to the threads library and linker 13 | # flags, and the PTHREAD_CFLAGS output variable to any special C compiler 14 | # flags that are needed. (The user can also force certain compiler 15 | # flags/libs to be tested by setting these environment variables.) 16 | # 17 | # Also sets PTHREAD_CC to any special C compiler that is needed for 18 | # multi-threaded programs (defaults to the value of CC otherwise). (This 19 | # is necessary on AIX to use the special cc_r compiler alias.) 20 | # 21 | # NOTE: You are assumed to not only compile your program with these flags, 22 | # but also to link with them as well. For example, you might link with 23 | # $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS 24 | # 25 | # If you are only building threaded programs, you may wish to use these 26 | # variables in your default LIBS, CFLAGS, and CC: 27 | # 28 | # LIBS="$PTHREAD_LIBS $LIBS" 29 | # CFLAGS="$CFLAGS $PTHREAD_CFLAGS" 30 | # CC="$PTHREAD_CC" 31 | # 32 | # In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute constant 33 | # has a nonstandard name, this macro defines PTHREAD_CREATE_JOINABLE to 34 | # that name (e.g. PTHREAD_CREATE_UNDETACHED on AIX). 35 | # 36 | # Also HAVE_PTHREAD_PRIO_INHERIT is defined if pthread is found and the 37 | # PTHREAD_PRIO_INHERIT symbol is defined when compiling with 38 | # PTHREAD_CFLAGS. 39 | # 40 | # ACTION-IF-FOUND is a list of shell commands to run if a threads library 41 | # is found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it 42 | # is not found. If ACTION-IF-FOUND is not specified, the default action 43 | # will define HAVE_PTHREAD. 44 | # 45 | # Please let the authors know if this macro fails on any platform, or if 46 | # you have any other suggestions or comments. This macro was based on work 47 | # by SGJ on autoconf scripts for FFTW (http://www.fftw.org/) (with help 48 | # from M. Frigo), as well as ac_pthread and hb_pthread macros posted by 49 | # Alejandro Forero Cuervo to the autoconf macro repository. We are also 50 | # grateful for the helpful feedback of numerous users. 51 | # 52 | # Updated for Autoconf 2.68 by Daniel Richard G. 53 | # 54 | # LICENSE 55 | # 56 | # Copyright (c) 2008 Steven G. Johnson 57 | # Copyright (c) 2011 Daniel Richard G. 58 | # 59 | # This program is free software: you can redistribute it and/or modify it 60 | # under the terms of the GNU General Public License as published by the 61 | # Free Software Foundation, either version 3 of the License, or (at your 62 | # option) any later version. 63 | # 64 | # This program is distributed in the hope that it will be useful, but 65 | # WITHOUT ANY WARRANTY; without even the implied warranty of 66 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 67 | # Public License for more details. 68 | # 69 | # You should have received a copy of the GNU General Public License along 70 | # with this program. If not, see . 71 | # 72 | # As a special exception, the respective Autoconf Macro's copyright owner 73 | # gives unlimited permission to copy, distribute and modify the configure 74 | # scripts that are the output of Autoconf when processing the Macro. You 75 | # need not follow the terms of the GNU General Public License when using 76 | # or distributing such scripts, even though portions of the text of the 77 | # Macro appear in them. The GNU General Public License (GPL) does govern 78 | # all other use of the material that constitutes the Autoconf Macro. 79 | # 80 | # This special exception to the GPL applies to versions of the Autoconf 81 | # Macro released by the Autoconf Archive. When you make and distribute a 82 | # modified version of the Autoconf Macro, you may extend this special 83 | # exception to the GPL to apply to your modified version as well. 84 | 85 | #serial 24 86 | 87 | AU_ALIAS([ACX_PTHREAD], [AX_PTHREAD]) 88 | AC_DEFUN([AX_PTHREAD], [ 89 | AC_REQUIRE([AC_CANONICAL_HOST]) 90 | AC_REQUIRE([AC_PROG_CC]) 91 | AC_REQUIRE([AC_PROG_SED]) 92 | AC_LANG_PUSH([C]) 93 | ax_pthread_ok=no 94 | 95 | # We used to check for pthread.h first, but this fails if pthread.h 96 | # requires special compiler flags (e.g. on Tru64 or Sequent). 97 | # It gets checked for in the link test anyway. 98 | 99 | # First of all, check if the user has set any of the PTHREAD_LIBS, 100 | # etcetera environment variables, and if threads linking works using 101 | # them: 102 | if test "x$PTHREAD_CFLAGS$PTHREAD_LIBS" != "x"; then 103 | ax_pthread_save_CC="$CC" 104 | ax_pthread_save_CFLAGS="$CFLAGS" 105 | ax_pthread_save_LIBS="$LIBS" 106 | AS_IF([test "x$PTHREAD_CC" != "x"], [CC="$PTHREAD_CC"]) 107 | CFLAGS="$CFLAGS $PTHREAD_CFLAGS" 108 | LIBS="$PTHREAD_LIBS $LIBS" 109 | AC_MSG_CHECKING([for pthread_join using $CC $PTHREAD_CFLAGS $PTHREAD_LIBS]) 110 | AC_LINK_IFELSE([AC_LANG_CALL([], [pthread_join])], [ax_pthread_ok=yes]) 111 | AC_MSG_RESULT([$ax_pthread_ok]) 112 | if test "x$ax_pthread_ok" = "xno"; then 113 | PTHREAD_LIBS="" 114 | PTHREAD_CFLAGS="" 115 | fi 116 | CC="$ax_pthread_save_CC" 117 | CFLAGS="$ax_pthread_save_CFLAGS" 118 | LIBS="$ax_pthread_save_LIBS" 119 | fi 120 | 121 | # We must check for the threads library under a number of different 122 | # names; the ordering is very important because some systems 123 | # (e.g. DEC) have both -lpthread and -lpthreads, where one of the 124 | # libraries is broken (non-POSIX). 125 | 126 | # Create a list of thread flags to try. Items starting with a "-" are 127 | # C compiler flags, and other items are library names, except for "none" 128 | # which indicates that we try without any flags at all, and "pthread-config" 129 | # which is a program returning the flags for the Pth emulation library. 130 | 131 | ax_pthread_flags="pthreads none -Kthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config" 132 | 133 | # The ordering *is* (sometimes) important. Some notes on the 134 | # individual items follow: 135 | 136 | # pthreads: AIX (must check this before -lpthread) 137 | # none: in case threads are in libc; should be tried before -Kthread and 138 | # other compiler flags to prevent continual compiler warnings 139 | # -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) 140 | # -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads), Tru64 141 | # (Note: HP C rejects this with "bad form for `-t' option") 142 | # -pthreads: Solaris/gcc (Note: HP C also rejects) 143 | # -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it 144 | # doesn't hurt to check since this sometimes defines pthreads and 145 | # -D_REENTRANT too), HP C (must be checked before -lpthread, which 146 | # is present but should not be used directly; and before -mthreads, 147 | # because the compiler interprets this as "-mt" + "-hreads") 148 | # -mthreads: Mingw32/gcc, Lynx/gcc 149 | # pthread: Linux, etcetera 150 | # --thread-safe: KAI C++ 151 | # pthread-config: use pthread-config program (for GNU Pth library) 152 | 153 | case $host_os in 154 | 155 | freebsd*) 156 | 157 | # -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) 158 | # lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) 159 | 160 | ax_pthread_flags="-kthread lthread $ax_pthread_flags" 161 | ;; 162 | 163 | hpux*) 164 | 165 | # From the cc(1) man page: "[-mt] Sets various -D flags to enable 166 | # multi-threading and also sets -lpthread." 167 | 168 | ax_pthread_flags="-mt -pthread pthread $ax_pthread_flags" 169 | ;; 170 | 171 | openedition*) 172 | 173 | # IBM z/OS requires a feature-test macro to be defined in order to 174 | # enable POSIX threads at all, so give the user a hint if this is 175 | # not set. (We don't define these ourselves, as they can affect 176 | # other portions of the system API in unpredictable ways.) 177 | 178 | AC_EGREP_CPP([AX_PTHREAD_ZOS_MISSING], 179 | [ 180 | # if !defined(_OPEN_THREADS) && !defined(_UNIX03_THREADS) 181 | AX_PTHREAD_ZOS_MISSING 182 | # endif 183 | ], 184 | [AC_MSG_WARN([IBM z/OS requires -D_OPEN_THREADS or -D_UNIX03_THREADS to enable pthreads support.])]) 185 | ;; 186 | 187 | solaris*) 188 | 189 | # On Solaris (at least, for some versions), libc contains stubbed 190 | # (non-functional) versions of the pthreads routines, so link-based 191 | # tests will erroneously succeed. (N.B.: The stubs are missing 192 | # pthread_cleanup_push, or rather a function called by this macro, 193 | # so we could check for that, but who knows whether they'll stub 194 | # that too in a future libc.) So we'll check first for the 195 | # standard Solaris way of linking pthreads (-mt -lpthread). 196 | 197 | ax_pthread_flags="-mt,pthread pthread $ax_pthread_flags" 198 | ;; 199 | esac 200 | 201 | # GCC generally uses -pthread, or -pthreads on some platforms (e.g. SPARC) 202 | 203 | AS_IF([test "x$GCC" = "xyes"], 204 | [ax_pthread_flags="-pthread -pthreads $ax_pthread_flags"]) 205 | 206 | # The presence of a feature test macro requesting re-entrant function 207 | # definitions is, on some systems, a strong hint that pthreads support is 208 | # correctly enabled 209 | 210 | case $host_os in 211 | darwin* | hpux* | linux* | osf* | solaris*) 212 | ax_pthread_check_macro="_REENTRANT" 213 | ;; 214 | 215 | aix*) 216 | ax_pthread_check_macro="_THREAD_SAFE" 217 | ;; 218 | 219 | *) 220 | ax_pthread_check_macro="--" 221 | ;; 222 | esac 223 | AS_IF([test "x$ax_pthread_check_macro" = "x--"], 224 | [ax_pthread_check_cond=0], 225 | [ax_pthread_check_cond="!defined($ax_pthread_check_macro)"]) 226 | 227 | # Are we compiling with Clang? 228 | 229 | AC_CACHE_CHECK([whether $CC is Clang], 230 | [ax_cv_PTHREAD_CLANG], 231 | [ax_cv_PTHREAD_CLANG=no 232 | # Note that Autoconf sets GCC=yes for Clang as well as GCC 233 | if test "x$GCC" = "xyes"; then 234 | AC_EGREP_CPP([AX_PTHREAD_CC_IS_CLANG], 235 | [/* Note: Clang 2.7 lacks __clang_[a-z]+__ */ 236 | # if defined(__clang__) && defined(__llvm__) 237 | AX_PTHREAD_CC_IS_CLANG 238 | # endif 239 | ], 240 | [ax_cv_PTHREAD_CLANG=yes]) 241 | fi 242 | ]) 243 | ax_pthread_clang="$ax_cv_PTHREAD_CLANG" 244 | 245 | ax_pthread_clang_warning=no 246 | 247 | # Clang needs special handling, because older versions handle the -pthread 248 | # option in a rather... idiosyncratic way 249 | 250 | if test "x$ax_pthread_clang" = "xyes"; then 251 | 252 | # Clang takes -pthread; it has never supported any other flag 253 | 254 | # (Note 1: This will need to be revisited if a system that Clang 255 | # supports has POSIX threads in a separate library. This tends not 256 | # to be the way of modern systems, but it's conceivable.) 257 | 258 | # (Note 2: On some systems, notably Darwin, -pthread is not needed 259 | # to get POSIX threads support; the API is always present and 260 | # active. We could reasonably leave PTHREAD_CFLAGS empty. But 261 | # -pthread does define _REENTRANT, and while the Darwin headers 262 | # ignore this macro, third-party headers might not.) 263 | 264 | PTHREAD_CFLAGS="-pthread" 265 | PTHREAD_LIBS= 266 | 267 | ax_pthread_ok=yes 268 | 269 | # However, older versions of Clang make a point of warning the user 270 | # that, in an invocation where only linking and no compilation is 271 | # taking place, the -pthread option has no effect ("argument unused 272 | # during compilation"). They expect -pthread to be passed in only 273 | # when source code is being compiled. 274 | # 275 | # Problem is, this is at odds with the way Automake and most other 276 | # C build frameworks function, which is that the same flags used in 277 | # compilation (CFLAGS) are also used in linking. Many systems 278 | # supported by AX_PTHREAD require exactly this for POSIX threads 279 | # support, and in fact it is often not straightforward to specify a 280 | # flag that is used only in the compilation phase and not in 281 | # linking. Such a scenario is extremely rare in practice. 282 | # 283 | # Even though use of the -pthread flag in linking would only print 284 | # a warning, this can be a nuisance for well-run software projects 285 | # that build with -Werror. So if the active version of Clang has 286 | # this misfeature, we search for an option to squash it. 287 | 288 | AC_CACHE_CHECK([whether Clang needs flag to prevent "argument unused" warning when linking with -pthread], 289 | [ax_cv_PTHREAD_CLANG_NO_WARN_FLAG], 290 | [ax_cv_PTHREAD_CLANG_NO_WARN_FLAG=unknown 291 | # Create an alternate version of $ac_link that compiles and 292 | # links in two steps (.c -> .o, .o -> exe) instead of one 293 | # (.c -> exe), because the warning occurs only in the second 294 | # step 295 | ax_pthread_save_ac_link="$ac_link" 296 | ax_pthread_sed='s/conftest\.\$ac_ext/conftest.$ac_objext/g' 297 | ax_pthread_link_step=`$as_echo "$ac_link" | sed "$ax_pthread_sed"` 298 | ax_pthread_2step_ac_link="($ac_compile) && (echo ==== >&5) && ($ax_pthread_link_step)" 299 | ax_pthread_save_CFLAGS="$CFLAGS" 300 | for ax_pthread_try in '' -Qunused-arguments -Wno-unused-command-line-argument unknown; do 301 | AS_IF([test "x$ax_pthread_try" = "xunknown"], [break]) 302 | CFLAGS="-Werror -Wunknown-warning-option $ax_pthread_try -pthread $ax_pthread_save_CFLAGS" 303 | ac_link="$ax_pthread_save_ac_link" 304 | AC_LINK_IFELSE([AC_LANG_SOURCE([[int main(void){return 0;}]])], 305 | [ac_link="$ax_pthread_2step_ac_link" 306 | AC_LINK_IFELSE([AC_LANG_SOURCE([[int main(void){return 0;}]])], 307 | [break]) 308 | ]) 309 | done 310 | ac_link="$ax_pthread_save_ac_link" 311 | CFLAGS="$ax_pthread_save_CFLAGS" 312 | AS_IF([test "x$ax_pthread_try" = "x"], [ax_pthread_try=no]) 313 | ax_cv_PTHREAD_CLANG_NO_WARN_FLAG="$ax_pthread_try" 314 | ]) 315 | 316 | case "$ax_cv_PTHREAD_CLANG_NO_WARN_FLAG" in 317 | no | unknown) ;; 318 | *) PTHREAD_CFLAGS="$ax_cv_PTHREAD_CLANG_NO_WARN_FLAG $PTHREAD_CFLAGS" ;; 319 | esac 320 | 321 | fi # $ax_pthread_clang = yes 322 | 323 | if test "x$ax_pthread_ok" = "xno"; then 324 | for ax_pthread_try_flag in $ax_pthread_flags; do 325 | 326 | case $ax_pthread_try_flag in 327 | none) 328 | AC_MSG_CHECKING([whether pthreads work without any flags]) 329 | ;; 330 | 331 | -mt,pthread) 332 | AC_MSG_CHECKING([whether pthreads work with -mt -lpthread]) 333 | PTHREAD_CFLAGS="-mt" 334 | PTHREAD_LIBS="-lpthread" 335 | ;; 336 | 337 | -*) 338 | AC_MSG_CHECKING([whether pthreads work with $ax_pthread_try_flag]) 339 | PTHREAD_CFLAGS="$ax_pthread_try_flag" 340 | ;; 341 | 342 | pthread-config) 343 | AC_CHECK_PROG([ax_pthread_config], [pthread-config], [yes], [no]) 344 | AS_IF([test "x$ax_pthread_config" = "xno"], [continue]) 345 | PTHREAD_CFLAGS="`pthread-config --cflags`" 346 | PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`" 347 | ;; 348 | 349 | *) 350 | AC_MSG_CHECKING([for the pthreads library -l$ax_pthread_try_flag]) 351 | PTHREAD_LIBS="-l$ax_pthread_try_flag" 352 | ;; 353 | esac 354 | 355 | ax_pthread_save_CFLAGS="$CFLAGS" 356 | ax_pthread_save_LIBS="$LIBS" 357 | CFLAGS="$CFLAGS $PTHREAD_CFLAGS" 358 | LIBS="$PTHREAD_LIBS $LIBS" 359 | 360 | # Check for various functions. We must include pthread.h, 361 | # since some functions may be macros. (On the Sequent, we 362 | # need a special flag -Kthread to make this header compile.) 363 | # We check for pthread_join because it is in -lpthread on IRIX 364 | # while pthread_create is in libc. We check for pthread_attr_init 365 | # due to DEC craziness with -lpthreads. We check for 366 | # pthread_cleanup_push because it is one of the few pthread 367 | # functions on Solaris that doesn't have a non-functional libc stub. 368 | # We try pthread_create on general principles. 369 | 370 | AC_LINK_IFELSE([AC_LANG_PROGRAM([#include 371 | # if $ax_pthread_check_cond 372 | # error "$ax_pthread_check_macro must be defined" 373 | # endif 374 | static void routine(void *a) { a = 0; } 375 | static void *start_routine(void *a) { return a; }], 376 | [pthread_t th; pthread_attr_t attr; 377 | pthread_create(&th, 0, start_routine, 0); 378 | pthread_join(th, 0); 379 | pthread_attr_init(&attr); 380 | pthread_cleanup_push(routine, 0); 381 | pthread_cleanup_pop(0) /* ; */])], 382 | [ax_pthread_ok=yes], 383 | []) 384 | 385 | CFLAGS="$ax_pthread_save_CFLAGS" 386 | LIBS="$ax_pthread_save_LIBS" 387 | 388 | AC_MSG_RESULT([$ax_pthread_ok]) 389 | AS_IF([test "x$ax_pthread_ok" = "xyes"], [break]) 390 | 391 | PTHREAD_LIBS="" 392 | PTHREAD_CFLAGS="" 393 | done 394 | fi 395 | 396 | # Various other checks: 397 | if test "x$ax_pthread_ok" = "xyes"; then 398 | ax_pthread_save_CFLAGS="$CFLAGS" 399 | ax_pthread_save_LIBS="$LIBS" 400 | CFLAGS="$CFLAGS $PTHREAD_CFLAGS" 401 | LIBS="$PTHREAD_LIBS $LIBS" 402 | 403 | # Detect AIX lossage: JOINABLE attribute is called UNDETACHED. 404 | AC_CACHE_CHECK([for joinable pthread attribute], 405 | [ax_cv_PTHREAD_JOINABLE_ATTR], 406 | [ax_cv_PTHREAD_JOINABLE_ATTR=unknown 407 | for ax_pthread_attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do 408 | AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ], 409 | [int attr = $ax_pthread_attr; return attr /* ; */])], 410 | [ax_cv_PTHREAD_JOINABLE_ATTR=$ax_pthread_attr; break], 411 | []) 412 | done 413 | ]) 414 | AS_IF([test "x$ax_cv_PTHREAD_JOINABLE_ATTR" != "xunknown" && \ 415 | test "x$ax_cv_PTHREAD_JOINABLE_ATTR" != "xPTHREAD_CREATE_JOINABLE" && \ 416 | test "x$ax_pthread_joinable_attr_defined" != "xyes"], 417 | [AC_DEFINE_UNQUOTED([PTHREAD_CREATE_JOINABLE], 418 | [$ax_cv_PTHREAD_JOINABLE_ATTR], 419 | [Define to necessary symbol if this constant 420 | uses a non-standard name on your system.]) 421 | ax_pthread_joinable_attr_defined=yes 422 | ]) 423 | 424 | AC_CACHE_CHECK([whether more special flags are required for pthreads], 425 | [ax_cv_PTHREAD_SPECIAL_FLAGS], 426 | [ax_cv_PTHREAD_SPECIAL_FLAGS=no 427 | case $host_os in 428 | solaris*) 429 | ax_cv_PTHREAD_SPECIAL_FLAGS="-D_POSIX_PTHREAD_SEMANTICS" 430 | ;; 431 | esac 432 | ]) 433 | AS_IF([test "x$ax_cv_PTHREAD_SPECIAL_FLAGS" != "xno" && \ 434 | test "x$ax_pthread_special_flags_added" != "xyes"], 435 | [PTHREAD_CFLAGS="$ax_cv_PTHREAD_SPECIAL_FLAGS $PTHREAD_CFLAGS" 436 | ax_pthread_special_flags_added=yes]) 437 | 438 | AC_CACHE_CHECK([for PTHREAD_PRIO_INHERIT], 439 | [ax_cv_PTHREAD_PRIO_INHERIT], 440 | [AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include ]], 441 | [[int i = PTHREAD_PRIO_INHERIT;]])], 442 | [ax_cv_PTHREAD_PRIO_INHERIT=yes], 443 | [ax_cv_PTHREAD_PRIO_INHERIT=no]) 444 | ]) 445 | AS_IF([test "x$ax_cv_PTHREAD_PRIO_INHERIT" = "xyes" && \ 446 | test "x$ax_pthread_prio_inherit_defined" != "xyes"], 447 | [AC_DEFINE([HAVE_PTHREAD_PRIO_INHERIT], [1], [Have PTHREAD_PRIO_INHERIT.]) 448 | ax_pthread_prio_inherit_defined=yes 449 | ]) 450 | 451 | CFLAGS="$ax_pthread_save_CFLAGS" 452 | LIBS="$ax_pthread_save_LIBS" 453 | 454 | # More AIX lossage: compile with *_r variant 455 | if test "x$GCC" != "xyes"; then 456 | case $host_os in 457 | aix*) 458 | AS_CASE(["x/$CC"], 459 | [x*/c89|x*/c89_128|x*/c99|x*/c99_128|x*/cc|x*/cc128|x*/xlc|x*/xlc_v6|x*/xlc128|x*/xlc128_v6], 460 | [#handle absolute path differently from PATH based program lookup 461 | AS_CASE(["x$CC"], 462 | [x/*], 463 | [AS_IF([AS_EXECUTABLE_P([${CC}_r])],[PTHREAD_CC="${CC}_r"])], 464 | [AC_CHECK_PROGS([PTHREAD_CC],[${CC}_r],[$CC])])]) 465 | ;; 466 | esac 467 | fi 468 | fi 469 | 470 | test -n "$PTHREAD_CC" || PTHREAD_CC="$CC" 471 | 472 | AC_SUBST([PTHREAD_LIBS]) 473 | AC_SUBST([PTHREAD_CFLAGS]) 474 | AC_SUBST([PTHREAD_CC]) 475 | 476 | # Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: 477 | if test "x$ax_pthread_ok" = "xyes"; then 478 | ifelse([$1],,[AC_DEFINE([HAVE_PTHREAD],[1],[Define if you have POSIX threads libraries and header files.])],[$1]) 479 | : 480 | else 481 | ax_pthread_ok=no 482 | $2 483 | fi 484 | AC_LANG_POP 485 | ])dnl AX_PTHREAD 486 | -------------------------------------------------------------------------------- /tests/Makefile.am: -------------------------------------------------------------------------------- 1 | AM_CFLAGS = $(PTHREAD_CFLAGS) 2 | LDADD = $(top_builddir)/lib/libPKA.la $(PTHREAD_LIBS) 3 | 4 | bin_PROGRAMS = pka_test_validation pka_test_performance 5 | pka_test_validation_SOURCES = validation/pka_test_validation.c pka_test_utils.c 6 | pka_test_performance_SOURCES = performance/pka_test_performance.c pka_test_utils.c 7 | --------------------------------------------------------------------------------