├── .gitignore ├── .travis.yml ├── COPYING ├── Makefile.am ├── README.md ├── autogen.sh ├── clean.sh ├── configure.ac ├── libb2.pc.in ├── m4 ├── ax_check_compile_flag.m4 ├── ax_check_link_flag.m4 ├── ax_ext.m4 ├── ax_forceinline.m4 ├── ax_gcc_x86_cpuid.m4 ├── libtool.m4 ├── ltoptions.m4 ├── ltsugar.m4 ├── ltversion.m4 └── lt~obsolete.m4 └── src ├── Makefile.am ├── blake2-config.h ├── blake2-dispatch.c ├── blake2-impl.h ├── blake2-kat.h ├── blake2.h ├── blake2b-load-sse2.h ├── blake2b-load-sse41.h ├── blake2b-ref.c ├── blake2b-round.h ├── blake2b-test.c ├── blake2b.c ├── blake2bp-test.c ├── blake2bp.c ├── blake2s-load-sse2.h ├── blake2s-load-sse41.h ├── blake2s-load-xop.h ├── blake2s-ref.c ├── blake2s-round.h ├── blake2s-test.c ├── blake2s.c ├── blake2sp-test.c └── blake2sp.c /.gitignore: -------------------------------------------------------------------------------- 1 | # Object files 2 | *.o 3 | *.ko 4 | *.obj 5 | *.elf 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Libraries 12 | *.lib 13 | *.a 14 | *.la 15 | *.lo 16 | 17 | # Shared objects (inc. Windows DLLs) 18 | *.dll 19 | *.so 20 | *.so.* 21 | *.dylib 22 | 23 | # Executables 24 | *.exe 25 | *.out 26 | *.app 27 | *.i*86 28 | *.x86_64 29 | *.hex 30 | 31 | # Debug files 32 | *.dSYM/ 33 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # libb2 reference source code package - Travis testing configuration 2 | # 3 | # Copyright 2020, Samuel Neves . You may use this under the 4 | # terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at 5 | # your option. The terms of these licenses can be found at: 6 | # 7 | # - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 8 | # - OpenSSL license : https://www.openssl.org/source/license.html 9 | # - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # More information about the BLAKE2 hash function can be found at 12 | # https://blake2.net. 13 | 14 | # DO NOT create top level (global) keys like env, arch, os, compiler. 15 | # The top level/global keys invoke [unwanted] matrix expansion. Also 16 | # see https://stackoverflow.com/q/58473000/608639 and 17 | # https://docs.travis-ci.com/user/reference/overview/ and 18 | # https://docs.travis-ci.com/user/multi-cpu-architectures and 19 | # https://github.com/travis-ci/travis-yml/blob/master/schema.json. 20 | 21 | language: c 22 | dist: bionic 23 | 24 | git: 25 | depth: 5 26 | 27 | # Use jobs rather than matrix since we are precisely specifiying our 28 | # test cases. Do not move any of the keys (env, os, arch, compiler, etc) 29 | # into global. Putting them in global invokes the matrix expansion. 30 | jobs: 31 | include: 32 | # AMD64 testing 33 | - name: GCC, amd64, no-opts 34 | os: linux 35 | arch: amd64 36 | compiler: gcc 37 | env: 38 | - BUILD_OPTS= 39 | - name: GCC, amd64, enable-fat 40 | os: linux 41 | arch: amd64 42 | compiler: gcc 43 | env: 44 | - BUILD_OPTS="--enable-fat" 45 | - name: GCC, amd64, disable-native 46 | os: linux 47 | arch: amd64 48 | compiler: gcc 49 | env: 50 | - BUILD_OPTS="--disable-native" 51 | - name: GCC, amd64, enable-openmp 52 | os: linux 53 | arch: amd64 54 | compiler: gcc 55 | env: 56 | - BUILD_OPTS="--enable-openmp" 57 | - name: GCC, amd64, disable-openmp 58 | os: linux 59 | arch: amd64 60 | compiler: gcc 61 | env: 62 | - BUILD_OPTS="--disable-openmp" 63 | - name: Clang, amd64, no-opts 64 | os: linux 65 | arch: amd64 66 | compiler: clang 67 | env: 68 | - CC=clang-8 69 | - BUILD_OPTS= 70 | - name: Clang, amd64, enable-fat 71 | os: linux 72 | arch: amd64 73 | compiler: clang 74 | env: 75 | - CC=clang-8 76 | - BUILD_OPTS="--enable-fat" 77 | - name: Clang, amd64, disable-native 78 | os: linux 79 | arch: amd64 80 | compiler: clang 81 | env: 82 | - CC=clang-8 83 | - BUILD_OPTS="--disable-native" 84 | - name: Clang, amd64, enable-openmp 85 | os: linux 86 | arch: amd64 87 | compiler: clang 88 | env: 89 | - CC=clang-8 90 | - BUILD_OPTS="--enable-openmp" 91 | - name: Clang, amd64, disable-openmp 92 | os: linux 93 | arch: amd64 94 | compiler: clang 95 | env: 96 | - CC=clang-8 97 | - BUILD_OPTS="--disable-openmp" 98 | # OS X testing 99 | - name: Clang, OS X, amd64, no-opts 100 | os: osx 101 | osx_image: xcode11.6 102 | arch: amd64 103 | compiler: clang 104 | env: 105 | - BUILD_OPTS= 106 | - name: Clang, OS X, amd64, enable-fat 107 | os: osx 108 | osx_image: xcode11.6 109 | arch: amd64 110 | compiler: clang 111 | env: 112 | - BUILD_OPTS="--enable-fat" 113 | - name: Clang, OS X, amd64, disable-native 114 | os: osx 115 | osx_image: xcode11.6 116 | arch: amd64 117 | compiler: clang 118 | env: 119 | - BUILD_OPTS="--disable-native" 120 | - name: Clang, OS X, amd64, enable-openmp 121 | os: osx 122 | osx_image: xcode11.6 123 | arch: amd64 124 | compiler: clang 125 | env: 126 | - BUILD_OPTS="--enable-openmp" 127 | - name: Clang, OS X, amd64, disable-openmp 128 | os: osx 129 | osx_image: xcode11.6 130 | arch: amd64 131 | compiler: clang 132 | env: 133 | - BUILD_OPTS="--disable-openmp" 134 | # Aarch64 testing 135 | - name: GCC, aarch64, no-opts 136 | os: linux 137 | arch: arm64 138 | compiler: gcc 139 | env: 140 | - BUILD_OPTS= 141 | - name: GCC, aarch64, disable-native 142 | os: linux 143 | arch: arm64 144 | compiler: gcc 145 | env: 146 | - BUILD_OPTS="--disable-native" 147 | - name: GCC, aarch64, enable-openmp 148 | os: linux 149 | arch: arm64 150 | compiler: gcc 151 | env: 152 | - BUILD_OPTS="--enable-openmp" 153 | - name: GCC, aarch64, disable-openmp 154 | os: linux 155 | arch: arm64 156 | compiler: gcc 157 | env: 158 | - BUILD_OPTS="--disable-openmp" 159 | - name: Clang, aarch64, no-opts 160 | os: linux 161 | arch: arm64 162 | compiler: clang 163 | env: 164 | - CC=clang-8 165 | - BUILD_OPTS= 166 | - name: Clang, aarch64, disable-native 167 | os: linux 168 | arch: arm64 169 | compiler: clang 170 | env: 171 | - CC=clang-8 172 | - BUILD_OPTS="--disable-native" 173 | - name: Clang, aarch64, enable-openmp 174 | os: linux 175 | arch: arm64 176 | compiler: clang 177 | env: 178 | - CC=clang-8 179 | - BUILD_OPTS="--enable-openmp" 180 | - name: Clang, aarch64, disable-openmp 181 | os: linux 182 | arch: arm64 183 | compiler: clang 184 | env: 185 | - CC=clang-8 186 | - BUILD_OPTS="--disable-openmp" 187 | # PowerPC testing 188 | - name: GCC, ppc64le, no-opts 189 | os: linux 190 | arch: ppc64le 191 | compiler: gcc 192 | env: 193 | - BUILD_OPTS= 194 | - name: GCC, ppc64le, disable-native 195 | os: linux 196 | arch: ppc64le 197 | compiler: gcc 198 | env: 199 | - BUILD_OPTS="--disable-native" 200 | - name: GCC, ppc64le, enable-openmp 201 | os: linux 202 | arch: ppc64le 203 | compiler: gcc 204 | env: 205 | - BUILD_OPTS="--enable-openmp" 206 | - name: GCC, ppc64le, disable-openmp 207 | os: linux 208 | arch: ppc64le 209 | compiler: gcc 210 | env: 211 | - BUILD_OPTS="--disable-openmp" 212 | - name: Clang, ppc64le, no-opts 213 | os: linux 214 | arch: ppc64le 215 | compiler: clang 216 | env: 217 | - CC=clang-8 218 | - BUILD_OPTS= 219 | - name: Clang, ppc64le, disable-native 220 | os: linux 221 | arch: ppc64le 222 | compiler: clang 223 | env: 224 | - CC=clang-8 225 | - BUILD_OPTS="--disable-native" 226 | - name: Clang, ppc64le, enable-openmp 227 | os: linux 228 | arch: ppc64le 229 | compiler: clang 230 | env: 231 | - CC=clang-8 232 | - BUILD_OPTS="--enable-openmp" 233 | - name: Clang, ppc64le, disable-openmp 234 | os: linux 235 | arch: ppc64le 236 | compiler: clang 237 | env: 238 | - CC=clang-8 239 | - BUILD_OPTS="--disable-openmp" 240 | # s390x testing 241 | - name: GCC, s390x, no-opts 242 | os: linux 243 | arch: s390x 244 | compiler: gcc 245 | env: 246 | - BUILD_OPTS= 247 | - name: GCC, s390x, disable-native 248 | os: linux 249 | arch: s390x 250 | compiler: gcc 251 | env: 252 | - BUILD_OPTS="--disable-native" 253 | - name: GCC, s390x, enable-openmp 254 | os: linux 255 | arch: s390x 256 | compiler: gcc 257 | env: 258 | - BUILD_OPTS="--enable-openmp" 259 | - name: GCC, s390x, disable-openmp 260 | os: linux 261 | arch: s390x 262 | compiler: gcc 263 | env: 264 | - BUILD_OPTS="--disable-openmp" 265 | - name: Clang, s390x, no-opts 266 | os: linux 267 | arch: s390x 268 | compiler: clang 269 | env: 270 | - CC=clang-8 271 | - BUILD_OPTS= 272 | - name: Clang, s390x, disable-native 273 | os: linux 274 | arch: s390x 275 | compiler: clang 276 | env: 277 | - CC=clang-8 278 | - BUILD_OPTS="--disable-native" 279 | - name: Clang, s390x, enable-openmp 280 | os: linux 281 | arch: s390x 282 | compiler: clang 283 | env: 284 | - CC=clang-8 285 | - BUILD_OPTS="--enable-openmp" 286 | - name: Clang, s390x, disable-openmp 287 | os: linux 288 | arch: s390x 289 | compiler: clang 290 | env: 291 | - CC=clang-8 292 | - BUILD_OPTS="--disable-openmp" 293 | 294 | allow_failures: 295 | # Clang has a fair amount of trouble 296 | # on platforms Apple does not support 297 | - os: linux 298 | arch: s390x 299 | compiler: clang 300 | # Clang 7.0 and below will likely have trouble on ppc64le 301 | # due to https://bugs.llvm.org/show_bug.cgi?id=39704. 302 | # Also see https://bugs.llvm.org/show_bug.cgi?id=46571. 303 | - os: linux 304 | arch: ppc64le 305 | compiler: clang 306 | # And https://bugs.llvm.org/show_bug.cgi?id=46572 307 | - os: linux 308 | arch: arm64 309 | compiler: clang 310 | 311 | before_install: 312 | - | 313 | # Clang 7 compiler is completely broken on PPC64 and s390x 314 | # Clang 8 is needed for OpenMP on Aarch64 315 | if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$TRAVIS_COMPILER" == "clang" ]]; then 316 | # https://github.com/travis-ci/travis-ci/issues/9037 317 | sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 0C49F3730359A145 318 | sudo apt-get -qq -y install --no-install-recommends clang-8 || true 319 | sudo apt-get -qq -y install --no-install-recommends libomp-8-dev || true 320 | sudo apt-get -qq -y install --no-install-recommends libomp5-8 || true 321 | fi 322 | 323 | # Add 'cat src/test-suite.log' as required. 324 | script: 325 | - | 326 | ./autogen.sh 327 | ./configure ${BUILD_OPTS} 328 | make CC="${CC}" -j 3 329 | make CC="${CC}" check 330 | 331 | # Whitelist branches to avoid testing feature branches twice 332 | branches: 333 | only: 334 | - master 335 | - /\/ci$/ 336 | 337 | #notifications: 338 | # email: 339 | # recipients: 340 | # - blake2-build@example.com 341 | # on_success: always # default: change 342 | # on_failure: always # default: always 343 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | CC0 1.0 Universal 2 | 3 | Statement of Purpose 4 | 5 | The laws of most jurisdictions throughout the world automatically confer 6 | exclusive Copyright and Related Rights (defined below) upon the creator and 7 | subsequent owner(s) (each and all, an "owner") of an original work of 8 | authorship and/or a database (each, a "Work"). 9 | 10 | Certain owners wish to permanently relinquish those rights to a Work for the 11 | purpose of contributing to a commons of creative, cultural and scientific 12 | works ("Commons") that the public can reliably and without fear of later 13 | claims of infringement build upon, modify, incorporate in other works, reuse 14 | and redistribute as freely as possible in any form whatsoever and for any 15 | purposes, including without limitation commercial purposes. These owners may 16 | contribute to the Commons to promote the ideal of a free culture and the 17 | further production of creative, cultural and scientific works, or to gain 18 | reputation or greater distribution for their Work in part through the use and 19 | efforts of others. 20 | 21 | For these and/or other purposes and motivations, and without any expectation 22 | of additional consideration or compensation, the person associating CC0 with a 23 | Work (the "Affirmer"), to the extent that he or she is an owner of Copyright 24 | and Related Rights in the Work, voluntarily elects to apply CC0 to the Work 25 | and publicly distribute the Work under its terms, with knowledge of his or her 26 | Copyright and Related Rights in the Work and the meaning and intended legal 27 | effect of CC0 on those rights. 28 | 29 | 1. Copyright and Related Rights. A Work made available under CC0 may be 30 | protected by copyright and related or neighboring rights ("Copyright and 31 | Related Rights"). Copyright and Related Rights include, but are not limited 32 | to, the following: 33 | 34 | i. the right to reproduce, adapt, distribute, perform, display, communicate, 35 | and translate a Work; 36 | 37 | ii. moral rights retained by the original author(s) and/or performer(s); 38 | 39 | iii. publicity and privacy rights pertaining to a person's image or likeness 40 | depicted in a Work; 41 | 42 | iv. rights protecting against unfair competition in regards to a Work, 43 | subject to the limitations in paragraph 4(a), below; 44 | 45 | v. rights protecting the extraction, dissemination, use and reuse of data in 46 | a Work; 47 | 48 | vi. database rights (such as those arising under Directive 96/9/EC of the 49 | European Parliament and of the Council of 11 March 1996 on the legal 50 | protection of databases, and under any national implementation thereof, 51 | including any amended or successor version of such directive); and 52 | 53 | vii. other similar, equivalent or corresponding rights throughout the world 54 | based on applicable law or treaty, and any national implementations thereof. 55 | 56 | 2. Waiver. To the greatest extent permitted by, but not in contravention of, 57 | applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and 58 | unconditionally waives, abandons, and surrenders all of Affirmer's Copyright 59 | and Related Rights and associated claims and causes of action, whether now 60 | known or unknown (including existing as well as future claims and causes of 61 | action), in the Work (i) in all territories worldwide, (ii) for the maximum 62 | duration provided by applicable law or treaty (including future time 63 | extensions), (iii) in any current or future medium and for any number of 64 | copies, and (iv) for any purpose whatsoever, including without limitation 65 | commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes 66 | the Waiver for the benefit of each member of the public at large and to the 67 | detriment of Affirmer's heirs and successors, fully intending that such Waiver 68 | shall not be subject to revocation, rescission, cancellation, termination, or 69 | any other legal or equitable action to disrupt the quiet enjoyment of the Work 70 | by the public as contemplated by Affirmer's express Statement of Purpose. 71 | 72 | 3. Public License Fallback. Should any part of the Waiver for any reason be 73 | judged legally invalid or ineffective under applicable law, then the Waiver 74 | shall be preserved to the maximum extent permitted taking into account 75 | Affirmer's express Statement of Purpose. In addition, to the extent the Waiver 76 | is so judged Affirmer hereby grants to each affected person a royalty-free, 77 | non transferable, non sublicensable, non exclusive, irrevocable and 78 | unconditional license to exercise Affirmer's Copyright and Related Rights in 79 | the Work (i) in all territories worldwide, (ii) for the maximum duration 80 | provided by applicable law or treaty (including future time extensions), (iii) 81 | in any current or future medium and for any number of copies, and (iv) for any 82 | purpose whatsoever, including without limitation commercial, advertising or 83 | promotional purposes (the "License"). The License shall be deemed effective as 84 | of the date CC0 was applied by Affirmer to the Work. Should any part of the 85 | License for any reason be judged legally invalid or ineffective under 86 | applicable law, such partial invalidity or ineffectiveness shall not 87 | invalidate the remainder of the License, and in such case Affirmer hereby 88 | affirms that he or she will not (i) exercise any of his or her remaining 89 | Copyright and Related Rights in the Work or (ii) assert any associated claims 90 | and causes of action with respect to the Work, in either case contrary to 91 | Affirmer's express Statement of Purpose. 92 | 93 | 4. Limitations and Disclaimers. 94 | 95 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 96 | surrendered, licensed or otherwise affected by this document. 97 | 98 | b. Affirmer offers the Work as-is and makes no representations or warranties 99 | of any kind concerning the Work, express, implied, statutory or otherwise, 100 | including without limitation warranties of title, merchantability, fitness 101 | for a particular purpose, non infringement, or the absence of latent or 102 | other defects, accuracy, or the present or absence of errors, whether or not 103 | discoverable, all to the greatest extent permissible under applicable law. 104 | 105 | c. Affirmer disclaims responsibility for clearing rights of other persons 106 | that may apply to the Work or any use thereof, including without limitation 107 | any person's Copyright and Related Rights in the Work. Further, Affirmer 108 | disclaims responsibility for obtaining any necessary consents, permissions 109 | or other rights required for any use of the Work. 110 | 111 | d. Affirmer understands and acknowledges that Creative Commons is not a 112 | party to this document and has no duty or obligation with respect to this 113 | CC0 or use of the Work. 114 | 115 | For more information, please see 116 | 117 | 118 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | ACLOCAL_AMFLAGS = -I m4 2 | 3 | EXTRA_DIST = COPYING 4 | 5 | SUBDIRS = src 6 | 7 | pkgconfig_DATA = libb2.pc 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # libb2 2 | 3 | C library providing BLAKE2b, BLAKE2s, BLAKE2bp, BLAKE2sp 4 | 5 | Installation: 6 | 7 | ``` 8 | $ ./autogen.sh 9 | $ ./configure 10 | $ make 11 | $ sudo make install 12 | ``` 13 | 14 | Contact: contact@blake2.net 15 | -------------------------------------------------------------------------------- /autogen.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | autoreconf -fvi 3 | -------------------------------------------------------------------------------- /clean.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | make maintainer-clean 4 | rm -rf Makefile.in\ 5 | aclocal.m4\ 6 | config.guess\ 7 | config.sub\ 8 | configure\ 9 | depcomp\ 10 | install-sh\ 11 | ltmain.sh\ 12 | missing\ 13 | src/Makefile.in\ 14 | src/config.h.in 15 | 16 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | AC_PREREQ([2.61]) 2 | AC_INIT([libb2], [0.98.1], [contact@blake2.net], [libb2], [https://blake2.net]) 3 | AC_CONFIG_SRCDIR([src/blake2b.c]) 4 | AC_CONFIG_HEADERS([src/config.h]) 5 | AM_INIT_AUTOMAKE([foreign 1.9]) 6 | AC_CONFIG_MACRO_DIR([m4]) 7 | 8 | B2_LIBRARY_VERSION=1:4:0 # interface, revision, age 9 | AC_SUBST(B2_LIBRARY_VERSION) 10 | 11 | AC_LANG_C 12 | AC_PROG_CC 13 | AC_PROG_CC_C99 14 | 15 | AC_CHECK_FUNCS(explicit_bzero) 16 | AC_CHECK_FUNCS(explicit_memset) 17 | AC_CHECK_FUNCS(memset_s) 18 | AC_CHECK_HEADERS([stddef.h stdint.h stdlib.h string.h]) 19 | AC_OPENMP 20 | # AX_FORCEINLINE() 21 | AC_C_BIGENDIAN( 22 | [], 23 | AC_DEFINE(NATIVE_LITTLE_ENDIAN, 1, [machine is little-endian]), 24 | AC_MSG_ERROR(unknown endianness), 25 | AC_MSG_ERROR(universal endianness not supported) 26 | ) 27 | 28 | # Checks for typedefs, structures, and compiler characteristics. 29 | AC_C_INLINE 30 | AC_TYPE_SIZE_T 31 | AC_TYPE_UINT32_T 32 | AC_TYPE_UINT64_T 33 | AC_TYPE_UINT8_T 34 | 35 | # Checks for library functions. 36 | AC_CHECK_FUNCS([memset]) 37 | 38 | dnl AM_PROG_AR 39 | AC_PROG_LIBTOOL 40 | dnl LT_INIT 41 | 42 | 43 | AC_ARG_ENABLE(fat, 44 | AC_HELP_STRING([--enable-fat], 45 | [build a fat binary on systems that support it [default=no]]), 46 | [case $enableval in 47 | yes|no) ;; 48 | *) AC_MSG_ERROR([bad value $enableval for --enable-fat, need yes or no]) ;; 49 | esac], 50 | [enable_fat=no] 51 | ) 52 | 53 | AC_ARG_ENABLE(native, 54 | AC_HELP_STRING([--enable-native], 55 | [build a binary optimized for the CPU found at compile time on systems that support it [default=yes]]), 56 | [case $enableval in 57 | yes|no) ;; 58 | *) AC_MSG_ERROR([bad value $enableval for --enable-native, need yes or no]) ;; 59 | esac], 60 | [enable_native=yes] 61 | ) 62 | 63 | AX_CHECK_COMPILE_FLAG([-O3], [CFLAGS="$CFLAGS -O3"]) 64 | dnl Not all architectures support -march=native 65 | if test $enable_native = "yes"; then 66 | AX_CHECK_COMPILE_FLAG([-march=native], [], [enable_native=no]) 67 | fi 68 | 69 | if test $enable_fat = "yes"; then 70 | dnl Fat build needs compiler who knows all the possible instruction sets 71 | AX_CHECK_COMPILE_FLAG([-msse2], [], AC_MSG_ERROR([Compiler does not know -msse2.])) 72 | AX_CHECK_COMPILE_FLAG([-mssse3], [], AC_MSG_ERROR([Compiler does not know -mssse3.])) 73 | AX_CHECK_COMPILE_FLAG([-msse4.1], [], AC_MSG_ERROR([Compiler does not know -msse4.1.])) 74 | AX_CHECK_COMPILE_FLAG([-mavx], [], AC_MSG_ERROR([Compiler does not know -mavx.])) 75 | AX_CHECK_COMPILE_FLAG([-mxop], [], AC_MSG_ERROR([Compiler does not know -mxop.])) 76 | elif test $enable_native = "yes"; then 77 | AX_EXT 78 | CFLAGS="${CFLAGS} -march=native ${SIMD_FLAGS}" 79 | fi 80 | 81 | case $host_os in 82 | *mingw*) LDFLAGS="${LDFLAGS} -no-undefined" ;; 83 | esac 84 | 85 | AM_CONDITIONAL([USE_FAT], [test "$enable_fat" = "yes"]) 86 | dnl Only move away from ref with SSSE3; SSE2 is generally slower 87 | AM_CONDITIONAL([USE_SSE], [test "$ax_cv_have_ssse3_ext" = "yes"]) 88 | 89 | 90 | PKG_INSTALLDIR 91 | AC_CONFIG_FILES([Makefile 92 | src/Makefile 93 | libb2.pc 94 | ]) 95 | AC_OUTPUT 96 | -------------------------------------------------------------------------------- /libb2.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@prefix@ 2 | exec_prefix=@exec_prefix@ 3 | libdir=@libdir@ 4 | includedir=@includedir@ 5 | 6 | Name: libb2 7 | Description: C library providing BLAKE2b, BLAKE2s, BLAKE2bp, BLAKE2sp 8 | URL: https://github.com/BLAKE2/libb2 9 | Version: @VERSION@ 10 | Requires: 11 | Cflags: -I${includedir} 12 | Libs: -L${libdir} -lb2 13 | -------------------------------------------------------------------------------- /m4/ax_check_compile_flag.m4: -------------------------------------------------------------------------------- 1 | # =========================================================================== 2 | # http://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html 3 | # =========================================================================== 4 | # 5 | # SYNOPSIS 6 | # 7 | # AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS]) 8 | # 9 | # DESCRIPTION 10 | # 11 | # Check whether the given FLAG works with the current language's compiler 12 | # or gives an error. (Warnings, however, are ignored) 13 | # 14 | # ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on 15 | # success/failure. 16 | # 17 | # If EXTRA-FLAGS is defined, it is added to the current language's default 18 | # flags (e.g. CFLAGS) when the check is done. The check is thus made with 19 | # the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to 20 | # force the compiler to issue an error when a bad flag is given. 21 | # 22 | # NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this 23 | # macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG. 24 | # 25 | # LICENSE 26 | # 27 | # Copyright (c) 2008 Guido U. Draheim 28 | # Copyright (c) 2011 Maarten Bosmans 29 | # 30 | # This program is free software: you can redistribute it and/or modify it 31 | # under the terms of the GNU General Public License as published by the 32 | # Free Software Foundation, either version 3 of the License, or (at your 33 | # option) any later version. 34 | # 35 | # This program is distributed in the hope that it will be useful, but 36 | # WITHOUT ANY WARRANTY; without even the implied warranty of 37 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 38 | # Public License for more details. 39 | # 40 | # You should have received a copy of the GNU General Public License along 41 | # with this program. If not, see . 42 | # 43 | # As a special exception, the respective Autoconf Macro's copyright owner 44 | # gives unlimited permission to copy, distribute and modify the configure 45 | # scripts that are the output of Autoconf when processing the Macro. You 46 | # need not follow the terms of the GNU General Public License when using 47 | # or distributing such scripts, even though portions of the text of the 48 | # Macro appear in them. The GNU General Public License (GPL) does govern 49 | # all other use of the material that constitutes the Autoconf Macro. 50 | # 51 | # This special exception to the GPL applies to versions of the Autoconf 52 | # Macro released by the Autoconf Archive. When you make and distribute a 53 | # modified version of the Autoconf Macro, you may extend this special 54 | # exception to the GPL to apply to your modified version as well. 55 | 56 | #serial 2 57 | 58 | AC_DEFUN([AX_CHECK_COMPILE_FLAG], 59 | [AC_PREREQ(2.59)dnl for _AC_LANG_PREFIX 60 | AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl 61 | AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [ 62 | ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS 63 | _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1" 64 | AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include ]], 65 | [[char x[42U], fodder = 0;if (fodder > -1000 && fgets(x,1000,stdin)) puts(x)]])], 66 | [AS_VAR_SET(CACHEVAR,[yes])], 67 | [AS_VAR_SET(CACHEVAR,[no])]) 68 | _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags]) 69 | AS_IF([test x"AS_VAR_GET(CACHEVAR)" = xyes], 70 | [m4_default([$2], :)], 71 | [m4_default([$3], :)]) 72 | AS_VAR_POPDEF([CACHEVAR])dnl 73 | ])dnl AX_CHECK_COMPILE_FLAGS 74 | -------------------------------------------------------------------------------- /m4/ax_check_link_flag.m4: -------------------------------------------------------------------------------- 1 | # =========================================================================== 2 | # http://www.gnu.org/software/autoconf-archive/ax_check_link_flag.html 3 | # =========================================================================== 4 | # 5 | # SYNOPSIS 6 | # 7 | # AX_CHECK_LINK_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS]) 8 | # 9 | # DESCRIPTION 10 | # 11 | # Check whether the given FLAG works with the linker or gives an error. 12 | # (Warnings, however, are ignored) 13 | # 14 | # ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on 15 | # success/failure. 16 | # 17 | # If EXTRA-FLAGS is defined, it is added to the linker's default flags 18 | # when the check is done. The check is thus made with the flags: "LDFLAGS 19 | # EXTRA-FLAGS FLAG". This can for example be used to force the linker to 20 | # issue an error when a bad flag is given. 21 | # 22 | # NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this 23 | # macro in sync with AX_CHECK_{PREPROC,COMPILE}_FLAG. 24 | # 25 | # LICENSE 26 | # 27 | # Copyright (c) 2008 Guido U. Draheim 28 | # Copyright (c) 2011 Maarten Bosmans 29 | # 30 | # This program is free software: you can redistribute it and/or modify it 31 | # under the terms of the GNU General Public License as published by the 32 | # Free Software Foundation, either version 3 of the License, or (at your 33 | # option) any later version. 34 | # 35 | # This program is distributed in the hope that it will be useful, but 36 | # WITHOUT ANY WARRANTY; without even the implied warranty of 37 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 38 | # Public License for more details. 39 | # 40 | # You should have received a copy of the GNU General Public License along 41 | # with this program. If not, see . 42 | # 43 | # As a special exception, the respective Autoconf Macro's copyright owner 44 | # gives unlimited permission to copy, distribute and modify the configure 45 | # scripts that are the output of Autoconf when processing the Macro. You 46 | # need not follow the terms of the GNU General Public License when using 47 | # or distributing such scripts, even though portions of the text of the 48 | # Macro appear in them. The GNU General Public License (GPL) does govern 49 | # all other use of the material that constitutes the Autoconf Macro. 50 | # 51 | # This special exception to the GPL applies to versions of the Autoconf 52 | # Macro released by the Autoconf Archive. When you make and distribute a 53 | # modified version of the Autoconf Macro, you may extend this special 54 | # exception to the GPL to apply to your modified version as well. 55 | 56 | #serial 2 57 | 58 | AC_DEFUN([AX_CHECK_LINK_FLAG], 59 | [AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_ldflags_$4_$1])dnl 60 | AC_CACHE_CHECK([whether the linker accepts $1], CACHEVAR, [ 61 | ax_check_save_flags=$LDFLAGS 62 | LDFLAGS="$LDFLAGS $4 $1" 63 | AC_LINK_IFELSE([AC_LANG_PROGRAM()], 64 | [AS_VAR_SET(CACHEVAR,[yes])], 65 | [AS_VAR_SET(CACHEVAR,[no])]) 66 | LDFLAGS=$ax_check_save_flags]) 67 | AS_IF([test x"AS_VAR_GET(CACHEVAR)" = xyes], 68 | [m4_default([$2], :)], 69 | [m4_default([$3], :)]) 70 | AS_VAR_POPDEF([CACHEVAR])dnl 71 | ])dnl AX_CHECK_LINK_FLAGS 72 | -------------------------------------------------------------------------------- /m4/ax_ext.m4: -------------------------------------------------------------------------------- 1 | # =========================================================================== 2 | # http://www.gnu.org/software/autoconf-archive/ax_ext.html 3 | # =========================================================================== 4 | # 5 | # SYNOPSIS 6 | # 7 | # AX_EXT 8 | # 9 | # DESCRIPTION 10 | # 11 | # Find supported SIMD extensions by requesting cpuid. When an SIMD 12 | # extension is found, the -m"simdextensionname" is added to SIMD_FLAGS if 13 | # compilator supports it. For example, if "sse2" is available, then 14 | # "-msse2" is added to SIMD_FLAGS. 15 | # 16 | # This macro calls: 17 | # 18 | # AC_SUBST(SIMD_FLAGS) 19 | # 20 | # And defines: 21 | # 22 | # HAVE_MMX / HAVE_SSE / HAVE_SSE2 / HAVE_SSE3 / HAVE_SSSE3 / HAVE_SSE4.1 / HAVE_SSE4.2 / HAVE_AVX 23 | # 24 | # LICENSE 25 | # 26 | # Copyright (c) 2007 Christophe Tournayre 27 | # 28 | # Copying and distribution of this file, with or without modification, are 29 | # permitted in any medium without royalty provided the copyright notice 30 | # and this notice are preserved. This file is offered as-is, without any 31 | # warranty. 32 | 33 | #serial 10 34 | 35 | AC_DEFUN([AX_EXT], 36 | [ 37 | AC_REQUIRE([AC_CANONICAL_HOST]) 38 | 39 | case $host_cpu in 40 | powerpc*) 41 | AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext], 42 | [ 43 | if test `/usr/sbin/sysctl -a 2>/dev/null| grep -c hw.optional.altivec` != 0; then 44 | if test `/usr/sbin/sysctl -n hw.optional.altivec` = 1; then 45 | ax_cv_have_altivec_ext=yes 46 | fi 47 | fi 48 | ]) 49 | 50 | if test "$ax_cv_have_altivec_ext" = yes; then 51 | AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions]) 52 | AX_CHECK_COMPILE_FLAG(-faltivec, SIMD_FLAGS="$SIMD_FLAGS -faltivec", []) 53 | fi 54 | ;; 55 | 56 | 57 | i[[3456]]86*|x86_64*) 58 | 59 | AC_REQUIRE([AX_GCC_X86_CPUID]) 60 | 61 | AX_GCC_X86_CPUID(0x00000001) 62 | ecx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 3` 63 | edx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 4` 64 | 65 | AC_CACHE_CHECK([whether mmx is supported], [ax_cv_have_mmx_ext], 66 | [ 67 | ax_cv_have_mmx_ext=no 68 | if test "$((0x$edx>>23&0x01))" = 1; then 69 | ax_cv_have_mmx_ext=yes 70 | fi 71 | ]) 72 | 73 | AC_CACHE_CHECK([whether sse is supported], [ax_cv_have_sse_ext], 74 | [ 75 | ax_cv_have_sse_ext=no 76 | if test "$((0x$edx>>25&0x01))" = 1; then 77 | ax_cv_have_sse_ext=yes 78 | fi 79 | ]) 80 | 81 | AC_CACHE_CHECK([whether sse2 is supported], [ax_cv_have_sse2_ext], 82 | [ 83 | ax_cv_have_sse2_ext=no 84 | if test "$((0x$edx>>26&0x01))" = 1; then 85 | ax_cv_have_sse2_ext=yes 86 | fi 87 | ]) 88 | 89 | AC_CACHE_CHECK([whether sse3 is supported], [ax_cv_have_sse3_ext], 90 | [ 91 | ax_cv_have_sse3_ext=no 92 | if test "$((0x$ecx&0x01))" = 1; then 93 | ax_cv_have_sse3_ext=yes 94 | fi 95 | ]) 96 | 97 | AC_CACHE_CHECK([whether ssse3 is supported], [ax_cv_have_ssse3_ext], 98 | [ 99 | ax_cv_have_ssse3_ext=no 100 | if test "$((0x$ecx>>9&0x01))" = 1; then 101 | ax_cv_have_ssse3_ext=yes 102 | fi 103 | ]) 104 | 105 | AC_CACHE_CHECK([whether sse4.1 is supported], [ax_cv_have_sse41_ext], 106 | [ 107 | ax_cv_have_sse41_ext=no 108 | if test "$((0x$ecx>>19&0x01))" = 1; then 109 | ax_cv_have_sse41_ext=yes 110 | fi 111 | ]) 112 | 113 | AC_CACHE_CHECK([whether sse4.2 is supported], [ax_cv_have_sse42_ext], 114 | [ 115 | ax_cv_have_sse42_ext=no 116 | if test "$((0x$ecx>>20&0x01))" = 1; then 117 | ax_cv_have_sse42_ext=yes 118 | fi 119 | ]) 120 | 121 | AC_CACHE_CHECK([whether avx is supported], [ax_cv_have_avx_ext], 122 | [ 123 | ax_cv_have_avx_ext=no 124 | if test "$((0x$ecx>>28&0x01))" = 1; then 125 | ax_cv_have_avx_ext=yes 126 | fi 127 | ]) 128 | 129 | if test "$ax_cv_have_mmx_ext" = yes; then 130 | AX_CHECK_COMPILE_FLAG(-mmmx, ax_cv_support_mmx_ext=yes, []) 131 | if test x"$ax_cv_support_mmx_ext" = x"yes"; then 132 | SIMD_FLAGS="$SIMD_FLAGS -mmmx" 133 | AC_DEFINE(HAVE_MMX,,[Support mmx instructions]) 134 | else 135 | AC_MSG_WARN([Your processor support mmx instructions but not your compilor, can you try another compilor ?]) 136 | fi 137 | fi 138 | 139 | if test "$ax_cv_have_sse_ext" = yes; then 140 | AX_CHECK_COMPILE_FLAG(-msse, ax_cv_support_sse_ext=yes, []) 141 | if test x"$ax_cv_support_sse_ext" = x"yes"; then 142 | SIMD_FLAGS="$SIMD_FLAGS -msse" 143 | AC_DEFINE(HAVE_SSE,,[Support SSE (Streaming SIMD Extensions) instructions]) 144 | else 145 | AC_MSG_WARN([Your processor support sse instructions but not your compilor, can you try another compilor ?]) 146 | fi 147 | fi 148 | 149 | if test "$ax_cv_have_sse2_ext" = yes; then 150 | AX_CHECK_COMPILE_FLAG(-msse2, ax_cv_support_sse2_ext=yes, []) 151 | if test x"$ax_cv_support_sse2_ext" = x"yes"; then 152 | SIMD_FLAGS="$SIMD_FLAGS -msse2" 153 | AC_DEFINE(HAVE_SSE2,,[Support SSE2 (Streaming SIMD Extensions 2) instructions]) 154 | else 155 | AC_MSG_WARN([Your processor support sse2 instructions but not your compilor, can you try another compilor ?]) 156 | fi 157 | fi 158 | 159 | if test "$ax_cv_have_sse3_ext" = yes; then 160 | AX_CHECK_COMPILE_FLAG(-msse3, ax_cv_support_sse3_ext=yes, []) 161 | if test x"$ax_cv_support_sse3_ext" = x"yes"; then 162 | SIMD_FLAGS="$SIMD_FLAGS -msse3" 163 | AC_DEFINE(HAVE_SSE3,,[Support SSE3 (Streaming SIMD Extensions 3) instructions]) 164 | else 165 | AC_MSG_WARN([Your processor support sse3 instructions but not your compilor, can you try another compilor ?]) 166 | fi 167 | fi 168 | 169 | if test "$ax_cv_have_ssse3_ext" = yes; then 170 | AX_CHECK_COMPILE_FLAG(-mssse3, ax_cv_support_ssse3_ext=yes, []) 171 | if test x"$ax_cv_support_ssse3_ext" = x"yes"; then 172 | SIMD_FLAGS="$SIMD_FLAGS -mssse3" 173 | AC_DEFINE(HAVE_SSSE3,,[Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions]) 174 | else 175 | AC_MSG_WARN([Your processor support ssse3 instructions but not your compilor, can you try another compilor ?]) 176 | fi 177 | fi 178 | 179 | if test "$ax_cv_have_sse41_ext" = yes; then 180 | AX_CHECK_COMPILE_FLAG(-msse4.1, ax_cv_support_sse41_ext=yes, []) 181 | if test x"$ax_cv_support_sse41_ext" = x"yes"; then 182 | SIMD_FLAGS="$SIMD_FLAGS -msse4.1" 183 | AC_DEFINE(HAVE_SSE4_1,,[Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions]) 184 | else 185 | AC_MSG_WARN([Your processor support sse4.1 instructions but not your compilor, can you try another compilor ?]) 186 | fi 187 | fi 188 | 189 | if test "$ax_cv_have_sse42_ext" = yes; then 190 | AX_CHECK_COMPILE_FLAG(-msse4.2, ax_cv_support_sse42_ext=yes, []) 191 | if test x"$ax_cv_support_sse42_ext" = x"yes"; then 192 | SIMD_FLAGS="$SIMD_FLAGS -msse4.2" 193 | AC_DEFINE(HAVE_SSE4_2,,[Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions]) 194 | else 195 | AC_MSG_WARN([Your processor support sse4.2 instructions but not your compilor, can you try another compilor ?]) 196 | fi 197 | fi 198 | 199 | if test "$ax_cv_have_avx_ext" = yes; then 200 | AX_CHECK_COMPILE_FLAG(-mavx, ax_cv_support_avx_ext=yes, []) 201 | if test x"$ax_cv_support_avx_ext" = x"yes"; then 202 | SIMD_FLAGS="$SIMD_FLAGS -mavx" 203 | AC_DEFINE(HAVE_AVX,,[Support AVX (Advanced Vector Extensions) instructions]) 204 | else 205 | AC_MSG_WARN([Your processor support avx instructions but not your compilor, can you try another compilor ?]) 206 | fi 207 | fi 208 | 209 | ;; 210 | esac 211 | 212 | AC_SUBST(SIMD_FLAGS) 213 | ]) 214 | -------------------------------------------------------------------------------- /m4/ax_forceinline.m4: -------------------------------------------------------------------------------- 1 | # =========================================================================== 2 | # http://www.gnu.org/software/autoconf-archive/ax_forceinline.html 3 | # =========================================================================== 4 | # 5 | # SYNOPSIS 6 | # 7 | # AX_FORCEINLINE() 8 | # 9 | # DESCRIPTION 10 | # 11 | # Provides a test for C compiler support of forced inlining. If usable, 12 | # #define FORCEINLINE to the appropriate force inline keyword. Otherwise 13 | # #define FORCEINLINE to be 'inline'. 14 | # 15 | # LICENSE 16 | # 17 | # Copyright (c) 2008 Alan Woodland 18 | # Copyright (c) 2009 Rhys Ulerich 19 | # 20 | # Copying and distribution of this file, with or without modification, are 21 | # permitted in any medium without royalty provided the copyright notice 22 | # and this notice are preserved. This file is offered as-is, without any 23 | # warranty. 24 | 25 | #serial 2 26 | 27 | AC_DEFUN([AX_FORCEINLINE], [ 28 | AC_LANG_PUSH([C]) 29 | AC_MSG_CHECKING(for forced inline keyword) 30 | AC_CACHE_VAL(ac_cv_forceinline, [ 31 | ax_forceinline_keywords="__forceinline inline none" 32 | for ax_forceinline_keyword in $ax_forceinline_keywords; do 33 | case $ax_forceinline_keyword in 34 | none) ac_cv_forceinline=none ; break ;; 35 | *) 36 | AC_TRY_COMPILE( 37 | [#include 38 | ] $ax_forceinline_keyword [ 39 | static void 40 | foo(void) { 41 | exit(1); 42 | }], 43 | [], 44 | [ac_cv_forceinline=$ax_forceinline_keyword ; break], 45 | ac_cv_forceinline=none 46 | ) 47 | esac 48 | done 49 | ]) 50 | 51 | if test "$ac_cv_forceinline" = "none"; then 52 | ax_forceinline_keyword= 53 | else 54 | ax_forceinline_keyword=$ac_cv_forceinline 55 | fi 56 | AC_DEFINE_UNQUOTED([FORCEINLINE],$ax_forceinline_keyword, 57 | [The most forceful inline keyword known by the compiler]) 58 | AC_MSG_RESULT($ac_cv_forceinline) 59 | AC_LANG_POP([C]) 60 | ]) 61 | -------------------------------------------------------------------------------- /m4/ax_gcc_x86_cpuid.m4: -------------------------------------------------------------------------------- 1 | # =========================================================================== 2 | # http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_cpuid.html 3 | # =========================================================================== 4 | # 5 | # SYNOPSIS 6 | # 7 | # AX_GCC_X86_CPUID(OP) 8 | # 9 | # DESCRIPTION 10 | # 11 | # On Pentium and later x86 processors, with gcc or a compiler that has a 12 | # compatible syntax for inline assembly instructions, run a small program 13 | # that executes the cpuid instruction with input OP. This can be used to 14 | # detect the CPU type. 15 | # 16 | # On output, the values of the eax, ebx, ecx, and edx registers are stored 17 | # as hexadecimal strings as "eax:ebx:ecx:edx" in the cache variable 18 | # ax_cv_gcc_x86_cpuid_OP. 19 | # 20 | # If the cpuid instruction fails (because you are running a 21 | # cross-compiler, or because you are not using gcc, or because you are on 22 | # a processor that doesn't have this instruction), ax_cv_gcc_x86_cpuid_OP 23 | # is set to the string "unknown". 24 | # 25 | # This macro mainly exists to be used in AX_GCC_ARCHFLAG. 26 | # 27 | # LICENSE 28 | # 29 | # Copyright (c) 2008 Steven G. Johnson 30 | # Copyright (c) 2008 Matteo Frigo 31 | # 32 | # This program is free software: you can redistribute it and/or modify it 33 | # under the terms of the GNU General Public License as published by the 34 | # Free Software Foundation, either version 3 of the License, or (at your 35 | # option) any later version. 36 | # 37 | # This program is distributed in the hope that it will be useful, but 38 | # WITHOUT ANY WARRANTY; without even the implied warranty of 39 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 40 | # Public License for more details. 41 | # 42 | # You should have received a copy of the GNU General Public License along 43 | # with this program. If not, see . 44 | # 45 | # As a special exception, the respective Autoconf Macro's copyright owner 46 | # gives unlimited permission to copy, distribute and modify the configure 47 | # scripts that are the output of Autoconf when processing the Macro. You 48 | # need not follow the terms of the GNU General Public License when using 49 | # or distributing such scripts, even though portions of the text of the 50 | # Macro appear in them. The GNU General Public License (GPL) does govern 51 | # all other use of the material that constitutes the Autoconf Macro. 52 | # 53 | # This special exception to the GPL applies to versions of the Autoconf 54 | # Macro released by the Autoconf Archive. When you make and distribute a 55 | # modified version of the Autoconf Macro, you may extend this special 56 | # exception to the GPL to apply to your modified version as well. 57 | 58 | #serial 7 59 | 60 | AC_DEFUN([AX_GCC_X86_CPUID], 61 | [AC_REQUIRE([AC_PROG_CC]) 62 | AC_LANG_PUSH([C]) 63 | AC_CACHE_CHECK(for x86 cpuid $1 output, ax_cv_gcc_x86_cpuid_$1, 64 | [AC_RUN_IFELSE([AC_LANG_PROGRAM([#include ], [ 65 | int op = $1, eax, ebx, ecx, edx; 66 | FILE *f; 67 | __asm__("cpuid" 68 | : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) 69 | : "a" (op)); 70 | f = fopen("conftest_cpuid", "w"); if (!f) return 1; 71 | fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); 72 | fclose(f); 73 | return 0; 74 | ])], 75 | [ax_cv_gcc_x86_cpuid_$1=`cat conftest_cpuid`; rm -f conftest_cpuid], 76 | [ax_cv_gcc_x86_cpuid_$1=unknown; rm -f conftest_cpuid], 77 | [ax_cv_gcc_x86_cpuid_$1=unknown])]) 78 | AC_LANG_POP([C]) 79 | ]) 80 | -------------------------------------------------------------------------------- /m4/ltoptions.m4: -------------------------------------------------------------------------------- 1 | # Helper functions for option handling. -*- Autoconf -*- 2 | # 3 | # Copyright (C) 2004-2005, 2007-2009, 2011-2015 Free Software 4 | # Foundation, Inc. 5 | # Written by Gary V. Vaughan, 2004 6 | # 7 | # This file is free software; the Free Software Foundation gives 8 | # unlimited permission to copy and/or distribute it, with or without 9 | # modifications, as long as this notice is preserved. 10 | 11 | # serial 8 ltoptions.m4 12 | 13 | # This is to help aclocal find these macros, as it can't see m4_define. 14 | AC_DEFUN([LTOPTIONS_VERSION], [m4_if([1])]) 15 | 16 | 17 | # _LT_MANGLE_OPTION(MACRO-NAME, OPTION-NAME) 18 | # ------------------------------------------ 19 | m4_define([_LT_MANGLE_OPTION], 20 | [[_LT_OPTION_]m4_bpatsubst($1__$2, [[^a-zA-Z0-9_]], [_])]) 21 | 22 | 23 | # _LT_SET_OPTION(MACRO-NAME, OPTION-NAME) 24 | # --------------------------------------- 25 | # Set option OPTION-NAME for macro MACRO-NAME, and if there is a 26 | # matching handler defined, dispatch to it. Other OPTION-NAMEs are 27 | # saved as a flag. 28 | m4_define([_LT_SET_OPTION], 29 | [m4_define(_LT_MANGLE_OPTION([$1], [$2]))dnl 30 | m4_ifdef(_LT_MANGLE_DEFUN([$1], [$2]), 31 | _LT_MANGLE_DEFUN([$1], [$2]), 32 | [m4_warning([Unknown $1 option '$2'])])[]dnl 33 | ]) 34 | 35 | 36 | # _LT_IF_OPTION(MACRO-NAME, OPTION-NAME, IF-SET, [IF-NOT-SET]) 37 | # ------------------------------------------------------------ 38 | # Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. 39 | m4_define([_LT_IF_OPTION], 40 | [m4_ifdef(_LT_MANGLE_OPTION([$1], [$2]), [$3], [$4])]) 41 | 42 | 43 | # _LT_UNLESS_OPTIONS(MACRO-NAME, OPTION-LIST, IF-NOT-SET) 44 | # ------------------------------------------------------- 45 | # Execute IF-NOT-SET unless all options in OPTION-LIST for MACRO-NAME 46 | # are set. 47 | m4_define([_LT_UNLESS_OPTIONS], 48 | [m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), 49 | [m4_ifdef(_LT_MANGLE_OPTION([$1], _LT_Option), 50 | [m4_define([$0_found])])])[]dnl 51 | m4_ifdef([$0_found], [m4_undefine([$0_found])], [$3 52 | ])[]dnl 53 | ]) 54 | 55 | 56 | # _LT_SET_OPTIONS(MACRO-NAME, OPTION-LIST) 57 | # ---------------------------------------- 58 | # OPTION-LIST is a space-separated list of Libtool options associated 59 | # with MACRO-NAME. If any OPTION has a matching handler declared with 60 | # LT_OPTION_DEFINE, dispatch to that macro; otherwise complain about 61 | # the unknown option and exit. 62 | m4_defun([_LT_SET_OPTIONS], 63 | [# Set options 64 | m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), 65 | [_LT_SET_OPTION([$1], _LT_Option)]) 66 | 67 | m4_if([$1],[LT_INIT],[ 68 | dnl 69 | dnl Simply set some default values (i.e off) if boolean options were not 70 | dnl specified: 71 | _LT_UNLESS_OPTIONS([LT_INIT], [dlopen], [enable_dlopen=no 72 | ]) 73 | _LT_UNLESS_OPTIONS([LT_INIT], [win32-dll], [enable_win32_dll=no 74 | ]) 75 | dnl 76 | dnl If no reference was made to various pairs of opposing options, then 77 | dnl we run the default mode handler for the pair. For example, if neither 78 | dnl 'shared' nor 'disable-shared' was passed, we enable building of shared 79 | dnl archives by default: 80 | _LT_UNLESS_OPTIONS([LT_INIT], [shared disable-shared], [_LT_ENABLE_SHARED]) 81 | _LT_UNLESS_OPTIONS([LT_INIT], [static disable-static], [_LT_ENABLE_STATIC]) 82 | _LT_UNLESS_OPTIONS([LT_INIT], [pic-only no-pic], [_LT_WITH_PIC]) 83 | _LT_UNLESS_OPTIONS([LT_INIT], [fast-install disable-fast-install], 84 | [_LT_ENABLE_FAST_INSTALL]) 85 | _LT_UNLESS_OPTIONS([LT_INIT], [aix-soname=aix aix-soname=both aix-soname=svr4], 86 | [_LT_WITH_AIX_SONAME([aix])]) 87 | ]) 88 | ])# _LT_SET_OPTIONS 89 | 90 | 91 | ## --------------------------------- ## 92 | ## Macros to handle LT_INIT options. ## 93 | ## --------------------------------- ## 94 | 95 | # _LT_MANGLE_DEFUN(MACRO-NAME, OPTION-NAME) 96 | # ----------------------------------------- 97 | m4_define([_LT_MANGLE_DEFUN], 98 | [[_LT_OPTION_DEFUN_]m4_bpatsubst(m4_toupper([$1__$2]), [[^A-Z0-9_]], [_])]) 99 | 100 | 101 | # LT_OPTION_DEFINE(MACRO-NAME, OPTION-NAME, CODE) 102 | # ----------------------------------------------- 103 | m4_define([LT_OPTION_DEFINE], 104 | [m4_define(_LT_MANGLE_DEFUN([$1], [$2]), [$3])[]dnl 105 | ])# LT_OPTION_DEFINE 106 | 107 | 108 | # dlopen 109 | # ------ 110 | LT_OPTION_DEFINE([LT_INIT], [dlopen], [enable_dlopen=yes 111 | ]) 112 | 113 | AU_DEFUN([AC_LIBTOOL_DLOPEN], 114 | [_LT_SET_OPTION([LT_INIT], [dlopen]) 115 | AC_DIAGNOSE([obsolete], 116 | [$0: Remove this warning and the call to _LT_SET_OPTION when you 117 | put the 'dlopen' option into LT_INIT's first parameter.]) 118 | ]) 119 | 120 | dnl aclocal-1.4 backwards compatibility: 121 | dnl AC_DEFUN([AC_LIBTOOL_DLOPEN], []) 122 | 123 | 124 | # win32-dll 125 | # --------- 126 | # Declare package support for building win32 dll's. 127 | LT_OPTION_DEFINE([LT_INIT], [win32-dll], 128 | [enable_win32_dll=yes 129 | 130 | case $host in 131 | *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*) 132 | AC_CHECK_TOOL(AS, as, false) 133 | AC_CHECK_TOOL(DLLTOOL, dlltool, false) 134 | AC_CHECK_TOOL(OBJDUMP, objdump, false) 135 | ;; 136 | esac 137 | 138 | test -z "$AS" && AS=as 139 | _LT_DECL([], [AS], [1], [Assembler program])dnl 140 | 141 | test -z "$DLLTOOL" && DLLTOOL=dlltool 142 | _LT_DECL([], [DLLTOOL], [1], [DLL creation program])dnl 143 | 144 | test -z "$OBJDUMP" && OBJDUMP=objdump 145 | _LT_DECL([], [OBJDUMP], [1], [Object dumper program])dnl 146 | ])# win32-dll 147 | 148 | AU_DEFUN([AC_LIBTOOL_WIN32_DLL], 149 | [AC_REQUIRE([AC_CANONICAL_HOST])dnl 150 | _LT_SET_OPTION([LT_INIT], [win32-dll]) 151 | AC_DIAGNOSE([obsolete], 152 | [$0: Remove this warning and the call to _LT_SET_OPTION when you 153 | put the 'win32-dll' option into LT_INIT's first parameter.]) 154 | ]) 155 | 156 | dnl aclocal-1.4 backwards compatibility: 157 | dnl AC_DEFUN([AC_LIBTOOL_WIN32_DLL], []) 158 | 159 | 160 | # _LT_ENABLE_SHARED([DEFAULT]) 161 | # ---------------------------- 162 | # implement the --enable-shared flag, and supports the 'shared' and 163 | # 'disable-shared' LT_INIT options. 164 | # DEFAULT is either 'yes' or 'no'. If omitted, it defaults to 'yes'. 165 | m4_define([_LT_ENABLE_SHARED], 166 | [m4_define([_LT_ENABLE_SHARED_DEFAULT], [m4_if($1, no, no, yes)])dnl 167 | AC_ARG_ENABLE([shared], 168 | [AS_HELP_STRING([--enable-shared@<:@=PKGS@:>@], 169 | [build shared libraries @<:@default=]_LT_ENABLE_SHARED_DEFAULT[@:>@])], 170 | [p=${PACKAGE-default} 171 | case $enableval in 172 | yes) enable_shared=yes ;; 173 | no) enable_shared=no ;; 174 | *) 175 | enable_shared=no 176 | # Look at the argument we got. We use all the common list separators. 177 | lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, 178 | for pkg in $enableval; do 179 | IFS=$lt_save_ifs 180 | if test "X$pkg" = "X$p"; then 181 | enable_shared=yes 182 | fi 183 | done 184 | IFS=$lt_save_ifs 185 | ;; 186 | esac], 187 | [enable_shared=]_LT_ENABLE_SHARED_DEFAULT) 188 | 189 | _LT_DECL([build_libtool_libs], [enable_shared], [0], 190 | [Whether or not to build shared libraries]) 191 | ])# _LT_ENABLE_SHARED 192 | 193 | LT_OPTION_DEFINE([LT_INIT], [shared], [_LT_ENABLE_SHARED([yes])]) 194 | LT_OPTION_DEFINE([LT_INIT], [disable-shared], [_LT_ENABLE_SHARED([no])]) 195 | 196 | # Old names: 197 | AC_DEFUN([AC_ENABLE_SHARED], 198 | [_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[shared]) 199 | ]) 200 | 201 | AC_DEFUN([AC_DISABLE_SHARED], 202 | [_LT_SET_OPTION([LT_INIT], [disable-shared]) 203 | ]) 204 | 205 | AU_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)]) 206 | AU_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)]) 207 | 208 | dnl aclocal-1.4 backwards compatibility: 209 | dnl AC_DEFUN([AM_ENABLE_SHARED], []) 210 | dnl AC_DEFUN([AM_DISABLE_SHARED], []) 211 | 212 | 213 | 214 | # _LT_ENABLE_STATIC([DEFAULT]) 215 | # ---------------------------- 216 | # implement the --enable-static flag, and support the 'static' and 217 | # 'disable-static' LT_INIT options. 218 | # DEFAULT is either 'yes' or 'no'. If omitted, it defaults to 'yes'. 219 | m4_define([_LT_ENABLE_STATIC], 220 | [m4_define([_LT_ENABLE_STATIC_DEFAULT], [m4_if($1, no, no, yes)])dnl 221 | AC_ARG_ENABLE([static], 222 | [AS_HELP_STRING([--enable-static@<:@=PKGS@:>@], 223 | [build static libraries @<:@default=]_LT_ENABLE_STATIC_DEFAULT[@:>@])], 224 | [p=${PACKAGE-default} 225 | case $enableval in 226 | yes) enable_static=yes ;; 227 | no) enable_static=no ;; 228 | *) 229 | enable_static=no 230 | # Look at the argument we got. We use all the common list separators. 231 | lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, 232 | for pkg in $enableval; do 233 | IFS=$lt_save_ifs 234 | if test "X$pkg" = "X$p"; then 235 | enable_static=yes 236 | fi 237 | done 238 | IFS=$lt_save_ifs 239 | ;; 240 | esac], 241 | [enable_static=]_LT_ENABLE_STATIC_DEFAULT) 242 | 243 | _LT_DECL([build_old_libs], [enable_static], [0], 244 | [Whether or not to build static libraries]) 245 | ])# _LT_ENABLE_STATIC 246 | 247 | LT_OPTION_DEFINE([LT_INIT], [static], [_LT_ENABLE_STATIC([yes])]) 248 | LT_OPTION_DEFINE([LT_INIT], [disable-static], [_LT_ENABLE_STATIC([no])]) 249 | 250 | # Old names: 251 | AC_DEFUN([AC_ENABLE_STATIC], 252 | [_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[static]) 253 | ]) 254 | 255 | AC_DEFUN([AC_DISABLE_STATIC], 256 | [_LT_SET_OPTION([LT_INIT], [disable-static]) 257 | ]) 258 | 259 | AU_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)]) 260 | AU_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)]) 261 | 262 | dnl aclocal-1.4 backwards compatibility: 263 | dnl AC_DEFUN([AM_ENABLE_STATIC], []) 264 | dnl AC_DEFUN([AM_DISABLE_STATIC], []) 265 | 266 | 267 | 268 | # _LT_ENABLE_FAST_INSTALL([DEFAULT]) 269 | # ---------------------------------- 270 | # implement the --enable-fast-install flag, and support the 'fast-install' 271 | # and 'disable-fast-install' LT_INIT options. 272 | # DEFAULT is either 'yes' or 'no'. If omitted, it defaults to 'yes'. 273 | m4_define([_LT_ENABLE_FAST_INSTALL], 274 | [m4_define([_LT_ENABLE_FAST_INSTALL_DEFAULT], [m4_if($1, no, no, yes)])dnl 275 | AC_ARG_ENABLE([fast-install], 276 | [AS_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@], 277 | [optimize for fast installation @<:@default=]_LT_ENABLE_FAST_INSTALL_DEFAULT[@:>@])], 278 | [p=${PACKAGE-default} 279 | case $enableval in 280 | yes) enable_fast_install=yes ;; 281 | no) enable_fast_install=no ;; 282 | *) 283 | enable_fast_install=no 284 | # Look at the argument we got. We use all the common list separators. 285 | lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, 286 | for pkg in $enableval; do 287 | IFS=$lt_save_ifs 288 | if test "X$pkg" = "X$p"; then 289 | enable_fast_install=yes 290 | fi 291 | done 292 | IFS=$lt_save_ifs 293 | ;; 294 | esac], 295 | [enable_fast_install=]_LT_ENABLE_FAST_INSTALL_DEFAULT) 296 | 297 | _LT_DECL([fast_install], [enable_fast_install], [0], 298 | [Whether or not to optimize for fast installation])dnl 299 | ])# _LT_ENABLE_FAST_INSTALL 300 | 301 | LT_OPTION_DEFINE([LT_INIT], [fast-install], [_LT_ENABLE_FAST_INSTALL([yes])]) 302 | LT_OPTION_DEFINE([LT_INIT], [disable-fast-install], [_LT_ENABLE_FAST_INSTALL([no])]) 303 | 304 | # Old names: 305 | AU_DEFUN([AC_ENABLE_FAST_INSTALL], 306 | [_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[fast-install]) 307 | AC_DIAGNOSE([obsolete], 308 | [$0: Remove this warning and the call to _LT_SET_OPTION when you put 309 | the 'fast-install' option into LT_INIT's first parameter.]) 310 | ]) 311 | 312 | AU_DEFUN([AC_DISABLE_FAST_INSTALL], 313 | [_LT_SET_OPTION([LT_INIT], [disable-fast-install]) 314 | AC_DIAGNOSE([obsolete], 315 | [$0: Remove this warning and the call to _LT_SET_OPTION when you put 316 | the 'disable-fast-install' option into LT_INIT's first parameter.]) 317 | ]) 318 | 319 | dnl aclocal-1.4 backwards compatibility: 320 | dnl AC_DEFUN([AC_ENABLE_FAST_INSTALL], []) 321 | dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], []) 322 | 323 | 324 | # _LT_WITH_AIX_SONAME([DEFAULT]) 325 | # ---------------------------------- 326 | # implement the --with-aix-soname flag, and support the `aix-soname=aix' 327 | # and `aix-soname=both' and `aix-soname=svr4' LT_INIT options. DEFAULT 328 | # is either `aix', `both' or `svr4'. If omitted, it defaults to `aix'. 329 | m4_define([_LT_WITH_AIX_SONAME], 330 | [m4_define([_LT_WITH_AIX_SONAME_DEFAULT], [m4_if($1, svr4, svr4, m4_if($1, both, both, aix))])dnl 331 | shared_archive_member_spec= 332 | case $host,$enable_shared in 333 | power*-*-aix[[5-9]]*,yes) 334 | AC_MSG_CHECKING([which variant of shared library versioning to provide]) 335 | AC_ARG_WITH([aix-soname], 336 | [AS_HELP_STRING([--with-aix-soname=aix|svr4|both], 337 | [shared library versioning (aka "SONAME") variant to provide on AIX, @<:@default=]_LT_WITH_AIX_SONAME_DEFAULT[@:>@.])], 338 | [case $withval in 339 | aix|svr4|both) 340 | ;; 341 | *) 342 | AC_MSG_ERROR([Unknown argument to --with-aix-soname]) 343 | ;; 344 | esac 345 | lt_cv_with_aix_soname=$with_aix_soname], 346 | [AC_CACHE_VAL([lt_cv_with_aix_soname], 347 | [lt_cv_with_aix_soname=]_LT_WITH_AIX_SONAME_DEFAULT) 348 | with_aix_soname=$lt_cv_with_aix_soname]) 349 | AC_MSG_RESULT([$with_aix_soname]) 350 | if test aix != "$with_aix_soname"; then 351 | # For the AIX way of multilib, we name the shared archive member 352 | # based on the bitwidth used, traditionally 'shr.o' or 'shr_64.o', 353 | # and 'shr.imp' or 'shr_64.imp', respectively, for the Import File. 354 | # Even when GNU compilers ignore OBJECT_MODE but need '-maix64' flag, 355 | # the AIX toolchain works better with OBJECT_MODE set (default 32). 356 | if test 64 = "${OBJECT_MODE-32}"; then 357 | shared_archive_member_spec=shr_64 358 | else 359 | shared_archive_member_spec=shr 360 | fi 361 | fi 362 | ;; 363 | *) 364 | with_aix_soname=aix 365 | ;; 366 | esac 367 | 368 | _LT_DECL([], [shared_archive_member_spec], [0], 369 | [Shared archive member basename, for filename based shared library versioning on AIX])dnl 370 | ])# _LT_WITH_AIX_SONAME 371 | 372 | LT_OPTION_DEFINE([LT_INIT], [aix-soname=aix], [_LT_WITH_AIX_SONAME([aix])]) 373 | LT_OPTION_DEFINE([LT_INIT], [aix-soname=both], [_LT_WITH_AIX_SONAME([both])]) 374 | LT_OPTION_DEFINE([LT_INIT], [aix-soname=svr4], [_LT_WITH_AIX_SONAME([svr4])]) 375 | 376 | 377 | # _LT_WITH_PIC([MODE]) 378 | # -------------------- 379 | # implement the --with-pic flag, and support the 'pic-only' and 'no-pic' 380 | # LT_INIT options. 381 | # MODE is either 'yes' or 'no'. If omitted, it defaults to 'both'. 382 | m4_define([_LT_WITH_PIC], 383 | [AC_ARG_WITH([pic], 384 | [AS_HELP_STRING([--with-pic@<:@=PKGS@:>@], 385 | [try to use only PIC/non-PIC objects @<:@default=use both@:>@])], 386 | [lt_p=${PACKAGE-default} 387 | case $withval in 388 | yes|no) pic_mode=$withval ;; 389 | *) 390 | pic_mode=default 391 | # Look at the argument we got. We use all the common list separators. 392 | lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, 393 | for lt_pkg in $withval; do 394 | IFS=$lt_save_ifs 395 | if test "X$lt_pkg" = "X$lt_p"; then 396 | pic_mode=yes 397 | fi 398 | done 399 | IFS=$lt_save_ifs 400 | ;; 401 | esac], 402 | [pic_mode=m4_default([$1], [default])]) 403 | 404 | _LT_DECL([], [pic_mode], [0], [What type of objects to build])dnl 405 | ])# _LT_WITH_PIC 406 | 407 | LT_OPTION_DEFINE([LT_INIT], [pic-only], [_LT_WITH_PIC([yes])]) 408 | LT_OPTION_DEFINE([LT_INIT], [no-pic], [_LT_WITH_PIC([no])]) 409 | 410 | # Old name: 411 | AU_DEFUN([AC_LIBTOOL_PICMODE], 412 | [_LT_SET_OPTION([LT_INIT], [pic-only]) 413 | AC_DIAGNOSE([obsolete], 414 | [$0: Remove this warning and the call to _LT_SET_OPTION when you 415 | put the 'pic-only' option into LT_INIT's first parameter.]) 416 | ]) 417 | 418 | dnl aclocal-1.4 backwards compatibility: 419 | dnl AC_DEFUN([AC_LIBTOOL_PICMODE], []) 420 | 421 | ## ----------------- ## 422 | ## LTDL_INIT Options ## 423 | ## ----------------- ## 424 | 425 | m4_define([_LTDL_MODE], []) 426 | LT_OPTION_DEFINE([LTDL_INIT], [nonrecursive], 427 | [m4_define([_LTDL_MODE], [nonrecursive])]) 428 | LT_OPTION_DEFINE([LTDL_INIT], [recursive], 429 | [m4_define([_LTDL_MODE], [recursive])]) 430 | LT_OPTION_DEFINE([LTDL_INIT], [subproject], 431 | [m4_define([_LTDL_MODE], [subproject])]) 432 | 433 | m4_define([_LTDL_TYPE], []) 434 | LT_OPTION_DEFINE([LTDL_INIT], [installable], 435 | [m4_define([_LTDL_TYPE], [installable])]) 436 | LT_OPTION_DEFINE([LTDL_INIT], [convenience], 437 | [m4_define([_LTDL_TYPE], [convenience])]) 438 | -------------------------------------------------------------------------------- /m4/ltsugar.m4: -------------------------------------------------------------------------------- 1 | # ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*- 2 | # 3 | # Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc. 4 | # Written by Gary V. Vaughan, 2004 5 | # 6 | # This file is free software; the Free Software Foundation gives 7 | # unlimited permission to copy and/or distribute it, with or without 8 | # modifications, as long as this notice is preserved. 9 | 10 | # serial 6 ltsugar.m4 11 | 12 | # This is to help aclocal find these macros, as it can't see m4_define. 13 | AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])]) 14 | 15 | 16 | # lt_join(SEP, ARG1, [ARG2...]) 17 | # ----------------------------- 18 | # Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their 19 | # associated separator. 20 | # Needed until we can rely on m4_join from Autoconf 2.62, since all earlier 21 | # versions in m4sugar had bugs. 22 | m4_define([lt_join], 23 | [m4_if([$#], [1], [], 24 | [$#], [2], [[$2]], 25 | [m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])]) 26 | m4_define([_lt_join], 27 | [m4_if([$#$2], [2], [], 28 | [m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])]) 29 | 30 | 31 | # lt_car(LIST) 32 | # lt_cdr(LIST) 33 | # ------------ 34 | # Manipulate m4 lists. 35 | # These macros are necessary as long as will still need to support 36 | # Autoconf-2.59 which quotes differently. 37 | m4_define([lt_car], [[$1]]) 38 | m4_define([lt_cdr], 39 | [m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])], 40 | [$#], 1, [], 41 | [m4_dquote(m4_shift($@))])]) 42 | m4_define([lt_unquote], $1) 43 | 44 | 45 | # lt_append(MACRO-NAME, STRING, [SEPARATOR]) 46 | # ------------------------------------------ 47 | # Redefine MACRO-NAME to hold its former content plus `SEPARATOR'`STRING'. 48 | # Note that neither SEPARATOR nor STRING are expanded; they are appended 49 | # to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked). 50 | # No SEPARATOR is output if MACRO-NAME was previously undefined (different 51 | # than defined and empty). 52 | # 53 | # This macro is needed until we can rely on Autoconf 2.62, since earlier 54 | # versions of m4sugar mistakenly expanded SEPARATOR but not STRING. 55 | m4_define([lt_append], 56 | [m4_define([$1], 57 | m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])]) 58 | 59 | 60 | 61 | # lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...]) 62 | # ---------------------------------------------------------- 63 | # Produce a SEP delimited list of all paired combinations of elements of 64 | # PREFIX-LIST with SUFFIX1 through SUFFIXn. Each element of the list 65 | # has the form PREFIXmINFIXSUFFIXn. 66 | # Needed until we can rely on m4_combine added in Autoconf 2.62. 67 | m4_define([lt_combine], 68 | [m4_if(m4_eval([$# > 3]), [1], 69 | [m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl 70 | [[m4_foreach([_Lt_prefix], [$2], 71 | [m4_foreach([_Lt_suffix], 72 | ]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[, 73 | [_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])]) 74 | 75 | 76 | # lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ]) 77 | # ----------------------------------------------------------------------- 78 | # Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited 79 | # by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ. 80 | m4_define([lt_if_append_uniq], 81 | [m4_ifdef([$1], 82 | [m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1], 83 | [lt_append([$1], [$2], [$3])$4], 84 | [$5])], 85 | [lt_append([$1], [$2], [$3])$4])]) 86 | 87 | 88 | # lt_dict_add(DICT, KEY, VALUE) 89 | # ----------------------------- 90 | m4_define([lt_dict_add], 91 | [m4_define([$1($2)], [$3])]) 92 | 93 | 94 | # lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE) 95 | # -------------------------------------------- 96 | m4_define([lt_dict_add_subkey], 97 | [m4_define([$1($2:$3)], [$4])]) 98 | 99 | 100 | # lt_dict_fetch(DICT, KEY, [SUBKEY]) 101 | # ---------------------------------- 102 | m4_define([lt_dict_fetch], 103 | [m4_ifval([$3], 104 | m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]), 105 | m4_ifdef([$1($2)], [m4_defn([$1($2)])]))]) 106 | 107 | 108 | # lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE]) 109 | # ----------------------------------------------------------------- 110 | m4_define([lt_if_dict_fetch], 111 | [m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4], 112 | [$5], 113 | [$6])]) 114 | 115 | 116 | # lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...]) 117 | # -------------------------------------------------------------- 118 | m4_define([lt_dict_filter], 119 | [m4_if([$5], [], [], 120 | [lt_join(m4_quote(m4_default([$4], [[, ]])), 121 | lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]), 122 | [lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl 123 | ]) 124 | -------------------------------------------------------------------------------- /m4/ltversion.m4: -------------------------------------------------------------------------------- 1 | # ltversion.m4 -- version numbers -*- Autoconf -*- 2 | # 3 | # Copyright (C) 2004, 2011-2015 Free Software Foundation, Inc. 4 | # Written by Scott James Remnant, 2004 5 | # 6 | # This file is free software; the Free Software Foundation gives 7 | # unlimited permission to copy and/or distribute it, with or without 8 | # modifications, as long as this notice is preserved. 9 | 10 | # @configure_input@ 11 | 12 | # serial 4179 ltversion.m4 13 | # This file is part of GNU Libtool 14 | 15 | m4_define([LT_PACKAGE_VERSION], [2.4.6]) 16 | m4_define([LT_PACKAGE_REVISION], [2.4.6]) 17 | 18 | AC_DEFUN([LTVERSION_VERSION], 19 | [macro_version='2.4.6' 20 | macro_revision='2.4.6' 21 | _LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?]) 22 | _LT_DECL(, macro_revision, 0) 23 | ]) 24 | -------------------------------------------------------------------------------- /m4/lt~obsolete.m4: -------------------------------------------------------------------------------- 1 | # lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*- 2 | # 3 | # Copyright (C) 2004, 2005, 2007, 2009 Free Software Foundation, Inc. 4 | # Written by Scott James Remnant, 2004. 5 | # 6 | # This file is free software; the Free Software Foundation gives 7 | # unlimited permission to copy and/or distribute it, with or without 8 | # modifications, as long as this notice is preserved. 9 | 10 | # serial 5 lt~obsolete.m4 11 | 12 | # These exist entirely to fool aclocal when bootstrapping libtool. 13 | # 14 | # In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN) 15 | # which have later been changed to m4_define as they aren't part of the 16 | # exported API, or moved to Autoconf or Automake where they belong. 17 | # 18 | # The trouble is, aclocal is a bit thick. It'll see the old AC_DEFUN 19 | # in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us 20 | # using a macro with the same name in our local m4/libtool.m4 it'll 21 | # pull the old libtool.m4 in (it doesn't see our shiny new m4_define 22 | # and doesn't know about Autoconf macros at all.) 23 | # 24 | # So we provide this file, which has a silly filename so it's always 25 | # included after everything else. This provides aclocal with the 26 | # AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything 27 | # because those macros already exist, or will be overwritten later. 28 | # We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6. 29 | # 30 | # Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here. 31 | # Yes, that means every name once taken will need to remain here until 32 | # we give up compatibility with versions before 1.7, at which point 33 | # we need to keep only those names which we still refer to. 34 | 35 | # This is to help aclocal find these macros, as it can't see m4_define. 36 | AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])]) 37 | 38 | m4_ifndef([AC_LIBTOOL_LINKER_OPTION], [AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])]) 39 | m4_ifndef([AC_PROG_EGREP], [AC_DEFUN([AC_PROG_EGREP])]) 40 | m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])]) 41 | m4_ifndef([_LT_AC_SHELL_INIT], [AC_DEFUN([_LT_AC_SHELL_INIT])]) 42 | m4_ifndef([_LT_AC_SYS_LIBPATH_AIX], [AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])]) 43 | m4_ifndef([_LT_PROG_LTMAIN], [AC_DEFUN([_LT_PROG_LTMAIN])]) 44 | m4_ifndef([_LT_AC_TAGVAR], [AC_DEFUN([_LT_AC_TAGVAR])]) 45 | m4_ifndef([AC_LTDL_ENABLE_INSTALL], [AC_DEFUN([AC_LTDL_ENABLE_INSTALL])]) 46 | m4_ifndef([AC_LTDL_PREOPEN], [AC_DEFUN([AC_LTDL_PREOPEN])]) 47 | m4_ifndef([_LT_AC_SYS_COMPILER], [AC_DEFUN([_LT_AC_SYS_COMPILER])]) 48 | m4_ifndef([_LT_AC_LOCK], [AC_DEFUN([_LT_AC_LOCK])]) 49 | m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE], [AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])]) 50 | m4_ifndef([_LT_AC_TRY_DLOPEN_SELF], [AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])]) 51 | m4_ifndef([AC_LIBTOOL_PROG_CC_C_O], [AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])]) 52 | m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])]) 53 | m4_ifndef([AC_LIBTOOL_OBJDIR], [AC_DEFUN([AC_LIBTOOL_OBJDIR])]) 54 | m4_ifndef([AC_LTDL_OBJDIR], [AC_DEFUN([AC_LTDL_OBJDIR])]) 55 | m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])]) 56 | m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP], [AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])]) 57 | m4_ifndef([AC_PATH_MAGIC], [AC_DEFUN([AC_PATH_MAGIC])]) 58 | m4_ifndef([AC_PROG_LD_GNU], [AC_DEFUN([AC_PROG_LD_GNU])]) 59 | m4_ifndef([AC_PROG_LD_RELOAD_FLAG], [AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])]) 60 | m4_ifndef([AC_DEPLIBS_CHECK_METHOD], [AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])]) 61 | m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])]) 62 | m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])]) 63 | m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])]) 64 | m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS], [AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])]) 65 | m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP], [AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])]) 66 | m4_ifndef([LT_AC_PROG_EGREP], [AC_DEFUN([LT_AC_PROG_EGREP])]) 67 | m4_ifndef([LT_AC_PROG_SED], [AC_DEFUN([LT_AC_PROG_SED])]) 68 | m4_ifndef([_LT_CC_BASENAME], [AC_DEFUN([_LT_CC_BASENAME])]) 69 | m4_ifndef([_LT_COMPILER_BOILERPLATE], [AC_DEFUN([_LT_COMPILER_BOILERPLATE])]) 70 | m4_ifndef([_LT_LINKER_BOILERPLATE], [AC_DEFUN([_LT_LINKER_BOILERPLATE])]) 71 | m4_ifndef([_AC_PROG_LIBTOOL], [AC_DEFUN([_AC_PROG_LIBTOOL])]) 72 | m4_ifndef([AC_LIBTOOL_SETUP], [AC_DEFUN([AC_LIBTOOL_SETUP])]) 73 | m4_ifndef([_LT_AC_CHECK_DLFCN], [AC_DEFUN([_LT_AC_CHECK_DLFCN])]) 74 | m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER], [AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])]) 75 | m4_ifndef([_LT_AC_TAGCONFIG], [AC_DEFUN([_LT_AC_TAGCONFIG])]) 76 | m4_ifndef([AC_DISABLE_FAST_INSTALL], [AC_DEFUN([AC_DISABLE_FAST_INSTALL])]) 77 | m4_ifndef([_LT_AC_LANG_CXX], [AC_DEFUN([_LT_AC_LANG_CXX])]) 78 | m4_ifndef([_LT_AC_LANG_F77], [AC_DEFUN([_LT_AC_LANG_F77])]) 79 | m4_ifndef([_LT_AC_LANG_GCJ], [AC_DEFUN([_LT_AC_LANG_GCJ])]) 80 | m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])]) 81 | m4_ifndef([_LT_AC_LANG_C_CONFIG], [AC_DEFUN([_LT_AC_LANG_C_CONFIG])]) 82 | m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])]) 83 | m4_ifndef([_LT_AC_LANG_CXX_CONFIG], [AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])]) 84 | m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])]) 85 | m4_ifndef([_LT_AC_LANG_F77_CONFIG], [AC_DEFUN([_LT_AC_LANG_F77_CONFIG])]) 86 | m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])]) 87 | m4_ifndef([_LT_AC_LANG_GCJ_CONFIG], [AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])]) 88 | m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])]) 89 | m4_ifndef([_LT_AC_LANG_RC_CONFIG], [AC_DEFUN([_LT_AC_LANG_RC_CONFIG])]) 90 | m4_ifndef([AC_LIBTOOL_CONFIG], [AC_DEFUN([AC_LIBTOOL_CONFIG])]) 91 | m4_ifndef([_LT_AC_FILE_LTDLL_C], [AC_DEFUN([_LT_AC_FILE_LTDLL_C])]) 92 | m4_ifndef([_LT_REQUIRED_DARWIN_CHECKS], [AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS])]) 93 | m4_ifndef([_LT_AC_PROG_CXXCPP], [AC_DEFUN([_LT_AC_PROG_CXXCPP])]) 94 | m4_ifndef([_LT_PREPARE_SED_QUOTE_VARS], [AC_DEFUN([_LT_PREPARE_SED_QUOTE_VARS])]) 95 | m4_ifndef([_LT_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_PROG_ECHO_BACKSLASH])]) 96 | m4_ifndef([_LT_PROG_F77], [AC_DEFUN([_LT_PROG_F77])]) 97 | m4_ifndef([_LT_PROG_FC], [AC_DEFUN([_LT_PROG_FC])]) 98 | m4_ifndef([_LT_PROG_CXX], [AC_DEFUN([_LT_PROG_CXX])]) 99 | -------------------------------------------------------------------------------- /src/Makefile.am: -------------------------------------------------------------------------------- 1 | # BLAKE2 reference source code package - optimized C implementations 2 | # 3 | # Written in 2012 by Samuel Neves 4 | # 5 | # To the extent possible under law, the author(s) have dedicated all copyright 6 | # and related and neighboring rights to this software to the public domain 7 | # worldwide. This software is distributed without any warranty. 8 | # 9 | # You should have received a copy of the CC0 Public Domain Dedication along with 10 | # this software. If not, see . 11 | 12 | EXTRA_DIST = 13 | 14 | CPPFLAGS += $(LTDLINCL) $(OPENMP_CFLAGS) 15 | CFLAGS += $(OPENMP_CFLAGS) 16 | LDFLAGS += -version-info $(B2_LIBRARY_VERSION) 17 | 18 | lib_LTLIBRARIES = libb2.la 19 | libb2_la_LIBADD = # -lgomp -lpthread 20 | libb2_la_LDFLAGS = -no-undefined 21 | libb2_la_CPPFLAGS = -DSUFFIX= \ 22 | $(LTDLINCL) 23 | 24 | include_HEADERS = blake2.h 25 | 26 | if USE_FAT 27 | noinst_LTLIBRARIES = libblake2b_ref.la \ 28 | libblake2b_sse2.la \ 29 | libblake2b_ssse3.la \ 30 | libblake2b_sse41.la \ 31 | libblake2b_avx.la \ 32 | libblake2b_xop.la \ 33 | libblake2s_ref.la \ 34 | libblake2s_sse2.la \ 35 | libblake2s_ssse3.la \ 36 | libblake2s_sse41.la \ 37 | libblake2s_avx.la \ 38 | libblake2s_xop.la 39 | 40 | libb2_la_SOURCES = blake2-dispatch.c blake2sp.c blake2bp.c 41 | libb2_la_LIBADD += libblake2b_ref.la \ 42 | libblake2b_sse2.la \ 43 | libblake2b_ssse3.la \ 44 | libblake2b_sse41.la \ 45 | libblake2b_avx.la \ 46 | libblake2b_xop.la \ 47 | libblake2s_ref.la \ 48 | libblake2s_sse2.la \ 49 | libblake2s_ssse3.la \ 50 | libblake2s_sse41.la \ 51 | libblake2s_avx.la \ 52 | libblake2s_xop.la 53 | 54 | 55 | libblake2b_ref_la_SOURCES = blake2b-ref.c 56 | libblake2b_ref_la_CPPFLAGS = -DSUFFIX=_ref 57 | libblake2b_ref_la_CFLAGS = 58 | 59 | libblake2b_sse2_la_SOURCES = blake2b.c 60 | libblake2b_sse2_la_CPPFLAGS = -DSUFFIX=_sse2 61 | libblake2b_sse2_la_CFLAGS = -msse2 62 | 63 | libblake2b_ssse3_la_SOURCES = blake2b.c 64 | libblake2b_ssse3_la_CPPFLAGS = -DSUFFIX=_ssse3 65 | libblake2b_ssse3_la_CFLAGS = -msse2 -mssse3 66 | 67 | libblake2b_sse41_la_SOURCES = blake2b.c 68 | libblake2b_sse41_la_CPPFLAGS = -DSUFFIX=_sse41 69 | libblake2b_sse41_la_CFLAGS = -msse2 -mssse3 -msse4.1 70 | 71 | libblake2b_avx_la_SOURCES = blake2b.c 72 | libblake2b_avx_la_CPPFLAGS = -DSUFFIX=_avx 73 | libblake2b_avx_la_CFLAGS = -msse2 -mssse3 -msse4.1 -mavx 74 | 75 | libblake2b_xop_la_SOURCES = blake2b.c 76 | libblake2b_xop_la_CPPFLAGS = -DSUFFIX=_xop 77 | libblake2b_xop_la_CFLAGS = -msse2 -mssse3 -msse4.1 -mavx -mxop 78 | 79 | 80 | libblake2s_ref_la_SOURCES = blake2s-ref.c 81 | libblake2s_ref_la_CPPFLAGS = -DSUFFIX=_ref 82 | libblake2s_ref_la_CFLAGS = 83 | 84 | libblake2s_sse2_la_SOURCES = blake2s.c 85 | libblake2s_sse2_la_CPPFLAGS = -DSUFFIX=_sse2 86 | libblake2s_sse2_la_CFLAGS = -msse2 87 | 88 | libblake2s_ssse3_la_SOURCES = blake2s.c 89 | libblake2s_ssse3_la_CPPFLAGS = -DSUFFIX=_ssse3 90 | libblake2s_ssse3_la_CFLAGS = -msse2 -mssse3 91 | 92 | libblake2s_sse41_la_SOURCES = blake2s.c 93 | libblake2s_sse41_la_CPPFLAGS = -DSUFFIX=_sse41 94 | libblake2s_sse41_la_CFLAGS = -msse2 -mssse3 -msse4.1 95 | 96 | libblake2s_avx_la_SOURCES = blake2s.c 97 | libblake2s_avx_la_CPPFLAGS = -DSUFFIX=_avx 98 | libblake2s_avx_la_CFLAGS = -msse2 -mssse3 -msse4.1 -mavx 99 | 100 | libblake2s_xop_la_SOURCES = blake2s.c 101 | libblake2s_xop_la_CPPFLAGS = -DSUFFIX=_xop 102 | libblake2s_xop_la_CFLAGS = -msse2 -mssse3 -msse4.1 -mavx -mxop 103 | 104 | else 105 | 106 | if USE_SSE 107 | libb2_la_SOURCES = blake2sp.c \ 108 | blake2bp.c \ 109 | blake2s.c \ 110 | blake2b.c \ 111 | blake2-impl.h \ 112 | blake2-config.h \ 113 | blake2s-round.h \ 114 | blake2b-round.h \ 115 | blake2s-load-xop.h \ 116 | blake2s-load-sse41.h \ 117 | blake2s-load-sse2.h \ 118 | blake2b-load-sse41.h \ 119 | blake2b-load-sse2.h 120 | else 121 | libb2_la_SOURCES = blake2s-ref.c \ 122 | blake2b-ref.c \ 123 | blake2.h \ 124 | blake2-impl.h \ 125 | blake2sp.c \ 126 | blake2bp.c \ 127 | blake2-kat.h 128 | endif 129 | endif 130 | 131 | TESTS_TARGETS = blake2s-test \ 132 | blake2b-test \ 133 | blake2sp-test \ 134 | blake2bp-test 135 | 136 | check_PROGRAMS = $(TESTS_TARGETS) 137 | TESTS = $(TESTS_TARGETS) 138 | 139 | TESTS_LDADD = ${top_builddir}/src/libb2.la 140 | 141 | blake2s_test_SOURCE = blake2s-test.c blake2-kat.h 142 | blake2s_test_LDADD = $(TESTS_LDADD) 143 | 144 | blake2b_test_SOURCE = blake2b-test.c blake2-kat.h 145 | blake2b_test_LDADD = $(TESTS_LDADD) 146 | 147 | blake2sp_test_SOURCE = blake2sp-test.c blake2-kat.h 148 | blake2sp_test_LDADD = $(TESTS_LDADD) 149 | 150 | blake2bp_test_SOURCE = blake2bp-test.c blake2-kat.h 151 | blake2bp_test_LDADD = $(TESTS_LDADD) 152 | 153 | -------------------------------------------------------------------------------- /src/blake2-config.h: -------------------------------------------------------------------------------- 1 | /* 2 | BLAKE2 reference source code package - optimized C implementations 3 | 4 | Written in 2012 by Samuel Neves 5 | 6 | To the extent possible under law, the author(s) have dedicated all copyright 7 | and related and neighboring rights to this software to the public domain 8 | worldwide. This software is distributed without any warranty. 9 | 10 | You should have received a copy of the CC0 Public Domain Dedication along with 11 | this software. If not, see . 12 | */ 13 | #pragma once 14 | #ifndef __BLAKE2_CONFIG_H__ 15 | #define __BLAKE2_CONFIG_H__ 16 | 17 | #if defined(__SSE2__) 18 | #define HAVE_SSE2 19 | #endif 20 | 21 | #if defined(__SSSE3__) 22 | #define HAVE_SSSE3 23 | #endif 24 | 25 | #if defined(__SSE4_1__) 26 | #define HAVE_SSE4_1 27 | #endif 28 | 29 | #if defined(__AVX__) 30 | #define HAVE_AVX 31 | #endif 32 | 33 | #if defined(__XOP__) 34 | #define HAVE_XOP 35 | #endif 36 | 37 | 38 | #ifdef HAVE_AVX2 39 | #ifndef HAVE_AVX 40 | #define HAVE_AVX 41 | #endif 42 | #endif 43 | 44 | #ifdef HAVE_XOP 45 | #ifndef HAVE_AVX 46 | #define HAVE_AVX 47 | #endif 48 | #endif 49 | 50 | #ifdef HAVE_AVX 51 | #ifndef HAVE_SSE4_1 52 | #define HAVE_SSE4_1 53 | #endif 54 | #endif 55 | 56 | #ifdef HAVE_SSE4_1 57 | #ifndef HAVE_SSSE3 58 | #define HAVE_SSSE3 59 | #endif 60 | #endif 61 | 62 | #ifdef HAVE_SSSE3 63 | #define HAVE_SSE2 64 | #endif 65 | 66 | #if !defined(HAVE_SSE2) 67 | #error "This code requires at least SSE2." 68 | #endif 69 | 70 | #endif 71 | 72 | -------------------------------------------------------------------------------- /src/blake2-dispatch.c: -------------------------------------------------------------------------------- 1 | /* 2 | BLAKE2 reference source code package - optimized C implementations 3 | 4 | Written in 2012 by Samuel Neves 5 | 6 | To the extent possible under law, the author(s) have dedicated all copyright 7 | and related and neighboring rights to this software to the public domain 8 | worldwide. This software is distributed without any warranty. 9 | 10 | You should have received a copy of the CC0 Public Domain Dedication along with 11 | this software. If not, see . 12 | */ 13 | #include 14 | #if defined(WIN32) 15 | #include 16 | #endif 17 | #include "blake2.h" 18 | 19 | #if defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64) 20 | #define HAVE_X86 21 | #endif 22 | 23 | typedef enum 24 | { 25 | NONE = 0, 26 | #if defined(HAVE_X86) 27 | SSE2 = 1, 28 | SSSE3 = 2, 29 | SSE41 = 3, 30 | AVX = 4, 31 | XOP = 5, 32 | /* AVX2 = 6, */ 33 | #endif 34 | #if defined(__x86_64__) || defined(_M_X64) 35 | DEFAULT = SSE2 36 | #else 37 | DEFAULT = NONE 38 | #endif 39 | } cpu_feature_t; 40 | 41 | static const char feature_names[][8] = 42 | { 43 | "none", 44 | #if defined(HAVE_X86) 45 | "sse2", 46 | "ssse3", 47 | "sse41", 48 | "avx", 49 | "xop", 50 | /* "avx2" */ 51 | #endif 52 | }; 53 | 54 | #if defined(HAVE_X86) 55 | 56 | #if defined(__GNUC__) 57 | static inline void cpuid( uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx ) 58 | { 59 | __asm__ __volatile__( 60 | #if defined(__i386__) /* This is needed for -fPIC to work on i386 */ 61 | "movl %%ebx, %%esi\n\t" 62 | #endif 63 | "cpuid\n\t" 64 | #if defined(__i386__) 65 | "xchgl %%ebx, %%esi\n\t" 66 | : "=a"( *eax ), "=S"( *ebx ), "=c"( *ecx ), "=d"( *edx ) : "a"( *eax ) ); 67 | #else 68 | : "=a"( *eax ), "=b"( *ebx ), "=c"( *ecx ), "=d"( *edx ) : "a"( *eax ) ); 69 | #endif 70 | } 71 | 72 | /* xgetbv insn is GCC 4.5 and above. The byte codes sidestep the limitation */ 73 | /* Also see http://www.agner.org/optimize/vectorclass/read.php?i=65 */ 74 | static inline uint64_t xgetbv(uint32_t xcr) 75 | { 76 | uint32_t a, d; 77 | __asm__ __volatile__( 78 | ".byte 0x0f, 0x01, 0xd0" 79 | : "=a"(a),"=d"(d) 80 | : "c"(xcr) 81 | ); 82 | return ((uint64_t)d << 32) | a; 83 | } 84 | 85 | #elif defined(_MSC_VER) 86 | #include 87 | static inline void cpuid( uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx ) 88 | { 89 | int regs[4]; 90 | __cpuid( regs, *eax ); 91 | *eax = regs[0]; 92 | *ebx = regs[1]; 93 | *ecx = regs[2]; 94 | *edx = regs[3]; 95 | } 96 | #else 97 | #error "Don't know how to call cpuid on this compiler!" 98 | #endif 99 | 100 | #endif /* HAVE_X86 */ 101 | 102 | static inline cpu_feature_t get_cpu_features( void ) 103 | { 104 | #if defined(HAVE_X86) 105 | static volatile int initialized = 0; 106 | static cpu_feature_t feature = DEFAULT; 107 | uint32_t eax, ecx, edx, ebx; 108 | 109 | if( initialized ) 110 | return feature; 111 | 112 | eax = 1; 113 | cpuid( &eax, &ebx, &ecx, &edx ); 114 | 115 | if( 1 & ( edx >> 26 ) ) 116 | feature = SSE2; 117 | 118 | if( 1 & ( ecx >> 9 ) ) 119 | feature = SSSE3; 120 | 121 | if( 1 & ( ecx >> 19 ) ) 122 | feature = SSE41; 123 | 124 | #if defined(WIN32) /* Work around the fact that Windows <7 does NOT support AVX... */ 125 | if( IsProcessorFeaturePresent(17) ) /* Some environments don't know about PF_XSAVE_ENABLED */ 126 | #endif 127 | { 128 | /* check for AVX and OSXSAVE bits */ 129 | if( 1 & ( ecx >> 28 ) & (ecx >> 27) ) { 130 | #if !defined(WIN32) /* Already checked for this in WIN32 */ 131 | if( (xgetbv(0) & 6) == 6 ) /* XCR0 */ 132 | #endif 133 | feature = AVX; 134 | } 135 | 136 | 137 | eax = 0x80000001; 138 | cpuid( &eax, &ebx, &ecx, &edx ); 139 | 140 | if( 1 & ( ecx >> 11 ) ) 141 | feature = XOP; 142 | } 143 | 144 | /* For future architectures */ 145 | /* 146 | eax = 7; ecx = 0; 147 | cpuid(&eax, &ebx, &ecx, &edx); 148 | 149 | if(1&(ebx >> 5)) 150 | feature = AVX2; 151 | */ 152 | /* fprintf( stderr, "Using %s engine\n", feature_names[feature] ); */ 153 | initialized = 1; 154 | return feature; 155 | #else 156 | return NONE; 157 | #endif 158 | } 159 | 160 | 161 | 162 | #if defined(__cplusplus) 163 | extern "C" { 164 | #endif 165 | int blake2b_init_ref( blake2b_state *S, size_t outlen ); 166 | int blake2b_init_key_ref( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); 167 | int blake2b_init_param_ref( blake2b_state *S, const blake2b_param *P ); 168 | int blake2b_update_ref( blake2b_state *S, const uint8_t *in, size_t inlen ); 169 | int blake2b_final_ref( blake2b_state *S, uint8_t *out, size_t outlen ); 170 | int blake2b_ref( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); 171 | 172 | #if defined(HAVE_X86) 173 | 174 | int blake2b_init_sse2( blake2b_state *S, size_t outlen ); 175 | int blake2b_init_key_sse2( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); 176 | int blake2b_init_param_sse2( blake2b_state *S, const blake2b_param *P ); 177 | int blake2b_update_sse2( blake2b_state *S, const uint8_t *in, size_t inlen ); 178 | int blake2b_final_sse2( blake2b_state *S, uint8_t *out, size_t outlen ); 179 | int blake2b_sse2( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); 180 | 181 | int blake2b_init_ssse3( blake2b_state *S, size_t outlen ); 182 | int blake2b_init_key_ssse3( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); 183 | int blake2b_init_param_ssse3( blake2b_state *S, const blake2b_param *P ); 184 | int blake2b_update_ssse3( blake2b_state *S, const uint8_t *in, size_t inlen ); 185 | int blake2b_final_ssse3( blake2b_state *S, uint8_t *out, size_t outlen ); 186 | int blake2b_ssse3( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); 187 | 188 | int blake2b_init_sse41( blake2b_state *S, size_t outlen ); 189 | int blake2b_init_key_sse41( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); 190 | int blake2b_init_param_sse41( blake2b_state *S, const blake2b_param *P ); 191 | int blake2b_update_sse41( blake2b_state *S, const uint8_t *in, size_t inlen ); 192 | int blake2b_final_sse41( blake2b_state *S, uint8_t *out, size_t outlen ); 193 | int blake2b_sse41( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); 194 | 195 | int blake2b_init_avx( blake2b_state *S, size_t outlen ); 196 | int blake2b_init_key_avx( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); 197 | int blake2b_init_param_avx( blake2b_state *S, const blake2b_param *P ); 198 | int blake2b_update_avx( blake2b_state *S, const uint8_t *in, size_t inlen ); 199 | int blake2b_final_avx( blake2b_state *S, uint8_t *out, size_t outlen ); 200 | int blake2b_avx( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); 201 | 202 | int blake2b_init_xop( blake2b_state *S, size_t outlen ); 203 | int blake2b_init_key_xop( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); 204 | int blake2b_init_param_xop( blake2b_state *S, const blake2b_param *P ); 205 | int blake2b_update_xop( blake2b_state *S, const uint8_t *in, size_t inlen ); 206 | int blake2b_final_xop( blake2b_state *S, uint8_t *out, size_t outlen ); 207 | int blake2b_xop( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); 208 | 209 | #endif /* HAVE_X86 */ 210 | 211 | int blake2s_init_ref( blake2s_state *S, size_t outlen ); 212 | int blake2s_init_key_ref( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); 213 | int blake2s_init_param_ref( blake2s_state *S, const blake2s_param *P ); 214 | int blake2s_update_ref( blake2s_state *S, const uint8_t *in, size_t inlen ); 215 | int blake2s_final_ref( blake2s_state *S, uint8_t *out, size_t outlen ); 216 | int blake2s_ref( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); 217 | 218 | #if defined(HAVE_X86) 219 | 220 | int blake2s_init_sse2( blake2s_state *S, size_t outlen ); 221 | int blake2s_init_key_sse2( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); 222 | int blake2s_init_param_sse2( blake2s_state *S, const blake2s_param *P ); 223 | int blake2s_update_sse2( blake2s_state *S, const uint8_t *in, size_t inlen ); 224 | int blake2s_final_sse2( blake2s_state *S, uint8_t *out, size_t outlen ); 225 | int blake2s_sse2( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); 226 | 227 | int blake2s_init_ssse3( blake2s_state *S, size_t outlen ); 228 | int blake2s_init_key_ssse3( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); 229 | int blake2s_init_param_ssse3( blake2s_state *S, const blake2s_param *P ); 230 | int blake2s_update_ssse3( blake2s_state *S, const uint8_t *in, size_t inlen ); 231 | int blake2s_final_ssse3( blake2s_state *S, uint8_t *out, size_t outlen ); 232 | int blake2s_ssse3( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); 233 | 234 | int blake2s_init_sse41( blake2s_state *S, size_t outlen ); 235 | int blake2s_init_key_sse41( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); 236 | int blake2s_init_param_sse41( blake2s_state *S, const blake2s_param *P ); 237 | int blake2s_update_sse41( blake2s_state *S, const uint8_t *in, size_t inlen ); 238 | int blake2s_final_sse41( blake2s_state *S, uint8_t *out, size_t outlen ); 239 | int blake2s_sse41( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); 240 | 241 | int blake2s_init_avx( blake2s_state *S, size_t outlen ); 242 | int blake2s_init_key_avx( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); 243 | int blake2s_init_param_avx( blake2s_state *S, const blake2s_param *P ); 244 | int blake2s_update_avx( blake2s_state *S, const uint8_t *in, size_t inlen ); 245 | int blake2s_final_avx( blake2s_state *S, uint8_t *out, size_t outlen ); 246 | int blake2s_avx( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); 247 | 248 | int blake2s_init_xop( blake2s_state *S, size_t outlen ); 249 | int blake2s_init_key_xop( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); 250 | int blake2s_init_param_xop( blake2s_state *S, const blake2s_param *P ); 251 | int blake2s_update_xop( blake2s_state *S, const uint8_t *in, size_t inlen ); 252 | int blake2s_final_xop( blake2s_state *S, uint8_t *out, size_t outlen ); 253 | int blake2s_xop( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); 254 | 255 | #endif /* HAVE_X86 */ 256 | 257 | #if defined(__cplusplus) 258 | } 259 | #endif 260 | 261 | typedef int ( *blake2b_init_fn )( blake2b_state *, size_t ); 262 | typedef int ( *blake2b_init_key_fn )( blake2b_state *, size_t, const void *, size_t ); 263 | typedef int ( *blake2b_init_param_fn )( blake2b_state *, const blake2b_param * ); 264 | typedef int ( *blake2b_update_fn )( blake2b_state *, const uint8_t *, size_t ); 265 | typedef int ( *blake2b_final_fn )( blake2b_state *, uint8_t *, size_t ); 266 | typedef int ( *blake2b_fn )( uint8_t *, const void *, const void *, size_t, size_t, size_t ); 267 | 268 | typedef int ( *blake2s_init_fn )( blake2s_state *, size_t ); 269 | typedef int ( *blake2s_init_key_fn )( blake2s_state *, size_t, const void *, size_t ); 270 | typedef int ( *blake2s_init_param_fn )( blake2s_state *, const blake2s_param * ); 271 | typedef int ( *blake2s_update_fn )( blake2s_state *, const uint8_t *, size_t ); 272 | typedef int ( *blake2s_final_fn )( blake2s_state *, uint8_t *, size_t ); 273 | typedef int ( *blake2s_fn )( uint8_t *, const void *, const void *, size_t, size_t, size_t ); 274 | 275 | static const blake2b_init_fn blake2b_init_table[] = 276 | { 277 | blake2b_init_ref, 278 | #if defined(HAVE_X86) 279 | blake2b_init_sse2, 280 | blake2b_init_ssse3, 281 | blake2b_init_sse41, 282 | blake2b_init_avx, 283 | blake2b_init_xop 284 | #endif 285 | }; 286 | 287 | static const blake2b_init_key_fn blake2b_init_key_table[] = 288 | { 289 | blake2b_init_key_ref, 290 | #if defined(HAVE_X86) 291 | blake2b_init_key_sse2, 292 | blake2b_init_key_ssse3, 293 | blake2b_init_key_sse41, 294 | blake2b_init_key_avx, 295 | blake2b_init_key_xop 296 | #endif 297 | }; 298 | 299 | static const blake2b_init_param_fn blake2b_init_param_table[] = 300 | { 301 | blake2b_init_param_ref, 302 | #if defined(HAVE_X86) 303 | blake2b_init_param_sse2, 304 | blake2b_init_param_ssse3, 305 | blake2b_init_param_sse41, 306 | blake2b_init_param_avx, 307 | blake2b_init_param_xop 308 | #endif 309 | }; 310 | 311 | static const blake2b_update_fn blake2b_update_table[] = 312 | { 313 | blake2b_update_ref, 314 | #if defined(HAVE_X86) 315 | blake2b_update_sse2, 316 | blake2b_update_ssse3, 317 | blake2b_update_sse41, 318 | blake2b_update_avx, 319 | blake2b_update_xop 320 | #endif 321 | }; 322 | 323 | static const blake2b_final_fn blake2b_final_table[] = 324 | { 325 | blake2b_final_ref, 326 | #if defined(HAVE_X86) 327 | blake2b_final_sse2, 328 | blake2b_final_ssse3, 329 | blake2b_final_sse41, 330 | blake2b_final_avx, 331 | blake2b_final_xop 332 | #endif 333 | }; 334 | 335 | static const blake2b_fn blake2b_table[] = 336 | { 337 | blake2b_ref, 338 | #if defined(HAVE_X86) 339 | blake2b_sse2, 340 | blake2b_ssse3, 341 | blake2b_sse41, 342 | blake2b_avx, 343 | blake2b_xop 344 | #endif 345 | }; 346 | 347 | static const blake2s_init_fn blake2s_init_table[] = 348 | { 349 | blake2s_init_ref, 350 | #if defined(HAVE_X86) 351 | blake2s_init_sse2, 352 | blake2s_init_ssse3, 353 | blake2s_init_sse41, 354 | blake2s_init_avx, 355 | blake2s_init_xop 356 | #endif 357 | }; 358 | 359 | static const blake2s_init_key_fn blake2s_init_key_table[] = 360 | { 361 | blake2s_init_key_ref, 362 | #if defined(HAVE_X86) 363 | blake2s_init_key_sse2, 364 | blake2s_init_key_ssse3, 365 | blake2s_init_key_sse41, 366 | blake2s_init_key_avx, 367 | blake2s_init_key_xop 368 | #endif 369 | }; 370 | 371 | static const blake2s_init_param_fn blake2s_init_param_table[] = 372 | { 373 | blake2s_init_param_ref, 374 | #if defined(HAVE_X86) 375 | blake2s_init_param_sse2, 376 | blake2s_init_param_ssse3, 377 | blake2s_init_param_sse41, 378 | blake2s_init_param_avx, 379 | blake2s_init_param_xop 380 | #endif 381 | }; 382 | 383 | static const blake2s_update_fn blake2s_update_table[] = 384 | { 385 | blake2s_update_ref, 386 | #if defined(HAVE_X86) 387 | blake2s_update_sse2, 388 | blake2s_update_ssse3, 389 | blake2s_update_sse41, 390 | blake2s_update_avx, 391 | blake2s_update_xop 392 | #endif 393 | }; 394 | 395 | static const blake2s_final_fn blake2s_final_table[] = 396 | { 397 | blake2s_final_ref, 398 | #if defined(HAVE_X86) 399 | blake2s_final_sse2, 400 | blake2s_final_ssse3, 401 | blake2s_final_sse41, 402 | blake2s_final_avx, 403 | blake2s_final_xop 404 | #endif 405 | }; 406 | 407 | static const blake2s_fn blake2s_table[] = 408 | { 409 | blake2s_ref, 410 | #if defined(HAVE_X86) 411 | blake2s_sse2, 412 | blake2s_ssse3, 413 | blake2s_sse41, 414 | blake2s_avx, 415 | blake2s_xop 416 | #endif 417 | }; 418 | 419 | #if defined(__cplusplus) 420 | extern "C" { 421 | #endif 422 | int blake2b_init_dispatch( blake2b_state *S, size_t outlen ); 423 | int blake2b_init_key_dispatch( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); 424 | int blake2b_init_param_dispatch( blake2b_state *S, const blake2b_param *P ); 425 | int blake2b_update_dispatch( blake2b_state *S, const uint8_t *in, size_t inlen ); 426 | int blake2b_final_dispatch( blake2b_state *S, uint8_t *out, size_t outlen ); 427 | int blake2b_dispatch( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); 428 | 429 | int blake2s_init_dispatch( blake2s_state *S, size_t outlen ); 430 | int blake2s_init_key_dispatch( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); 431 | int blake2s_init_param_dispatch( blake2s_state *S, const blake2s_param *P ); 432 | int blake2s_update_dispatch( blake2s_state *S, const uint8_t *in, size_t inlen ); 433 | int blake2s_final_dispatch( blake2s_state *S, uint8_t *out, size_t outlen ); 434 | int blake2s_dispatch( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); 435 | #if defined(__cplusplus) 436 | } 437 | #endif 438 | 439 | static blake2b_init_fn blake2b_init_ptr = blake2b_init_dispatch; 440 | static blake2b_init_key_fn blake2b_init_key_ptr = blake2b_init_key_dispatch; 441 | static blake2b_init_param_fn blake2b_init_param_ptr = blake2b_init_param_dispatch; 442 | static blake2b_update_fn blake2b_update_ptr = blake2b_update_dispatch; 443 | static blake2b_final_fn blake2b_final_ptr = blake2b_final_dispatch; 444 | static blake2b_fn blake2b_ptr = blake2b_dispatch; 445 | 446 | static blake2s_init_fn blake2s_init_ptr = blake2s_init_dispatch; 447 | static blake2s_init_key_fn blake2s_init_key_ptr = blake2s_init_key_dispatch; 448 | static blake2s_init_param_fn blake2s_init_param_ptr = blake2s_init_param_dispatch; 449 | static blake2s_update_fn blake2s_update_ptr = blake2s_update_dispatch; 450 | static blake2s_final_fn blake2s_final_ptr = blake2s_final_dispatch; 451 | static blake2s_fn blake2s_ptr = blake2s_dispatch; 452 | 453 | int blake2b_init_dispatch( blake2b_state *S, size_t outlen ) 454 | { 455 | blake2b_init_ptr = blake2b_init_table[get_cpu_features()]; 456 | return blake2b_init_ptr( S, outlen ); 457 | } 458 | 459 | int blake2b_init_key_dispatch( blake2b_state *S, size_t outlen, const void *key, size_t keylen ) 460 | { 461 | blake2b_init_key_ptr = blake2b_init_key_table[get_cpu_features()]; 462 | return blake2b_init_key_ptr( S, outlen, key, keylen ); 463 | } 464 | 465 | int blake2b_init_param_dispatch( blake2b_state *S, const blake2b_param *P ) 466 | { 467 | blake2b_init_param_ptr = blake2b_init_param_table[get_cpu_features()]; 468 | return blake2b_init_param_ptr( S, P ); 469 | } 470 | 471 | int blake2b_update_dispatch( blake2b_state *S, const uint8_t *in, size_t inlen ) 472 | { 473 | blake2b_update_ptr = blake2b_update_table[get_cpu_features()]; 474 | return blake2b_update_ptr( S, in, inlen ); 475 | } 476 | 477 | int blake2b_final_dispatch( blake2b_state *S, uint8_t *out, size_t outlen ) 478 | { 479 | blake2b_final_ptr = blake2b_final_table[get_cpu_features()]; 480 | return blake2b_final_ptr( S, out, outlen ); 481 | } 482 | 483 | int blake2b_dispatch( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) 484 | { 485 | blake2b_ptr = blake2b_table[get_cpu_features()]; 486 | return blake2b_ptr( out, in, key, outlen, inlen, keylen ); 487 | } 488 | 489 | BLAKE2_API int blake2b_init( blake2b_state *S, size_t outlen ) 490 | { 491 | return blake2b_init_ptr( S, outlen ); 492 | } 493 | 494 | BLAKE2_API int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ) 495 | { 496 | return blake2b_init_key_ptr( S, outlen, key, keylen ); 497 | } 498 | 499 | BLAKE2_API int blake2b_init_param( blake2b_state *S, const blake2b_param *P ) 500 | { 501 | return blake2b_init_param_ptr( S, P ); 502 | } 503 | 504 | BLAKE2_API int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen ) 505 | { 506 | return blake2b_update_ptr( S, in, inlen ); 507 | } 508 | 509 | BLAKE2_API int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen ) 510 | { 511 | return blake2b_final_ptr( S, out, outlen ); 512 | } 513 | 514 | BLAKE2_API int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) 515 | { 516 | return blake2b_ptr( out, in, key, outlen, inlen, keylen ); 517 | } 518 | 519 | int blake2s_init_dispatch( blake2s_state *S, size_t outlen ) 520 | { 521 | blake2s_init_ptr = blake2s_init_table[get_cpu_features()]; 522 | return blake2s_init_ptr( S, outlen ); 523 | } 524 | 525 | int blake2s_init_key_dispatch( blake2s_state *S, size_t outlen, const void *key, size_t keylen ) 526 | { 527 | blake2s_init_key_ptr = blake2s_init_key_table[get_cpu_features()]; 528 | return blake2s_init_key_ptr( S, outlen, key, keylen ); 529 | } 530 | 531 | int blake2s_init_param_dispatch( blake2s_state *S, const blake2s_param *P ) 532 | { 533 | blake2s_init_param_ptr = blake2s_init_param_table[get_cpu_features()]; 534 | return blake2s_init_param_ptr( S, P ); 535 | } 536 | 537 | int blake2s_update_dispatch( blake2s_state *S, const uint8_t *in, size_t inlen ) 538 | { 539 | blake2s_update_ptr = blake2s_update_table[get_cpu_features()]; 540 | return blake2s_update_ptr( S, in, inlen ); 541 | } 542 | 543 | int blake2s_final_dispatch( blake2s_state *S, uint8_t *out, size_t outlen ) 544 | { 545 | blake2s_final_ptr = blake2s_final_table[get_cpu_features()]; 546 | return blake2s_final_ptr( S, out, outlen ); 547 | } 548 | 549 | int blake2s_dispatch( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) 550 | { 551 | blake2s_ptr = blake2s_table[get_cpu_features()]; 552 | return blake2s_ptr( out, in, key, outlen, inlen, keylen ); 553 | } 554 | 555 | BLAKE2_API int blake2s_init( blake2s_state *S, size_t outlen ) 556 | { 557 | return blake2s_init_ptr( S, outlen ); 558 | } 559 | 560 | BLAKE2_API int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen ) 561 | { 562 | return blake2s_init_key_ptr( S, outlen, key, keylen ); 563 | } 564 | 565 | BLAKE2_API int blake2s_init_param( blake2s_state *S, const blake2s_param *P ) 566 | { 567 | return blake2s_init_param_ptr( S, P ); 568 | } 569 | 570 | BLAKE2_API int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen ) 571 | { 572 | return blake2s_update_ptr( S, in, inlen ); 573 | } 574 | 575 | BLAKE2_API int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen ) 576 | { 577 | return blake2s_final_ptr( S, out, outlen ); 578 | } 579 | 580 | BLAKE2_API int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) 581 | { 582 | return blake2s_ptr( out, in, key, outlen, inlen, keylen ); 583 | } 584 | 585 | -------------------------------------------------------------------------------- /src/blake2-impl.h: -------------------------------------------------------------------------------- 1 | /* 2 | BLAKE2 reference source code package - optimized C implementations 3 | 4 | Written in 2012 by Samuel Neves 5 | 6 | To the extent possible under law, the author(s) have dedicated all copyright 7 | and related and neighboring rights to this software to the public domain 8 | worldwide. This software is distributed without any warranty. 9 | 10 | You should have received a copy of the CC0 Public Domain Dedication along with 11 | this software. If not, see . 12 | */ 13 | #pragma once 14 | #ifndef __BLAKE2_IMPL_H__ 15 | #define __BLAKE2_IMPL_H__ 16 | 17 | #if defined(_WIN32) || defined(WIN32) 18 | #include 19 | #endif 20 | 21 | #include 22 | #include 23 | #include 24 | #include "config.h" 25 | 26 | #define BLAKE2_IMPL_CAT(x,y) x ## y 27 | #define BLAKE2_IMPL_EVAL(x,y) BLAKE2_IMPL_CAT(x,y) 28 | #define BLAKE2_IMPL_NAME(fun) BLAKE2_IMPL_EVAL(fun, SUFFIX) 29 | 30 | static inline uint32_t load32( const void *src ) 31 | { 32 | #if defined(NATIVE_LITTLE_ENDIAN) 33 | uint32_t w; 34 | memcpy( &w, src, sizeof( w ) ); 35 | return w; 36 | #else 37 | const uint8_t *p = ( uint8_t * )src; 38 | uint32_t w = *p++; 39 | w |= ( uint32_t )( *p++ ) << 8; 40 | w |= ( uint32_t )( *p++ ) << 16; 41 | w |= ( uint32_t )( *p++ ) << 24; 42 | return w; 43 | #endif 44 | } 45 | 46 | static inline uint64_t load64( const void *src ) 47 | { 48 | #if defined(NATIVE_LITTLE_ENDIAN) 49 | uint64_t w; 50 | memcpy( &w, src, sizeof( w ) ); 51 | return w; 52 | #else 53 | const uint8_t *p = ( uint8_t * )src; 54 | uint64_t w = *p++; 55 | w |= ( uint64_t )( *p++ ) << 8; 56 | w |= ( uint64_t )( *p++ ) << 16; 57 | w |= ( uint64_t )( *p++ ) << 24; 58 | w |= ( uint64_t )( *p++ ) << 32; 59 | w |= ( uint64_t )( *p++ ) << 40; 60 | w |= ( uint64_t )( *p++ ) << 48; 61 | w |= ( uint64_t )( *p++ ) << 56; 62 | return w; 63 | #endif 64 | } 65 | 66 | static inline void store32( void *dst, uint32_t w ) 67 | { 68 | #if defined(NATIVE_LITTLE_ENDIAN) 69 | memcpy( dst, &w, sizeof( w ) ); 70 | #else 71 | uint8_t *p = ( uint8_t * )dst; 72 | *p++ = ( uint8_t )w; w >>= 8; 73 | *p++ = ( uint8_t )w; w >>= 8; 74 | *p++ = ( uint8_t )w; w >>= 8; 75 | *p++ = ( uint8_t )w; 76 | #endif 77 | } 78 | 79 | static inline void store64( void *dst, uint64_t w ) 80 | { 81 | #if defined(NATIVE_LITTLE_ENDIAN) 82 | memcpy( dst, &w, sizeof( w ) ); 83 | #else 84 | uint8_t *p = ( uint8_t * )dst; 85 | *p++ = ( uint8_t )w; w >>= 8; 86 | *p++ = ( uint8_t )w; w >>= 8; 87 | *p++ = ( uint8_t )w; w >>= 8; 88 | *p++ = ( uint8_t )w; w >>= 8; 89 | *p++ = ( uint8_t )w; w >>= 8; 90 | *p++ = ( uint8_t )w; w >>= 8; 91 | *p++ = ( uint8_t )w; w >>= 8; 92 | *p++ = ( uint8_t )w; 93 | #endif 94 | } 95 | 96 | static inline uint64_t load48( const void *src ) 97 | { 98 | const uint8_t *p = ( const uint8_t * )src; 99 | uint64_t w = *p++; 100 | w |= ( uint64_t )( *p++ ) << 8; 101 | w |= ( uint64_t )( *p++ ) << 16; 102 | w |= ( uint64_t )( *p++ ) << 24; 103 | w |= ( uint64_t )( *p++ ) << 32; 104 | w |= ( uint64_t )( *p++ ) << 40; 105 | return w; 106 | } 107 | 108 | static inline void store48( void *dst, uint64_t w ) 109 | { 110 | uint8_t *p = ( uint8_t * )dst; 111 | *p++ = ( uint8_t )w; w >>= 8; 112 | *p++ = ( uint8_t )w; w >>= 8; 113 | *p++ = ( uint8_t )w; w >>= 8; 114 | *p++ = ( uint8_t )w; w >>= 8; 115 | *p++ = ( uint8_t )w; w >>= 8; 116 | *p++ = ( uint8_t )w; 117 | } 118 | 119 | static inline uint32_t rotl32( const uint32_t w, const unsigned c ) 120 | { 121 | return ( w << c ) | ( w >> ( 32 - c ) ); 122 | } 123 | 124 | static inline uint64_t rotl64( const uint64_t w, const unsigned c ) 125 | { 126 | return ( w << c ) | ( w >> ( 64 - c ) ); 127 | } 128 | 129 | static inline uint32_t rotr32( const uint32_t w, const unsigned c ) 130 | { 131 | return ( w >> c ) | ( w << ( 32 - c ) ); 132 | } 133 | 134 | static inline uint64_t rotr64( const uint64_t w, const unsigned c ) 135 | { 136 | return ( w >> c ) | ( w << ( 64 - c ) ); 137 | } 138 | 139 | /* prevents compiler optimizing out memset() */ 140 | static inline void secure_zero_memory(void *v, size_t n) 141 | { 142 | #if defined(_WIN32) || defined(WIN32) 143 | SecureZeroMemory(v, n); 144 | #elif defined(__hpux) 145 | static void *(*const volatile memset_v)(void *, int, size_t) = &memset; 146 | memset_v(v, 0, n); 147 | #else 148 | // prioritize first the general C11 call 149 | #if defined(HAVE_MEMSET_S) 150 | memset_s(v, n, 0, n); 151 | #elif defined(HAVE_EXPLICIT_BZERO) 152 | explicit_bzero(v, n); 153 | #elif defined(HAVE_EXPLICIT_MEMSET) 154 | explicit_memset(v, 0, n); 155 | #else 156 | memset(v, 0, n); 157 | __asm__ __volatile__("" :: "r"(v) : "memory"); 158 | #endif 159 | #endif 160 | } 161 | 162 | #endif 163 | 164 | -------------------------------------------------------------------------------- /src/blake2.h: -------------------------------------------------------------------------------- 1 | /* 2 | BLAKE2 reference source code package - optimized C implementations 3 | 4 | Written in 2012 by Samuel Neves 5 | 6 | To the extent possible under law, the author(s) have dedicated all copyright 7 | and related and neighboring rights to this software to the public domain 8 | worldwide. This software is distributed without any warranty. 9 | 10 | You should have received a copy of the CC0 Public Domain Dedication along with 11 | this software. If not, see . 12 | */ 13 | #pragma once 14 | #ifndef __BLAKE2_H__ 15 | #define __BLAKE2_H__ 16 | 17 | #include 18 | #include 19 | 20 | #if defined(_WIN32) || defined(__CYGWIN__) 21 | #define BLAKE2_DLL_IMPORT __declspec(dllimport) 22 | #define BLAKE2_DLL_EXPORT __declspec(dllexport) 23 | #define BLAKE2_DLL_PRIVATE 24 | #elif __GNUC__ >= 4 25 | #define BLAKE2_DLL_IMPORT __attribute__ ((visibility ("default"))) 26 | #define BLAKE2_DLL_EXPORT __attribute__ ((visibility ("default"))) 27 | #define BLAKE2_DLL_PRIVATE __attribute__ ((visibility ("hidden"))) 28 | #else 29 | #define BLAKE2_DLL_IMPORT 30 | #define BLAKE2_DLL_EXPORT 31 | #define BLAKE2_DLL_PRIVATE 32 | #endif 33 | 34 | #if defined(BLAKE2_DLL) 35 | #if defined(BLAKE2_DLL_EXPORTS) // defined if we are building the DLL 36 | #define BLAKE2_API BLAKE2_DLL_EXPORT 37 | #else 38 | #define BLAKE2_API BLAKE2_DLL_IMPORT 39 | #endif 40 | #define BLAKE2_PRIVATE BLAKE2_DLL_PRIVATE // must only be used by hidden logic 41 | #else 42 | #define BLAKE2_API 43 | #define BLAKE2_PRIVATE 44 | #endif 45 | 46 | #if defined(__cplusplus) 47 | extern "C" { 48 | #elif defined(_MSC_VER) && !defined(inline) 49 | #define inline __inline 50 | #endif 51 | 52 | enum blake2s_constant 53 | { 54 | BLAKE2S_BLOCKBYTES = 64, 55 | BLAKE2S_OUTBYTES = 32, 56 | BLAKE2S_KEYBYTES = 32, 57 | BLAKE2S_SALTBYTES = 8, 58 | BLAKE2S_PERSONALBYTES = 8 59 | }; 60 | 61 | enum blake2b_constant 62 | { 63 | BLAKE2B_BLOCKBYTES = 128, 64 | BLAKE2B_OUTBYTES = 64, 65 | BLAKE2B_KEYBYTES = 64, 66 | BLAKE2B_SALTBYTES = 16, 67 | BLAKE2B_PERSONALBYTES = 16 68 | }; 69 | 70 | #pragma pack(push, 1) 71 | typedef struct __blake2s_param 72 | { 73 | uint8_t digest_length; // 1 74 | uint8_t key_length; // 2 75 | uint8_t fanout; // 3 76 | uint8_t depth; // 4 77 | uint32_t leaf_length; // 8 78 | uint8_t node_offset[6];// 14 79 | uint8_t node_depth; // 15 80 | uint8_t inner_length; // 16 81 | // uint8_t reserved[0]; 82 | uint8_t salt[BLAKE2S_SALTBYTES]; // 24 83 | uint8_t personal[BLAKE2S_PERSONALBYTES]; // 32 84 | } blake2s_param; 85 | 86 | typedef struct __blake2s_state 87 | { 88 | uint32_t h[8]; 89 | uint32_t t[2]; 90 | uint32_t f[2]; 91 | uint8_t buf[2 * BLAKE2S_BLOCKBYTES]; 92 | uint32_t buflen; 93 | uint8_t outlen; 94 | uint8_t last_node; 95 | } blake2s_state; 96 | 97 | typedef struct __blake2b_param 98 | { 99 | uint8_t digest_length; // 1 100 | uint8_t key_length; // 2 101 | uint8_t fanout; // 3 102 | uint8_t depth; // 4 103 | uint32_t leaf_length; // 8 104 | uint64_t node_offset; // 16 105 | uint8_t node_depth; // 17 106 | uint8_t inner_length; // 18 107 | uint8_t reserved[14]; // 32 108 | uint8_t salt[BLAKE2B_SALTBYTES]; // 48 109 | uint8_t personal[BLAKE2B_PERSONALBYTES]; // 64 110 | } blake2b_param; 111 | 112 | typedef struct __blake2b_state 113 | { 114 | uint64_t h[8]; 115 | uint64_t t[2]; 116 | uint64_t f[2]; 117 | uint8_t buf[2 * BLAKE2B_BLOCKBYTES]; 118 | uint32_t buflen; 119 | uint8_t outlen; 120 | uint8_t last_node; 121 | } blake2b_state; 122 | 123 | typedef struct __blake2sp_state 124 | { 125 | blake2s_state S[8][1]; 126 | blake2s_state R[1]; 127 | uint8_t buf[8 * BLAKE2S_BLOCKBYTES]; 128 | uint32_t buflen; 129 | uint8_t outlen; 130 | } blake2sp_state; 131 | 132 | typedef struct __blake2bp_state 133 | { 134 | blake2b_state S[4][1]; 135 | blake2b_state R[1]; 136 | uint8_t buf[4 * BLAKE2B_BLOCKBYTES]; 137 | uint32_t buflen; 138 | uint8_t outlen; 139 | } blake2bp_state; 140 | #pragma pack(pop) 141 | 142 | // Streaming API 143 | BLAKE2_API int blake2s_init( blake2s_state *S, size_t outlen ); 144 | BLAKE2_API int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); 145 | BLAKE2_API int blake2s_init_param( blake2s_state *S, const blake2s_param *P ); 146 | BLAKE2_API int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen ); 147 | BLAKE2_API int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen ); 148 | 149 | BLAKE2_API int blake2b_init( blake2b_state *S, size_t outlen ); 150 | BLAKE2_API int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); 151 | BLAKE2_API int blake2b_init_param( blake2b_state *S, const blake2b_param *P ); 152 | BLAKE2_API int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen ); 153 | BLAKE2_API int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen ); 154 | 155 | BLAKE2_API int blake2sp_init( blake2sp_state *S, size_t outlen ); 156 | BLAKE2_API int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen ); 157 | BLAKE2_API int blake2sp_update( blake2sp_state *S, const uint8_t *in, size_t inlen ); 158 | BLAKE2_API int blake2sp_final( blake2sp_state *S, uint8_t *out, size_t outlen ); 159 | 160 | BLAKE2_API int blake2bp_init( blake2bp_state *S, size_t outlen ); 161 | BLAKE2_API int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen ); 162 | BLAKE2_API int blake2bp_update( blake2bp_state *S, const uint8_t *in, size_t inlen ); 163 | BLAKE2_API int blake2bp_final( blake2bp_state *S, uint8_t *out, size_t outlen ); 164 | 165 | // Simple API 166 | BLAKE2_API int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); 167 | BLAKE2_API int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); 168 | 169 | BLAKE2_API int blake2sp( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); 170 | BLAKE2_API int blake2bp( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); 171 | 172 | static inline int blake2( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) 173 | { 174 | return blake2b( out, in, key, outlen, inlen, keylen ); 175 | } 176 | 177 | #if defined(__cplusplus) 178 | } 179 | #endif 180 | 181 | #endif 182 | 183 | -------------------------------------------------------------------------------- /src/blake2b-load-sse2.h: -------------------------------------------------------------------------------- 1 | /* 2 | BLAKE2 reference source code package - optimized C implementations 3 | 4 | Written in 2012 by Samuel Neves 5 | 6 | To the extent possible under law, the author(s) have dedicated all copyright 7 | and related and neighboring rights to this software to the public domain 8 | worldwide. This software is distributed without any warranty. 9 | 10 | You should have received a copy of the CC0 Public Domain Dedication along with 11 | this software. If not, see . 12 | */ 13 | #pragma once 14 | #ifndef __BLAKE2B_LOAD_SSE2_H__ 15 | #define __BLAKE2B_LOAD_SSE2_H__ 16 | 17 | #define LOAD_MSG_0_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4) 18 | #define LOAD_MSG_0_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5) 19 | #define LOAD_MSG_0_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12) 20 | #define LOAD_MSG_0_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13) 21 | #define LOAD_MSG_1_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9) 22 | #define LOAD_MSG_1_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15) 23 | #define LOAD_MSG_1_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11) 24 | #define LOAD_MSG_1_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7) 25 | #define LOAD_MSG_2_1(b0, b1) b0 = _mm_set_epi64x(m12, m11); b1 = _mm_set_epi64x(m15, m5) 26 | #define LOAD_MSG_2_2(b0, b1) b0 = _mm_set_epi64x(m0, m8); b1 = _mm_set_epi64x(m13, m2) 27 | #define LOAD_MSG_2_3(b0, b1) b0 = _mm_set_epi64x(m3, m10); b1 = _mm_set_epi64x(m9, m7) 28 | #define LOAD_MSG_2_4(b0, b1) b0 = _mm_set_epi64x(m6, m14); b1 = _mm_set_epi64x(m4, m1) 29 | #define LOAD_MSG_3_1(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m13) 30 | #define LOAD_MSG_3_2(b0, b1) b0 = _mm_set_epi64x(m1, m9); b1 = _mm_set_epi64x(m14, m12) 31 | #define LOAD_MSG_3_3(b0, b1) b0 = _mm_set_epi64x(m5, m2); b1 = _mm_set_epi64x(m15, m4) 32 | #define LOAD_MSG_3_4(b0, b1) b0 = _mm_set_epi64x(m10, m6); b1 = _mm_set_epi64x(m8, m0) 33 | #define LOAD_MSG_4_1(b0, b1) b0 = _mm_set_epi64x(m5, m9); b1 = _mm_set_epi64x(m10, m2) 34 | #define LOAD_MSG_4_2(b0, b1) b0 = _mm_set_epi64x(m7, m0); b1 = _mm_set_epi64x(m15, m4) 35 | #define LOAD_MSG_4_3(b0, b1) b0 = _mm_set_epi64x(m11, m14); b1 = _mm_set_epi64x(m3, m6) 36 | #define LOAD_MSG_4_4(b0, b1) b0 = _mm_set_epi64x(m12, m1); b1 = _mm_set_epi64x(m13, m8) 37 | #define LOAD_MSG_5_1(b0, b1) b0 = _mm_set_epi64x(m6, m2); b1 = _mm_set_epi64x(m8, m0) 38 | #define LOAD_MSG_5_2(b0, b1) b0 = _mm_set_epi64x(m10, m12); b1 = _mm_set_epi64x(m3, m11) 39 | #define LOAD_MSG_5_3(b0, b1) b0 = _mm_set_epi64x(m7, m4); b1 = _mm_set_epi64x(m1, m15) 40 | #define LOAD_MSG_5_4(b0, b1) b0 = _mm_set_epi64x(m5, m13); b1 = _mm_set_epi64x(m9, m14) 41 | #define LOAD_MSG_6_1(b0, b1) b0 = _mm_set_epi64x(m1, m12); b1 = _mm_set_epi64x(m4, m14) 42 | #define LOAD_MSG_6_2(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m10, m13) 43 | #define LOAD_MSG_6_3(b0, b1) b0 = _mm_set_epi64x(m6, m0); b1 = _mm_set_epi64x(m8, m9) 44 | #define LOAD_MSG_6_4(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m2) 45 | #define LOAD_MSG_7_1(b0, b1) b0 = _mm_set_epi64x(m7, m13); b1 = _mm_set_epi64x(m3, m12) 46 | #define LOAD_MSG_7_2(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m9, m1) 47 | #define LOAD_MSG_7_3(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m2, m8) 48 | #define LOAD_MSG_7_4(b0, b1) b0 = _mm_set_epi64x(m4, m0); b1 = _mm_set_epi64x(m10, m6) 49 | #define LOAD_MSG_8_1(b0, b1) b0 = _mm_set_epi64x(m14, m6); b1 = _mm_set_epi64x(m0, m11) 50 | #define LOAD_MSG_8_2(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m8, m3) 51 | #define LOAD_MSG_8_3(b0, b1) b0 = _mm_set_epi64x(m13, m12); b1 = _mm_set_epi64x(m10, m1) 52 | #define LOAD_MSG_8_4(b0, b1) b0 = _mm_set_epi64x(m7, m2); b1 = _mm_set_epi64x(m5, m4) 53 | #define LOAD_MSG_9_1(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m1, m7) 54 | #define LOAD_MSG_9_2(b0, b1) b0 = _mm_set_epi64x(m4, m2); b1 = _mm_set_epi64x(m5, m6) 55 | #define LOAD_MSG_9_3(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m13, m3) 56 | #define LOAD_MSG_9_4(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m0, m12) 57 | #define LOAD_MSG_10_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4) 58 | #define LOAD_MSG_10_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5) 59 | #define LOAD_MSG_10_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12) 60 | #define LOAD_MSG_10_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13) 61 | #define LOAD_MSG_11_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9) 62 | #define LOAD_MSG_11_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15) 63 | #define LOAD_MSG_11_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11) 64 | #define LOAD_MSG_11_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7) 65 | 66 | 67 | #endif 68 | 69 | -------------------------------------------------------------------------------- /src/blake2b-load-sse41.h: -------------------------------------------------------------------------------- 1 | /* 2 | BLAKE2 reference source code package - optimized C implementations 3 | 4 | Written in 2012 by Samuel Neves 5 | 6 | To the extent possible under law, the author(s) have dedicated all copyright 7 | and related and neighboring rights to this software to the public domain 8 | worldwide. This software is distributed without any warranty. 9 | 10 | You should have received a copy of the CC0 Public Domain Dedication along with 11 | this software. If not, see . 12 | */ 13 | #pragma once 14 | #ifndef __BLAKE2B_LOAD_SSE41_H__ 15 | #define __BLAKE2B_LOAD_SSE41_H__ 16 | 17 | #define LOAD_MSG_0_1(b0, b1) \ 18 | do \ 19 | { \ 20 | b0 = _mm_unpacklo_epi64(m0, m1); \ 21 | b1 = _mm_unpacklo_epi64(m2, m3); \ 22 | } while(0) 23 | 24 | 25 | #define LOAD_MSG_0_2(b0, b1) \ 26 | do \ 27 | { \ 28 | b0 = _mm_unpackhi_epi64(m0, m1); \ 29 | b1 = _mm_unpackhi_epi64(m2, m3); \ 30 | } while(0) 31 | 32 | 33 | #define LOAD_MSG_0_3(b0, b1) \ 34 | do \ 35 | { \ 36 | b0 = _mm_unpacklo_epi64(m4, m5); \ 37 | b1 = _mm_unpacklo_epi64(m6, m7); \ 38 | } while(0) 39 | 40 | 41 | #define LOAD_MSG_0_4(b0, b1) \ 42 | do \ 43 | { \ 44 | b0 = _mm_unpackhi_epi64(m4, m5); \ 45 | b1 = _mm_unpackhi_epi64(m6, m7); \ 46 | } while(0) 47 | 48 | 49 | #define LOAD_MSG_1_1(b0, b1) \ 50 | do \ 51 | { \ 52 | b0 = _mm_unpacklo_epi64(m7, m2); \ 53 | b1 = _mm_unpackhi_epi64(m4, m6); \ 54 | } while(0) 55 | 56 | 57 | #define LOAD_MSG_1_2(b0, b1) \ 58 | do \ 59 | { \ 60 | b0 = _mm_unpacklo_epi64(m5, m4); \ 61 | b1 = _mm_alignr_epi8(m3, m7, 8); \ 62 | } while(0) 63 | 64 | 65 | #define LOAD_MSG_1_3(b0, b1) \ 66 | do \ 67 | { \ 68 | b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \ 69 | b1 = _mm_unpackhi_epi64(m5, m2); \ 70 | } while(0) 71 | 72 | 73 | #define LOAD_MSG_1_4(b0, b1) \ 74 | do \ 75 | { \ 76 | b0 = _mm_unpacklo_epi64(m6, m1); \ 77 | b1 = _mm_unpackhi_epi64(m3, m1); \ 78 | } while(0) 79 | 80 | 81 | #define LOAD_MSG_2_1(b0, b1) \ 82 | do \ 83 | { \ 84 | b0 = _mm_alignr_epi8(m6, m5, 8); \ 85 | b1 = _mm_unpackhi_epi64(m2, m7); \ 86 | } while(0) 87 | 88 | 89 | #define LOAD_MSG_2_2(b0, b1) \ 90 | do \ 91 | { \ 92 | b0 = _mm_unpacklo_epi64(m4, m0); \ 93 | b1 = _mm_blend_epi16(m1, m6, 0xF0); \ 94 | } while(0) 95 | 96 | 97 | #define LOAD_MSG_2_3(b0, b1) \ 98 | do \ 99 | { \ 100 | b0 = _mm_blend_epi16(m5, m1, 0xF0); \ 101 | b1 = _mm_unpackhi_epi64(m3, m4); \ 102 | } while(0) 103 | 104 | 105 | #define LOAD_MSG_2_4(b0, b1) \ 106 | do \ 107 | { \ 108 | b0 = _mm_unpacklo_epi64(m7, m3); \ 109 | b1 = _mm_alignr_epi8(m2, m0, 8); \ 110 | } while(0) 111 | 112 | 113 | #define LOAD_MSG_3_1(b0, b1) \ 114 | do \ 115 | { \ 116 | b0 = _mm_unpackhi_epi64(m3, m1); \ 117 | b1 = _mm_unpackhi_epi64(m6, m5); \ 118 | } while(0) 119 | 120 | 121 | #define LOAD_MSG_3_2(b0, b1) \ 122 | do \ 123 | { \ 124 | b0 = _mm_unpackhi_epi64(m4, m0); \ 125 | b1 = _mm_unpacklo_epi64(m6, m7); \ 126 | } while(0) 127 | 128 | 129 | #define LOAD_MSG_3_3(b0, b1) \ 130 | do \ 131 | { \ 132 | b0 = _mm_blend_epi16(m1, m2, 0xF0); \ 133 | b1 = _mm_blend_epi16(m2, m7, 0xF0); \ 134 | } while(0) 135 | 136 | 137 | #define LOAD_MSG_3_4(b0, b1) \ 138 | do \ 139 | { \ 140 | b0 = _mm_unpacklo_epi64(m3, m5); \ 141 | b1 = _mm_unpacklo_epi64(m0, m4); \ 142 | } while(0) 143 | 144 | 145 | #define LOAD_MSG_4_1(b0, b1) \ 146 | do \ 147 | { \ 148 | b0 = _mm_unpackhi_epi64(m4, m2); \ 149 | b1 = _mm_unpacklo_epi64(m1, m5); \ 150 | } while(0) 151 | 152 | 153 | #define LOAD_MSG_4_2(b0, b1) \ 154 | do \ 155 | { \ 156 | b0 = _mm_blend_epi16(m0, m3, 0xF0); \ 157 | b1 = _mm_blend_epi16(m2, m7, 0xF0); \ 158 | } while(0) 159 | 160 | 161 | #define LOAD_MSG_4_3(b0, b1) \ 162 | do \ 163 | { \ 164 | b0 = _mm_blend_epi16(m7, m5, 0xF0); \ 165 | b1 = _mm_blend_epi16(m3, m1, 0xF0); \ 166 | } while(0) 167 | 168 | 169 | #define LOAD_MSG_4_4(b0, b1) \ 170 | do \ 171 | { \ 172 | b0 = _mm_alignr_epi8(m6, m0, 8); \ 173 | b1 = _mm_blend_epi16(m4, m6, 0xF0); \ 174 | } while(0) 175 | 176 | 177 | #define LOAD_MSG_5_1(b0, b1) \ 178 | do \ 179 | { \ 180 | b0 = _mm_unpacklo_epi64(m1, m3); \ 181 | b1 = _mm_unpacklo_epi64(m0, m4); \ 182 | } while(0) 183 | 184 | 185 | #define LOAD_MSG_5_2(b0, b1) \ 186 | do \ 187 | { \ 188 | b0 = _mm_unpacklo_epi64(m6, m5); \ 189 | b1 = _mm_unpackhi_epi64(m5, m1); \ 190 | } while(0) 191 | 192 | 193 | #define LOAD_MSG_5_3(b0, b1) \ 194 | do \ 195 | { \ 196 | b0 = _mm_blend_epi16(m2, m3, 0xF0); \ 197 | b1 = _mm_unpackhi_epi64(m7, m0); \ 198 | } while(0) 199 | 200 | 201 | #define LOAD_MSG_5_4(b0, b1) \ 202 | do \ 203 | { \ 204 | b0 = _mm_unpackhi_epi64(m6, m2); \ 205 | b1 = _mm_blend_epi16(m7, m4, 0xF0); \ 206 | } while(0) 207 | 208 | 209 | #define LOAD_MSG_6_1(b0, b1) \ 210 | do \ 211 | { \ 212 | b0 = _mm_blend_epi16(m6, m0, 0xF0); \ 213 | b1 = _mm_unpacklo_epi64(m7, m2); \ 214 | } while(0) 215 | 216 | 217 | #define LOAD_MSG_6_2(b0, b1) \ 218 | do \ 219 | { \ 220 | b0 = _mm_unpackhi_epi64(m2, m7); \ 221 | b1 = _mm_alignr_epi8(m5, m6, 8); \ 222 | } while(0) 223 | 224 | 225 | #define LOAD_MSG_6_3(b0, b1) \ 226 | do \ 227 | { \ 228 | b0 = _mm_unpacklo_epi64(m0, m3); \ 229 | b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \ 230 | } while(0) 231 | 232 | 233 | #define LOAD_MSG_6_4(b0, b1) \ 234 | do \ 235 | { \ 236 | b0 = _mm_unpackhi_epi64(m3, m1); \ 237 | b1 = _mm_blend_epi16(m1, m5, 0xF0); \ 238 | } while(0) 239 | 240 | 241 | #define LOAD_MSG_7_1(b0, b1) \ 242 | do \ 243 | { \ 244 | b0 = _mm_unpackhi_epi64(m6, m3); \ 245 | b1 = _mm_blend_epi16(m6, m1, 0xF0); \ 246 | } while(0) 247 | 248 | 249 | #define LOAD_MSG_7_2(b0, b1) \ 250 | do \ 251 | { \ 252 | b0 = _mm_alignr_epi8(m7, m5, 8); \ 253 | b1 = _mm_unpackhi_epi64(m0, m4); \ 254 | } while(0) 255 | 256 | 257 | #define LOAD_MSG_7_3(b0, b1) \ 258 | do \ 259 | { \ 260 | b0 = _mm_unpackhi_epi64(m2, m7); \ 261 | b1 = _mm_unpacklo_epi64(m4, m1); \ 262 | } while(0) 263 | 264 | 265 | #define LOAD_MSG_7_4(b0, b1) \ 266 | do \ 267 | { \ 268 | b0 = _mm_unpacklo_epi64(m0, m2); \ 269 | b1 = _mm_unpacklo_epi64(m3, m5); \ 270 | } while(0) 271 | 272 | 273 | #define LOAD_MSG_8_1(b0, b1) \ 274 | do \ 275 | { \ 276 | b0 = _mm_unpacklo_epi64(m3, m7); \ 277 | b1 = _mm_alignr_epi8(m0, m5, 8); \ 278 | } while(0) 279 | 280 | 281 | #define LOAD_MSG_8_2(b0, b1) \ 282 | do \ 283 | { \ 284 | b0 = _mm_unpackhi_epi64(m7, m4); \ 285 | b1 = _mm_alignr_epi8(m4, m1, 8); \ 286 | } while(0) 287 | 288 | 289 | #define LOAD_MSG_8_3(b0, b1) \ 290 | do \ 291 | { \ 292 | b0 = m6; \ 293 | b1 = _mm_alignr_epi8(m5, m0, 8); \ 294 | } while(0) 295 | 296 | 297 | #define LOAD_MSG_8_4(b0, b1) \ 298 | do \ 299 | { \ 300 | b0 = _mm_blend_epi16(m1, m3, 0xF0); \ 301 | b1 = m2; \ 302 | } while(0) 303 | 304 | 305 | #define LOAD_MSG_9_1(b0, b1) \ 306 | do \ 307 | { \ 308 | b0 = _mm_unpacklo_epi64(m5, m4); \ 309 | b1 = _mm_unpackhi_epi64(m3, m0); \ 310 | } while(0) 311 | 312 | 313 | #define LOAD_MSG_9_2(b0, b1) \ 314 | do \ 315 | { \ 316 | b0 = _mm_unpacklo_epi64(m1, m2); \ 317 | b1 = _mm_blend_epi16(m3, m2, 0xF0); \ 318 | } while(0) 319 | 320 | 321 | #define LOAD_MSG_9_3(b0, b1) \ 322 | do \ 323 | { \ 324 | b0 = _mm_unpackhi_epi64(m7, m4); \ 325 | b1 = _mm_unpackhi_epi64(m1, m6); \ 326 | } while(0) 327 | 328 | 329 | #define LOAD_MSG_9_4(b0, b1) \ 330 | do \ 331 | { \ 332 | b0 = _mm_alignr_epi8(m7, m5, 8); \ 333 | b1 = _mm_unpacklo_epi64(m6, m0); \ 334 | } while(0) 335 | 336 | 337 | #define LOAD_MSG_10_1(b0, b1) \ 338 | do \ 339 | { \ 340 | b0 = _mm_unpacklo_epi64(m0, m1); \ 341 | b1 = _mm_unpacklo_epi64(m2, m3); \ 342 | } while(0) 343 | 344 | 345 | #define LOAD_MSG_10_2(b0, b1) \ 346 | do \ 347 | { \ 348 | b0 = _mm_unpackhi_epi64(m0, m1); \ 349 | b1 = _mm_unpackhi_epi64(m2, m3); \ 350 | } while(0) 351 | 352 | 353 | #define LOAD_MSG_10_3(b0, b1) \ 354 | do \ 355 | { \ 356 | b0 = _mm_unpacklo_epi64(m4, m5); \ 357 | b1 = _mm_unpacklo_epi64(m6, m7); \ 358 | } while(0) 359 | 360 | 361 | #define LOAD_MSG_10_4(b0, b1) \ 362 | do \ 363 | { \ 364 | b0 = _mm_unpackhi_epi64(m4, m5); \ 365 | b1 = _mm_unpackhi_epi64(m6, m7); \ 366 | } while(0) 367 | 368 | 369 | #define LOAD_MSG_11_1(b0, b1) \ 370 | do \ 371 | { \ 372 | b0 = _mm_unpacklo_epi64(m7, m2); \ 373 | b1 = _mm_unpackhi_epi64(m4, m6); \ 374 | } while(0) 375 | 376 | 377 | #define LOAD_MSG_11_2(b0, b1) \ 378 | do \ 379 | { \ 380 | b0 = _mm_unpacklo_epi64(m5, m4); \ 381 | b1 = _mm_alignr_epi8(m3, m7, 8); \ 382 | } while(0) 383 | 384 | 385 | #define LOAD_MSG_11_3(b0, b1) \ 386 | do \ 387 | { \ 388 | b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \ 389 | b1 = _mm_unpackhi_epi64(m5, m2); \ 390 | } while(0) 391 | 392 | 393 | #define LOAD_MSG_11_4(b0, b1) \ 394 | do \ 395 | { \ 396 | b0 = _mm_unpacklo_epi64(m6, m1); \ 397 | b1 = _mm_unpackhi_epi64(m3, m1); \ 398 | } while(0) 399 | 400 | 401 | #endif 402 | 403 | -------------------------------------------------------------------------------- /src/blake2b-ref.c: -------------------------------------------------------------------------------- 1 | /* 2 | BLAKE2 reference source code package - reference C implementations 3 | 4 | Written in 2012 by Samuel Neves 5 | 6 | To the extent possible under law, the author(s) have dedicated all copyright 7 | and related and neighboring rights to this software to the public domain 8 | worldwide. This software is distributed without any warranty. 9 | 10 | You should have received a copy of the CC0 Public Domain Dedication along with 11 | this software. If not, see . 12 | */ 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | #include "blake2.h" 19 | #include "blake2-impl.h" 20 | 21 | static const uint64_t blake2b_IV[8] = 22 | { 23 | 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 24 | 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, 25 | 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, 26 | 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL 27 | }; 28 | 29 | static const uint8_t blake2b_sigma[12][16] = 30 | { 31 | { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , 32 | { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , 33 | { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , 34 | { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , 35 | { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , 36 | { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , 37 | { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , 38 | { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , 39 | { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , 40 | { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , 41 | { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , 42 | { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } 43 | }; 44 | 45 | 46 | static inline int blake2b_set_lastnode( blake2b_state *S ) 47 | { 48 | S->f[1] = ~0ULL; 49 | return 0; 50 | } 51 | 52 | static inline int blake2b_clear_lastnode( blake2b_state *S ) 53 | { 54 | S->f[1] = 0ULL; 55 | return 0; 56 | } 57 | 58 | /* Some helper functions, not necessarily useful */ 59 | static inline int blake2b_set_lastblock( blake2b_state *S ) 60 | { 61 | if( S->last_node ) blake2b_set_lastnode( S ); 62 | 63 | S->f[0] = ~0ULL; 64 | return 0; 65 | } 66 | 67 | static inline int blake2b_clear_lastblock( blake2b_state *S ) 68 | { 69 | if( S->last_node ) blake2b_clear_lastnode( S ); 70 | 71 | S->f[0] = 0ULL; 72 | return 0; 73 | } 74 | 75 | static inline int blake2b_increment_counter( blake2b_state *S, const uint64_t inc ) 76 | { 77 | S->t[0] += inc; 78 | S->t[1] += ( S->t[0] < inc ); 79 | return 0; 80 | } 81 | 82 | 83 | 84 | // Parameter-related functions 85 | static inline int blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length ) 86 | { 87 | P->digest_length = digest_length; 88 | return 0; 89 | } 90 | 91 | static inline int blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout ) 92 | { 93 | P->fanout = fanout; 94 | return 0; 95 | } 96 | 97 | static inline int blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth ) 98 | { 99 | P->depth = depth; 100 | return 0; 101 | } 102 | 103 | static inline int blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length ) 104 | { 105 | store32( &P->leaf_length, leaf_length ); 106 | return 0; 107 | } 108 | 109 | static inline int blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset ) 110 | { 111 | store64( &P->node_offset, node_offset ); 112 | return 0; 113 | } 114 | 115 | static inline int blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth ) 116 | { 117 | P->node_depth = node_depth; 118 | return 0; 119 | } 120 | 121 | static inline int blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length ) 122 | { 123 | P->inner_length = inner_length; 124 | return 0; 125 | } 126 | 127 | static inline int blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] ) 128 | { 129 | memcpy( P->salt, salt, BLAKE2B_SALTBYTES ); 130 | return 0; 131 | } 132 | 133 | static inline int blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] ) 134 | { 135 | memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES ); 136 | return 0; 137 | } 138 | 139 | static inline int blake2b_init0( blake2b_state *S ) 140 | { 141 | memset( S, 0, sizeof( blake2b_state ) ); 142 | 143 | for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i]; 144 | 145 | return 0; 146 | } 147 | 148 | #define blake2b_init BLAKE2_IMPL_NAME(blake2b_init) 149 | #define blake2b_init_param BLAKE2_IMPL_NAME(blake2b_init_param) 150 | #define blake2b_init_key BLAKE2_IMPL_NAME(blake2b_init_key) 151 | #define blake2b_update BLAKE2_IMPL_NAME(blake2b_update) 152 | #define blake2b_final BLAKE2_IMPL_NAME(blake2b_final) 153 | #define blake2b BLAKE2_IMPL_NAME(blake2b) 154 | 155 | #if defined(__cplusplus) 156 | extern "C" { 157 | #endif 158 | int blake2b_init( blake2b_state *S, size_t outlen ); 159 | int blake2b_init_param( blake2b_state *S, const blake2b_param *P ); 160 | int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); 161 | int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen ); 162 | int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen ); 163 | int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); 164 | #if defined(__cplusplus) 165 | } 166 | #endif 167 | 168 | /* init xors IV with input parameter block */ 169 | int blake2b_init_param( blake2b_state *S, const blake2b_param *P ) 170 | { 171 | blake2b_init0( S ); 172 | uint8_t *p = ( uint8_t * )( P ); 173 | 174 | /* IV XOR ParamBlock */ 175 | for( size_t i = 0; i < 8; ++i ) 176 | S->h[i] ^= load64( p + sizeof( S->h[i] ) * i ); 177 | 178 | S->outlen = P->digest_length; 179 | return 0; 180 | } 181 | 182 | 183 | 184 | int blake2b_init( blake2b_state *S, size_t outlen ) 185 | { 186 | blake2b_param P[1]; 187 | 188 | if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; 189 | 190 | P->digest_length = ( uint8_t ) outlen; 191 | P->key_length = 0; 192 | P->fanout = 1; 193 | P->depth = 1; 194 | store32( &P->leaf_length, 0 ); 195 | store64( &P->node_offset, 0 ); 196 | P->node_depth = 0; 197 | P->inner_length = 0; 198 | memset( P->reserved, 0, sizeof( P->reserved ) ); 199 | memset( P->salt, 0, sizeof( P->salt ) ); 200 | memset( P->personal, 0, sizeof( P->personal ) ); 201 | return blake2b_init_param( S, P ); 202 | } 203 | 204 | 205 | int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ) 206 | { 207 | blake2b_param P[1]; 208 | 209 | if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; 210 | 211 | if ( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1; 212 | 213 | P->digest_length = ( uint8_t ) outlen; 214 | P->key_length = ( uint8_t ) keylen; 215 | P->fanout = 1; 216 | P->depth = 1; 217 | store32( &P->leaf_length, 0 ); 218 | store64( &P->node_offset, 0 ); 219 | P->node_depth = 0; 220 | P->inner_length = 0; 221 | memset( P->reserved, 0, sizeof( P->reserved ) ); 222 | memset( P->salt, 0, sizeof( P->salt ) ); 223 | memset( P->personal, 0, sizeof( P->personal ) ); 224 | 225 | if( blake2b_init_param( S, P ) < 0 ) return -1; 226 | 227 | { 228 | uint8_t block[BLAKE2B_BLOCKBYTES]; 229 | memset( block, 0, BLAKE2B_BLOCKBYTES ); 230 | memcpy( block, key, keylen ); 231 | blake2b_update( S, block, BLAKE2B_BLOCKBYTES ); 232 | secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */ 233 | } 234 | return 0; 235 | } 236 | 237 | static int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) 238 | { 239 | uint64_t m[16]; 240 | uint64_t v[16]; 241 | size_t i; 242 | 243 | for( i = 0; i < 16; ++i ) 244 | m[i] = load64( block + i * sizeof( m[i] ) ); 245 | 246 | for( i = 0; i < 8; ++i ) 247 | v[i] = S->h[i]; 248 | 249 | v[ 8] = blake2b_IV[0]; 250 | v[ 9] = blake2b_IV[1]; 251 | v[10] = blake2b_IV[2]; 252 | v[11] = blake2b_IV[3]; 253 | v[12] = S->t[0] ^ blake2b_IV[4]; 254 | v[13] = S->t[1] ^ blake2b_IV[5]; 255 | v[14] = S->f[0] ^ blake2b_IV[6]; 256 | v[15] = S->f[1] ^ blake2b_IV[7]; 257 | #define G(r,i,a,b,c,d) \ 258 | do { \ 259 | a = a + b + m[blake2b_sigma[r][2*i+0]]; \ 260 | d = rotr64(d ^ a, 32); \ 261 | c = c + d; \ 262 | b = rotr64(b ^ c, 24); \ 263 | a = a + b + m[blake2b_sigma[r][2*i+1]]; \ 264 | d = rotr64(d ^ a, 16); \ 265 | c = c + d; \ 266 | b = rotr64(b ^ c, 63); \ 267 | } while(0) 268 | #define ROUND(r) \ 269 | do { \ 270 | G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ 271 | G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ 272 | G(r,2,v[ 2],v[ 6],v[10],v[14]); \ 273 | G(r,3,v[ 3],v[ 7],v[11],v[15]); \ 274 | G(r,4,v[ 0],v[ 5],v[10],v[15]); \ 275 | G(r,5,v[ 1],v[ 6],v[11],v[12]); \ 276 | G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ 277 | G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ 278 | } while(0) 279 | ROUND( 0 ); 280 | ROUND( 1 ); 281 | ROUND( 2 ); 282 | ROUND( 3 ); 283 | ROUND( 4 ); 284 | ROUND( 5 ); 285 | ROUND( 6 ); 286 | ROUND( 7 ); 287 | ROUND( 8 ); 288 | ROUND( 9 ); 289 | ROUND( 10 ); 290 | ROUND( 11 ); 291 | 292 | for( i = 0; i < 8; ++i ) 293 | S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; 294 | 295 | #undef G 296 | #undef ROUND 297 | return 0; 298 | } 299 | 300 | 301 | int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen ) 302 | { 303 | while( inlen > 0 ) 304 | { 305 | uint32_t left = S->buflen; 306 | uint32_t fill = 2 * BLAKE2B_BLOCKBYTES - left; 307 | 308 | if( inlen > fill ) 309 | { 310 | memcpy( S->buf + left, in, fill ); // Fill buffer 311 | S->buflen += fill; 312 | blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); 313 | blake2b_compress( S, S->buf ); // Compress 314 | memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); // Shift buffer left 315 | S->buflen -= BLAKE2B_BLOCKBYTES; 316 | in += fill; 317 | inlen -= fill; 318 | } 319 | else // inlen <= fill 320 | { 321 | memcpy( S->buf + left, in, inlen ); 322 | S->buflen += ( uint32_t ) inlen; // Be lazy, do not compress 323 | in += inlen; 324 | inlen -= inlen; 325 | } 326 | } 327 | 328 | return 0; 329 | } 330 | 331 | int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen ) 332 | { 333 | uint8_t buffer[BLAKE2B_OUTBYTES]; 334 | size_t i; 335 | 336 | if(S->outlen != outlen) return -1; 337 | 338 | if( S->buflen > BLAKE2B_BLOCKBYTES ) 339 | { 340 | blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); 341 | blake2b_compress( S, S->buf ); 342 | S->buflen -= BLAKE2B_BLOCKBYTES; 343 | memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen ); 344 | } 345 | 346 | blake2b_increment_counter( S, S->buflen ); 347 | blake2b_set_lastblock( S ); 348 | memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */ 349 | blake2b_compress( S, S->buf ); 350 | 351 | for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ 352 | store64( buffer + sizeof( S->h[i] ) * i, S->h[i] ); 353 | 354 | memcpy( out, buffer, outlen ); 355 | return 0; 356 | } 357 | 358 | int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) 359 | { 360 | blake2b_state S[1]; 361 | 362 | /* Verify parameters */ 363 | if ( NULL == in && inlen > 0 ) return -1; 364 | 365 | if ( NULL == out ) return -1; 366 | 367 | if( NULL == key && keylen > 0 ) return -1; 368 | 369 | if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1; 370 | 371 | if( keylen > BLAKE2B_KEYBYTES ) return -1; 372 | 373 | if( keylen > 0 ) 374 | { 375 | if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1; 376 | } 377 | else 378 | { 379 | if( blake2b_init( S, outlen ) < 0 ) return -1; 380 | } 381 | 382 | if( blake2b_update( S, ( uint8_t * )in, inlen ) < 0 ) return -1; 383 | return blake2b_final( S, out, outlen ); 384 | } 385 | 386 | 387 | -------------------------------------------------------------------------------- /src/blake2b-round.h: -------------------------------------------------------------------------------- 1 | /* 2 | BLAKE2 reference source code package - optimized C implementations 3 | 4 | Written in 2012 by Samuel Neves 5 | 6 | To the extent possible under law, the author(s) have dedicated all copyright 7 | and related and neighboring rights to this software to the public domain 8 | worldwide. This software is distributed without any warranty. 9 | 10 | You should have received a copy of the CC0 Public Domain Dedication along with 11 | this software. If not, see . 12 | */ 13 | #pragma once 14 | #ifndef __BLAKE2B_ROUND_H__ 15 | #define __BLAKE2B_ROUND_H__ 16 | 17 | #define LOAD(p) _mm_load_si128( (__m128i *)(p) ) 18 | #define STORE(p,r) _mm_store_si128((__m128i *)(p), r) 19 | 20 | #define LOADU(p) _mm_loadu_si128( (__m128i *)(p) ) 21 | #define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r) 22 | 23 | #define TOF(reg) _mm_castsi128_ps((reg)) 24 | #define TOI(reg) _mm_castps_si128((reg)) 25 | 26 | #define LIKELY(x) __builtin_expect((x),1) 27 | 28 | 29 | /* Microarchitecture-specific macros */ 30 | #ifndef HAVE_XOP 31 | #ifdef HAVE_SSSE3 32 | #define _mm_roti_epi64(x, c) \ 33 | (-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1)) \ 34 | : (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \ 35 | : (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \ 36 | : (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x))) \ 37 | : _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c)))) 38 | #else 39 | #define _mm_roti_epi64(r, c) _mm_xor_si128(_mm_srli_epi64( (r), -(c) ),_mm_slli_epi64( (r), 64-(-(c)) )) 40 | #endif 41 | #else 42 | /* ... */ 43 | #endif 44 | 45 | 46 | 47 | #define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \ 48 | row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \ 49 | row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \ 50 | \ 51 | row4l = _mm_xor_si128(row4l, row1l); \ 52 | row4h = _mm_xor_si128(row4h, row1h); \ 53 | \ 54 | row4l = _mm_roti_epi64(row4l, -32); \ 55 | row4h = _mm_roti_epi64(row4h, -32); \ 56 | \ 57 | row3l = _mm_add_epi64(row3l, row4l); \ 58 | row3h = _mm_add_epi64(row3h, row4h); \ 59 | \ 60 | row2l = _mm_xor_si128(row2l, row3l); \ 61 | row2h = _mm_xor_si128(row2h, row3h); \ 62 | \ 63 | row2l = _mm_roti_epi64(row2l, -24); \ 64 | row2h = _mm_roti_epi64(row2h, -24); \ 65 | 66 | #define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \ 67 | row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \ 68 | row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \ 69 | \ 70 | row4l = _mm_xor_si128(row4l, row1l); \ 71 | row4h = _mm_xor_si128(row4h, row1h); \ 72 | \ 73 | row4l = _mm_roti_epi64(row4l, -16); \ 74 | row4h = _mm_roti_epi64(row4h, -16); \ 75 | \ 76 | row3l = _mm_add_epi64(row3l, row4l); \ 77 | row3h = _mm_add_epi64(row3h, row4h); \ 78 | \ 79 | row2l = _mm_xor_si128(row2l, row3l); \ 80 | row2h = _mm_xor_si128(row2h, row3h); \ 81 | \ 82 | row2l = _mm_roti_epi64(row2l, -63); \ 83 | row2h = _mm_roti_epi64(row2h, -63); \ 84 | 85 | #if defined(HAVE_SSSE3) 86 | #define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ 87 | t0 = _mm_alignr_epi8(row2h, row2l, 8); \ 88 | t1 = _mm_alignr_epi8(row2l, row2h, 8); \ 89 | row2l = t0; \ 90 | row2h = t1; \ 91 | \ 92 | t0 = row3l; \ 93 | row3l = row3h; \ 94 | row3h = t0; \ 95 | \ 96 | t0 = _mm_alignr_epi8(row4h, row4l, 8); \ 97 | t1 = _mm_alignr_epi8(row4l, row4h, 8); \ 98 | row4l = t1; \ 99 | row4h = t0; 100 | 101 | #define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ 102 | t0 = _mm_alignr_epi8(row2l, row2h, 8); \ 103 | t1 = _mm_alignr_epi8(row2h, row2l, 8); \ 104 | row2l = t0; \ 105 | row2h = t1; \ 106 | \ 107 | t0 = row3l; \ 108 | row3l = row3h; \ 109 | row3h = t0; \ 110 | \ 111 | t0 = _mm_alignr_epi8(row4l, row4h, 8); \ 112 | t1 = _mm_alignr_epi8(row4h, row4l, 8); \ 113 | row4l = t1; \ 114 | row4h = t0; 115 | #else 116 | 117 | #define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ 118 | t0 = row4l;\ 119 | t1 = row2l;\ 120 | row4l = row3l;\ 121 | row3l = row3h;\ 122 | row3h = row4l;\ 123 | row4l = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t0, t0)); \ 124 | row4h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row4h, row4h)); \ 125 | row2l = _mm_unpackhi_epi64(row2l, _mm_unpacklo_epi64(row2h, row2h)); \ 126 | row2h = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(t1, t1)) 127 | 128 | #define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ 129 | t0 = row3l;\ 130 | row3l = row3h;\ 131 | row3h = t0;\ 132 | t0 = row2l;\ 133 | t1 = row4l;\ 134 | row2l = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(row2l, row2l)); \ 135 | row2h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row2h, row2h)); \ 136 | row4l = _mm_unpackhi_epi64(row4l, _mm_unpacklo_epi64(row4h, row4h)); \ 137 | row4h = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t1, t1)) 138 | 139 | #endif 140 | 141 | #if defined(HAVE_SSE4_1) 142 | #include "blake2b-load-sse41.h" 143 | #else 144 | #include "blake2b-load-sse2.h" 145 | #endif 146 | 147 | #define ROUND(r) \ 148 | LOAD_MSG_ ##r ##_1(b0, b1); \ 149 | G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ 150 | LOAD_MSG_ ##r ##_2(b0, b1); \ 151 | G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ 152 | DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \ 153 | LOAD_MSG_ ##r ##_3(b0, b1); \ 154 | G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ 155 | LOAD_MSG_ ##r ##_4(b0, b1); \ 156 | G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ 157 | UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); 158 | 159 | #endif 160 | 161 | -------------------------------------------------------------------------------- /src/blake2b-test.c: -------------------------------------------------------------------------------- 1 | /* 2 | BLAKE2 reference source code package - optimized C implementations 3 | 4 | Written in 2012 by Samuel Neves 5 | 6 | To the extent possible under law, the author(s) have dedicated all copyright 7 | and related and neighboring rights to this software to the public domain 8 | worldwide. This software is distributed without any warranty. 9 | 10 | You should have received a copy of the CC0 Public Domain Dedication along with 11 | this software. If not, see . 12 | */ 13 | #include 14 | #include 15 | #include "blake2.h" 16 | #include "blake2-kat.h" 17 | int main( int argc, char **argv ) 18 | { 19 | uint8_t key[BLAKE2B_KEYBYTES]; 20 | uint8_t buf[KAT_LENGTH]; 21 | 22 | for( size_t i = 0; i < BLAKE2B_KEYBYTES; ++i ) 23 | key[i] = ( uint8_t )i; 24 | 25 | for( size_t i = 0; i < KAT_LENGTH; ++i ) 26 | buf[i] = ( uint8_t )i; 27 | 28 | for( size_t i = 0; i < KAT_LENGTH; ++i ) 29 | { 30 | uint8_t hash[BLAKE2B_OUTBYTES]; 31 | 32 | if( blake2b( hash, buf, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES ) < 0 || 33 | 0 != memcmp( hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES ) ) 34 | { 35 | puts( "error" ); 36 | return -1; 37 | } 38 | } 39 | 40 | puts( "ok" ); 41 | return 0; 42 | } 43 | 44 | -------------------------------------------------------------------------------- /src/blake2b.c: -------------------------------------------------------------------------------- 1 | /* 2 | BLAKE2 reference source code package - optimized C implementations 3 | 4 | Written in 2012 by Samuel Neves 5 | 6 | To the extent possible under law, the author(s) have dedicated all copyright 7 | and related and neighboring rights to this software to the public domain 8 | worldwide. This software is distributed without any warranty. 9 | 10 | You should have received a copy of the CC0 Public Domain Dedication along with 11 | this software. If not, see . 12 | */ 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | #include "blake2.h" 19 | #include "blake2-impl.h" 20 | 21 | #include "blake2-config.h" 22 | 23 | #if defined(_MSC_VER) 24 | #include 25 | #endif 26 | 27 | #if defined(HAVE_SSE2) 28 | #include 29 | // MSVC only defines _mm_set_epi64x for x86_64... 30 | #if defined(_MSC_VER) && !defined(_M_X64) && !defined(__clang__) 31 | static inline __m128i _mm_set_epi64x( const uint64_t u1, const uint64_t u0 ) 32 | { 33 | return _mm_set_epi32( u1 >> 32, u1, u0 >> 32, u0 ); 34 | } 35 | #endif 36 | #endif 37 | 38 | #if defined(HAVE_SSSE3) 39 | #include 40 | #endif 41 | #if defined(HAVE_SSE4_1) 42 | #include 43 | #endif 44 | #if defined(HAVE_AVX) 45 | #include 46 | #endif 47 | #if defined(HAVE_XOP) && !defined(_MSC_VER) 48 | #include 49 | #endif 50 | 51 | 52 | 53 | #include "blake2b-round.h" 54 | 55 | static const uint64_t blake2b_IV[8] = 56 | { 57 | 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 58 | 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, 59 | 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, 60 | 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL 61 | }; 62 | 63 | static const uint8_t blake2b_sigma[12][16] = 64 | { 65 | { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , 66 | { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , 67 | { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , 68 | { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , 69 | { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , 70 | { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , 71 | { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , 72 | { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , 73 | { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , 74 | { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , 75 | { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , 76 | { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } 77 | }; 78 | 79 | 80 | /* Some helper functions, not necessarily useful */ 81 | static inline int blake2b_set_lastnode( blake2b_state *S ) 82 | { 83 | S->f[1] = ~0ULL; 84 | return 0; 85 | } 86 | 87 | static inline int blake2b_clear_lastnode( blake2b_state *S ) 88 | { 89 | S->f[1] = 0ULL; 90 | return 0; 91 | } 92 | 93 | static inline int blake2b_set_lastblock( blake2b_state *S ) 94 | { 95 | if( S->last_node ) blake2b_set_lastnode( S ); 96 | 97 | S->f[0] = ~0ULL; 98 | return 0; 99 | } 100 | 101 | static inline int blake2b_clear_lastblock( blake2b_state *S ) 102 | { 103 | if( S->last_node ) blake2b_clear_lastnode( S ); 104 | 105 | S->f[0] = 0ULL; 106 | return 0; 107 | } 108 | 109 | 110 | static inline int blake2b_increment_counter( blake2b_state *S, const uint64_t inc ) 111 | { 112 | #if defined(__x86_64__) && (defined(__GNUC__) || defined(__clang__)) 113 | // ADD/ADC chain 114 | __uint128_t t = ( ( __uint128_t )S->t[1] << 64 ) | S->t[0]; 115 | t += inc; 116 | S->t[0] = ( uint64_t )( t >> 0 ); 117 | S->t[1] = ( uint64_t )( t >> 64 ); 118 | #else 119 | S->t[0] += inc; 120 | S->t[1] += ( S->t[0] < inc ); 121 | #endif 122 | return 0; 123 | } 124 | 125 | 126 | // Parameter-related functions 127 | static inline int blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length ) 128 | { 129 | P->digest_length = digest_length; 130 | return 0; 131 | } 132 | 133 | static inline int blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout ) 134 | { 135 | P->fanout = fanout; 136 | return 0; 137 | } 138 | 139 | static inline int blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth ) 140 | { 141 | P->depth = depth; 142 | return 0; 143 | } 144 | 145 | static inline int blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length ) 146 | { 147 | P->leaf_length = leaf_length; 148 | return 0; 149 | } 150 | 151 | static inline int blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset ) 152 | { 153 | P->node_offset = node_offset; 154 | return 0; 155 | } 156 | 157 | static inline int blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth ) 158 | { 159 | P->node_depth = node_depth; 160 | return 0; 161 | } 162 | 163 | static inline int blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length ) 164 | { 165 | P->inner_length = inner_length; 166 | return 0; 167 | } 168 | 169 | static inline int blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] ) 170 | { 171 | memcpy( P->salt, salt, BLAKE2B_SALTBYTES ); 172 | return 0; 173 | } 174 | 175 | static inline int blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] ) 176 | { 177 | memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES ); 178 | return 0; 179 | } 180 | 181 | static inline int blake2b_init0( blake2b_state *S ) 182 | { 183 | memset( S, 0, sizeof( blake2b_state ) ); 184 | 185 | for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i]; 186 | 187 | return 0; 188 | } 189 | 190 | 191 | 192 | #define blake2b_init BLAKE2_IMPL_NAME(blake2b_init) 193 | #define blake2b_init_param BLAKE2_IMPL_NAME(blake2b_init_param) 194 | #define blake2b_init_key BLAKE2_IMPL_NAME(blake2b_init_key) 195 | #define blake2b_update BLAKE2_IMPL_NAME(blake2b_update) 196 | #define blake2b_final BLAKE2_IMPL_NAME(blake2b_final) 197 | #define blake2b BLAKE2_IMPL_NAME(blake2b) 198 | 199 | #if defined(__cplusplus) 200 | extern "C" { 201 | #endif 202 | int blake2b_init( blake2b_state *S, size_t outlen ); 203 | int blake2b_init_param( blake2b_state *S, const blake2b_param *P ); 204 | int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); 205 | int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen ); 206 | int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen ); 207 | int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); 208 | #if defined(__cplusplus) 209 | } 210 | #endif 211 | 212 | /* init xors IV with input parameter block */ 213 | int blake2b_init_param( blake2b_state *S, const blake2b_param *P ) 214 | { 215 | uint8_t *p, *h, *v; 216 | //blake2b_init0( S ); 217 | v = ( uint8_t * )( blake2b_IV ); 218 | h = ( uint8_t * )( S->h ); 219 | p = ( uint8_t * )( P ); 220 | /* IV XOR ParamBlock */ 221 | memset( S, 0, sizeof( blake2b_state ) ); 222 | 223 | for( int i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i]; 224 | 225 | S->outlen = P->digest_length; 226 | return 0; 227 | } 228 | 229 | 230 | /* Some sort of default parameter block initialization, for sequential blake2b */ 231 | 232 | int blake2b_init( blake2b_state *S, size_t outlen ) 233 | { 234 | if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; 235 | 236 | const blake2b_param P = 237 | { 238 | ( uint8_t ) outlen, 239 | 0, 240 | 1, 241 | 1, 242 | 0, 243 | 0, 244 | 0, 245 | 0, 246 | {0}, 247 | {0}, 248 | {0} 249 | }; 250 | return blake2b_init_param( S, &P ); 251 | } 252 | 253 | int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ) 254 | { 255 | if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; 256 | 257 | if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1; 258 | 259 | const blake2b_param P = 260 | { 261 | ( uint8_t ) outlen, 262 | ( uint8_t ) keylen, 263 | 1, 264 | 1, 265 | 0, 266 | 0, 267 | 0, 268 | 0, 269 | {0}, 270 | {0}, 271 | {0} 272 | }; 273 | 274 | if( blake2b_init_param( S, &P ) < 0 ) 275 | return 0; 276 | 277 | { 278 | uint8_t block[BLAKE2B_BLOCKBYTES]; 279 | memset( block, 0, BLAKE2B_BLOCKBYTES ); 280 | memcpy( block, key, keylen ); 281 | blake2b_update( S, block, BLAKE2B_BLOCKBYTES ); 282 | secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */ 283 | } 284 | return 0; 285 | } 286 | 287 | static inline int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) 288 | { 289 | __m128i row1l, row1h; 290 | __m128i row2l, row2h; 291 | __m128i row3l, row3h; 292 | __m128i row4l, row4h; 293 | __m128i b0, b1; 294 | __m128i t0, t1; 295 | #if defined(HAVE_SSSE3) && !defined(HAVE_XOP) 296 | const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 ); 297 | const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 ); 298 | #endif 299 | #if defined(HAVE_SSE4_1) 300 | const __m128i m0 = LOADU( block + 00 ); 301 | const __m128i m1 = LOADU( block + 16 ); 302 | const __m128i m2 = LOADU( block + 32 ); 303 | const __m128i m3 = LOADU( block + 48 ); 304 | const __m128i m4 = LOADU( block + 64 ); 305 | const __m128i m5 = LOADU( block + 80 ); 306 | const __m128i m6 = LOADU( block + 96 ); 307 | const __m128i m7 = LOADU( block + 112 ); 308 | #else 309 | const uint64_t m0 = ( ( uint64_t * )block )[ 0]; 310 | const uint64_t m1 = ( ( uint64_t * )block )[ 1]; 311 | const uint64_t m2 = ( ( uint64_t * )block )[ 2]; 312 | const uint64_t m3 = ( ( uint64_t * )block )[ 3]; 313 | const uint64_t m4 = ( ( uint64_t * )block )[ 4]; 314 | const uint64_t m5 = ( ( uint64_t * )block )[ 5]; 315 | const uint64_t m6 = ( ( uint64_t * )block )[ 6]; 316 | const uint64_t m7 = ( ( uint64_t * )block )[ 7]; 317 | const uint64_t m8 = ( ( uint64_t * )block )[ 8]; 318 | const uint64_t m9 = ( ( uint64_t * )block )[ 9]; 319 | const uint64_t m10 = ( ( uint64_t * )block )[10]; 320 | const uint64_t m11 = ( ( uint64_t * )block )[11]; 321 | const uint64_t m12 = ( ( uint64_t * )block )[12]; 322 | const uint64_t m13 = ( ( uint64_t * )block )[13]; 323 | const uint64_t m14 = ( ( uint64_t * )block )[14]; 324 | const uint64_t m15 = ( ( uint64_t * )block )[15]; 325 | #endif 326 | row1l = LOADU( &S->h[0] ); 327 | row1h = LOADU( &S->h[2] ); 328 | row2l = LOADU( &S->h[4] ); 329 | row2h = LOADU( &S->h[6] ); 330 | row3l = LOADU( &blake2b_IV[0] ); 331 | row3h = LOADU( &blake2b_IV[2] ); 332 | row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &S->t[0] ) ); 333 | row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &S->f[0] ) ); 334 | ROUND( 0 ); 335 | ROUND( 1 ); 336 | ROUND( 2 ); 337 | ROUND( 3 ); 338 | ROUND( 4 ); 339 | ROUND( 5 ); 340 | ROUND( 6 ); 341 | ROUND( 7 ); 342 | ROUND( 8 ); 343 | ROUND( 9 ); 344 | ROUND( 10 ); 345 | ROUND( 11 ); 346 | row1l = _mm_xor_si128( row3l, row1l ); 347 | row1h = _mm_xor_si128( row3h, row1h ); 348 | STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) ); 349 | STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) ); 350 | row2l = _mm_xor_si128( row4l, row2l ); 351 | row2h = _mm_xor_si128( row4h, row2h ); 352 | STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) ); 353 | STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) ); 354 | return 0; 355 | } 356 | 357 | 358 | int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen ) 359 | { 360 | while( inlen > 0 ) 361 | { 362 | uint32_t left = S->buflen; 363 | uint32_t fill = 2 * BLAKE2B_BLOCKBYTES - left; 364 | 365 | if( inlen > fill ) 366 | { 367 | memcpy( S->buf + left, in, fill ); // Fill buffer 368 | S->buflen += fill; 369 | blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); 370 | blake2b_compress( S, S->buf ); // Compress 371 | memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); // Shift buffer left 372 | S->buflen -= BLAKE2B_BLOCKBYTES; 373 | in += fill; 374 | inlen -= fill; 375 | } 376 | else // inlen <= fill 377 | { 378 | memcpy( S->buf + left, in, inlen ); 379 | S->buflen += ( uint32_t ) inlen; // Be lazy, do not compress 380 | in += inlen; 381 | inlen -= inlen; 382 | } 383 | } 384 | 385 | return 0; 386 | } 387 | 388 | 389 | int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen ) 390 | { 391 | if(S->outlen != outlen) return -1; 392 | 393 | if( S->buflen > BLAKE2B_BLOCKBYTES ) 394 | { 395 | blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); 396 | blake2b_compress( S, S->buf ); 397 | S->buflen -= BLAKE2B_BLOCKBYTES; 398 | memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen ); 399 | } 400 | 401 | blake2b_increment_counter( S, S->buflen ); 402 | blake2b_set_lastblock( S ); 403 | memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */ 404 | blake2b_compress( S, S->buf ); 405 | memcpy( out, &S->h[0], outlen ); 406 | return 0; 407 | } 408 | 409 | 410 | int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) 411 | { 412 | blake2b_state S[1]; 413 | 414 | /* Verify parameters */ 415 | if ( NULL == in && inlen > 0 ) return -1; 416 | 417 | if ( NULL == out ) return -1; 418 | 419 | if( NULL == key && keylen > 0 ) return -1; 420 | 421 | if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1; 422 | 423 | if( keylen > BLAKE2B_KEYBYTES ) return -1; 424 | 425 | if( keylen ) 426 | { 427 | if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1; 428 | } 429 | else 430 | { 431 | if( blake2b_init( S, outlen ) < 0 ) return -1; 432 | } 433 | 434 | if( blake2b_update( S, ( uint8_t * )in, inlen ) < 0) return -1; 435 | return blake2b_final( S, out, outlen ); 436 | } 437 | 438 | #if defined(SUPERCOP) 439 | int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) 440 | { 441 | return blake2b( out, in, NULL, BLAKE2B_OUTBYTES, inlen, 0 ); 442 | } 443 | #endif 444 | -------------------------------------------------------------------------------- /src/blake2bp-test.c: -------------------------------------------------------------------------------- 1 | /* 2 | BLAKE2 reference source code package - optimized C implementations 3 | 4 | Written in 2012 by Samuel Neves 5 | 6 | To the extent possible under law, the author(s) have dedicated all copyright 7 | and related and neighboring rights to this software to the public domain 8 | worldwide. This software is distributed without any warranty. 9 | 10 | You should have received a copy of the CC0 Public Domain Dedication along with 11 | this software. If not, see . 12 | */ 13 | #include 14 | #include 15 | #include "blake2.h" 16 | #include "blake2-kat.h" 17 | 18 | int main( int argc, char **argv ) 19 | { 20 | uint8_t key[BLAKE2B_KEYBYTES]; 21 | uint8_t buf[KAT_LENGTH]; 22 | 23 | for( size_t i = 0; i < BLAKE2B_KEYBYTES; ++i ) 24 | key[i] = ( uint8_t )i; 25 | 26 | for( size_t i = 0; i < KAT_LENGTH; ++i ) 27 | buf[i] = ( uint8_t )i; 28 | 29 | for( size_t i = 0; i < KAT_LENGTH; ++i ) 30 | { 31 | uint8_t hash[BLAKE2B_OUTBYTES]; 32 | 33 | if( blake2bp( hash, buf, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES ) < 0 || 34 | 0 != memcmp( hash, blake2bp_keyed_kat[i], BLAKE2B_OUTBYTES ) ) 35 | { 36 | puts( "error" ); 37 | return -1; 38 | } 39 | } 40 | 41 | puts( "ok" ); 42 | return 0; 43 | } 44 | 45 | -------------------------------------------------------------------------------- /src/blake2bp.c: -------------------------------------------------------------------------------- 1 | /* 2 | BLAKE2 reference source code package - optimized C implementations 3 | 4 | Written in 2012 by Samuel Neves 5 | 6 | To the extent possible under law, the author(s) have dedicated all copyright 7 | and related and neighboring rights to this software to the public domain 8 | worldwide. This software is distributed without any warranty. 9 | 10 | You should have received a copy of the CC0 Public Domain Dedication along with 11 | this software. If not, see . 12 | */ 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #if defined(_OPENMP) 20 | #include 21 | #endif 22 | 23 | #include "blake2.h" 24 | #include "blake2-impl.h" 25 | 26 | #define PARALLELISM_DEGREE 4 27 | 28 | static int blake2bp_init_leaf( blake2b_state *S, uint8_t outlen, uint8_t keylen, uint64_t offset ) 29 | { 30 | blake2b_param P[1]; 31 | P->digest_length = outlen; 32 | P->key_length = keylen; 33 | P->fanout = PARALLELISM_DEGREE; 34 | P->depth = 2; 35 | store32(&P->leaf_length, 0); 36 | store64(&P->node_offset, offset); 37 | P->node_depth = 0; 38 | P->inner_length = BLAKE2B_OUTBYTES; 39 | memset( P->reserved, 0, sizeof( P->reserved ) ); 40 | memset( P->salt, 0, sizeof( P->salt ) ); 41 | memset( P->personal, 0, sizeof( P->personal ) ); 42 | blake2b_init_param( S, P ); 43 | S->outlen = P->inner_length; 44 | return 0; 45 | } 46 | 47 | static int blake2bp_init_root( blake2b_state *S, uint8_t outlen, uint8_t keylen ) 48 | { 49 | blake2b_param P[1]; 50 | P->digest_length = outlen; 51 | P->key_length = keylen; 52 | P->fanout = PARALLELISM_DEGREE; 53 | P->depth = 2; 54 | store32(&P->leaf_length, 0); 55 | store64(&P->node_offset, 0); 56 | P->node_depth = 1; 57 | P->inner_length = BLAKE2B_OUTBYTES; 58 | memset( P->reserved, 0, sizeof( P->reserved ) ); 59 | memset( P->salt, 0, sizeof( P->salt ) ); 60 | memset( P->personal, 0, sizeof( P->personal ) ); 61 | blake2b_init_param( S, P ); 62 | S->outlen = P->digest_length; 63 | return 0; 64 | } 65 | 66 | 67 | int blake2bp_init( blake2bp_state *S, size_t outlen ) 68 | { 69 | if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1; 70 | 71 | memset( S->buf, 0, sizeof( S->buf ) ); 72 | S->buflen = 0; 73 | 74 | if( blake2bp_init_root( S->R, ( uint8_t ) outlen, 0 ) < 0 ) 75 | return -1; 76 | 77 | for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) 78 | if( blake2bp_init_leaf( S->S[i], ( uint8_t ) outlen, 0, i ) < 0 ) return -1; 79 | 80 | S->R->last_node = 1; 81 | S->S[PARALLELISM_DEGREE - 1]->last_node = 1; 82 | S->outlen = ( uint8_t ) outlen; 83 | return 0; 84 | } 85 | 86 | int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen ) 87 | { 88 | if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1; 89 | 90 | if( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1; 91 | 92 | memset( S->buf, 0, sizeof( S->buf ) ); 93 | S->buflen = 0; 94 | 95 | if( blake2bp_init_root( S->R, ( uint8_t ) outlen, ( uint8_t ) keylen ) < 0 ) 96 | return -1; 97 | 98 | for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) 99 | if( blake2bp_init_leaf( S->S[i], ( uint8_t ) outlen, ( uint8_t ) keylen, i ) < 0 ) 100 | return -1; 101 | 102 | S->R->last_node = 1; 103 | S->S[PARALLELISM_DEGREE - 1]->last_node = 1; 104 | S->outlen = ( uint8_t ) outlen; 105 | { 106 | uint8_t block[BLAKE2B_BLOCKBYTES]; 107 | memset( block, 0, BLAKE2B_BLOCKBYTES ); 108 | memcpy( block, key, keylen ); 109 | 110 | for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) 111 | blake2b_update( S->S[i], block, BLAKE2B_BLOCKBYTES ); 112 | 113 | secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */ 114 | } 115 | return 0; 116 | } 117 | 118 | 119 | int blake2bp_update( blake2bp_state *S, const uint8_t *in, size_t inlen ) 120 | { 121 | size_t left = S->buflen; 122 | size_t fill = sizeof( S->buf ) - left; 123 | 124 | if( left && inlen >= fill ) 125 | { 126 | memcpy( S->buf + left, in, fill ); 127 | 128 | for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) 129 | blake2b_update( S->S[i], S->buf + i * BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); 130 | 131 | in += fill; 132 | inlen -= fill; 133 | left = 0; 134 | } 135 | 136 | #if defined(_OPENMP) 137 | omp_set_num_threads(PARALLELISM_DEGREE); 138 | #pragma omp parallel shared(S) 139 | #else 140 | for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ ) 141 | #endif 142 | { 143 | #if defined(_OPENMP) 144 | size_t id__ = ( size_t ) omp_get_thread_num(); 145 | #endif 146 | size_t inlen__ = inlen; 147 | const uint8_t *in__ = ( const uint8_t * )in; 148 | in__ += id__ * BLAKE2B_BLOCKBYTES; 149 | 150 | while( inlen__ >= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES ) 151 | { 152 | blake2b_update( S->S[id__], in__, BLAKE2B_BLOCKBYTES ); 153 | in__ += PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES; 154 | inlen__ -= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES; 155 | } 156 | } 157 | 158 | in += inlen - inlen % ( PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES ); 159 | inlen %= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES; 160 | 161 | if( inlen > 0 ) 162 | memcpy( S->buf + left, in, inlen ); 163 | 164 | S->buflen = ( uint32_t ) left + ( uint32_t ) inlen; 165 | return 0; 166 | } 167 | 168 | 169 | 170 | int blake2bp_final( blake2bp_state *S, uint8_t *out, size_t outlen ) 171 | { 172 | uint8_t hash[PARALLELISM_DEGREE][BLAKE2B_OUTBYTES]; 173 | 174 | if(S->outlen != outlen) return -1; 175 | 176 | for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) 177 | { 178 | if( S->buflen > i * BLAKE2B_BLOCKBYTES ) 179 | { 180 | size_t left = S->buflen - i * BLAKE2B_BLOCKBYTES; 181 | 182 | if( left > BLAKE2B_BLOCKBYTES ) left = BLAKE2B_BLOCKBYTES; 183 | 184 | blake2b_update( S->S[i], S->buf + i * BLAKE2B_BLOCKBYTES, left ); 185 | } 186 | 187 | blake2b_final( S->S[i], hash[i], BLAKE2B_OUTBYTES ); 188 | } 189 | 190 | for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) 191 | blake2b_update( S->R, hash[i], BLAKE2B_OUTBYTES ); 192 | 193 | return blake2b_final( S->R, out, outlen ); 194 | } 195 | 196 | int blake2bp( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) 197 | { 198 | uint8_t hash[PARALLELISM_DEGREE][BLAKE2B_OUTBYTES]; 199 | blake2b_state S[PARALLELISM_DEGREE][1]; 200 | blake2b_state FS[1]; 201 | 202 | /* Verify parameters */ 203 | if ( NULL == in && inlen > 0 ) return -1; 204 | 205 | if ( NULL == out ) return -1; 206 | 207 | if ( NULL == key && keylen > 0) return -1; 208 | 209 | if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1; 210 | 211 | if( keylen > BLAKE2B_KEYBYTES ) return -1; 212 | 213 | for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) 214 | if( blake2bp_init_leaf( S[i], ( uint8_t ) outlen, ( uint8_t ) keylen, i ) < 0 ) 215 | return -1; 216 | 217 | S[PARALLELISM_DEGREE - 1]->last_node = 1; // mark last node 218 | 219 | if( keylen > 0 ) 220 | { 221 | uint8_t block[BLAKE2B_BLOCKBYTES]; 222 | memset( block, 0, BLAKE2B_BLOCKBYTES ); 223 | memcpy( block, key, keylen ); 224 | 225 | for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) 226 | blake2b_update( S[i], block, BLAKE2B_BLOCKBYTES ); 227 | 228 | secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */ 229 | } 230 | 231 | #if defined(_OPENMP) 232 | omp_set_num_threads(PARALLELISM_DEGREE); 233 | #pragma omp parallel shared(S,hash) 234 | #else 235 | for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ ) 236 | #endif 237 | { 238 | #if defined(_OPENMP) 239 | size_t id__ = ( size_t ) omp_get_thread_num(); 240 | #endif 241 | size_t inlen__ = inlen; 242 | const uint8_t *in__ = ( const uint8_t * )in; 243 | in__ += id__ * BLAKE2B_BLOCKBYTES; 244 | 245 | while( inlen__ >= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES ) 246 | { 247 | blake2b_update( S[id__], in__, BLAKE2B_BLOCKBYTES ); 248 | in__ += PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES; 249 | inlen__ -= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES; 250 | } 251 | 252 | if( inlen__ > id__ * BLAKE2B_BLOCKBYTES ) 253 | { 254 | const size_t left = inlen__ - id__ * BLAKE2B_BLOCKBYTES; 255 | const size_t len = left <= BLAKE2B_BLOCKBYTES ? left : BLAKE2B_BLOCKBYTES; 256 | blake2b_update( S[id__], in__, len ); 257 | } 258 | 259 | blake2b_final( S[id__], hash[id__], BLAKE2B_OUTBYTES ); 260 | } 261 | 262 | if( blake2bp_init_root( FS, ( uint8_t ) outlen, ( uint8_t ) keylen ) < 0 ) 263 | return -1; 264 | 265 | FS->last_node = 1; // Mark as last node 266 | 267 | for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) 268 | blake2b_update( FS, hash[i], BLAKE2B_OUTBYTES ); 269 | 270 | return blake2b_final( FS, out, outlen ); 271 | } 272 | 273 | 274 | 275 | -------------------------------------------------------------------------------- /src/blake2s-load-sse2.h: -------------------------------------------------------------------------------- 1 | /* 2 | BLAKE2 reference source code package - optimized C implementations 3 | 4 | Written in 2012 by Samuel Neves 5 | 6 | To the extent possible under law, the author(s) have dedicated all copyright 7 | and related and neighboring rights to this software to the public domain 8 | worldwide. This software is distributed without any warranty. 9 | 10 | You should have received a copy of the CC0 Public Domain Dedication along with 11 | this software. If not, see . 12 | */ 13 | #pragma once 14 | #ifndef __BLAKE2S_LOAD_SSE2_H__ 15 | #define __BLAKE2S_LOAD_SSE2_H__ 16 | 17 | #define LOAD_MSG_0_1(buf) buf = _mm_set_epi32(m6,m4,m2,m0) 18 | #define LOAD_MSG_0_2(buf) buf = _mm_set_epi32(m7,m5,m3,m1) 19 | #define LOAD_MSG_0_3(buf) buf = _mm_set_epi32(m14,m12,m10,m8) 20 | #define LOAD_MSG_0_4(buf) buf = _mm_set_epi32(m15,m13,m11,m9) 21 | #define LOAD_MSG_1_1(buf) buf = _mm_set_epi32(m13,m9,m4,m14) 22 | #define LOAD_MSG_1_2(buf) buf = _mm_set_epi32(m6,m15,m8,m10) 23 | #define LOAD_MSG_1_3(buf) buf = _mm_set_epi32(m5,m11,m0,m1) 24 | #define LOAD_MSG_1_4(buf) buf = _mm_set_epi32(m3,m7,m2,m12) 25 | #define LOAD_MSG_2_1(buf) buf = _mm_set_epi32(m15,m5,m12,m11) 26 | #define LOAD_MSG_2_2(buf) buf = _mm_set_epi32(m13,m2,m0,m8) 27 | #define LOAD_MSG_2_3(buf) buf = _mm_set_epi32(m9,m7,m3,m10) 28 | #define LOAD_MSG_2_4(buf) buf = _mm_set_epi32(m4,m1,m6,m14) 29 | #define LOAD_MSG_3_1(buf) buf = _mm_set_epi32(m11,m13,m3,m7) 30 | #define LOAD_MSG_3_2(buf) buf = _mm_set_epi32(m14,m12,m1,m9) 31 | #define LOAD_MSG_3_3(buf) buf = _mm_set_epi32(m15,m4,m5,m2) 32 | #define LOAD_MSG_3_4(buf) buf = _mm_set_epi32(m8,m0,m10,m6) 33 | #define LOAD_MSG_4_1(buf) buf = _mm_set_epi32(m10,m2,m5,m9) 34 | #define LOAD_MSG_4_2(buf) buf = _mm_set_epi32(m15,m4,m7,m0) 35 | #define LOAD_MSG_4_3(buf) buf = _mm_set_epi32(m3,m6,m11,m14) 36 | #define LOAD_MSG_4_4(buf) buf = _mm_set_epi32(m13,m8,m12,m1) 37 | #define LOAD_MSG_5_1(buf) buf = _mm_set_epi32(m8,m0,m6,m2) 38 | #define LOAD_MSG_5_2(buf) buf = _mm_set_epi32(m3,m11,m10,m12) 39 | #define LOAD_MSG_5_3(buf) buf = _mm_set_epi32(m1,m15,m7,m4) 40 | #define LOAD_MSG_5_4(buf) buf = _mm_set_epi32(m9,m14,m5,m13) 41 | #define LOAD_MSG_6_1(buf) buf = _mm_set_epi32(m4,m14,m1,m12) 42 | #define LOAD_MSG_6_2(buf) buf = _mm_set_epi32(m10,m13,m15,m5) 43 | #define LOAD_MSG_6_3(buf) buf = _mm_set_epi32(m8,m9,m6,m0) 44 | #define LOAD_MSG_6_4(buf) buf = _mm_set_epi32(m11,m2,m3,m7) 45 | #define LOAD_MSG_7_1(buf) buf = _mm_set_epi32(m3,m12,m7,m13) 46 | #define LOAD_MSG_7_2(buf) buf = _mm_set_epi32(m9,m1,m14,m11) 47 | #define LOAD_MSG_7_3(buf) buf = _mm_set_epi32(m2,m8,m15,m5) 48 | #define LOAD_MSG_7_4(buf) buf = _mm_set_epi32(m10,m6,m4,m0) 49 | #define LOAD_MSG_8_1(buf) buf = _mm_set_epi32(m0,m11,m14,m6) 50 | #define LOAD_MSG_8_2(buf) buf = _mm_set_epi32(m8,m3,m9,m15) 51 | #define LOAD_MSG_8_3(buf) buf = _mm_set_epi32(m10,m1,m13,m12) 52 | #define LOAD_MSG_8_4(buf) buf = _mm_set_epi32(m5,m4,m7,m2) 53 | #define LOAD_MSG_9_1(buf) buf = _mm_set_epi32(m1,m7,m8,m10) 54 | #define LOAD_MSG_9_2(buf) buf = _mm_set_epi32(m5,m6,m4,m2) 55 | #define LOAD_MSG_9_3(buf) buf = _mm_set_epi32(m13,m3,m9,m15) 56 | #define LOAD_MSG_9_4(buf) buf = _mm_set_epi32(m0,m12,m14,m11) 57 | 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /src/blake2s-load-sse41.h: -------------------------------------------------------------------------------- 1 | /* 2 | BLAKE2 reference source code package - optimized C implementations 3 | 4 | Written in 2012 by Samuel Neves 5 | 6 | To the extent possible under law, the author(s) have dedicated all copyright 7 | and related and neighboring rights to this software to the public domain 8 | worldwide. This software is distributed without any warranty. 9 | 10 | You should have received a copy of the CC0 Public Domain Dedication along with 11 | this software. If not, see . 12 | */ 13 | #pragma once 14 | #ifndef __BLAKE2S_LOAD_SSE41_H__ 15 | #define __BLAKE2S_LOAD_SSE41_H__ 16 | 17 | #define LOAD_MSG_0_1(buf) \ 18 | buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(2,0,2,0))); 19 | 20 | #define LOAD_MSG_0_2(buf) \ 21 | buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(3,1,3,1))); 22 | 23 | #define LOAD_MSG_0_3(buf) \ 24 | buf = TOI(_mm_shuffle_ps(TOF(m2), TOF(m3), _MM_SHUFFLE(2,0,2,0))); 25 | 26 | #define LOAD_MSG_0_4(buf) \ 27 | buf = TOI(_mm_shuffle_ps(TOF(m2), TOF(m3), _MM_SHUFFLE(3,1,3,1))); 28 | 29 | #define LOAD_MSG_1_1(buf) \ 30 | t0 = _mm_blend_epi16(m1, m2, 0x0C); \ 31 | t1 = _mm_slli_si128(m3, 4); \ 32 | t2 = _mm_blend_epi16(t0, t1, 0xF0); \ 33 | buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,0,3)); 34 | 35 | #define LOAD_MSG_1_2(buf) \ 36 | t0 = _mm_shuffle_epi32(m2,_MM_SHUFFLE(0,0,2,0)); \ 37 | t1 = _mm_blend_epi16(m1,m3,0xC0); \ 38 | t2 = _mm_blend_epi16(t0, t1, 0xF0); \ 39 | buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1)); 40 | 41 | #define LOAD_MSG_1_3(buf) \ 42 | t0 = _mm_slli_si128(m1, 4); \ 43 | t1 = _mm_blend_epi16(m2, t0, 0x30); \ 44 | t2 = _mm_blend_epi16(m0, t1, 0xF0); \ 45 | buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1)); 46 | 47 | #define LOAD_MSG_1_4(buf) \ 48 | t0 = _mm_unpackhi_epi32(m0,m1); \ 49 | t1 = _mm_slli_si128(m3, 4); \ 50 | t2 = _mm_blend_epi16(t0, t1, 0x0C); \ 51 | buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1)); 52 | 53 | #define LOAD_MSG_2_1(buf) \ 54 | t0 = _mm_unpackhi_epi32(m2,m3); \ 55 | t1 = _mm_blend_epi16(m3,m1,0x0C); \ 56 | t2 = _mm_blend_epi16(t0, t1, 0x0F); \ 57 | buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2)); 58 | 59 | #define LOAD_MSG_2_2(buf) \ 60 | t0 = _mm_unpacklo_epi32(m2,m0); \ 61 | t1 = _mm_blend_epi16(t0, m0, 0xF0); \ 62 | t2 = _mm_slli_si128(m3, 8); \ 63 | buf = _mm_blend_epi16(t1, t2, 0xC0); 64 | 65 | #define LOAD_MSG_2_3(buf) \ 66 | t0 = _mm_blend_epi16(m0, m2, 0x3C); \ 67 | t1 = _mm_srli_si128(m1, 12); \ 68 | t2 = _mm_blend_epi16(t0,t1,0x03); \ 69 | buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,0,3,2)); 70 | 71 | #define LOAD_MSG_2_4(buf) \ 72 | t0 = _mm_slli_si128(m3, 4); \ 73 | t1 = _mm_blend_epi16(m0, m1, 0x33); \ 74 | t2 = _mm_blend_epi16(t1, t0, 0xC0); \ 75 | buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(0,1,2,3)); 76 | 77 | #define LOAD_MSG_3_1(buf) \ 78 | t0 = _mm_unpackhi_epi32(m0,m1); \ 79 | t1 = _mm_unpackhi_epi32(t0, m2); \ 80 | t2 = _mm_blend_epi16(t1, m3, 0x0C); \ 81 | buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2)); 82 | 83 | #define LOAD_MSG_3_2(buf) \ 84 | t0 = _mm_slli_si128(m2, 8); \ 85 | t1 = _mm_blend_epi16(m3,m0,0x0C); \ 86 | t2 = _mm_blend_epi16(t1, t0, 0xC0); \ 87 | buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,1,3)); 88 | 89 | #define LOAD_MSG_3_3(buf) \ 90 | t0 = _mm_blend_epi16(m0,m1,0x0F); \ 91 | t1 = _mm_blend_epi16(t0, m3, 0xC0); \ 92 | buf = _mm_shuffle_epi32(t1, _MM_SHUFFLE(3,0,1,2)); 93 | 94 | #define LOAD_MSG_3_4(buf) \ 95 | t0 = _mm_unpacklo_epi32(m0,m2); \ 96 | t1 = _mm_unpackhi_epi32(m1,m2); \ 97 | buf = _mm_unpacklo_epi64(t1,t0); 98 | 99 | #define LOAD_MSG_4_1(buf) \ 100 | t0 = _mm_unpacklo_epi64(m1,m2); \ 101 | t1 = _mm_unpackhi_epi64(m0,m2); \ 102 | t2 = _mm_blend_epi16(t0,t1,0x33); \ 103 | buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,1,3)); 104 | 105 | #define LOAD_MSG_4_2(buf) \ 106 | t0 = _mm_unpackhi_epi64(m1,m3); \ 107 | t1 = _mm_unpacklo_epi64(m0,m1); \ 108 | buf = _mm_blend_epi16(t0,t1,0x33); 109 | 110 | #define LOAD_MSG_4_3(buf) \ 111 | t0 = _mm_unpackhi_epi64(m3,m1); \ 112 | t1 = _mm_unpackhi_epi64(m2,m0); \ 113 | buf = _mm_blend_epi16(t1,t0,0x33); 114 | 115 | #define LOAD_MSG_4_4(buf) \ 116 | t0 = _mm_blend_epi16(m0,m2,0x03); \ 117 | t1 = _mm_slli_si128(t0, 8); \ 118 | t2 = _mm_blend_epi16(t1,m3,0x0F); \ 119 | buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,2,0,3)); 120 | 121 | #define LOAD_MSG_5_1(buf) \ 122 | t0 = _mm_unpackhi_epi32(m0,m1); \ 123 | t1 = _mm_unpacklo_epi32(m0,m2); \ 124 | buf = _mm_unpacklo_epi64(t0,t1); 125 | 126 | #define LOAD_MSG_5_2(buf) \ 127 | t0 = _mm_srli_si128(m2, 4); \ 128 | t1 = _mm_blend_epi16(m0,m3,0x03); \ 129 | buf = _mm_blend_epi16(t1,t0,0x3C); 130 | 131 | #define LOAD_MSG_5_3(buf) \ 132 | t0 = _mm_blend_epi16(m1,m0,0x0C); \ 133 | t1 = _mm_srli_si128(m3, 4); \ 134 | t2 = _mm_blend_epi16(t0,t1,0x30); \ 135 | buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,2,3,0)); 136 | 137 | #define LOAD_MSG_5_4(buf) \ 138 | t0 = _mm_unpacklo_epi64(m1,m2); \ 139 | t1= _mm_shuffle_epi32(m3, _MM_SHUFFLE(0,2,0,1)); \ 140 | buf = _mm_blend_epi16(t0,t1,0x33); 141 | 142 | #define LOAD_MSG_6_1(buf) \ 143 | t0 = _mm_slli_si128(m1, 12); \ 144 | t1 = _mm_blend_epi16(m0,m3,0x33); \ 145 | buf = _mm_blend_epi16(t1,t0,0xC0); 146 | 147 | #define LOAD_MSG_6_2(buf) \ 148 | t0 = _mm_blend_epi16(m3,m2,0x30); \ 149 | t1 = _mm_srli_si128(m1, 4); \ 150 | t2 = _mm_blend_epi16(t0,t1,0x03); \ 151 | buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,3,0)); 152 | 153 | #define LOAD_MSG_6_3(buf) \ 154 | t0 = _mm_unpacklo_epi64(m0,m2); \ 155 | t1 = _mm_srli_si128(m1, 4); \ 156 | buf = _mm_shuffle_epi32(_mm_blend_epi16(t0,t1,0x0C), _MM_SHUFFLE(2,3,1,0)); 157 | 158 | #define LOAD_MSG_6_4(buf) \ 159 | t0 = _mm_unpackhi_epi32(m1,m2); \ 160 | t1 = _mm_unpackhi_epi64(m0,t0); \ 161 | buf = _mm_shuffle_epi32(t1, _MM_SHUFFLE(3,0,1,2)); 162 | 163 | #define LOAD_MSG_7_1(buf) \ 164 | t0 = _mm_unpackhi_epi32(m0,m1); \ 165 | t1 = _mm_blend_epi16(t0,m3,0x0F); \ 166 | buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(2,0,3,1)); 167 | 168 | #define LOAD_MSG_7_2(buf) \ 169 | t0 = _mm_blend_epi16(m2,m3,0x30); \ 170 | t1 = _mm_srli_si128(m0,4); \ 171 | t2 = _mm_blend_epi16(t0,t1,0x03); \ 172 | buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,0,2,3)); 173 | 174 | #define LOAD_MSG_7_3(buf) \ 175 | t0 = _mm_unpackhi_epi64(m0,m3); \ 176 | t1 = _mm_unpacklo_epi64(m1,m2); \ 177 | t2 = _mm_blend_epi16(t0,t1,0x3C); \ 178 | buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(0,2,3,1)); 179 | 180 | #define LOAD_MSG_7_4(buf) \ 181 | t0 = _mm_unpacklo_epi32(m0,m1); \ 182 | t1 = _mm_unpackhi_epi32(m1,m2); \ 183 | buf = _mm_unpacklo_epi64(t0,t1); 184 | 185 | #define LOAD_MSG_8_1(buf) \ 186 | t0 = _mm_unpackhi_epi32(m1,m3); \ 187 | t1 = _mm_unpacklo_epi64(t0,m0); \ 188 | t2 = _mm_blend_epi16(t1,m2,0xC0); \ 189 | buf = _mm_shufflehi_epi16(t2,_MM_SHUFFLE(1,0,3,2)); 190 | 191 | #define LOAD_MSG_8_2(buf) \ 192 | t0 = _mm_unpackhi_epi32(m0,m3); \ 193 | t1 = _mm_blend_epi16(m2,t0,0xF0); \ 194 | buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(0,2,1,3)); 195 | 196 | #define LOAD_MSG_8_3(buf) \ 197 | t0 = _mm_blend_epi16(m2,m0,0x0C); \ 198 | t1 = _mm_slli_si128(t0,4); \ 199 | buf = _mm_blend_epi16(t1,m3,0x0F); 200 | 201 | #define LOAD_MSG_8_4(buf) \ 202 | t0 = _mm_blend_epi16(m1,m0,0x30); \ 203 | buf = _mm_shuffle_epi32(t0,_MM_SHUFFLE(1,0,3,2)); 204 | 205 | #define LOAD_MSG_9_1(buf) \ 206 | t0 = _mm_blend_epi16(m0,m2,0x03); \ 207 | t1 = _mm_blend_epi16(m1,m2,0x30); \ 208 | t2 = _mm_blend_epi16(t1,t0,0x0F); \ 209 | buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(1,3,0,2)); 210 | 211 | #define LOAD_MSG_9_2(buf) \ 212 | t0 = _mm_slli_si128(m0,4); \ 213 | t1 = _mm_blend_epi16(m1,t0,0xC0); \ 214 | buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(1,2,0,3)); 215 | 216 | #define LOAD_MSG_9_3(buf) \ 217 | t0 = _mm_unpackhi_epi32(m0,m3); \ 218 | t1 = _mm_unpacklo_epi32(m2,m3); \ 219 | t2 = _mm_unpackhi_epi64(t0,t1); \ 220 | buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(3,0,2,1)); 221 | 222 | #define LOAD_MSG_9_4(buf) \ 223 | t0 = _mm_blend_epi16(m3,m2,0xC0); \ 224 | t1 = _mm_unpacklo_epi32(m0,m3); \ 225 | t2 = _mm_blend_epi16(t0,t1,0x0F); \ 226 | buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(0,1,2,3)); 227 | 228 | #endif 229 | 230 | -------------------------------------------------------------------------------- /src/blake2s-load-xop.h: -------------------------------------------------------------------------------- 1 | /* 2 | BLAKE2 reference source code package - optimized C implementations 3 | 4 | Written in 2012 by Samuel Neves 5 | 6 | To the extent possible under law, the author(s) have dedicated all copyright 7 | and related and neighboring rights to this software to the public domain 8 | worldwide. This software is distributed without any warranty. 9 | 10 | You should have received a copy of the CC0 Public Domain Dedication along with 11 | this software. If not, see . 12 | */ 13 | #pragma once 14 | #ifndef __BLAKE2S_LOAD_XOP_H__ 15 | #define __BLAKE2S_LOAD_XOP_H__ 16 | 17 | #define TOB(x) ((x)*4*0x01010101 + 0x03020100) // ..or not TOB 18 | 19 | /* Basic VPPERM emulation, for testing purposes */ 20 | /*static __m128i _mm_perm_epi8(const __m128i src1, const __m128i src2, const __m128i sel) 21 | { 22 | const __m128i sixteen = _mm_set1_epi8(16); 23 | const __m128i t0 = _mm_shuffle_epi8(src1, sel); 24 | const __m128i s1 = _mm_shuffle_epi8(src2, _mm_sub_epi8(sel, sixteen)); 25 | const __m128i mask = _mm_or_si128(_mm_cmpeq_epi8(sel, sixteen), 26 | _mm_cmpgt_epi8(sel, sixteen)); // (>=16) = 0xff : 00 27 | return _mm_blendv_epi8(t0, s1, mask); 28 | }*/ 29 | 30 | #define LOAD_MSG_0_1(buf) \ 31 | buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(6),TOB(4),TOB(2),TOB(0)) ); 32 | 33 | #define LOAD_MSG_0_2(buf) \ 34 | buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(7),TOB(5),TOB(3),TOB(1)) ); 35 | 36 | #define LOAD_MSG_0_3(buf) \ 37 | buf = _mm_perm_epi8(m2, m3, _mm_set_epi32(TOB(6),TOB(4),TOB(2),TOB(0)) ); 38 | 39 | #define LOAD_MSG_0_4(buf) \ 40 | buf = _mm_perm_epi8(m2, m3, _mm_set_epi32(TOB(7),TOB(5),TOB(3),TOB(1)) ); 41 | 42 | #define LOAD_MSG_1_1(buf) \ 43 | t0 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(0),TOB(5),TOB(0),TOB(0)) ); \ 44 | buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(6)) ); 45 | 46 | #define LOAD_MSG_1_2(buf) \ 47 | t1 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(2),TOB(0),TOB(4),TOB(6)) ); \ 48 | buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) ); 49 | 50 | #define LOAD_MSG_1_3(buf) \ 51 | t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(0),TOB(0),TOB(1)) ); \ 52 | buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) ); 53 | 54 | #define LOAD_MSG_1_4(buf) \ 55 | t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(3),TOB(7),TOB(2),TOB(0)) ); \ 56 | buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(4)) ); 57 | 58 | #define LOAD_MSG_2_1(buf) \ 59 | t0 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(0),TOB(1),TOB(0),TOB(7)) ); \ 60 | buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(7),TOB(2),TOB(4),TOB(0)) ); 61 | 62 | #define LOAD_MSG_2_2(buf) \ 63 | t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(2),TOB(0),TOB(4)) ); \ 64 | buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(0)) ); 65 | 66 | #define LOAD_MSG_2_3(buf) \ 67 | t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(7),TOB(3),TOB(0)) ); \ 68 | buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(6)) ); 69 | 70 | #define LOAD_MSG_2_4(buf) \ 71 | t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(4),TOB(1),TOB(6),TOB(0)) ); \ 72 | buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(6)) ); 73 | 74 | #define LOAD_MSG_3_1(buf) \ 75 | t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(3),TOB(7)) ); \ 76 | t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) ); \ 77 | buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(5),TOB(1),TOB(0)) ); 78 | 79 | #define LOAD_MSG_3_2(buf) \ 80 | t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(0),TOB(1),TOB(5)) ); \ 81 | buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(6),TOB(4),TOB(1),TOB(0)) ); 82 | 83 | #define LOAD_MSG_3_3(buf) \ 84 | t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(4),TOB(5),TOB(2)) ); \ 85 | buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) ); 86 | 87 | #define LOAD_MSG_3_4(buf) \ 88 | t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(6)) ); \ 89 | buf = _mm_perm_epi8(t1, m2, _mm_set_epi32(TOB(4),TOB(2),TOB(6),TOB(0)) ); 90 | 91 | #define LOAD_MSG_4_1(buf) \ 92 | t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(2),TOB(5),TOB(0)) ); \ 93 | buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(6),TOB(2),TOB(1),TOB(5)) ); 94 | 95 | #define LOAD_MSG_4_2(buf) \ 96 | t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(4),TOB(7),TOB(0)) ); \ 97 | buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) ); 98 | 99 | #define LOAD_MSG_4_3(buf) \ 100 | t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(3),TOB(6),TOB(0),TOB(0)) ); \ 101 | t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(2),TOB(7),TOB(0)) ); \ 102 | buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(6)) ); 103 | 104 | #define LOAD_MSG_4_4(buf) \ 105 | t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(4),TOB(0),TOB(1)) ); \ 106 | buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(4),TOB(0)) ); 107 | 108 | #define LOAD_MSG_5_1(buf) \ 109 | t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(6),TOB(2)) ); \ 110 | buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(4),TOB(2),TOB(1),TOB(0)) ); 111 | 112 | #define LOAD_MSG_5_2(buf) \ 113 | t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(6),TOB(0)) ); \ 114 | buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(4)) ); 115 | 116 | #define LOAD_MSG_5_3(buf) \ 117 | t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(1),TOB(0),TOB(7),TOB(4)) ); \ 118 | buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) ); 119 | 120 | #define LOAD_MSG_5_4(buf) \ 121 | t1 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(5),TOB(0),TOB(1),TOB(0)) ); \ 122 | buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(6),TOB(1),TOB(5)) ); 123 | 124 | #define LOAD_MSG_6_1(buf) \ 125 | t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(4),TOB(0),TOB(1),TOB(0)) ); \ 126 | buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(6),TOB(1),TOB(4)) ); 127 | 128 | #define LOAD_MSG_6_2(buf) \ 129 | t1 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(6),TOB(0),TOB(0),TOB(1)) ); \ 130 | buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(5),TOB(7),TOB(0)) ); 131 | 132 | #define LOAD_MSG_6_3(buf) \ 133 | t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(6),TOB(0)) ); \ 134 | buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(4),TOB(5),TOB(1),TOB(0)) ); 135 | 136 | #define LOAD_MSG_6_4(buf) \ 137 | t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(2),TOB(3),TOB(7)) ); \ 138 | buf = _mm_perm_epi8(t1, m2, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) ); 139 | 140 | #define LOAD_MSG_7_1(buf) \ 141 | t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(3),TOB(0),TOB(7),TOB(0)) ); \ 142 | buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(4),TOB(1),TOB(5)) ); 143 | 144 | #define LOAD_MSG_7_2(buf) \ 145 | t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(5),TOB(1),TOB(0),TOB(7)) ); \ 146 | buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(6),TOB(0)) ); 147 | 148 | #define LOAD_MSG_7_3(buf) \ 149 | t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(2),TOB(0),TOB(0),TOB(5)) ); \ 150 | t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(4),TOB(1),TOB(0)) ); \ 151 | buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(7),TOB(0)) ); 152 | 153 | #define LOAD_MSG_7_4(buf) \ 154 | t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(6),TOB(4),TOB(0)) ); \ 155 | buf = _mm_perm_epi8(t1, m2, _mm_set_epi32(TOB(6),TOB(2),TOB(1),TOB(0)) ); 156 | 157 | #define LOAD_MSG_8_1(buf) \ 158 | t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(6)) ); \ 159 | t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) ); \ 160 | buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(6),TOB(0)) ); 161 | 162 | #define LOAD_MSG_8_2(buf) \ 163 | t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(4),TOB(3),TOB(5),TOB(0)) ); \ 164 | buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(7)) ); 165 | 166 | #define LOAD_MSG_8_3(buf) \ 167 | t0 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(6),TOB(1),TOB(0),TOB(0)) ); \ 168 | buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(5),TOB(4)) ); \ 169 | 170 | #define LOAD_MSG_8_4(buf) \ 171 | buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(4),TOB(7),TOB(2)) ); 172 | 173 | #define LOAD_MSG_9_1(buf) \ 174 | t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(1),TOB(7),TOB(0),TOB(0)) ); \ 175 | buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(2),TOB(4),TOB(6)) ); 176 | 177 | #define LOAD_MSG_9_2(buf) \ 178 | buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(6),TOB(4),TOB(2)) ); 179 | 180 | #define LOAD_MSG_9_3(buf) \ 181 | t0 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(3),TOB(5),TOB(0)) ); \ 182 | buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(7)) ); 183 | 184 | #define LOAD_MSG_9_4(buf) \ 185 | t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(7)) ); \ 186 | buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(4),TOB(6),TOB(0)) ); 187 | 188 | #endif 189 | 190 | -------------------------------------------------------------------------------- /src/blake2s-ref.c: -------------------------------------------------------------------------------- 1 | /* 2 | BLAKE2 reference source code package - reference C implementations 3 | 4 | Written in 2012 by Samuel Neves 5 | 6 | To the extent possible under law, the author(s) have dedicated all copyright 7 | and related and neighboring rights to this software to the public domain 8 | worldwide. This software is distributed without any warranty. 9 | 10 | You should have received a copy of the CC0 Public Domain Dedication along with 11 | this software. If not, see . 12 | */ 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | #include "blake2.h" 19 | #include "blake2-impl.h" 20 | 21 | static const uint32_t blake2s_IV[8] = 22 | { 23 | 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, 24 | 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL 25 | }; 26 | 27 | static const uint8_t blake2s_sigma[10][16] = 28 | { 29 | { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , 30 | { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , 31 | { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , 32 | { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , 33 | { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , 34 | { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , 35 | { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , 36 | { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , 37 | { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , 38 | { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , 39 | }; 40 | 41 | static inline int blake2s_set_lastnode( blake2s_state *S ) 42 | { 43 | S->f[1] = ~0U; 44 | return 0; 45 | } 46 | 47 | static inline int blake2s_clear_lastnode( blake2s_state *S ) 48 | { 49 | S->f[1] = 0U; 50 | return 0; 51 | } 52 | 53 | /* Some helper functions, not necessarily useful */ 54 | static inline int blake2s_set_lastblock( blake2s_state *S ) 55 | { 56 | if( S->last_node ) blake2s_set_lastnode( S ); 57 | 58 | S->f[0] = ~0U; 59 | return 0; 60 | } 61 | 62 | static inline int blake2s_clear_lastblock( blake2s_state *S ) 63 | { 64 | if( S->last_node ) blake2s_clear_lastnode( S ); 65 | 66 | S->f[0] = 0U; 67 | return 0; 68 | } 69 | 70 | static inline int blake2s_increment_counter( blake2s_state *S, const uint32_t inc ) 71 | { 72 | S->t[0] += inc; 73 | S->t[1] += ( S->t[0] < inc ); 74 | return 0; 75 | } 76 | 77 | // Parameter-related functions 78 | static inline int blake2s_param_set_digest_length( blake2s_param *P, const uint8_t digest_length ) 79 | { 80 | P->digest_length = digest_length; 81 | return 0; 82 | } 83 | 84 | static inline int blake2s_param_set_fanout( blake2s_param *P, const uint8_t fanout ) 85 | { 86 | P->fanout = fanout; 87 | return 0; 88 | } 89 | 90 | static inline int blake2s_param_set_max_depth( blake2s_param *P, const uint8_t depth ) 91 | { 92 | P->depth = depth; 93 | return 0; 94 | } 95 | 96 | static inline int blake2s_param_set_leaf_length( blake2s_param *P, const uint32_t leaf_length ) 97 | { 98 | store32( &P->leaf_length, leaf_length ); 99 | return 0; 100 | } 101 | 102 | static inline int blake2s_param_set_node_offset( blake2s_param *P, const uint64_t node_offset ) 103 | { 104 | store48( P->node_offset, node_offset ); 105 | return 0; 106 | } 107 | 108 | static inline int blake2s_param_set_node_depth( blake2s_param *P, const uint8_t node_depth ) 109 | { 110 | P->node_depth = node_depth; 111 | return 0; 112 | } 113 | 114 | static inline int blake2s_param_set_inner_length( blake2s_param *P, const uint8_t inner_length ) 115 | { 116 | P->inner_length = inner_length; 117 | return 0; 118 | } 119 | 120 | static inline int blake2s_param_set_salt( blake2s_param *P, const uint8_t salt[BLAKE2S_SALTBYTES] ) 121 | { 122 | memcpy( P->salt, salt, BLAKE2S_SALTBYTES ); 123 | return 0; 124 | } 125 | 126 | static inline int blake2s_param_set_personal( blake2s_param *P, const uint8_t personal[BLAKE2S_PERSONALBYTES] ) 127 | { 128 | memcpy( P->personal, personal, BLAKE2S_PERSONALBYTES ); 129 | return 0; 130 | } 131 | 132 | static inline int blake2s_init0( blake2s_state *S ) 133 | { 134 | memset( S, 0, sizeof( blake2s_state ) ); 135 | 136 | for( int i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i]; 137 | 138 | return 0; 139 | } 140 | 141 | #define blake2s_init BLAKE2_IMPL_NAME(blake2s_init) 142 | #define blake2s_init_param BLAKE2_IMPL_NAME(blake2s_init_param) 143 | #define blake2s_init_key BLAKE2_IMPL_NAME(blake2s_init_key) 144 | #define blake2s_update BLAKE2_IMPL_NAME(blake2s_update) 145 | #define blake2s_final BLAKE2_IMPL_NAME(blake2s_final) 146 | #define blake2s BLAKE2_IMPL_NAME(blake2s) 147 | 148 | #if defined(__cplusplus) 149 | extern "C" { 150 | #endif 151 | int blake2s_init( blake2s_state *S, size_t outlen ); 152 | int blake2s_init_param( blake2s_state *S, const blake2s_param *P ); 153 | int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); 154 | int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen ); 155 | int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen ); 156 | int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); 157 | #if defined(__cplusplus) 158 | } 159 | #endif 160 | 161 | /* init2 xors IV with input parameter block */ 162 | int blake2s_init_param( blake2s_state *S, const blake2s_param *P ) 163 | { 164 | blake2s_init0( S ); 165 | uint32_t *p = ( uint32_t * )( P ); 166 | 167 | /* IV XOR ParamBlock */ 168 | for( size_t i = 0; i < 8; ++i ) 169 | S->h[i] ^= load32( &p[i] ); 170 | 171 | S->outlen = P->digest_length; 172 | return 0; 173 | } 174 | 175 | 176 | // Sequential blake2s initialization 177 | int blake2s_init( blake2s_state *S, size_t outlen ) 178 | { 179 | blake2s_param P[1]; 180 | 181 | /* Move interval verification here? */ 182 | if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; 183 | 184 | P->digest_length = ( uint8_t) outlen; 185 | P->key_length = 0; 186 | P->fanout = 1; 187 | P->depth = 1; 188 | store32( &P->leaf_length, 0 ); 189 | store48( &P->node_offset, 0 ); 190 | P->node_depth = 0; 191 | P->inner_length = 0; 192 | // memset(P->reserved, 0, sizeof(P->reserved) ); 193 | memset( P->salt, 0, sizeof( P->salt ) ); 194 | memset( P->personal, 0, sizeof( P->personal ) ); 195 | return blake2s_init_param( S, P ); 196 | } 197 | 198 | int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen ) 199 | { 200 | blake2s_param P[1]; 201 | 202 | if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; 203 | 204 | if ( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1; 205 | 206 | P->digest_length = ( uint8_t ) outlen; 207 | P->key_length = ( uint8_t ) keylen; 208 | P->fanout = 1; 209 | P->depth = 1; 210 | store32( &P->leaf_length, 0 ); 211 | store48( &P->node_offset, 0 ); 212 | P->node_depth = 0; 213 | P->inner_length = 0; 214 | // memset(P->reserved, 0, sizeof(P->reserved) ); 215 | memset( P->salt, 0, sizeof( P->salt ) ); 216 | memset( P->personal, 0, sizeof( P->personal ) ); 217 | 218 | if( blake2s_init_param( S, P ) < 0 ) return -1; 219 | 220 | { 221 | uint8_t block[BLAKE2S_BLOCKBYTES]; 222 | memset( block, 0, BLAKE2S_BLOCKBYTES ); 223 | memcpy( block, key, keylen ); 224 | blake2s_update( S, block, BLAKE2S_BLOCKBYTES ); 225 | secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */ 226 | } 227 | return 0; 228 | } 229 | 230 | static int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] ) 231 | { 232 | uint32_t m[16]; 233 | uint32_t v[16]; 234 | 235 | for( size_t i = 0; i < 16; ++i ) 236 | m[i] = load32( block + i * sizeof( m[i] ) ); 237 | 238 | for( size_t i = 0; i < 8; ++i ) 239 | v[i] = S->h[i]; 240 | 241 | v[ 8] = blake2s_IV[0]; 242 | v[ 9] = blake2s_IV[1]; 243 | v[10] = blake2s_IV[2]; 244 | v[11] = blake2s_IV[3]; 245 | v[12] = S->t[0] ^ blake2s_IV[4]; 246 | v[13] = S->t[1] ^ blake2s_IV[5]; 247 | v[14] = S->f[0] ^ blake2s_IV[6]; 248 | v[15] = S->f[1] ^ blake2s_IV[7]; 249 | #define G(r,i,a,b,c,d) \ 250 | do { \ 251 | a = a + b + m[blake2s_sigma[r][2*i+0]]; \ 252 | d = rotr32(d ^ a, 16); \ 253 | c = c + d; \ 254 | b = rotr32(b ^ c, 12); \ 255 | a = a + b + m[blake2s_sigma[r][2*i+1]]; \ 256 | d = rotr32(d ^ a, 8); \ 257 | c = c + d; \ 258 | b = rotr32(b ^ c, 7); \ 259 | } while(0) 260 | #define ROUND(r) \ 261 | do { \ 262 | G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ 263 | G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ 264 | G(r,2,v[ 2],v[ 6],v[10],v[14]); \ 265 | G(r,3,v[ 3],v[ 7],v[11],v[15]); \ 266 | G(r,4,v[ 0],v[ 5],v[10],v[15]); \ 267 | G(r,5,v[ 1],v[ 6],v[11],v[12]); \ 268 | G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ 269 | G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ 270 | } while(0) 271 | ROUND( 0 ); 272 | ROUND( 1 ); 273 | ROUND( 2 ); 274 | ROUND( 3 ); 275 | ROUND( 4 ); 276 | ROUND( 5 ); 277 | ROUND( 6 ); 278 | ROUND( 7 ); 279 | ROUND( 8 ); 280 | ROUND( 9 ); 281 | 282 | for( size_t i = 0; i < 8; ++i ) 283 | S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; 284 | 285 | #undef G 286 | #undef ROUND 287 | return 0; 288 | } 289 | 290 | 291 | int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen ) 292 | { 293 | while( inlen > 0 ) 294 | { 295 | uint32_t left = S->buflen; 296 | uint32_t fill = 2 * BLAKE2S_BLOCKBYTES - left; 297 | 298 | if( inlen > fill ) 299 | { 300 | memcpy( S->buf + left, in, fill ); // Fill buffer 301 | S->buflen += fill; 302 | blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); 303 | blake2s_compress( S, S->buf ); // Compress 304 | memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); // Shift buffer left 305 | S->buflen -= BLAKE2S_BLOCKBYTES; 306 | in += fill; 307 | inlen -= fill; 308 | } 309 | else // inlen <= fill 310 | { 311 | memcpy( S->buf + left, in, inlen ); 312 | S->buflen += ( uint32_t ) inlen; // Be lazy, do not compress 313 | in += inlen; 314 | inlen -= inlen; 315 | } 316 | } 317 | 318 | return 0; 319 | } 320 | 321 | int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen ) 322 | { 323 | uint8_t buffer[BLAKE2S_OUTBYTES]; 324 | size_t i; 325 | 326 | if(S->outlen != outlen) return -1; 327 | 328 | if( S->buflen > BLAKE2S_BLOCKBYTES ) 329 | { 330 | blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); 331 | blake2s_compress( S, S->buf ); 332 | S->buflen -= BLAKE2S_BLOCKBYTES; 333 | memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen ); 334 | } 335 | 336 | blake2s_increment_counter( S, ( uint32_t )S->buflen ); 337 | blake2s_set_lastblock( S ); 338 | memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */ 339 | blake2s_compress( S, S->buf ); 340 | 341 | for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ 342 | store32( buffer + sizeof( S->h[i] ) * i, S->h[i] ); 343 | 344 | memcpy( out, buffer, outlen ); 345 | return 0; 346 | } 347 | 348 | int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) 349 | { 350 | blake2s_state S[1]; 351 | 352 | /* Verify parameters */ 353 | if ( NULL == in && inlen > 0 ) return -1; 354 | 355 | if ( NULL == out ) return -1; 356 | 357 | if ( NULL == key && keylen > 0 ) return -1; 358 | 359 | if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1; 360 | 361 | if( keylen > BLAKE2S_KEYBYTES ) return -1; 362 | 363 | if( keylen > 0 ) 364 | { 365 | if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1; 366 | } 367 | else 368 | { 369 | if( blake2s_init( S, outlen ) < 0 ) return -1; 370 | } 371 | 372 | if( blake2s_update( S, ( uint8_t * )in, inlen ) < 0) return -1; 373 | return blake2s_final( S, out, outlen ); 374 | } 375 | 376 | -------------------------------------------------------------------------------- /src/blake2s-round.h: -------------------------------------------------------------------------------- 1 | /* 2 | BLAKE2 reference source code package - optimized C implementations 3 | 4 | Written in 2012 by Samuel Neves 5 | 6 | To the extent possible under law, the author(s) have dedicated all copyright 7 | and related and neighboring rights to this software to the public domain 8 | worldwide. This software is distributed without any warranty. 9 | 10 | You should have received a copy of the CC0 Public Domain Dedication along with 11 | this software. If not, see . 12 | */ 13 | #pragma once 14 | #ifndef __BLAKE2S_ROUND_H__ 15 | #define __BLAKE2S_ROUND_H__ 16 | 17 | #define LOAD(p) _mm_load_si128( (__m128i *)(p) ) 18 | #define STORE(p,r) _mm_store_si128((__m128i *)(p), r) 19 | 20 | #define LOADU(p) _mm_loadu_si128( (__m128i *)(p) ) 21 | #define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r) 22 | 23 | #define TOF(reg) _mm_castsi128_ps((reg)) 24 | #define TOI(reg) _mm_castps_si128((reg)) 25 | 26 | #define LIKELY(x) __builtin_expect((x),1) 27 | 28 | 29 | /* Microarchitecture-specific macros */ 30 | #ifndef HAVE_XOP 31 | #ifdef HAVE_SSSE3 32 | #define _mm_roti_epi32(r, c) ( \ 33 | (8==-(c)) ? _mm_shuffle_epi8(r,r8) \ 34 | : (16==-(c)) ? _mm_shuffle_epi8(r,r16) \ 35 | : _mm_xor_si128(_mm_srli_epi32( (r), -(c) ),_mm_slli_epi32( (r), 32-(-(c)) )) ) 36 | #else 37 | #define _mm_roti_epi32(r, c) _mm_xor_si128(_mm_srli_epi32( (r), -(c) ),_mm_slli_epi32( (r), 32-(-(c)) )) 38 | #endif 39 | #else 40 | /* ... */ 41 | #endif 42 | 43 | 44 | #define G1(row1,row2,row3,row4,buf) \ 45 | row1 = _mm_add_epi32( _mm_add_epi32( row1, buf), row2 ); \ 46 | row4 = _mm_xor_si128( row4, row1 ); \ 47 | row4 = _mm_roti_epi32(row4, -16); \ 48 | row3 = _mm_add_epi32( row3, row4 ); \ 49 | row2 = _mm_xor_si128( row2, row3 ); \ 50 | row2 = _mm_roti_epi32(row2, -12); 51 | 52 | #define G2(row1,row2,row3,row4,buf) \ 53 | row1 = _mm_add_epi32( _mm_add_epi32( row1, buf), row2 ); \ 54 | row4 = _mm_xor_si128( row4, row1 ); \ 55 | row4 = _mm_roti_epi32(row4, -8); \ 56 | row3 = _mm_add_epi32( row3, row4 ); \ 57 | row2 = _mm_xor_si128( row2, row3 ); \ 58 | row2 = _mm_roti_epi32(row2, -7); 59 | 60 | #define DIAGONALIZE(row1,row2,row3,row4) \ 61 | row4 = _mm_shuffle_epi32( row4, _MM_SHUFFLE(2,1,0,3) ); \ 62 | row3 = _mm_shuffle_epi32( row3, _MM_SHUFFLE(1,0,3,2) ); \ 63 | row2 = _mm_shuffle_epi32( row2, _MM_SHUFFLE(0,3,2,1) ); 64 | 65 | #define UNDIAGONALIZE(row1,row2,row3,row4) \ 66 | row4 = _mm_shuffle_epi32( row4, _MM_SHUFFLE(0,3,2,1) ); \ 67 | row3 = _mm_shuffle_epi32( row3, _MM_SHUFFLE(1,0,3,2) ); \ 68 | row2 = _mm_shuffle_epi32( row2, _MM_SHUFFLE(2,1,0,3) ); 69 | 70 | #if defined(HAVE_XOP) 71 | #include "blake2s-load-xop.h" 72 | #elif defined(HAVE_SSE4_1) 73 | #include "blake2s-load-sse41.h" 74 | #else 75 | #include "blake2s-load-sse2.h" 76 | #endif 77 | 78 | #define ROUND(r) \ 79 | LOAD_MSG_ ##r ##_1(buf1); \ 80 | G1(row1,row2,row3,row4,buf1); \ 81 | LOAD_MSG_ ##r ##_2(buf2); \ 82 | G2(row1,row2,row3,row4,buf2); \ 83 | DIAGONALIZE(row1,row2,row3,row4); \ 84 | LOAD_MSG_ ##r ##_3(buf3); \ 85 | G1(row1,row2,row3,row4,buf3); \ 86 | LOAD_MSG_ ##r ##_4(buf4); \ 87 | G2(row1,row2,row3,row4,buf4); \ 88 | UNDIAGONALIZE(row1,row2,row3,row4); \ 89 | 90 | #endif 91 | 92 | -------------------------------------------------------------------------------- /src/blake2s-test.c: -------------------------------------------------------------------------------- 1 | /* 2 | BLAKE2 reference source code package - optimized C implementations 3 | 4 | Written in 2012 by Samuel Neves 5 | 6 | To the extent possible under law, the author(s) have dedicated all copyright 7 | and related and neighboring rights to this software to the public domain 8 | worldwide. This software is distributed without any warranty. 9 | 10 | You should have received a copy of the CC0 Public Domain Dedication along with 11 | this software. If not, see . 12 | */ 13 | #include 14 | #include 15 | #include "blake2.h" 16 | #include "blake2-kat.h" 17 | int main( int argc, char **argv ) 18 | { 19 | uint8_t key[BLAKE2S_KEYBYTES]; 20 | uint8_t buf[KAT_LENGTH]; 21 | 22 | for( size_t i = 0; i < BLAKE2S_KEYBYTES; ++i ) 23 | key[i] = ( uint8_t )i; 24 | 25 | for( size_t i = 0; i < KAT_LENGTH; ++i ) 26 | buf[i] = ( uint8_t )i; 27 | 28 | for( size_t i = 0; i < KAT_LENGTH; ++i ) 29 | { 30 | uint8_t hash[BLAKE2S_OUTBYTES]; 31 | 32 | if( blake2s( hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES ) < 0 || 33 | 0 != memcmp( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) ) 34 | { 35 | puts( "error" ); 36 | return -1; 37 | } 38 | } 39 | 40 | puts( "ok" ); 41 | return 0; 42 | } 43 | 44 | -------------------------------------------------------------------------------- /src/blake2s.c: -------------------------------------------------------------------------------- 1 | /* 2 | BLAKE2 reference source code package - optimized C implementations 3 | 4 | Written in 2012 by Samuel Neves 5 | 6 | To the extent possible under law, the author(s) have dedicated all copyright 7 | and related and neighboring rights to this software to the public domain 8 | worldwide. This software is distributed without any warranty. 9 | 10 | You should have received a copy of the CC0 Public Domain Dedication along with 11 | this software. If not, see . 12 | */ 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | #include "blake2.h" 19 | #include "blake2-impl.h" 20 | 21 | #include "blake2-config.h" 22 | 23 | #if defined(_MSC_VER) 24 | #include 25 | #endif 26 | 27 | #if defined(HAVE_SSE2) 28 | #include 29 | // MSVC only defines _mm_set_epi64x for x86_64... 30 | #if defined(_MSC_VER) && !defined(_M_X64) && !defined(__clang__) 31 | static inline __m128i _mm_set_epi64x( const uint64_t u1, const uint64_t u0 ) 32 | { 33 | return _mm_set_epi32( u1 >> 32, u1, u0 >> 32, u0 ); 34 | } 35 | #endif 36 | #endif 37 | 38 | 39 | #if defined(HAVE_SSSE3) 40 | #include 41 | #endif 42 | #if defined(HAVE_SSE4_1) 43 | #include 44 | #endif 45 | #if defined(HAVE_AVX) 46 | #include 47 | #endif 48 | #if defined(HAVE_XOP) && !defined(_MSC_VER) 49 | #include 50 | #endif 51 | 52 | #include "blake2s-round.h" 53 | 54 | static const uint32_t blake2s_IV[8] = 55 | { 56 | 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, 57 | 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL 58 | }; 59 | 60 | static const uint8_t blake2s_sigma[10][16] = 61 | { 62 | { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , 63 | { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , 64 | { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , 65 | { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , 66 | { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , 67 | { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , 68 | { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , 69 | { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , 70 | { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , 71 | { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , 72 | }; 73 | 74 | 75 | /* Some helper functions, not necessarily useful */ 76 | static inline int blake2s_set_lastnode( blake2s_state *S ) 77 | { 78 | S->f[1] = ~0U; 79 | return 0; 80 | } 81 | 82 | static inline int blake2s_clear_lastnode( blake2s_state *S ) 83 | { 84 | S->f[1] = 0U; 85 | return 0; 86 | } 87 | 88 | static inline int blake2s_set_lastblock( blake2s_state *S ) 89 | { 90 | if( S->last_node ) blake2s_set_lastnode( S ); 91 | 92 | S->f[0] = ~0U; 93 | return 0; 94 | } 95 | 96 | static inline int blake2s_clear_lastblock( blake2s_state *S ) 97 | { 98 | if( S->last_node ) blake2s_clear_lastnode( S ); 99 | 100 | S->f[0] = 0U; 101 | return 0; 102 | } 103 | 104 | static inline int blake2s_increment_counter( blake2s_state *S, const uint32_t inc ) 105 | { 106 | uint64_t t = ( ( uint64_t )S->t[1] << 32 ) | S->t[0]; 107 | t += inc; 108 | S->t[0] = ( uint32_t )( t >> 0 ); 109 | S->t[1] = ( uint32_t )( t >> 32 ); 110 | return 0; 111 | } 112 | 113 | 114 | // Parameter-related functions 115 | static inline int blake2s_param_set_digest_length( blake2s_param *P, const uint8_t digest_length ) 116 | { 117 | P->digest_length = digest_length; 118 | return 0; 119 | } 120 | 121 | static inline int blake2s_param_set_fanout( blake2s_param *P, const uint8_t fanout ) 122 | { 123 | P->fanout = fanout; 124 | return 0; 125 | } 126 | 127 | static inline int blake2s_param_set_max_depth( blake2s_param *P, const uint8_t depth ) 128 | { 129 | P->depth = depth; 130 | return 0; 131 | } 132 | 133 | static inline int blake2s_param_set_leaf_length( blake2s_param *P, const uint32_t leaf_length ) 134 | { 135 | P->leaf_length = leaf_length; 136 | return 0; 137 | } 138 | 139 | static inline int blake2s_param_set_node_offset( blake2s_param *P, const uint64_t node_offset ) 140 | { 141 | store48( P->node_offset, node_offset ); 142 | return 0; 143 | } 144 | 145 | static inline int blake2s_param_set_node_depth( blake2s_param *P, const uint8_t node_depth ) 146 | { 147 | P->node_depth = node_depth; 148 | return 0; 149 | } 150 | 151 | static inline int blake2s_param_set_inner_length( blake2s_param *P, const uint8_t inner_length ) 152 | { 153 | P->inner_length = inner_length; 154 | return 0; 155 | } 156 | 157 | static inline int blake2s_param_set_salt( blake2s_param *P, const uint8_t salt[BLAKE2S_SALTBYTES] ) 158 | { 159 | memcpy( P->salt, salt, BLAKE2S_SALTBYTES ); 160 | return 0; 161 | } 162 | 163 | static inline int blake2s_param_set_personal( blake2s_param *P, const uint8_t personal[BLAKE2S_PERSONALBYTES] ) 164 | { 165 | memcpy( P->personal, personal, BLAKE2S_PERSONALBYTES ); 166 | return 0; 167 | } 168 | 169 | static inline int blake2s_init0( blake2s_state *S ) 170 | { 171 | memset( S, 0, sizeof( blake2s_state ) ); 172 | 173 | for( int i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i]; 174 | 175 | return 0; 176 | } 177 | 178 | #define blake2s_init BLAKE2_IMPL_NAME(blake2s_init) 179 | #define blake2s_init_param BLAKE2_IMPL_NAME(blake2s_init_param) 180 | #define blake2s_init_key BLAKE2_IMPL_NAME(blake2s_init_key) 181 | #define blake2s_update BLAKE2_IMPL_NAME(blake2s_update) 182 | #define blake2s_final BLAKE2_IMPL_NAME(blake2s_final) 183 | #define blake2s BLAKE2_IMPL_NAME(blake2s) 184 | 185 | #if defined(__cplusplus) 186 | extern "C" { 187 | #endif 188 | int blake2s_init( blake2s_state *S, size_t outlen ); 189 | int blake2s_init_param( blake2s_state *S, const blake2s_param *P ); 190 | int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); 191 | int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen ); 192 | int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen ); 193 | int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); 194 | #if defined(__cplusplus) 195 | } 196 | #endif 197 | 198 | 199 | /* init2 xors IV with input parameter block */ 200 | int blake2s_init_param( blake2s_state *S, const blake2s_param *P ) 201 | { 202 | uint8_t *p, *h, *v; 203 | //blake2s_init0( S ); 204 | v = ( uint8_t * )( blake2s_IV ); 205 | h = ( uint8_t * )( S->h ); 206 | p = ( uint8_t * )( P ); 207 | /* IV XOR ParamBlock */ 208 | memset( S, 0, sizeof( blake2s_state ) ); 209 | 210 | for( int i = 0; i < BLAKE2S_OUTBYTES; ++i ) h[i] = v[i] ^ p[i]; 211 | 212 | S->outlen = P->digest_length; 213 | return 0; 214 | } 215 | 216 | 217 | /* Some sort of default parameter block initialization, for sequential blake2s */ 218 | int blake2s_init( blake2s_state *S, size_t outlen ) 219 | { 220 | if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; 221 | 222 | const blake2s_param P = 223 | { 224 | outlen, 225 | 0, 226 | 1, 227 | 1, 228 | 0, 229 | {0}, 230 | 0, 231 | 0, 232 | {0}, 233 | {0} 234 | }; 235 | return blake2s_init_param( S, &P ); 236 | } 237 | 238 | 239 | int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen ) 240 | { 241 | if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; 242 | 243 | if ( ( !key ) || ( !keylen ) || keylen > BLAKE2S_KEYBYTES ) return -1; 244 | 245 | const blake2s_param P = 246 | { 247 | outlen, 248 | keylen, 249 | 1, 250 | 1, 251 | 0, 252 | {0}, 253 | 0, 254 | 0, 255 | {0}, 256 | {0} 257 | }; 258 | 259 | if( blake2s_init_param( S, &P ) < 0 ) 260 | return -1; 261 | 262 | { 263 | uint8_t block[BLAKE2S_BLOCKBYTES]; 264 | memset( block, 0, BLAKE2S_BLOCKBYTES ); 265 | memcpy( block, key, keylen ); 266 | blake2s_update( S, block, BLAKE2S_BLOCKBYTES ); 267 | secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */ 268 | } 269 | return 0; 270 | } 271 | 272 | 273 | static inline int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] ) 274 | { 275 | __m128i row1, row2, row3, row4; 276 | __m128i buf1, buf2, buf3, buf4; 277 | #if defined(HAVE_SSE4_1) 278 | __m128i t0, t1; 279 | #if !defined(HAVE_XOP) 280 | __m128i t2; 281 | #endif 282 | #endif 283 | __m128i ff0, ff1; 284 | #if defined(HAVE_SSSE3) && !defined(HAVE_XOP) 285 | const __m128i r8 = _mm_set_epi8( 12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1 ); 286 | const __m128i r16 = _mm_set_epi8( 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2 ); 287 | #endif 288 | #if defined(HAVE_SSE4_1) 289 | const __m128i m0 = LOADU( block + 00 ); 290 | const __m128i m1 = LOADU( block + 16 ); 291 | const __m128i m2 = LOADU( block + 32 ); 292 | const __m128i m3 = LOADU( block + 48 ); 293 | #else 294 | const uint32_t m0 = ( ( uint32_t * )block )[ 0]; 295 | const uint32_t m1 = ( ( uint32_t * )block )[ 1]; 296 | const uint32_t m2 = ( ( uint32_t * )block )[ 2]; 297 | const uint32_t m3 = ( ( uint32_t * )block )[ 3]; 298 | const uint32_t m4 = ( ( uint32_t * )block )[ 4]; 299 | const uint32_t m5 = ( ( uint32_t * )block )[ 5]; 300 | const uint32_t m6 = ( ( uint32_t * )block )[ 6]; 301 | const uint32_t m7 = ( ( uint32_t * )block )[ 7]; 302 | const uint32_t m8 = ( ( uint32_t * )block )[ 8]; 303 | const uint32_t m9 = ( ( uint32_t * )block )[ 9]; 304 | const uint32_t m10 = ( ( uint32_t * )block )[10]; 305 | const uint32_t m11 = ( ( uint32_t * )block )[11]; 306 | const uint32_t m12 = ( ( uint32_t * )block )[12]; 307 | const uint32_t m13 = ( ( uint32_t * )block )[13]; 308 | const uint32_t m14 = ( ( uint32_t * )block )[14]; 309 | const uint32_t m15 = ( ( uint32_t * )block )[15]; 310 | #endif 311 | row1 = ff0 = LOADU( &S->h[0] ); 312 | row2 = ff1 = LOADU( &S->h[4] ); 313 | row3 = _mm_setr_epi32( 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A ); 314 | row4 = _mm_xor_si128( _mm_setr_epi32( 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 ), LOADU( &S->t[0] ) ); 315 | ROUND( 0 ); 316 | ROUND( 1 ); 317 | ROUND( 2 ); 318 | ROUND( 3 ); 319 | ROUND( 4 ); 320 | ROUND( 5 ); 321 | ROUND( 6 ); 322 | ROUND( 7 ); 323 | ROUND( 8 ); 324 | ROUND( 9 ); 325 | STOREU( &S->h[0], _mm_xor_si128( ff0, _mm_xor_si128( row1, row3 ) ) ); 326 | STOREU( &S->h[4], _mm_xor_si128( ff1, _mm_xor_si128( row2, row4 ) ) ); 327 | return 0; 328 | } 329 | 330 | 331 | int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen ) 332 | { 333 | while( inlen > 0 ) 334 | { 335 | size_t left = S->buflen; 336 | size_t fill = 2 * BLAKE2S_BLOCKBYTES - left; 337 | 338 | if( inlen > fill ) 339 | { 340 | memcpy( S->buf + left, in, fill ); // Fill buffer 341 | S->buflen += fill; 342 | blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); 343 | blake2s_compress( S, S->buf ); // Compress 344 | memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); // Shift buffer left 345 | S->buflen -= BLAKE2S_BLOCKBYTES; 346 | in += fill; 347 | inlen -= fill; 348 | } 349 | else /* inlen <= fill */ 350 | { 351 | memcpy( S->buf + left, in, inlen ); 352 | S->buflen += inlen; // Be lazy, do not compress 353 | in += inlen; 354 | inlen -= inlen; 355 | } 356 | } 357 | 358 | return 0; 359 | } 360 | 361 | 362 | int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen ) 363 | { 364 | uint8_t buffer[BLAKE2S_OUTBYTES]; 365 | 366 | if(outlen != S->outlen ) return -1; 367 | 368 | if( S->buflen > BLAKE2S_BLOCKBYTES ) 369 | { 370 | blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); 371 | blake2s_compress( S, S->buf ); 372 | S->buflen -= BLAKE2S_BLOCKBYTES; 373 | memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen ); 374 | } 375 | 376 | blake2s_increment_counter( S, ( uint32_t )S->buflen ); 377 | blake2s_set_lastblock( S ); 378 | memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */ 379 | blake2s_compress( S, S->buf ); 380 | 381 | for( int i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ 382 | store32( buffer + sizeof( S->h[i] ) * i, S->h[i] ); 383 | 384 | memcpy( out, buffer, outlen ); 385 | return 0; 386 | } 387 | 388 | int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) 389 | { 390 | blake2s_state S[1]; 391 | 392 | /* Verify parameters */ 393 | if ( NULL == in && inlen > 0 ) return -1; 394 | 395 | if ( NULL == out ) return -1; 396 | 397 | if ( NULL == key && keylen > 0) return -1; 398 | 399 | if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1; 400 | 401 | if( keylen > BLAKE2S_KEYBYTES ) return -1; 402 | 403 | if( keylen > 0 ) 404 | { 405 | if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1; 406 | } 407 | else 408 | { 409 | if( blake2s_init( S, outlen ) < 0 ) return -1; 410 | } 411 | 412 | if( blake2s_update( S, ( uint8_t * )in, inlen ) < 0) return -1; 413 | return blake2s_final( S, out, outlen ); 414 | } 415 | 416 | #if defined(SUPERCOP) 417 | int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) 418 | { 419 | return blake2s( out, in, NULL, BLAKE2S_OUTBYTES, (size_t)inlen, 0 ); 420 | } 421 | #endif 422 | 423 | -------------------------------------------------------------------------------- /src/blake2sp-test.c: -------------------------------------------------------------------------------- 1 | /* 2 | BLAKE2 reference source code package - optimized C implementations 3 | 4 | Written in 2012 by Samuel Neves 5 | 6 | To the extent possible under law, the author(s) have dedicated all copyright 7 | and related and neighboring rights to this software to the public domain 8 | worldwide. This software is distributed without any warranty. 9 | 10 | You should have received a copy of the CC0 Public Domain Dedication along with 11 | this software. If not, see . 12 | */ 13 | #include 14 | #include 15 | #include "blake2.h" 16 | #include "blake2-kat.h" 17 | 18 | int main( int argc, char **argv ) 19 | { 20 | uint8_t key[BLAKE2S_KEYBYTES]; 21 | uint8_t buf[KAT_LENGTH]; 22 | 23 | for( size_t i = 0; i < BLAKE2S_KEYBYTES; ++i ) 24 | key[i] = ( uint8_t )i; 25 | 26 | for( size_t i = 0; i < KAT_LENGTH; ++i ) 27 | buf[i] = ( uint8_t )i; 28 | 29 | for( size_t i = 0; i < KAT_LENGTH; ++i ) 30 | { 31 | uint8_t hash[BLAKE2S_OUTBYTES]; 32 | if( blake2sp( hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES ) < 0 || 33 | 0 != memcmp( hash, blake2sp_keyed_kat[i], BLAKE2S_OUTBYTES ) ) 34 | { 35 | puts( "error" ); 36 | return -1; 37 | } 38 | } 39 | 40 | puts( "ok" ); 41 | return 0; 42 | } 43 | 44 | -------------------------------------------------------------------------------- /src/blake2sp.c: -------------------------------------------------------------------------------- 1 | /* 2 | BLAKE2 reference source code package - optimized C implementations 3 | 4 | Written in 2012 by Samuel Neves 5 | 6 | To the extent possible under law, the author(s) have dedicated all copyright 7 | and related and neighboring rights to this software to the public domain 8 | worldwide. This software is distributed without any warranty. 9 | 10 | You should have received a copy of the CC0 Public Domain Dedication along with 11 | this software. If not, see . 12 | */ 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | #if defined(_OPENMP) 19 | #include 20 | #endif 21 | 22 | #include "blake2.h" 23 | #include "blake2-impl.h" 24 | 25 | #define PARALLELISM_DEGREE 8 26 | 27 | static int blake2sp_init_leaf( blake2s_state *S, uint8_t outlen, uint8_t keylen, uint64_t offset ) 28 | { 29 | blake2s_param P[1]; 30 | P->digest_length = outlen; 31 | P->key_length = keylen; 32 | P->fanout = PARALLELISM_DEGREE; 33 | P->depth = 2; 34 | P->leaf_length = 0; 35 | store48( P->node_offset, offset ); 36 | P->node_depth = 0; 37 | P->inner_length = BLAKE2S_OUTBYTES; 38 | memset( P->salt, 0, sizeof( P->salt ) ); 39 | memset( P->personal, 0, sizeof( P->personal ) ); 40 | blake2s_init_param( S, P ); 41 | S->outlen = P->inner_length; 42 | return 0; 43 | } 44 | 45 | static int blake2sp_init_root( blake2s_state *S, uint8_t outlen, uint8_t keylen ) 46 | { 47 | blake2s_param P[1]; 48 | P->digest_length = outlen; 49 | P->key_length = keylen; 50 | P->fanout = PARALLELISM_DEGREE; 51 | P->depth = 2; 52 | P->leaf_length = 0; 53 | store48( P->node_offset, 0ULL ); 54 | P->node_depth = 1; 55 | P->inner_length = BLAKE2S_OUTBYTES; 56 | memset( P->salt, 0, sizeof( P->salt ) ); 57 | memset( P->personal, 0, sizeof( P->personal ) ); 58 | blake2s_init_param( S, P ); 59 | S->outlen = P->digest_length; 60 | return 0; 61 | } 62 | 63 | 64 | int blake2sp_init( blake2sp_state *S, size_t outlen ) 65 | { 66 | if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1; 67 | 68 | memset( S->buf, 0, sizeof( S->buf ) ); 69 | S->buflen = 0; 70 | 71 | if( blake2sp_init_root( S->R, ( uint8_t ) outlen, 0 ) < 0 ) 72 | return -1; 73 | 74 | for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) 75 | if( blake2sp_init_leaf( S->S[i], ( uint8_t ) outlen, 0, i ) < 0 ) return -1; 76 | 77 | S->R->last_node = 1; 78 | S->S[PARALLELISM_DEGREE - 1]->last_node = 1; 79 | S->outlen = ( uint8_t ) outlen; 80 | return 0; 81 | } 82 | 83 | int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen ) 84 | { 85 | if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1; 86 | 87 | if( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1; 88 | 89 | memset( S->buf, 0, sizeof( S->buf ) ); 90 | S->buflen = 0; 91 | 92 | if( blake2sp_init_root( S->R, ( uint8_t ) outlen, ( uint8_t ) keylen ) < 0 ) 93 | return -1; 94 | 95 | for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) 96 | if( blake2sp_init_leaf( S->S[i], ( uint8_t ) outlen, ( uint8_t ) keylen, i ) < 0 ) 97 | return -1; 98 | 99 | S->R->last_node = 1; 100 | S->S[PARALLELISM_DEGREE - 1]->last_node = 1; 101 | S->outlen = ( uint8_t ) outlen; 102 | { 103 | uint8_t block[BLAKE2S_BLOCKBYTES]; 104 | memset( block, 0, BLAKE2S_BLOCKBYTES ); 105 | memcpy( block, key, keylen ); 106 | 107 | for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) 108 | blake2s_update( S->S[i], block, BLAKE2S_BLOCKBYTES ); 109 | 110 | secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */ 111 | } 112 | return 0; 113 | } 114 | 115 | 116 | int blake2sp_update( blake2sp_state *S, const uint8_t *in, size_t inlen ) 117 | { 118 | size_t left = S->buflen; 119 | size_t fill = sizeof( S->buf ) - left; 120 | 121 | if( left && inlen >= fill ) 122 | { 123 | memcpy( S->buf + left, in, fill ); 124 | 125 | for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) 126 | blake2s_update( S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); 127 | 128 | in += fill; 129 | inlen -= fill; 130 | left = 0; 131 | } 132 | 133 | #if defined(_OPENMP) 134 | omp_set_num_threads(PARALLELISM_DEGREE); 135 | #pragma omp parallel shared(S) 136 | #else 137 | for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ ) 138 | #endif 139 | { 140 | #if defined(_OPENMP) 141 | size_t id__ = ( size_t ) omp_get_thread_num(); 142 | #endif 143 | size_t inlen__ = inlen; 144 | const uint8_t *in__ = ( const uint8_t * )in; 145 | in__ += id__ * BLAKE2S_BLOCKBYTES; 146 | 147 | while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES ) 148 | { 149 | blake2s_update( S->S[id__], in__, BLAKE2S_BLOCKBYTES ); 150 | in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES; 151 | inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES; 152 | } 153 | } 154 | 155 | in += inlen - inlen % ( PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES ); 156 | inlen %= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES; 157 | 158 | if( inlen > 0 ) 159 | memcpy( S->buf + left, in, inlen ); 160 | 161 | S->buflen = ( uint32_t ) left + ( uint32_t ) inlen; 162 | return 0; 163 | } 164 | 165 | 166 | int blake2sp_final( blake2sp_state *S, uint8_t *out, size_t outlen ) 167 | { 168 | uint8_t hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES]; 169 | 170 | if(S->outlen != outlen) return -1; 171 | 172 | for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) 173 | { 174 | if( S->buflen > i * BLAKE2S_BLOCKBYTES ) 175 | { 176 | size_t left = S->buflen - i * BLAKE2S_BLOCKBYTES; 177 | 178 | if( left > BLAKE2S_BLOCKBYTES ) left = BLAKE2S_BLOCKBYTES; 179 | 180 | blake2s_update( S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, left ); 181 | } 182 | 183 | blake2s_final( S->S[i], hash[i], BLAKE2S_OUTBYTES ); 184 | } 185 | 186 | for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) 187 | blake2s_update( S->R, hash[i], BLAKE2S_OUTBYTES ); 188 | 189 | blake2s_final( S->R, out, outlen ); 190 | return 0; 191 | } 192 | 193 | 194 | int blake2sp( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) 195 | { 196 | uint8_t hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES]; 197 | blake2s_state S[PARALLELISM_DEGREE][1]; 198 | blake2s_state FS[1]; 199 | 200 | /* Verify parameters */ 201 | if ( NULL == in && inlen > 0 ) return -1; 202 | 203 | if ( NULL == out ) return -1; 204 | 205 | if ( NULL == key && keylen > 0 ) return -1; 206 | 207 | if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1; 208 | 209 | if( keylen > BLAKE2S_KEYBYTES ) return -1; 210 | 211 | for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) 212 | if( blake2sp_init_leaf( S[i], ( uint8_t ) outlen, ( uint8_t ) keylen, i ) < 0 ) 213 | return -1; 214 | 215 | S[PARALLELISM_DEGREE - 1]->last_node = 1; // mark last node 216 | 217 | if( keylen > 0 ) 218 | { 219 | uint8_t block[BLAKE2S_BLOCKBYTES]; 220 | memset( block, 0, BLAKE2S_BLOCKBYTES ); 221 | memcpy( block, key, keylen ); 222 | 223 | for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) 224 | blake2s_update( S[i], block, BLAKE2S_BLOCKBYTES ); 225 | 226 | secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */ 227 | } 228 | 229 | #if defined(_OPENMP) 230 | omp_set_num_threads(PARALLELISM_DEGREE); 231 | #pragma omp parallel shared(S,hash) 232 | #else 233 | 234 | for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ ) 235 | #endif 236 | { 237 | #if defined(_OPENMP) 238 | size_t id__ = ( size_t ) omp_get_thread_num(); 239 | #endif 240 | size_t inlen__ = inlen; 241 | const uint8_t *in__ = ( const uint8_t * )in; 242 | in__ += id__ * BLAKE2S_BLOCKBYTES; 243 | 244 | while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES ) 245 | { 246 | blake2s_update( S[id__], in__, BLAKE2S_BLOCKBYTES ); 247 | in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES; 248 | inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES; 249 | } 250 | 251 | if( inlen__ > id__ * BLAKE2S_BLOCKBYTES ) 252 | { 253 | const size_t left = inlen__ - id__ * BLAKE2S_BLOCKBYTES; 254 | const size_t len = left <= BLAKE2S_BLOCKBYTES ? left : BLAKE2S_BLOCKBYTES; 255 | blake2s_update( S[id__], in__, len ); 256 | } 257 | 258 | blake2s_final( S[id__], hash[id__], BLAKE2S_OUTBYTES ); 259 | } 260 | 261 | if( blake2sp_init_root( FS, ( uint8_t ) outlen, ( uint8_t ) keylen ) < 0 ) 262 | return -1; 263 | 264 | FS->last_node = 1; 265 | 266 | for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) 267 | blake2s_update( FS, hash[i], BLAKE2S_OUTBYTES ); 268 | 269 | return blake2s_final( FS, out, outlen ); 270 | } 271 | 272 | 273 | 274 | 275 | --------------------------------------------------------------------------------