├── .gitignore ├── src ├── config │ ├── config_vars.ml.default │ ├── dune │ └── flags.ml ├── poly1305-donna.h ├── keccak.h ├── d3des.h ├── chacha20.h ├── blowfish.h ├── pclmul.h ├── arcfour.h ├── sha1.h ├── ghash.h ├── poly1305-donna.c ├── dune ├── ripemd160.h ├── siphash.h ├── sha256.h ├── cryptokitBignumOld.mli ├── sha512.h ├── stubs-sha1.c ├── cryptokitBignum.mli ├── stubs-ripemd160.c ├── stubs-poly1305.c ├── stubs-des.c ├── aesni.h ├── stubs-siphash.c ├── blake2.h ├── arcfour.c ├── stubs-misc.c ├── stubs-arcfour.c ├── rijndael-alg-fst.h ├── stubs-sha256.c ├── stubs-md5.c ├── blake3.h ├── stubs-ghash.c ├── stubs-chacha20.c ├── stubs-blowfish.c ├── stubs-blake2.c ├── stubs-blake3.c ├── stubs-sha512.c ├── stubs-sha3.c ├── stubs-aes.c ├── pclmul.c ├── cryptokitBignum.ml ├── ghash.c ├── siphash.c ├── sha1.c ├── chacha20.c ├── stubs-rng.c ├── keccak.c ├── poly1305-donna-64.h ├── blake3_portable.c ├── poly1305-donna-32.h ├── sha256.c ├── stubs-zlib.c ├── blake3_dispatch.c ├── sha512.c └── blake3_impl.h ├── test ├── dune ├── prngtest.ml └── speedtest.ml ├── .github └── workflows │ └── build.yml ├── dune-project ├── cryptokit.opam ├── configure ├── Changes └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | setup.data 3 | setup.log 4 | *.native 5 | *~ 6 | .merlin 7 | # This file is generated by the configure script 8 | src/config/config_vars.ml 9 | -------------------------------------------------------------------------------- /src/config/config_vars.ml.default: -------------------------------------------------------------------------------- 1 | type 'a value = 2 | | This of 'a 3 | | Auto 4 | 5 | let enable_zlib = Auto 6 | 7 | let enable_hardware_support = Auto 8 | -------------------------------------------------------------------------------- /src/config/dune: -------------------------------------------------------------------------------- 1 | (executable 2 | (name flags) 3 | (libraries dune-configurator)) 4 | 5 | (rule 6 | (mode fallback) 7 | (action (copy config_vars.ml.default config_vars.ml))) 8 | -------------------------------------------------------------------------------- /test/dune: -------------------------------------------------------------------------------- 1 | (test 2 | (name test) 3 | (modules test) 4 | (libraries cryptokit) 5 | (flags :standard -w -35 -alert -crypto)) 6 | 7 | (executable 8 | (name prngtest) 9 | (modules prngtest) 10 | (libraries cryptokit)) 11 | 12 | (executable 13 | (name speedtest) 14 | (modules speedtest) 15 | (libraries cryptokit) 16 | (flags :standard -w -35 -alert -crypto)) 17 | -------------------------------------------------------------------------------- /src/poly1305-donna.h: -------------------------------------------------------------------------------- 1 | /* Poly1305 implementation written by Andrew Moon, 2 | https://github.com/floodyberry/poly1305-donna 3 | License: MIT or public domain. 4 | Minor adaptations for Cryptokit by Xavier Leroy. */ 5 | 6 | #ifndef POLY1305_DONNA_H 7 | #define POLY1305_DONNA_H 8 | 9 | #include 10 | 11 | typedef struct poly1305_context { 12 | size_t aligner; 13 | unsigned char opaque[136]; 14 | } poly1305_context; 15 | 16 | EXPORT void poly1305_init(poly1305_context *ctx, const unsigned char key[32]); 17 | EXPORT void poly1305_update(poly1305_context *ctx, const unsigned char *m, size_t bytes); 18 | EXPORT void poly1305_finish(poly1305_context *ctx, unsigned char mac[16]); 19 | 20 | #endif /* POLY1305_DONNA_H */ 21 | 22 | -------------------------------------------------------------------------------- /src/keccak.h: -------------------------------------------------------------------------------- 1 | /* SHA-3 (Keccak) cryptographic hash function */ 2 | 3 | typedef unsigned long long u64; 4 | 5 | struct SHA3Context { 6 | u64 state[25]; 7 | unsigned char buffer[144]; 8 | int numbytes; /* number of bytes in buffer */ 9 | int rsiz; /* number of message bytes processed by permutation */ 10 | int hsiz; /* size of hash in bytes */ 11 | }; 12 | 13 | EXPORT void SHA3_init(struct SHA3Context * ctx, int hsiz); 14 | 15 | EXPORT void SHA3_absorb(struct SHA3Context * ctx, 16 | unsigned char * data, 17 | unsigned long len); 18 | 19 | EXPORT void SHA3_extract(unsigned char padding, 20 | struct SHA3Context * ctx, 21 | unsigned char * output); 22 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build and test 2 | 3 | on: 4 | push: 5 | branches: 6 | - 'master' 7 | pull_request: 8 | workflow_dispatch: 9 | 10 | permissions: read-all 11 | 12 | jobs: 13 | build: 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | os: 18 | - ubuntu-latest 19 | - macos-latest 20 | - windows-latest 21 | 22 | runs-on: ${{ matrix.os }} 23 | 24 | steps: 25 | - name: Checkout tree 26 | uses: actions/checkout@v4 27 | 28 | - name: Set-up OCaml 29 | uses: ocaml/setup-ocaml@v3 30 | with: 31 | ocaml-compiler: 5 32 | 33 | - run: opam install . --deps-only --with-test 34 | 35 | - run: opam exec -- dune build 36 | 37 | - run: opam exec -- dune runtest 38 | -------------------------------------------------------------------------------- /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 2.5) 2 | (generate_opam_files true) 3 | 4 | (name cryptokit) 5 | (source (github xavierleroy/cryptokit)) 6 | (authors "Xavier Leroy") 7 | (maintainers "Xavier Leroy ") 8 | 9 | (package 10 | (name cryptokit) 11 | (synopsis "A library of cryptographic primitives") 12 | (version 1.20) 13 | (license "LGPL-2.0-or-later WITH OCaml-LGPL-linking-exception") 14 | (description "Cryptokit includes authenticated encryption (AES-GCM, Chacha20-Poly1305), block ciphers (AES, DES, 3DES), stream ciphers (Chacha20, ARCfour), public-key cryptography (RSA, DH), hashes (SHA-256, SHA-512, SHA-3, Blake2, Blake3), MACs, compression, random number generation -- all presented with a compositional, extensible interface.") 15 | 16 | (depends 17 | (ocaml (>= 4.08.0)) 18 | (dune (>= 2.5)) 19 | dune-configurator 20 | (zarith (>= 1.4)) 21 | conf-zlib 22 | conf-gmp-powm-sec)) 23 | -------------------------------------------------------------------------------- /src/d3des.h: -------------------------------------------------------------------------------- 1 | /* d3des.h - 2 | * 3 | * Headers and defines for d3des.c 4 | * Graven Imagery, 1992. 5 | * 6 | * Copyright (c) 1988,1989,1990,1991,1992 by Richard Outerbridge 7 | * (GEnie : OUTER; CIS : [71755,204]) 8 | * 9 | * Modified and adapted by Xavier Leroy, 2002. 10 | */ 11 | 12 | #define EN0 0 /* MODE == encrypt */ 13 | #define DE1 1 /* MODE == decrypt */ 14 | 15 | typedef unsigned char u8; 16 | typedef unsigned int u32; 17 | 18 | EXPORT void d3des_cook_key(u8 key[8], int mode, u32 res[32]); 19 | /* Sets the key register [res] according to the hexadecimal 20 | * key contained in the 8 bytes of [key], according to the DES, 21 | * for encryption or decryption according to [mode]. 22 | */ 23 | 24 | EXPORT void d3des_transform(u32 key[32], u8 from[8], u8 to[8]); 25 | /* Encrypts/Decrypts (according to the key [key]) 26 | * one block of eight bytes at address 'from' 27 | * into the block at address 'to'. They can be the same. 28 | */ 29 | -------------------------------------------------------------------------------- /src/chacha20.h: -------------------------------------------------------------------------------- 1 | /* Based on D. J. Bernstein's chacha-regs.c version 200801118, 2 | https://cr.yp.to/streamciphers/timings/estreambench/submissions/salsa20/chacha8/regs/chacha.c 3 | The initial code is in the public domain */ 4 | 5 | #include 6 | #include 7 | 8 | typedef struct { 9 | uint32_t input[16]; /* The current state */ 10 | uint8_t output[64]; /* Output data for the current state */ 11 | int next; /* Index of next unused byte in output */ 12 | int iv_length; /* 8 or 12 */ 13 | } chacha20_ctx; 14 | 15 | EXPORT void chacha20_init(chacha20_ctx * ctx, 16 | const uint8_t * key, size_t key_length, 17 | const uint8_t * iv, size_t iv_length, 18 | uint64_t ctr); 19 | 20 | EXPORT void chacha20_extract(chacha20_ctx * ctx, 21 | uint8_t * out, size_t len); 22 | 23 | EXPORT void chacha20_transform(chacha20_ctx * ctx, 24 | const uint8_t * in, uint8_t * out, size_t len); 25 | -------------------------------------------------------------------------------- /src/blowfish.h: -------------------------------------------------------------------------------- 1 | /* 2 | blowfish.h: Header file for blowfish.c 3 | 4 | Copyright (C) 1997 by Paul Kocher 5 | 6 | This library is free software; you can redistribute it and/or 7 | modify it under the terms of the GNU Lesser General Public 8 | License as published by the Free Software Foundation; either 9 | version 2.1 of the License, or (at your option) any later version. 10 | This library is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | Lesser General Public License for more details. 14 | You should have received a copy of the GNU Lesser General Public 15 | License along with this library; if not, write to the Free Software 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 | 18 | 19 | See blowfish.c for more information about this file. 20 | */ 21 | 22 | typedef unsigned int u32; 23 | 24 | typedef struct { 25 | u32 P[16 + 2]; 26 | u32 S[4][256]; 27 | } BLOWFISH_CTX; 28 | 29 | EXPORT void Blowfish_Init(BLOWFISH_CTX *ctx, unsigned char *key, int keyLen); 30 | EXPORT void Blowfish_Encrypt(BLOWFISH_CTX *ctx, u32 *xl, u32 *xr); 31 | EXPORT void Blowfish_Decrypt(BLOWFISH_CTX *ctx, u32 *xl, u32 *xr); 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /cryptokit.opam: -------------------------------------------------------------------------------- 1 | # This file is generated by dune, edit dune-project instead 2 | opam-version: "2.0" 3 | version: "1.20" 4 | synopsis: "A library of cryptographic primitives" 5 | description: 6 | "Cryptokit includes authenticated encryption (AES-GCM, Chacha20-Poly1305), block ciphers (AES, DES, 3DES), stream ciphers (Chacha20, ARCfour), public-key cryptography (RSA, DH), hashes (SHA-256, SHA-512, SHA-3, Blake2, Blake3), MACs, compression, random number generation -- all presented with a compositional, extensible interface." 7 | maintainer: ["Xavier Leroy "] 8 | authors: ["Xavier Leroy"] 9 | license: "LGPL-2.0-or-later WITH OCaml-LGPL-linking-exception" 10 | x-maintenance-intent: ["(latest)"] 11 | homepage: "https://github.com/xavierleroy/cryptokit" 12 | bug-reports: "https://github.com/xavierleroy/cryptokit/issues" 13 | depends: [ 14 | "ocaml" {>= "4.08.0"} 15 | "dune" {>= "2.5"} 16 | "dune-configurator" 17 | "zarith" {>= "1.4"} 18 | "conf-zlib" 19 | "conf-gmp-powm-sec" 20 | ] 21 | build: [ 22 | ["dune" "subst"] {pinned} 23 | [ 24 | "dune" 25 | "build" 26 | "-p" 27 | name 28 | "-j" 29 | jobs 30 | "@install" 31 | "@runtest" {with-test} 32 | "@doc" {with-doc} 33 | ] 34 | ] 35 | dev-repo: "git+https://github.com/xavierleroy/cryptokit.git" 36 | -------------------------------------------------------------------------------- /src/pclmul.h: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, Collège de France and Inria */ 6 | /* */ 7 | /* Copyright 2022 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | /* Hardware-accelerated implementation of GHASH multiplication */ 15 | 16 | EXPORT int pclmul_available; 17 | /* -1: unknown, call pclmul_check_available() to determine 18 | 0: not available 19 | 1: available 20 | */ 21 | 22 | EXPORT int pclmul_check_available(void); 23 | 24 | EXPORT void pclmul_mult(uint8_t res[16], 25 | const uint8_t arg1[16], const uint8_t arg2[16]); 26 | 27 | 28 | -------------------------------------------------------------------------------- /src/arcfour.h: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, projet Cristal, INRIA Rocquencourt */ 6 | /* */ 7 | /* Copyright 2002 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | /* $Id$ */ 15 | 16 | struct arcfour_key 17 | { 18 | unsigned char state[256]; 19 | unsigned char x, y; 20 | }; 21 | 22 | EXPORT void arcfour_cook_key(struct arcfour_key * key, 23 | unsigned char * key_data, 24 | int key_data_len); 25 | 26 | EXPORT void arcfour_encrypt(struct arcfour_key * key, 27 | char * src, char * dst, long len); 28 | 29 | -------------------------------------------------------------------------------- /src/sha1.h: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, projet Cristal, INRIA Rocquencourt */ 6 | /* */ 7 | /* Copyright 2002 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | /* $Id$ */ 15 | 16 | /* SHA-1 hashing */ 17 | 18 | typedef unsigned int u32; 19 | 20 | struct SHA1Context { 21 | u32 state[5]; 22 | u32 length[2]; 23 | int numbytes; 24 | unsigned char buffer[64]; 25 | }; 26 | 27 | EXPORT void SHA1_init(struct SHA1Context * ctx); 28 | EXPORT void SHA1_add_data(struct SHA1Context * ctx, unsigned char * data, 29 | unsigned long len); 30 | EXPORT void SHA1_finish(struct SHA1Context * ctx, unsigned char output[20]); 31 | -------------------------------------------------------------------------------- /src/ghash.h: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, Collège de France and Inria */ 6 | /* */ 7 | /* Copyright 2022 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | /* Software implementation of GHASH multiplication */ 15 | 16 | struct ghash_context { 17 | uint64_t HL[16]; // precalculated lo-half HTable 18 | uint64_t HH[16]; // precalculated hi-half HTable 19 | }; 20 | 21 | EXPORT void ghash_init(struct ghash_context * ctx, 22 | const uint8_t h[16]); 23 | 24 | EXPORT void ghash_mult(const struct ghash_context * ctx, 25 | const uint8_t input[16], 26 | uint8_t output[16]); 27 | -------------------------------------------------------------------------------- /src/poly1305-donna.c: -------------------------------------------------------------------------------- 1 | /* Poly1305 implementation written by Andrew Moon, 2 | https://github.com/floodyberry/poly1305-donna 3 | License: MIT or public domain. 4 | Minor adaptations for Cryptokit by Xavier Leroy. */ 5 | 6 | #include "poly1305-donna.h" 7 | 8 | /* auto detect between 32bit / 64bit */ 9 | #if defined(__SIZEOF_INT128__) && defined(__LP64__) 10 | #include "poly1305-donna-64.h" 11 | #else 12 | #include "poly1305-donna-32.h" 13 | #endif 14 | 15 | void 16 | EXPORT poly1305_update(poly1305_context *ctx, const unsigned char *m, size_t bytes) { 17 | poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx; 18 | size_t i; 19 | 20 | /* handle leftover */ 21 | if (st->leftover) { 22 | size_t want = (poly1305_block_size - st->leftover); 23 | if (want > bytes) 24 | want = bytes; 25 | for (i = 0; i < want; i++) 26 | st->buffer[st->leftover + i] = m[i]; 27 | bytes -= want; 28 | m += want; 29 | st->leftover += want; 30 | if (st->leftover < poly1305_block_size) 31 | return; 32 | poly1305_blocks(st, st->buffer, poly1305_block_size); 33 | st->leftover = 0; 34 | } 35 | 36 | /* process full blocks */ 37 | if (bytes >= poly1305_block_size) { 38 | size_t want = (bytes & ~(poly1305_block_size - 1)); 39 | poly1305_blocks(st, m, want); 40 | m += want; 41 | bytes -= want; 42 | } 43 | 44 | /* store leftover */ 45 | if (bytes) { 46 | for (i = 0; i < bytes; i++) 47 | st->buffer[st->leftover + i] = m[i]; 48 | st->leftover += bytes; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/dune: -------------------------------------------------------------------------------- 1 | (library (name cryptokit) (public_name cryptokit) 2 | (libraries unix zarith) 3 | (wrapped false) 4 | (modules CryptokitBignum Cryptokit) 5 | (foreign_stubs 6 | (language c) 7 | (flags -DCAML_NAME_SPACE -DEXPORT=static 8 | -DBLAKE3_NO_SSE2 -DBLAKE3_NO_SSE41 -DBLAKE3_NO_AVX2 -DBLAKE3_NO_AVX512 9 | -DBLAKE3_USE_NEON=0 10 | (:include flags.sexp)) 11 | (names stubs-arcfour 12 | stubs-blowfish 13 | stubs-des 14 | stubs-ripemd160 15 | stubs-sha1 16 | stubs-sha256 17 | stubs-sha512 18 | stubs-aes 19 | stubs-md5 20 | stubs-misc 21 | stubs-rng 22 | stubs-zlib 23 | stubs-sha3 24 | stubs-chacha20 25 | stubs-blake2 26 | stubs-ghash 27 | stubs-poly1305 28 | stubs-siphash 29 | stubs-blake3) 30 | (extra_deps 31 | aesni.c 32 | arcfour.c 33 | blowfish.c 34 | d3des.c 35 | rijndael-alg-fst.c 36 | ripemd160.c 37 | sha1.c 38 | sha256.c 39 | sha512.c 40 | keccak.c 41 | chacha20.c 42 | blake2.c 43 | ghash.c 44 | pclmul.c 45 | poly1305-donna.c 46 | siphash.c 47 | blake3.c 48 | blake3_dispatch.c 49 | blake3_portable.c)) 50 | (c_library_flags (:include library_flags.sexp)) 51 | (flags :standard -safe-string -w -7 -w -27 -w -37)) 52 | 53 | ; compute flags 54 | (rule 55 | (alias configure) 56 | (targets flags.sexp library_flags.sexp) 57 | (action (run config/flags.exe))) 58 | -------------------------------------------------------------------------------- /src/ripemd160.h: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, projet Cristal, INRIA Rocquencourt */ 6 | /* */ 7 | /* Copyright 2005 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | /* $Id$ */ 15 | 16 | /* RIPEMD160 hashing */ 17 | 18 | typedef unsigned int u32; 19 | 20 | struct RIPEMD160Context { 21 | u32 state[5]; 22 | u32 length[2]; 23 | int numbytes; 24 | unsigned char buffer[64]; 25 | }; 26 | 27 | EXPORT void RIPEMD160_init(struct RIPEMD160Context * ctx); 28 | EXPORT void RIPEMD160_add_data(struct RIPEMD160Context * ctx, 29 | unsigned char * data, 30 | unsigned long len); 31 | EXPORT void RIPEMD160_finish(struct RIPEMD160Context * ctx, 32 | unsigned char output[20]); 33 | -------------------------------------------------------------------------------- /src/siphash.h: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, Collège de France and Inria */ 6 | /* */ 7 | /* Copyright 2022 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | #define SIPHASH_BUFLEN 8 15 | 16 | struct siphash { 17 | uint64_t v0, v1, v2, v3; 18 | unsigned char buffer[SIPHASH_BUFLEN]; 19 | int used; /* number of valid bytes in buffer */ 20 | uint8_t len8; /* 8 low bits of total data length */ 21 | }; 22 | 23 | EXPORT void siphash_init(struct siphash * st, 24 | const unsigned char * key, int outlen); 25 | EXPORT void siphash_add(struct siphash * st, 26 | const unsigned char * p, size_t len); 27 | EXPORT void siphash_final(struct siphash * st, 28 | int outlen, unsigned char * out); 29 | 30 | -------------------------------------------------------------------------------- /src/sha256.h: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, projet Cristal, INRIA Rocquencourt */ 6 | /* */ 7 | /* Copyright 2002 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | /* $Id$ */ 15 | 16 | /* SHA-256 hashing */ 17 | 18 | #ifndef _MSC_VER 19 | #include 20 | typedef uint32_t u32; 21 | #else 22 | typedef unsigned int u32; 23 | #endif 24 | 25 | struct SHA256Context { 26 | u32 state[8]; 27 | u32 length[2]; 28 | int numbytes; 29 | unsigned char buffer[64]; 30 | }; 31 | 32 | EXPORT void SHA256_init(struct SHA256Context * ctx, int bitsize); 33 | EXPORT void SHA256_add_data(struct SHA256Context * ctx, unsigned char * data, 34 | unsigned long len); 35 | EXPORT void SHA256_finish(struct SHA256Context * ctx, 36 | int bitsize, 37 | unsigned char * output); 38 | -------------------------------------------------------------------------------- /src/cryptokitBignumOld.mli: -------------------------------------------------------------------------------- 1 | (***********************************************************************) 2 | (* *) 3 | (* The Cryptokit library *) 4 | (* *) 5 | (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) 6 | (* *) 7 | (* Copyright 2002 Institut National de Recherche en Informatique et *) 8 | (* en Automatique. All rights reserved. This file is distributed *) 9 | (* under the terms of the GNU Library General Public License, with *) 10 | (* the special exception on linking described in file LICENSE. *) 11 | (* *) 12 | (***********************************************************************) 13 | 14 | (* Arithmetic on big integers *) 15 | 16 | type t 17 | 18 | val zero : t 19 | val one : t 20 | val of_int : int -> t 21 | 22 | val compare : t -> t -> int 23 | 24 | val add : t -> t -> t 25 | val sub : t -> t -> t 26 | val mult : t -> t -> t 27 | val mod_ : t -> t -> t 28 | 29 | val relative_prime : t -> t -> bool 30 | val mod_power : t -> t -> t -> t 31 | val mod_power_CRT : t -> t -> t -> t -> t -> t -> t 32 | val mod_inv : t -> t -> t 33 | 34 | val of_bytes : string -> t 35 | val to_bytes : ?numbits:int -> t -> bytes 36 | 37 | val random : rng:(bytes -> int -> int -> unit) -> ?lowbits:int -> int -> t 38 | val random_prime : rng:(bytes -> int -> int -> unit) -> int -> t 39 | 40 | val wipe : t -> unit 41 | 42 | -------------------------------------------------------------------------------- /src/sha512.h: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, projet Cristal, INRIA Rocquencourt */ 6 | /* */ 7 | /* Copyright 2015 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | /* $Id: sha256.h 53 2010-08-30 10:53:00Z gildor-admin $ */ 15 | 16 | /* SHA-512 hashing */ 17 | 18 | #ifndef _MSC_VER 19 | #include 20 | typedef uint64_t u64; 21 | #else 22 | typedef unsigned __int64 u64; 23 | #define UINT64_C(x) x##ui64 24 | #endif 25 | 26 | struct SHA512Context { 27 | u64 state[8]; 28 | u64 length[2]; 29 | int numbytes; 30 | unsigned char buffer[128]; 31 | }; 32 | 33 | EXPORT void SHA512_init(struct SHA512Context * ctx, int bitsize); 34 | EXPORT void SHA512_add_data(struct SHA512Context * ctx, unsigned char * data, 35 | unsigned long len); 36 | EXPORT void SHA512_finish(struct SHA512Context * ctx, int bitsize, 37 | unsigned char * output); 38 | -------------------------------------------------------------------------------- /configure: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ocaml 2 | (* -*- tuareg -*- *) 3 | 4 | type 'a value = 5 | | This of 'a 6 | | Auto 7 | 8 | let string_of_value to_string = function 9 | | This a -> "This (" ^ to_string a ^ ")" 10 | | Auto -> "Auto" 11 | 12 | let () = 13 | let declare_flag arg description = 14 | let reference = ref Auto in 15 | let args = 16 | [ "--enable-" ^ arg, Arg.Unit (fun () -> reference := This true), 17 | " Enable " ^ description 18 | ; "--disable-" ^ arg, Arg.Unit (fun () -> reference := This false), 19 | " Disable " ^ description 20 | ] 21 | in args, reference 22 | in 23 | let args_zlib, ref_zlib = declare_flag "zlib" "ZLib" in 24 | let args_hardware_support, ref_hardware_support = 25 | declare_flag "hardwaresupport" 26 | "hardware support for AES and GCM (needs GCC or Clang)" in 27 | Arg.parse 28 | (Arg.align (args_zlib @ args_hardware_support)) 29 | (fun s -> raise (Arg.Bad (Printf.sprintf "don't know what to do with %S" s))) 30 | "Usage: ./configure [OPTIONS]"; 31 | let oc = open_out_bin "src/config/config_vars.ml" in 32 | Printf.fprintf oc {| 33 | type 'a value = 34 | | This of 'a 35 | | Auto 36 | 37 | let enable_zlib = %s 38 | let enable_hardware_support = %s 39 | |} 40 | (string_of_value string_of_bool !ref_zlib) 41 | (string_of_value string_of_bool !ref_hardware_support); 42 | close_out oc; 43 | (* Below is a temporary workaround to make sure the configuration happens 44 | every time this script is run. *) 45 | (try 46 | Sys.remove "_build/default/src/flags.sexp"; 47 | with _ -> ()); 48 | exit (Sys.command "dune build @configure --release") 49 | -------------------------------------------------------------------------------- /src/stubs-sha1.c: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, projet Cristal, INRIA Rocquencourt */ 6 | /* */ 7 | /* Copyright 2002 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | #include "sha1.c" 15 | #include 16 | #include 17 | #include 18 | 19 | #define Context_val(v) ((struct SHA1Context *) String_val(v)) 20 | 21 | CAMLprim value caml_sha1_init(value unit) 22 | { 23 | value ctx = caml_alloc_string(sizeof(struct SHA1Context)); 24 | SHA1_init(Context_val(ctx)); 25 | return ctx; 26 | } 27 | 28 | CAMLprim value caml_sha1_update(value ctx, value src, value ofs, value len) 29 | { 30 | SHA1_add_data(Context_val(ctx), &Byte_u(src, Long_val(ofs)), Long_val(len)); 31 | return Val_unit; 32 | } 33 | 34 | CAMLprim value caml_sha1_final(value ctx) 35 | { 36 | CAMLparam1(ctx); 37 | CAMLlocal1(res); 38 | 39 | res = caml_alloc_string(20); 40 | SHA1_finish(Context_val(ctx), &Byte_u(res, 0)); 41 | CAMLreturn(res); 42 | } 43 | 44 | -------------------------------------------------------------------------------- /src/cryptokitBignum.mli: -------------------------------------------------------------------------------- 1 | (***********************************************************************) 2 | (* *) 3 | (* The Cryptokit library *) 4 | (* *) 5 | (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) 6 | (* *) 7 | (* Copyright 2002 Institut National de Recherche en Informatique et *) 8 | (* en Automatique. All rights reserved. This file is distributed *) 9 | (* under the terms of the GNU Library General Public License, with *) 10 | (* the special exception on linking described in file LICENSE. *) 11 | (* *) 12 | (***********************************************************************) 13 | 14 | (** Operations on big integers, used for the implementation of module 15 | {!Cryptokit}. *) 16 | 17 | type t 18 | 19 | val zero : t 20 | val one : t 21 | val of_int : int -> t 22 | 23 | val compare : t -> t -> int 24 | 25 | val add : t -> t -> t 26 | val sub : t -> t -> t 27 | val mult : t -> t -> t 28 | val div : t -> t -> t 29 | val lcm : t -> t -> t 30 | val mod_ : t -> t -> t 31 | 32 | val relative_prime : t -> t -> bool 33 | val mod_power : t -> t -> t -> t 34 | val mod_power_CRT : t -> t -> t -> t -> t -> t -> t 35 | val mod_inv : t -> t -> t 36 | 37 | val of_bytes : string -> t 38 | val to_bytes : ?numbits:int -> t -> string 39 | 40 | val random : rng:(bytes -> int -> int -> unit) -> ?odd:bool -> int -> t 41 | val random_prime : rng:(bytes -> int -> int -> unit) -> int -> t 42 | 43 | val wipe : t -> unit 44 | 45 | -------------------------------------------------------------------------------- /src/stubs-ripemd160.c: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, projet Cristal, INRIA Rocquencourt */ 6 | /* */ 7 | /* Copyright 2005 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | #include "ripemd160.c" 15 | #include 16 | #include 17 | #include 18 | 19 | #define Context_val(v) ((struct RIPEMD160Context *) String_val(v)) 20 | 21 | CAMLprim value caml_ripemd160_init(value unit) 22 | { 23 | value ctx = caml_alloc_string(sizeof(struct RIPEMD160Context)); 24 | RIPEMD160_init(Context_val(ctx)); 25 | return ctx; 26 | } 27 | 28 | CAMLprim value caml_ripemd160_update(value ctx, value src, value ofs, value len) 29 | { 30 | RIPEMD160_add_data(Context_val(ctx), &Byte_u(src, Long_val(ofs)), Long_val(len)); 31 | return Val_unit; 32 | } 33 | 34 | CAMLprim value caml_ripemd160_final(value ctx) 35 | { 36 | CAMLparam1(ctx); 37 | CAMLlocal1(res); 38 | 39 | res = caml_alloc_string(20); 40 | RIPEMD160_finish(Context_val(ctx), &Byte_u(res, 0)); 41 | CAMLreturn(res); 42 | } 43 | 44 | -------------------------------------------------------------------------------- /src/stubs-poly1305.c: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, Collège de France and Inria */ 6 | /* */ 7 | /* Copyright 2022 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | #include "poly1305-donna.c" 15 | #include 16 | #include 17 | #include 18 | 19 | #define Context_val(v) ((struct poly1305_context *) String_val(v)) 20 | 21 | CAMLprim value caml_poly1305_init(value key) 22 | { 23 | CAMLparam1(key); 24 | value ctx = caml_alloc_string(sizeof(struct poly1305_context)); 25 | poly1305_init(Context_val(ctx), &Byte_u(key, 0)); 26 | CAMLreturn(ctx); 27 | } 28 | 29 | CAMLprim value caml_poly1305_update(value ctx, value src, value ofs, value len) 30 | { 31 | poly1305_update(Context_val(ctx), &Byte_u(src, Long_val(ofs)), Long_val(len)); 32 | return Val_unit; 33 | } 34 | 35 | CAMLprim value caml_poly1305_final(value ctx) 36 | { 37 | CAMLparam1(ctx); 38 | CAMLlocal1(res); 39 | res = caml_alloc_string(16); 40 | poly1305_finish(Context_val(ctx), &Byte_u(res, 0)); 41 | CAMLreturn(res); 42 | } 43 | 44 | -------------------------------------------------------------------------------- /src/stubs-des.c: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, projet Cristal, INRIA Rocquencourt */ 6 | /* */ 7 | /* Copyright 2002 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | /* Stub code for DES */ 15 | 16 | #include "d3des.c" 17 | #include 18 | #include 19 | #include 20 | 21 | #define Cooked_key_size (32 * sizeof(u32)) 22 | 23 | CAMLprim value caml_des_cook_key(value key, value ofs, value direction) 24 | { 25 | CAMLparam2(key,direction); 26 | value ckey = caml_alloc_string(Cooked_key_size); 27 | d3des_cook_key((u8 *) &Byte(key, Long_val(ofs)), 28 | Int_val(direction), 29 | (u32 *) String_val(ckey)); 30 | CAMLreturn(ckey); 31 | } 32 | 33 | CAMLprim value caml_des_transform(value ckey, value src, value src_ofs, 34 | value dst, value dst_ofs) 35 | { 36 | d3des_transform((u32 *) String_val(ckey), 37 | (u8 *) &Byte(src, Long_val(src_ofs)), 38 | (u8 *) &Byte(dst, Long_val(dst_ofs))); 39 | return Val_unit; 40 | } 41 | 42 | -------------------------------------------------------------------------------- /src/aesni.h: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, projet Gallium, INRIA Paris */ 6 | /* */ 7 | /* Copyright 2016 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | /* Hardware-accelerated implementation of AES */ 15 | 16 | EXPORT int aesni_available; 17 | /* -1: unknown, call aesni_check_available() to determine 18 | 0: not available 19 | 1: available 20 | */ 21 | 22 | EXPORT int aesni_check_available(void); 23 | 24 | EXPORT int aesniKeySetupEnc(unsigned char * ckey, 25 | const unsigned char * key, 26 | int keylength); 27 | 28 | EXPORT int aesniKeySetupDec(unsigned char * ckey, 29 | const unsigned char * key, 30 | int keylength); 31 | 32 | EXPORT void aesniEncrypt(const unsigned char * key, int nrounds, 33 | const unsigned char * in, 34 | unsigned char * out); 35 | 36 | EXPORT void aesniDecrypt(const unsigned char * key, int nrounds, 37 | const unsigned char * in, 38 | unsigned char * out); 39 | 40 | 41 | -------------------------------------------------------------------------------- /src/stubs-siphash.c: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, Collège de France and Inria */ 6 | /* */ 7 | /* Copyright 2022 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | #include 15 | #include 16 | #include "siphash.c" 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | #define siphash_val(v) ((struct siphash *) String_val(v)) 23 | 24 | CAMLprim value caml_siphash_init(value key, value hashlen) 25 | { 26 | value ctx = caml_alloc_string(sizeof(struct siphash)); 27 | siphash_init(siphash_val(ctx), &Byte_u(key, 0), Int_val(hashlen)); 28 | return ctx; 29 | } 30 | 31 | CAMLprim value caml_siphash_update(value ctx, value src, value ofs, value len) 32 | { 33 | siphash_add(siphash_val(ctx), &Byte_u(src, Long_val(ofs)), Long_val(len)); 34 | return Val_unit; 35 | } 36 | 37 | CAMLprim value caml_siphash_final(value ctx, value hashlen) 38 | { 39 | CAMLparam1(ctx); 40 | CAMLlocal1(res); 41 | int len = Int_val(hashlen); 42 | res = caml_alloc_string(len); 43 | siphash_final(siphash_val(ctx), len, &Byte_u(res, 0)); 44 | CAMLreturn(res); 45 | } 46 | -------------------------------------------------------------------------------- /test/prngtest.ml: -------------------------------------------------------------------------------- 1 | (***********************************************************************) 2 | (* *) 3 | (* The Cryptokit library *) 4 | (* *) 5 | (* Xavier Leroy, projet Gallium, INRIA Paris *) 6 | (* *) 7 | (* Copyright 2017 Institut National de Recherche en Informatique et *) 8 | (* en Automatique. All rights reserved. This file is distributed *) 9 | (* under the terms of the GNU Library General Public License, with *) 10 | (* the special exception on linking described in file LICENSE. *) 11 | (* *) 12 | (***********************************************************************) 13 | 14 | (* Generate pseudorandom data on stdout, for testing with "dieharder" *) 15 | 16 | open Cryptokit 17 | 18 | let output_pr_data rng = 19 | let b = Bytes.create 64 in 20 | while true do 21 | rng#random_bytes b 0 64; 22 | output stdout b 0 64 23 | done 24 | 25 | let usage() = 26 | prerr_string {|Usage: 27 | ./prngtest.native aes-ctr | dieharder -a -g 200 28 | ./prngtest.native chacha20 | dieharder -a -g 200 29 | ./prngtest.native hardware | dieharder -a -g 200 30 | Warning: each dieharder run takes a long time. 31 | |}; 32 | exit 2 33 | 34 | let _ = 35 | let seed = 36 | if Array.length Sys.argv > 2 37 | then Sys.argv.(2) 38 | else "Supercalifragilistusexpialidolcius" in 39 | let rng = 40 | if Array.length Sys.argv > 1 then begin 41 | match Sys.argv.(1) with 42 | | "aes-ctr" -> Random.pseudo_rng_aes_ctr seed 43 | | "chacha20" -> Random.pseudo_rng seed 44 | | "hardware" -> Random.hardware_rng () 45 | | _ -> usage() 46 | end else usage() in 47 | output_pr_data rng 48 | 49 | 50 | -------------------------------------------------------------------------------- /src/blake2.h: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, Collège de France and Inria */ 6 | /* */ 7 | /* Copyright 2020 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | /* BLAKE2b hashing */ 15 | 16 | #define BLAKE2b_BLOCKSIZE 128 17 | 18 | struct blake2b { 19 | uint64_t h[8]; 20 | uint64_t len[2]; 21 | int numbytes; 22 | unsigned char buffer[BLAKE2b_BLOCKSIZE]; 23 | }; 24 | 25 | EXPORT void blake2b_init(struct blake2b * s, 26 | int hashlen, int keylen, unsigned char * key); 27 | EXPORT void blake2b_add_data(struct blake2b * s, 28 | unsigned char * data, size_t len); 29 | EXPORT void blake2b_final(struct blake2b * s, 30 | int hashlen, unsigned char * hash); 31 | 32 | #define BLAKE2s_BLOCKSIZE 64 33 | 34 | struct blake2s { 35 | uint32_t h[8]; 36 | uint32_t len[2]; 37 | int numbytes; 38 | unsigned char buffer[BLAKE2s_BLOCKSIZE]; 39 | }; 40 | 41 | EXPORT void blake2s_init(struct blake2s * s, 42 | int hashlen, int keylen, unsigned char * key); 43 | EXPORT void blake2s_add_data(struct blake2s * s, 44 | unsigned char * data, size_t len); 45 | EXPORT void blake2s_final(struct blake2s * s, 46 | int hashlen, unsigned char * hash); 47 | 48 | -------------------------------------------------------------------------------- /src/arcfour.c: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, projet Cristal, INRIA Rocquencourt */ 6 | /* */ 7 | /* Copyright 2002 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | #include "arcfour.h" 15 | 16 | EXPORT void arcfour_cook_key(struct arcfour_key * key, 17 | unsigned char * key_data, 18 | int key_data_len) 19 | { 20 | unsigned char * s; 21 | int i; 22 | unsigned char t, index1, index2; 23 | 24 | s = &key->state[0]; 25 | for (i = 0; i < 256; i++) s[i] = i; 26 | key->x = 0; 27 | key->y = 0; 28 | index1 = 0; 29 | index2 = 0; 30 | for (i = 0; i < 256; i++) { 31 | index2 = key_data[index1] + s[i] + index2; 32 | t = s[i]; s[i] = s[index2]; s[index2] = t; 33 | index1++; 34 | if (index1 >= key_data_len) index1 = 0; 35 | } 36 | } 37 | 38 | EXPORT void arcfour_encrypt(struct arcfour_key * key, 39 | char * src, char * dst, long len) 40 | { 41 | int x, y, kx, ky; 42 | 43 | x = key->x; 44 | y = key->y; 45 | for (/*nothing*/; len > 0; len--) { 46 | x = (x + 1) & 0xFF; 47 | kx = key->state[x]; 48 | y = (kx + y) & 0xFF; 49 | ky = key->state[y]; 50 | key->state[x] = ky; key->state[y] = kx; 51 | *dst++ = *src++ ^ key->state[(kx + ky) & 0xFF]; 52 | } 53 | key->x = x; 54 | key->y = y; 55 | } 56 | 57 | 58 | -------------------------------------------------------------------------------- /src/stubs-misc.c: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, projet Cristal, INRIA Rocquencourt */ 6 | /* */ 7 | /* Copyright 2002 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | #include 15 | #include 16 | 17 | #define ALIGNMENT_OF(x) ((uintnat)(x) & (sizeof(uintnat) - 1)) 18 | 19 | CAMLprim value caml_xor_string(value src, value src_ofs, 20 | value dst, value dst_ofs, 21 | value len) 22 | { 23 | char * s = &Byte(src, Long_val(src_ofs)); 24 | char * d = &Byte(dst, Long_val(dst_ofs)); 25 | long l = Long_val(len); 26 | 27 | if (l >= 64 && ALIGNMENT_OF(s) == ALIGNMENT_OF(d)) { 28 | while (ALIGNMENT_OF(s) != 0 && l > 0) { 29 | *d ^= *s; 30 | s += 1; 31 | d += 1; 32 | l -= 1; 33 | } 34 | while (l >= sizeof(uintnat)) { 35 | *((uintnat *) d) ^= *((uintnat *) s); 36 | s += sizeof(uintnat); 37 | d += sizeof(uintnat); 38 | l -= sizeof(uintnat); 39 | } 40 | } 41 | while (l > 0) { 42 | *d ^= *s; 43 | s += 1; 44 | d += 1; 45 | l -= 1; 46 | } 47 | return Val_unit; 48 | } 49 | 50 | CAMLprim value caml_wipe_z(value v) 51 | { 52 | if (Is_block(v) && Tag_val(v) == Custom_tag) { 53 | memset(Data_custom_val(v), 0, (Wosize_val(v) - 1) * sizeof(value)); 54 | } 55 | return Val_unit; 56 | } 57 | -------------------------------------------------------------------------------- /src/stubs-arcfour.c: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, projet Cristal, INRIA Rocquencourt */ 6 | /* */ 7 | /* Copyright 2002 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | /* Stub code for ARC4 */ 15 | 16 | #include "arcfour.c" 17 | #include 18 | #include 19 | #include 20 | 21 | #define Cooked_key_size (sizeof(struct arcfour_key)) 22 | #define Key_val(v) ((struct arcfour_key *) String_val(v)) 23 | 24 | CAMLprim value caml_arcfour_cook_key(value key) 25 | { 26 | CAMLparam1(key); 27 | value ckey = caml_alloc_string(Cooked_key_size); 28 | arcfour_cook_key(Key_val(ckey), 29 | (unsigned char *) String_val(key), 30 | caml_string_length(key)); 31 | CAMLreturn(ckey); 32 | } 33 | 34 | CAMLprim value caml_arcfour_transform(value ckey, value src, value src_ofs, 35 | value dst, value dst_ofs, value len) 36 | { 37 | arcfour_encrypt(Key_val(ckey), 38 | &Byte(src, Long_val(src_ofs)), 39 | &Byte(dst, Long_val(dst_ofs)), 40 | Long_val(len)); 41 | return Val_unit; 42 | } 43 | 44 | CAMLprim value caml_arcfour_transform_bytecode(value * argv, int argc) 45 | { 46 | return caml_arcfour_transform(argv[0], argv[1], argv[2], 47 | argv[3], argv[4], argv[5]); 48 | } 49 | -------------------------------------------------------------------------------- /src/rijndael-alg-fst.h: -------------------------------------------------------------------------------- 1 | /** 2 | * rijndael-alg-fst.h 3 | * 4 | * @version 3.0 (December 2000) 5 | * 6 | * Optimised ANSI C code for the Rijndael cipher (now AES) 7 | * 8 | * @author Vincent Rijmen 9 | * @author Antoon Bosselaers 10 | * @author Paulo Barreto 11 | * 12 | * This code is hereby placed in the public domain. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS 15 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE 18 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 21 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 23 | * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | #ifndef __RIJNDAEL_ALG_FST_H 27 | #define __RIJNDAEL_ALG_FST_H 28 | 29 | #define MAXKC (256/32) 30 | #define MAXKB (256/8) 31 | #define MAXNR 14 32 | 33 | typedef unsigned char u8; 34 | typedef unsigned short u16; 35 | typedef unsigned int u32; 36 | 37 | EXPORT int rijndaelKeySetupEnc(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits); 38 | EXPORT int rijndaelKeySetupDec(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits); 39 | EXPORT void rijndaelEncrypt(const u32 rk[/*4*(Nr + 1)*/], int Nr, const u8 pt[16], u8 ct[16]); 40 | EXPORT void rijndaelDecrypt(const u32 rk[/*4*(Nr + 1)*/], int Nr, const u8 ct[16], u8 pt[16]); 41 | 42 | #ifdef INTERMEDIATE_VALUE_KAT 43 | EXPORT void rijndaelEncryptRound(const u32 rk[/*4*(Nr + 1)*/], int Nr, u8 block[16], int rounds); 44 | EXPORT void rijndaelDecryptRound(const u32 rk[/*4*(Nr + 1)*/], int Nr, u8 block[16], int rounds); 45 | #endif /* INTERMEDIATE_VALUE_KAT */ 46 | 47 | #endif /* __RIJNDAEL_ALG_FST_H */ 48 | -------------------------------------------------------------------------------- /src/stubs-sha256.c: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, projet Cristal, INRIA Rocquencourt */ 6 | /* */ 7 | /* Copyright 2004 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | #include "sha256.c" 15 | #include 16 | #include 17 | #include 18 | 19 | #define Context_val(v) ((struct SHA256Context *) String_val(v)) 20 | 21 | CAMLprim value caml_sha256_init(value unit) 22 | { 23 | value ctx = caml_alloc_string(sizeof(struct SHA256Context)); 24 | SHA256_init(Context_val(ctx), 256); 25 | return ctx; 26 | } 27 | 28 | CAMLprim value caml_sha224_init(value unit) 29 | { 30 | value ctx = caml_alloc_string(sizeof(struct SHA256Context)); 31 | SHA256_init(Context_val(ctx), 224); 32 | return ctx; 33 | } 34 | 35 | CAMLprim value caml_sha256_update(value ctx, value src, value ofs, value len) 36 | { 37 | SHA256_add_data(Context_val(ctx), &Byte_u(src, Long_val(ofs)), Long_val(len)); 38 | return Val_unit; 39 | } 40 | 41 | CAMLprim value caml_sha256_final(value ctx) 42 | { 43 | CAMLparam1(ctx); 44 | CAMLlocal1(res); 45 | 46 | res = caml_alloc_string(32); 47 | SHA256_finish(Context_val(ctx), 256, &Byte_u(res, 0)); 48 | CAMLreturn(res); 49 | } 50 | 51 | CAMLprim value caml_sha224_final(value ctx) 52 | { 53 | CAMLparam1(ctx); 54 | CAMLlocal1(res); 55 | 56 | res = caml_alloc_string(28); 57 | SHA256_finish(Context_val(ctx), 224, &Byte_u(res, 0)); 58 | CAMLreturn(res); 59 | } 60 | 61 | 62 | -------------------------------------------------------------------------------- /src/stubs-md5.c: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, projet Cristal, INRIA Rocquencourt */ 6 | /* */ 7 | /* Copyright 2002 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | #ifndef _MSC_VER 19 | #include 20 | typedef uint32_t u32; 21 | #else 22 | typedef unsigned int u32; 23 | #endif 24 | 25 | struct MD5Context { 26 | u32 buf[4]; 27 | u32 bits[2]; 28 | unsigned char in[64]; 29 | }; 30 | 31 | CAMLextern void caml_MD5Init (struct MD5Context *context); 32 | CAMLextern void caml_MD5Update (struct MD5Context *context, 33 | unsigned char *buf, unsigned len); 34 | CAMLextern void caml_MD5Final (unsigned char *digest, struct MD5Context *ctx); 35 | 36 | #define Context_val(v) ((struct MD5Context *) String_val(v)) 37 | 38 | CAMLprim value caml_md5_init(value unit) 39 | { 40 | value ctx = caml_alloc_string(sizeof(struct MD5Context)); 41 | caml_MD5Init(Context_val(ctx)); 42 | return ctx; 43 | } 44 | 45 | CAMLprim value caml_md5_update(value ctx, value src, value ofs, value len) 46 | { 47 | caml_MD5Update(Context_val(ctx), &Byte_u(src, Long_val(ofs)), Long_val(len)); 48 | return Val_unit; 49 | } 50 | 51 | CAMLprim value caml_md5_final(value ctx) 52 | { 53 | CAMLparam1(ctx); 54 | CAMLlocal1(res); 55 | 56 | res = caml_alloc_string(16); 57 | caml_MD5Final(&Byte_u(res, 0), Context_val(ctx)); 58 | CAMLreturn(res); 59 | } 60 | 61 | -------------------------------------------------------------------------------- /src/blake3.h: -------------------------------------------------------------------------------- 1 | #ifndef BLAKE3_H 2 | #define BLAKE3_H 3 | 4 | #include 5 | #include 6 | 7 | #ifdef __cplusplus 8 | EXPORT "C" { 9 | #endif 10 | 11 | #define BLAKE3_VERSION_STRING "1.3.1" 12 | #define BLAKE3_KEY_LEN 32 13 | #define BLAKE3_OUT_LEN 32 14 | #define BLAKE3_BLOCK_LEN 64 15 | #define BLAKE3_CHUNK_LEN 1024 16 | #define BLAKE3_MAX_DEPTH 54 17 | 18 | // This struct is a private implementation detail. It has to be here because 19 | // it's part of blake3_hasher below. 20 | typedef struct { 21 | uint32_t cv[8]; 22 | uint64_t chunk_counter; 23 | uint8_t buf[BLAKE3_BLOCK_LEN]; 24 | uint8_t buf_len; 25 | uint8_t blocks_compressed; 26 | uint8_t flags; 27 | } blake3_chunk_state; 28 | 29 | typedef struct { 30 | uint32_t key[8]; 31 | blake3_chunk_state chunk; 32 | uint8_t cv_stack_len; 33 | // The stack size is MAX_DEPTH + 1 because we do lazy merging. For example, 34 | // with 7 chunks, we have 3 entries in the stack. Adding an 8th chunk 35 | // requires a 4th entry, rather than merging everything down to 1, because we 36 | // don't know whether more input is coming. This is different from how the 37 | // reference implementation does things. 38 | uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN]; 39 | } blake3_hasher; 40 | 41 | EXPORT const char *blake3_version(void); 42 | EXPORT void blake3_hasher_init(blake3_hasher *self); 43 | EXPORT void blake3_hasher_init_keyed(blake3_hasher *self, 44 | const uint8_t key[BLAKE3_KEY_LEN]); 45 | EXPORT void blake3_hasher_init_derive_key(blake3_hasher *self, const char *context); 46 | EXPORT void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context, 47 | size_t context_len); 48 | EXPORT void blake3_hasher_update(blake3_hasher *self, const void *input, 49 | size_t input_len); 50 | EXPORT void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out, 51 | size_t out_len); 52 | EXPORT void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek, 53 | uint8_t *out, size_t out_len); 54 | EXPORT void blake3_hasher_reset(blake3_hasher *self); 55 | 56 | #ifdef __cplusplus 57 | } 58 | #endif 59 | 60 | #endif /* BLAKE3_H */ 61 | -------------------------------------------------------------------------------- /src/stubs-ghash.c: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, Collège de France and Inria */ 6 | /* */ 7 | /* Copyright 2022 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | #include 15 | #include 16 | #include "ghash.c" 17 | #include "pclmul.c" 18 | #include 19 | #include 20 | #include 21 | 22 | #define Context_val(v) (*((struct ghash_context **) Data_custom_val(v))) 23 | 24 | static void caml_ghash_finalize(value ctx) 25 | { 26 | if (Context_val(ctx) != NULL) { 27 | caml_stat_free(Context_val(ctx)); 28 | Context_val(ctx) = NULL; 29 | } 30 | } 31 | 32 | static struct custom_operations ghash_context_ops = { 33 | "fr.inria.caml.cryptokit.GHASH_context", 34 | caml_ghash_finalize, 35 | custom_compare_default, 36 | custom_hash_default, 37 | custom_deserialize_default, 38 | custom_compare_ext_default 39 | }; 40 | 41 | CAMLprim value caml_ghash_init(value key) 42 | { 43 | if (pclmul_available == -1) pclmul_check_available(); 44 | if (pclmul_available == 1) { 45 | return key; 46 | } else { 47 | struct ghash_context * ctx = caml_stat_alloc(sizeof(struct ghash_context)); 48 | value res = 49 | caml_alloc_custom(&ghash_context_ops, 50 | sizeof(struct ghash_context *), 51 | 0, 1); 52 | ghash_init(ctx, &Byte_u(key, 0)); 53 | Context_val(res) = ctx; 54 | return res; 55 | } 56 | } 57 | 58 | CAMLprim value caml_ghash_mult(value ctx, value x) 59 | { 60 | if (pclmul_available == 1) { 61 | pclmul_mult(&Byte_u(x, 0), &Byte_u(ctx, 0), &Byte_u(x, 0)); 62 | } else { 63 | ghash_mult(Context_val(ctx), &Byte_u(x, 0), &Byte_u(x, 0)); 64 | } 65 | return Val_unit; 66 | } 67 | -------------------------------------------------------------------------------- /src/stubs-chacha20.c: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, projet Cristal, INRIA Rocquencourt */ 6 | /* */ 7 | /* Copyright 2002 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | /* Stub code for Chacha20 */ 15 | 16 | #include "chacha20.c" 17 | #include 18 | #include 19 | #include 20 | 21 | #define Cooked_key_size (sizeof(chacha20_ctx)) 22 | #define Key_val(v) ((chacha20_ctx *) String_val(v)) 23 | 24 | CAMLprim value caml_chacha20_cook_key(value key, value iv, value counter) 25 | { 26 | CAMLparam3(key, iv, counter); 27 | value ckey = caml_alloc_string(Cooked_key_size); 28 | chacha20_init(Key_val(ckey), 29 | (unsigned char *) String_val(key), caml_string_length(key), 30 | (unsigned char *) String_val(iv), caml_string_length(iv), 31 | Int64_val(counter)); 32 | CAMLreturn(ckey); 33 | } 34 | 35 | CAMLprim value caml_chacha20_transform(value ckey, value src, value src_ofs, 36 | value dst, value dst_ofs, value len) 37 | { 38 | chacha20_transform(Key_val(ckey), 39 | &Byte_u(src, Long_val(src_ofs)), 40 | &Byte_u(dst, Long_val(dst_ofs)), 41 | Long_val(len)); 42 | return Val_unit; 43 | } 44 | 45 | CAMLprim value caml_chacha20_transform_bytecode(value * argv, int argc) 46 | { 47 | return caml_chacha20_transform(argv[0], argv[1], argv[2], 48 | argv[3], argv[4], argv[5]); 49 | } 50 | 51 | CAMLprim value caml_chacha20_extract(value ckey, 52 | value dst, value dst_ofs, value len) 53 | { 54 | chacha20_extract(Key_val(ckey), 55 | &Byte_u(dst, Long_val(dst_ofs)), 56 | Long_val(len)); 57 | return Val_unit; 58 | } 59 | 60 | -------------------------------------------------------------------------------- /src/config/flags.ml: -------------------------------------------------------------------------------- 1 | (* Compute compilation and linking flags *) 2 | 3 | open Printf 4 | open Config_vars 5 | 6 | module Configurator = Configurator.V1 7 | 8 | (* Compile and link a dummy C program with the given flags. *) 9 | let test ~cfg ~c_flags ~link_flags = 10 | let test_program = "int main() { return 0; }" in 11 | Configurator.c_test cfg test_program ~c_flags ~link_flags 12 | 13 | (* Check that a list of header files declare a list of identifiers. *) 14 | let provides ~cfg ~c_flags ~link_flags ~headers ~functions = 15 | let test_program = 16 | List.map (fun h -> sprintf "#include <%s>\n" h) headers 17 | @ ["int main() {\n"] 18 | @ List.map (fun f -> sprintf " void * ptr_%s = &%s;\n" f f) functions 19 | @ ["}\n"] in 20 | Configurator.c_test cfg (String.concat "" test_program) ~c_flags ~link_flags 21 | 22 | let () = Configurator.main ~name:"cryptokit" @@ fun cfg -> 23 | let os_type = Configurator.ocaml_config_var_exn cfg "os_type" in 24 | let system = Configurator.ocaml_config_var_exn cfg "system" in 25 | let architecture = Configurator.ocaml_config_var_exn cfg "architecture" in 26 | let zlib = match enable_zlib with 27 | | This bool -> bool 28 | | Auto -> os_type <> "Win32" 29 | in 30 | let hardware_support = match enable_hardware_support with 31 | | This bool -> bool 32 | | Auto -> (architecture = "amd64" || architecture = "i386") 33 | && test ~cfg ~c_flags:[ "-maes"; "-mpclmul" ] ~link_flags:[] 34 | in 35 | let has_getentropy = 36 | provides ~cfg ~c_flags:[] ~link_flags:[] 37 | ~headers:["unistd.h"] ~functions:["getentropy"] 38 | in 39 | let append_if c y x = if c then x @ [ y ] else x in 40 | let flags = 41 | [] 42 | |> append_if has_getentropy "-DHAVE_GETENTROPY" 43 | |> append_if zlib "-DHAVE_ZLIB" 44 | |> append_if hardware_support "-maes" 45 | |> append_if hardware_support "-mpclmul" 46 | in 47 | let library_flags = 48 | [] 49 | |> append_if (zlib && (system = "win32" || system = "win64")) "zlib.lib" 50 | |> append_if (zlib && system <> "win32" && system <> "win64") "-lz" 51 | |> append_if (system = "win32" || system = "win64") "advapi32.lib" 52 | |> append_if (system = "mingw" || system = "mingw64") "-ladvapi32" 53 | in 54 | Configurator.Flags.write_sexp "flags.sexp" flags; 55 | Configurator.Flags.write_sexp "library_flags.sexp" library_flags; 56 | let describe_bool = function 57 | | true -> "enabled" 58 | | false -> "disabled" 59 | in 60 | printf "ZLib: ............................... %s\n" (describe_bool zlib); 61 | printf "Hardware support for AES and GCM: ... %s\n" (describe_bool hardware_support); 62 | printf "getentropy():........................ %s\n" (describe_bool has_getentropy) 63 | 64 | -------------------------------------------------------------------------------- /src/stubs-blowfish.c: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, projet Gallium, INRIA Rocquencourt */ 6 | /* */ 7 | /* Copyright 2006 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | /* Stub code for Blowfish */ 15 | 16 | #include "blowfish.c" 17 | #include 18 | #include 19 | #include 20 | 21 | CAMLprim value caml_blowfish_cook_key(value key) 22 | { 23 | CAMLparam1(key); 24 | value ckey = caml_alloc_string(sizeof(BLOWFISH_CTX)); 25 | Blowfish_Init((BLOWFISH_CTX *) String_val(ckey), 26 | &Byte_u(key, 0), 27 | caml_string_length(key)); 28 | CAMLreturn(ckey); 29 | } 30 | 31 | #ifdef ARCH_BIG_ENDIAN 32 | #define COPY4BYTES(dst,src) \ 33 | (dst)[0] = (src)[0], \ 34 | (dst)[1] = (src)[1], \ 35 | (dst)[2] = (src)[2], \ 36 | (dst)[3] = (src)[3] 37 | #else 38 | #define COPY4BYTES(dst,src) \ 39 | (dst)[0] = (src)[3], \ 40 | (dst)[1] = (src)[2], \ 41 | (dst)[2] = (src)[1], \ 42 | (dst)[3] = (src)[0] 43 | #endif 44 | 45 | CAMLprim value caml_blowfish_encrypt(value ckey, value src, value src_ofs, 46 | value dst, value dst_ofs) 47 | { 48 | u32 xl, xr; 49 | unsigned char * p; 50 | 51 | p = &Byte_u(src, Long_val(src_ofs)); 52 | COPY4BYTES((unsigned char *) &xl, p); 53 | COPY4BYTES((unsigned char *) &xr, p + 4); 54 | Blowfish_Encrypt((BLOWFISH_CTX *) String_val(ckey), &xl, &xr); 55 | p = &Byte_u(dst, Long_val(dst_ofs)); 56 | COPY4BYTES(p, (unsigned char *) &xl); 57 | COPY4BYTES(p + 4, (unsigned char *) &xr); 58 | return Val_unit; 59 | } 60 | 61 | CAMLprim value caml_blowfish_decrypt(value ckey, value src, value src_ofs, 62 | value dst, value dst_ofs) 63 | { 64 | u32 xl, xr; 65 | unsigned char * p; 66 | 67 | p = &Byte_u(src, Long_val(src_ofs)); 68 | COPY4BYTES((unsigned char *) &xl, p); 69 | COPY4BYTES((unsigned char *) &xr, p + 4); 70 | Blowfish_Decrypt((BLOWFISH_CTX *) String_val(ckey), &xl, &xr); 71 | p = &Byte_u(dst, Long_val(dst_ofs)); 72 | COPY4BYTES(p, (unsigned char *) &xl); 73 | COPY4BYTES(p + 4, (unsigned char *) &xr); 74 | return Val_unit; 75 | } 76 | 77 | -------------------------------------------------------------------------------- /src/stubs-blake2.c: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, Collège de France and Inria */ 6 | /* */ 7 | /* Copyright 2020 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | #include 15 | #include 16 | #include "blake2.c" 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | #define blake2b_val(v) ((struct blake2b *) String_val(v)) 23 | 24 | CAMLprim value caml_blake2b_init(value hashlen, value key) 25 | { 26 | CAMLparam1(key); 27 | value ctx = caml_alloc_string(sizeof(struct blake2b)); 28 | blake2b_init(blake2b_val(ctx), 29 | Int_val(hashlen), 30 | caml_string_length(key), &Byte_u(key, 0)); 31 | CAMLreturn(ctx); 32 | } 33 | 34 | CAMLprim value caml_blake2b_update(value ctx, value src, value ofs, value len) 35 | { 36 | blake2b_add_data(blake2b_val(ctx), 37 | &Byte_u(src, Long_val(ofs)), Long_val(len)); 38 | return Val_unit; 39 | } 40 | 41 | CAMLprim value caml_blake2b_final(value ctx, value hashlen) 42 | { 43 | CAMLparam1(ctx); 44 | CAMLlocal1(res); 45 | int len = Int_val(hashlen); 46 | res = caml_alloc_string(len); 47 | blake2b_final(blake2b_val(ctx), len, &Byte_u(res, 0)); 48 | CAMLreturn(res); 49 | } 50 | 51 | #define blake2s_val(v) ((struct blake2s *) String_val(v)) 52 | 53 | CAMLprim value caml_blake2s_init(value hashlen, value key) 54 | { 55 | CAMLparam1(key); 56 | value ctx = caml_alloc_string(sizeof(struct blake2s)); 57 | blake2s_init(blake2s_val(ctx), 58 | Int_val(hashlen), 59 | caml_string_length(key), &Byte_u(key, 0)); 60 | CAMLreturn(ctx); 61 | } 62 | 63 | CAMLprim value caml_blake2s_update(value ctx, value src, value ofs, value len) 64 | { 65 | blake2s_add_data(blake2s_val(ctx), 66 | &Byte_u(src, Long_val(ofs)), Long_val(len)); 67 | return Val_unit; 68 | } 69 | 70 | CAMLprim value caml_blake2s_final(value ctx, value hashlen) 71 | { 72 | CAMLparam1(ctx); 73 | CAMLlocal1(res); 74 | int len = Int_val(hashlen); 75 | res = caml_alloc_string(len); 76 | blake2s_final(blake2s_val(ctx), len, &Byte_u(res, 0)); 77 | CAMLreturn(res); 78 | } 79 | -------------------------------------------------------------------------------- /src/stubs-blake3.c: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, Collège de France and Inria */ 6 | /* */ 7 | /* Copyright 2022 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | #include 15 | #include 16 | #include "blake3.c" 17 | #include "blake3_portable.c" 18 | #include "blake3_dispatch.c" 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #define Context_val(v) (*((blake3_hasher **) Data_custom_val(v))) 26 | 27 | static void caml_blake3_finalize(value ctx) 28 | { 29 | if (Context_val(ctx) != NULL) { 30 | caml_stat_free(Context_val(ctx)); 31 | Context_val(ctx) = NULL; 32 | } 33 | } 34 | 35 | static struct custom_operations blake3_context_ops = { 36 | "fr.inria.caml.cryptokit.blake3_context", 37 | caml_blake3_finalize, 38 | custom_compare_default, 39 | custom_hash_default, 40 | custom_deserialize_default, 41 | custom_compare_ext_default 42 | }; 43 | 44 | CAMLprim value caml_blake3_init(value optkey) 45 | { 46 | CAMLparam1(optkey); 47 | blake3_hasher * ctx = caml_stat_alloc(sizeof(blake3_hasher)); 48 | value res = 49 | caml_alloc_custom(&blake3_context_ops, 50 | sizeof(blake3_hasher *), 51 | 0, 1); 52 | if (caml_string_length(optkey) == BLAKE3_KEY_LEN) { 53 | blake3_hasher_init_keyed(ctx, &Byte_u(optkey, 0)); 54 | } else { 55 | blake3_hasher_init(ctx); 56 | } 57 | Context_val(res) = ctx; 58 | CAMLreturn(res); 59 | } 60 | 61 | CAMLprim value caml_blake3_update(value ctx, 62 | value src, value ofs, value len) 63 | { 64 | blake3_hasher_update(Context_val(ctx), 65 | &Byte_u(src, Long_val(ofs)), Long_val(len)); 66 | return Val_unit; 67 | } 68 | 69 | 70 | CAMLprim value caml_blake3_extract(value ctx, value vlen) 71 | { 72 | CAMLparam2(ctx, vlen); 73 | CAMLlocal1(res); 74 | size_t len = Long_val(vlen); 75 | res = caml_alloc_string(len); 76 | blake3_hasher_finalize(Context_val(ctx), &Byte_u(res, 0), len); 77 | CAMLreturn(res); 78 | } 79 | 80 | CAMLprim value caml_blake3_wipe(value ctx) 81 | { 82 | if (Context_val(ctx) != NULL) 83 | memset(Context_val(ctx), 0, sizeof(blake3_hasher)); 84 | caml_blake3_finalize(ctx); 85 | return Val_unit; 86 | } 87 | 88 | -------------------------------------------------------------------------------- /src/stubs-sha512.c: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, projet Cristal, INRIA Rocquencourt */ 6 | /* */ 7 | /* Copyright 2015 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | #include "sha512.c" 15 | #include 16 | #include 17 | #include 18 | 19 | #define Context_val(v) ((struct SHA512Context *) String_val(v)) 20 | 21 | CAMLprim value caml_sha512_init(value unit) 22 | { 23 | value ctx = caml_alloc_string(sizeof(struct SHA512Context)); 24 | SHA512_init(Context_val(ctx), 512); 25 | return ctx; 26 | } 27 | 28 | CAMLprim value caml_sha384_init(value unit) 29 | { 30 | value ctx = caml_alloc_string(sizeof(struct SHA512Context)); 31 | SHA512_init(Context_val(ctx), 384); 32 | return ctx; 33 | } 34 | 35 | CAMLprim value caml_sha512_256_init(value unit) 36 | { 37 | value ctx = caml_alloc_string(sizeof(struct SHA512Context)); 38 | SHA512_init(Context_val(ctx), 256); 39 | return ctx; 40 | } 41 | 42 | CAMLprim value caml_sha512_224_init(value unit) 43 | { 44 | value ctx = caml_alloc_string(sizeof(struct SHA512Context)); 45 | SHA512_init(Context_val(ctx), 224); 46 | return ctx; 47 | } 48 | 49 | CAMLprim value caml_sha512_update(value ctx, value src, value ofs, value len) 50 | { 51 | SHA512_add_data(Context_val(ctx), &Byte_u(src, Long_val(ofs)), Long_val(len)); 52 | return Val_unit; 53 | } 54 | 55 | CAMLprim value caml_sha512_final(value ctx) 56 | { 57 | CAMLparam1(ctx); 58 | CAMLlocal1(res); 59 | 60 | res = caml_alloc_string(64); 61 | SHA512_finish(Context_val(ctx), 512, &Byte_u(res, 0)); 62 | CAMLreturn(res); 63 | } 64 | 65 | CAMLprim value caml_sha384_final(value ctx) 66 | { 67 | CAMLparam1(ctx); 68 | CAMLlocal1(res); 69 | 70 | res = caml_alloc_string(48); 71 | SHA512_finish(Context_val(ctx), 384, &Byte_u(res, 0)); 72 | CAMLreturn(res); 73 | } 74 | 75 | CAMLprim value caml_sha512_256_final(value ctx) 76 | { 77 | CAMLparam1(ctx); 78 | CAMLlocal1(res); 79 | 80 | res = caml_alloc_string(32); 81 | SHA512_finish(Context_val(ctx), 256, &Byte_u(res, 0)); 82 | CAMLreturn(res); 83 | } 84 | 85 | CAMLprim value caml_sha512_224_final(value ctx) 86 | { 87 | CAMLparam1(ctx); 88 | CAMLlocal1(res); 89 | 90 | res = caml_alloc_string(28); 91 | SHA512_finish(Context_val(ctx), 224, &Byte_u(res, 0)); 92 | CAMLreturn(res); 93 | } 94 | -------------------------------------------------------------------------------- /src/stubs-sha3.c: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, projet Gallium, INRIA Rocquencourt */ 6 | /* */ 7 | /* Copyright 2013 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | #include 15 | #include "keccak.c" 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #define Context_val(v) (*((struct SHA3Context **) Data_custom_val(v))) 22 | 23 | static void caml_sha3_finalize(value ctx) 24 | { 25 | if (Context_val(ctx) != NULL) { 26 | caml_stat_free(Context_val(ctx)); 27 | Context_val(ctx) = NULL; 28 | } 29 | } 30 | 31 | static struct custom_operations SHA3_context_ops = { 32 | "fr.inria.caml.cryptokit.SHA3_context", 33 | caml_sha3_finalize, 34 | custom_compare_default, 35 | custom_hash_default, 36 | custom_deserialize_default, 37 | custom_compare_ext_default 38 | }; 39 | 40 | CAMLprim value caml_sha3_init(value vsize) 41 | { 42 | struct SHA3Context * ctx = caml_stat_alloc(sizeof(struct SHA3Context)); 43 | value res = 44 | caml_alloc_custom(&SHA3_context_ops, 45 | sizeof(struct SHA3Context *), 46 | 0, 1); 47 | SHA3_init(ctx, Int_val(vsize)); 48 | Context_val(res) = ctx; 49 | return res; 50 | } 51 | 52 | CAMLprim value caml_sha3_absorb(value ctx, 53 | value src, value ofs, value len) 54 | { 55 | SHA3_absorb(Context_val(ctx), &Byte_u(src, Long_val(ofs)), Long_val(len)); 56 | return Val_unit; 57 | } 58 | 59 | 60 | /* On page 9 of Keccak Implementation Overview (Version 3.2) 61 | http://keccak.noekeon.org/Keccak-implementation-3.2.pdf, 62 | there is a figure `0x01` as the padding byte. */ 63 | static const unsigned keccak_padding = 0x01; 64 | 65 | /* In a similar, updated description at http://keccak.noekeon.org/specs_summary.html, 66 | on Table 3, `0x06` is shown as the relevant padding byte. */ 67 | static const unsigned sha3_padding = 0x06; 68 | 69 | CAMLprim value caml_sha3_extract(value official, value ctx) 70 | { 71 | CAMLparam2(official, ctx); 72 | CAMLlocal1(res); 73 | 74 | res = caml_alloc_string(Context_val(ctx)->hsiz); 75 | SHA3_extract(Bool_val(official) ? sha3_padding : keccak_padding, Context_val(ctx), &Byte_u(res, 0)); 76 | CAMLreturn(res); 77 | } 78 | 79 | CAMLprim value caml_sha3_wipe(value ctx) 80 | { 81 | if (Context_val(ctx) != NULL) { 82 | memset(Context_val(ctx), 0, sizeof(struct SHA3Context)); 83 | caml_stat_free(Context_val(ctx)); 84 | Context_val(ctx) = NULL; 85 | } 86 | return Val_unit; 87 | } 88 | 89 | -------------------------------------------------------------------------------- /src/stubs-aes.c: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, projet Cristal, INRIA Rocquencourt */ 6 | /* */ 7 | /* Copyright 2002 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | /* Stub code for AES */ 15 | 16 | #include "rijndael-alg-fst.c" 17 | #include "aesni.c" 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | #define Cooked_key_NR_offset ((4 * (MAXNR + 1)) * sizeof(u32)) 24 | #define Cooked_key_size (Cooked_key_NR_offset + 1) 25 | 26 | CAMLprim value caml_aes_cook_encrypt_key(value key) 27 | { 28 | CAMLparam1(key); 29 | value ckey = caml_alloc_string(Cooked_key_size); 30 | int nr; 31 | 32 | if (aesni_available == -1) aesni_check_available(); 33 | if (aesni_available == 1) 34 | nr = aesniKeySetupEnc((u8 *) String_val(ckey), 35 | (const u8 *) String_val(key), 36 | 8 * caml_string_length(key)); 37 | else 38 | nr = rijndaelKeySetupEnc((u32 *) String_val(ckey), 39 | (const u8 *) String_val(key), 40 | 8 * caml_string_length(key)); 41 | Byte(ckey, Cooked_key_NR_offset) = nr; 42 | CAMLreturn(ckey); 43 | } 44 | 45 | CAMLprim value caml_aes_cook_decrypt_key(value key) 46 | { 47 | CAMLparam1(key); 48 | value ckey = caml_alloc_string(Cooked_key_size); 49 | int nr; 50 | 51 | if (aesni_available == -1) aesni_check_available(); 52 | if (aesni_available == 1) 53 | nr = aesniKeySetupDec((u8 *) String_val(ckey), 54 | (const u8 *) String_val(key), 55 | 8 * caml_string_length(key)); 56 | else 57 | nr = rijndaelKeySetupDec((u32 *) String_val(ckey), 58 | (const u8 *) String_val(key), 59 | 8 * caml_string_length(key)); 60 | Byte(ckey, Cooked_key_NR_offset) = nr; 61 | CAMLreturn(ckey); 62 | } 63 | 64 | CAMLprim value caml_aes_encrypt(value ckey, value src, value src_ofs, 65 | value dst, value dst_ofs) 66 | { 67 | if (aesni_available == 1) 68 | aesniEncrypt((const u8 *) String_val(ckey), 69 | Byte(ckey, Cooked_key_NR_offset), 70 | (const u8 *) &Byte(src, Long_val(src_ofs)), 71 | (u8 *) &Byte(dst, Long_val(dst_ofs))); 72 | else 73 | rijndaelEncrypt((const u32 *) String_val(ckey), 74 | Byte(ckey, Cooked_key_NR_offset), 75 | (const u8 *) &Byte(src, Long_val(src_ofs)), 76 | (u8 *) &Byte(dst, Long_val(dst_ofs))); 77 | return Val_unit; 78 | } 79 | 80 | CAMLprim value caml_aes_decrypt(value ckey, value src, value src_ofs, 81 | value dst, value dst_ofs) 82 | { 83 | if (aesni_available == 1) 84 | aesniDecrypt((const u8 *) String_val(ckey), 85 | Byte(ckey, Cooked_key_NR_offset), 86 | (const u8 *) &Byte(src, Long_val(src_ofs)), 87 | (u8 *) &Byte(dst, Long_val(dst_ofs))); 88 | else 89 | rijndaelDecrypt((const u32 *) String_val(ckey), 90 | Byte(ckey, Cooked_key_NR_offset), 91 | (const u8 *) &Byte(src, Long_val(src_ofs)), 92 | (u8 *) &Byte(dst, Long_val(dst_ofs))); 93 | return Val_unit; 94 | } 95 | 96 | -------------------------------------------------------------------------------- /src/pclmul.c: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, Collège de France and Inria */ 6 | /* */ 7 | /* Copyright 2022 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | /* Hardware-accelerated implementation of GHASH multiplication */ 15 | 16 | #include 17 | #include 18 | #include "pclmul.h" 19 | 20 | #ifdef __PCLMUL__ 21 | 22 | #include 23 | #include 24 | #include 25 | 26 | EXPORT int pclmul_available = -1; 27 | 28 | EXPORT int pclmul_check_available(void) 29 | { 30 | unsigned int eax, ebx, ecx, edx; 31 | if(__get_cpuid(1, &eax, &ebx, &ecx, &edx)) { 32 | pclmul_available = (ecx & (1 << 1)) != 0; 33 | } else { 34 | pclmul_available = 0; 35 | } 36 | return pclmul_available; 37 | } 38 | 39 | static void copy_reverse_16(void * dst, const void * src) 40 | { 41 | #define COPY(i) *((uint8_t*) dst + i) = *((const uint8_t *) src + 15 - i) 42 | COPY(0); COPY(1); COPY(2); COPY(3); 43 | COPY(4); COPY(5); COPY(6); COPY(7); 44 | COPY(8); COPY(9); COPY(10); COPY(11); 45 | COPY(12); COPY(13); COPY(14); COPY(15); 46 | #undef COPY 47 | } 48 | 49 | EXPORT void pclmul_mult(uint8_t res[16], 50 | const uint8_t arg1[16], const uint8_t arg2[16]) 51 | { 52 | __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9; 53 | 54 | copy_reverse_16(&tmp0, arg1); 55 | copy_reverse_16(&tmp1, arg2); 56 | 57 | tmp3 = _mm_clmulepi64_si128(tmp0, tmp1, 0x00); 58 | tmp4 = _mm_clmulepi64_si128(tmp0, tmp1, 0x10); 59 | tmp5 = _mm_clmulepi64_si128(tmp0, tmp1, 0x01); 60 | tmp6 = _mm_clmulepi64_si128(tmp0, tmp1, 0x11); 61 | 62 | tmp4 = _mm_xor_si128(tmp4, tmp5); 63 | tmp5 = _mm_slli_si128(tmp4, 8); 64 | tmp4 = _mm_srli_si128(tmp4, 8); 65 | tmp3 = _mm_xor_si128(tmp3, tmp5); 66 | tmp6 = _mm_xor_si128(tmp6, tmp4); 67 | 68 | tmp7 = _mm_srli_epi32(tmp3, 31); 69 | tmp8 = _mm_srli_epi32(tmp6, 31); 70 | tmp3 = _mm_slli_epi32(tmp3, 1); 71 | tmp6 = _mm_slli_epi32(tmp6, 1); 72 | 73 | tmp9 = _mm_srli_si128(tmp7, 12); 74 | tmp8 = _mm_slli_si128(tmp8, 4); 75 | tmp7 = _mm_slli_si128(tmp7, 4); 76 | tmp3 = _mm_or_si128(tmp3, tmp7); 77 | tmp6 = _mm_or_si128(tmp6, tmp8); 78 | tmp6 = _mm_or_si128(tmp6, tmp9); 79 | 80 | tmp7 = _mm_slli_epi32(tmp3, 31); 81 | tmp8 = _mm_slli_epi32(tmp3, 30); 82 | tmp9 = _mm_slli_epi32(tmp3, 25); 83 | 84 | tmp7 = _mm_xor_si128(tmp7, tmp8); 85 | tmp7 = _mm_xor_si128(tmp7, tmp9); 86 | tmp8 = _mm_srli_si128(tmp7, 4); 87 | tmp7 = _mm_slli_si128(tmp7, 12); 88 | tmp3 = _mm_xor_si128(tmp3, tmp7); 89 | 90 | tmp2 = _mm_srli_epi32(tmp3, 1); 91 | tmp4 = _mm_srli_epi32(tmp3, 2); 92 | tmp5 = _mm_srli_epi32(tmp3, 7); 93 | tmp2 = _mm_xor_si128(tmp2, tmp4); 94 | tmp2 = _mm_xor_si128(tmp2, tmp5); 95 | tmp2 = _mm_xor_si128(tmp2, tmp8); 96 | tmp3 = _mm_xor_si128(tmp3, tmp2); 97 | tmp6 = _mm_xor_si128(tmp6, tmp3); 98 | 99 | tmp0 = tmp6; 100 | copy_reverse_16(res, &tmp0); 101 | } 102 | 103 | #else 104 | 105 | EXPORT int pclmul_available = 0; 106 | 107 | EXPORT int pclmul_check_available(void) { return 0; } 108 | 109 | EXPORT void pclmul_mult(uint8_t res[16], 110 | const uint8_t arg1[16], const uint8_t arg2[16]) 111 | { abort(); } 112 | 113 | #endif 114 | -------------------------------------------------------------------------------- /src/cryptokitBignum.ml: -------------------------------------------------------------------------------- 1 | (***********************************************************************) 2 | (* *) 3 | (* The Cryptokit library *) 4 | (* *) 5 | (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) 6 | (* *) 7 | (* Copyright 2002 Institut National de Recherche en Informatique et *) 8 | (* en Automatique. All rights reserved. This file is distributed *) 9 | (* under the terms of the GNU Library General Public License, with *) 10 | (* the special exception on linking described in file LICENSE. *) 11 | (* *) 12 | (***********************************************************************) 13 | 14 | (* Arithmetic on big integers, based on the ZArith library. *) 15 | 16 | type t = Z.t 17 | 18 | external wipe: t -> unit = "caml_wipe_z" 19 | 20 | let zero = Z.zero 21 | let one = Z.one 22 | 23 | let of_int = Z.of_int 24 | 25 | let compare = Z.compare 26 | 27 | let add = Z.add 28 | let sub = Z.sub 29 | let mult = Z.mul 30 | 31 | let div = Z.div 32 | let mod_ = Z.rem 33 | 34 | let lcm = Z.lcm 35 | 36 | let relative_prime a b = 37 | Z.equal (Z.gcd a b) Z.one 38 | 39 | let mod_power = Z.powm_sec 40 | 41 | let sub_mod a b p = 42 | let d = Z.sub a b in 43 | if Z.sign d < 0 then Z.add d p else d 44 | 45 | (* Modular exponentiation via the Chinese Remainder Theorem. 46 | Compute a ^ d mod pq, where d is defined by 47 | dp = d mod (p-1) and dq = d mod (q-1). 48 | qinv is q^-1 mod p. 49 | Formula: 50 | mp = (a mod p)^dp mod p 51 | mq = (a mod q)^dq mod q 52 | m = ((((mp - mq) mod p) * qInv) mod p) * q + mq 53 | *) 54 | 55 | let mod_power_CRT a p q dp dq qinv = 56 | let amodp = Z.rem a p and amodq = Z.rem a q in 57 | let mp = mod_power amodp dp p and mq = mod_power amodq dq q in 58 | let diff = sub_mod mp mq p in 59 | let diff_qinv = Z.mul diff qinv in 60 | let diff_qinv_mod_p = Z.rem diff_qinv p in 61 | let res = Z.(add (mul q diff_qinv_mod_p) mq) in 62 | wipe amodp; wipe amodq; 63 | (* It is possible that res == mq, so we cannot wipe mq. 64 | For consistency we don't wipe any of the intermediate results 65 | besides amodp and amodq. *) 66 | res 67 | 68 | let mod_inv = Z.invert 69 | 70 | let wipe_bytes s = Bytes.fill s 0 (Bytes.length s) '\000' 71 | 72 | let of_bytes s = 73 | let l = String.length s in 74 | let t = Bytes.create l in 75 | for i = 0 to l - 1 do Bytes.set t i s.[l - 1 - i] done; 76 | let n = Z.of_bits (Bytes.unsafe_to_string t) in 77 | wipe_bytes t; 78 | n 79 | 80 | let to_bytes ?numbits n = 81 | let s = Z.to_bits n in 82 | let l = 83 | match numbits with 84 | | None -> String.length s 85 | | Some nb -> assert (Z.numbits n <= nb); (nb + 7) / 8 in 86 | let t = Bytes.make l '\000' in 87 | for i = 0 to String.length s - 1 do 88 | Bytes.set t (l - 1 - i) s.[i] 89 | done; 90 | wipe_bytes (Bytes.unsafe_of_string s); 91 | Bytes.unsafe_to_string t 92 | 93 | let change_byte s i f = 94 | Bytes.set s i (Char.chr (f (Char.code (Bytes.get s i)))) 95 | 96 | let random ~rng ?(odd = false) numbits = 97 | let numbytes = (numbits + 7) / 8 in 98 | let buf = Bytes.create numbytes in 99 | rng buf 0 numbytes; 100 | (* adjust low byte if requested *) 101 | if odd then 102 | change_byte buf 0 (fun b -> b lor 1); 103 | (* adjust high byte so that the number is exactly numbits long *) 104 | let mask = 1 lsl ((numbits - 1) land 7) in 105 | change_byte buf (numbytes - 1) 106 | (fun b -> (b land (mask - 1)) lor mask); 107 | (* convert to a number *) 108 | let n = Z.of_bits (Bytes.unsafe_to_string buf) in 109 | wipe_bytes buf; 110 | assert (Z.numbits n = numbits); 111 | if odd then assert (Z.is_odd n); 112 | n 113 | 114 | let rec random_prime ~rng numbits = 115 | (* Generate random odd number *) 116 | let n = random ~rng ~odd:true numbits in 117 | (* Find next prime above n *) 118 | let p = Z.nextprime n in 119 | (* Make sure it has the right number of bits *) 120 | if Z.numbits p = numbits then p else random_prime ~rng numbits 121 | 122 | -------------------------------------------------------------------------------- /src/ghash.c: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, Collège de France and Inria */ 6 | /* */ 7 | /* Copyright 2022 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | /* Software implementation of GHASH multiplication */ 15 | 16 | /* Based on the implementation by Steven M. Gibson at 17 | https://github.com/mko-x/SharedAES-GCM/blob/master/Sources/gcm.c 18 | Gibson's implementation is in the public domain. */ 19 | 20 | #include 21 | #include 22 | #include "ghash.h" 23 | 24 | static inline uint64_t get_uint64_be(const uint8_t * b, int i) 25 | { 26 | return 27 | ( (uint64_t) b[i ] << 56 ) 28 | | ( (uint64_t) b[i + 1] << 48 ) 29 | | ( (uint64_t) b[i + 2] << 40 ) 30 | | ( (uint64_t) b[i + 3] << 32 ) 31 | | ( (uint64_t) b[i + 4] << 24 ) 32 | | ( (uint64_t) b[i + 5] << 16 ) 33 | | ( (uint64_t) b[i + 6] << 8 ) 34 | | ( (uint64_t) b[i + 7] ); 35 | } 36 | 37 | static inline void put_uint64_be(uint64_t n, uint8_t * b, int i) 38 | { 39 | b[i ] = n >> 56; 40 | b[i + 1] = n >> 48; 41 | b[i + 2] = n >> 40; 42 | b[i + 3] = n >> 32; 43 | b[i + 4] = n >> 24; 44 | b[i + 5] = n >> 16; 45 | b[i + 6] = n >> 8; 46 | b[i + 7] = n; 47 | } 48 | 49 | EXPORT void ghash_mult(const struct ghash_context * ctx, 50 | const uint8_t input[16], 51 | uint8_t output[16]) 52 | { 53 | static const uint64_t last4[16] = { 54 | 0x0000, 0x1c20, 0x3840, 0x2460, 0x7080, 0x6ca0, 0x48c0, 0x54e0, 55 | 0xe100, 0xfd20, 0xd940, 0xc560, 0x9180, 0x8da0, 0xa9c0, 0xb5e0 56 | }; 57 | int i; 58 | uint8_t lo, hi, rem; 59 | uint64_t zh, zl; 60 | 61 | lo = (uint8_t)( input[15] & 0x0f ); 62 | hi = (uint8_t)( input[15] >> 4 ); 63 | zh = ctx->HH[lo]; 64 | zl = ctx->HL[lo]; 65 | 66 | for( i = 15; i >= 0; i-- ) { 67 | lo = (uint8_t) ( input[i] & 0x0f ); 68 | hi = (uint8_t) ( input[i] >> 4 ); 69 | 70 | if( i != 15 ) { 71 | rem = (uint8_t) ( zl & 0x0f ); 72 | zl = ( zh << 60 ) | ( zl >> 4 ); 73 | zh = ( zh >> 4 ); 74 | zh ^= (uint64_t) last4[rem] << 48; 75 | zh ^= ctx->HH[lo]; 76 | zl ^= ctx->HL[lo]; 77 | } 78 | rem = (uint8_t) ( zl & 0x0f ); 79 | zl = ( zh << 60 ) | ( zl >> 4 ); 80 | zh = ( zh >> 4 ); 81 | zh ^= (uint64_t) last4[rem] << 48; 82 | zh ^= ctx->HH[hi]; 83 | zl ^= ctx->HL[hi]; 84 | } 85 | put_uint64_be(zh, output, 0 ); 86 | put_uint64_be(zl, output, 8 ); 87 | } 88 | 89 | EXPORT void ghash_init(struct ghash_context * ctx, 90 | const uint8_t h[16]) 91 | { 92 | int ret, i, j; 93 | uint64_t hi, lo; 94 | uint64_t vl, vh; 95 | 96 | memset(ctx, 0, sizeof(struct ghash_context)); 97 | 98 | vh = get_uint64_be(h, 0); 99 | vl = get_uint64_be(h, 8); 100 | 101 | ctx->HL[8] = vl; // 8 = 1000 corresponds to 1 in GF(2^128) 102 | ctx->HH[8] = vh; 103 | ctx->HH[0] = 0; // 0 corresponds to 0 in GF(2^128) 104 | ctx->HL[0] = 0; 105 | 106 | for( i = 4; i > 0; i >>= 1 ) { 107 | uint32_t T = (uint32_t) ( vl & 1 ) * 0xe1000000U; 108 | vl = ( vh << 63 ) | ( vl >> 1 ); 109 | vh = ( vh >> 1 ) ^ ( (uint64_t) T << 32); 110 | ctx->HL[i] = vl; 111 | ctx->HH[i] = vh; 112 | } 113 | for (i = 2; i < 16; i <<= 1 ) { 114 | uint64_t *HiL = ctx->HL + i, *HiH = ctx->HH + i; 115 | vh = *HiH; 116 | vl = *HiL; 117 | for( j = 1; j < i; j++ ) { 118 | HiH[j] = vh ^ ctx->HH[j]; 119 | HiL[j] = vl ^ ctx->HL[j]; 120 | } 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /src/siphash.c: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, Collège de France and Inria */ 6 | /* */ 7 | /* Copyright (c) 2012-2016 Jean-Philippe Aumasson */ 8 | /* */ 9 | /* Copyright (c) 2012-2014 Daniel J. Bernstein */ 10 | /* Copyright 2022 Institut National de Recherche en Informatique et */ 11 | /* en Automatique. All rights reserved. This file is distributed */ 12 | /* under the terms of the GNU Library General Public License, with */ 13 | /* the special exception on linking described in file LICENSE. */ 14 | /* */ 15 | /***********************************************************************/ 16 | 17 | /* Based on the SipHash reference C implementation by Aumasson and Berstein 18 | https://github.com/veorq/SipHash 19 | and lightly adapted by Leroy. 20 | The original implementation is distributed under the CC0 Public Domain 21 | Dedication. */ 22 | 23 | #include 24 | #include 25 | #include 26 | #include "siphash.h" 27 | 28 | #define ROTL64(x,n) ((x) << n | (x) >> (64-n)) 29 | 30 | static inline uint64_t U8TO64_LE(const unsigned char *p) { 31 | return (((uint64_t)(p[0] & 0xff) ) | 32 | ((uint64_t)(p[1] & 0xff) << 8) | 33 | ((uint64_t)(p[2] & 0xff) << 16) | 34 | ((uint64_t)(p[3] & 0xff) << 24) | 35 | ((uint64_t)(p[4] & 0xff) << 32) | 36 | ((uint64_t)(p[5] & 0xff) << 40) | 37 | ((uint64_t)(p[6] & 0xff) << 48) | 38 | ((uint64_t)(p[7] & 0xff) << 56)); 39 | } 40 | 41 | static inline void U64TO8_LE(unsigned char *p, uint64_t v) { 42 | p[0] = (v ) & 0xff; 43 | p[1] = (v >> 8) & 0xff; 44 | p[2] = (v >> 16) & 0xff; 45 | p[3] = (v >> 24) & 0xff; 46 | p[4] = (v >> 32) & 0xff; 47 | p[5] = (v >> 40) & 0xff; 48 | p[6] = (v >> 48) & 0xff; 49 | p[7] = (v >> 56) & 0xff; 50 | } 51 | 52 | EXPORT void siphash_init(struct siphash * st, const unsigned char * key, int outlen) 53 | { 54 | uint64_t k0 = U8TO64_LE(key); 55 | uint64_t k1 = U8TO64_LE(key + 8); 56 | st->v0 = 0x736f6d6570736575; 57 | st->v1 = 0x646f72616e646f6d; 58 | st->v2 = 0x6c7967656e657261; 59 | st->v3 = 0x7465646279746573; 60 | st->v3 ^= k1; 61 | st->v2 ^= k0; 62 | st->v1 ^= k1; 63 | st->v0 ^= k0; 64 | if (outlen == 16) st->v1 ^= 0xEE; 65 | st->used = 0; 66 | st->len8 = 0; 67 | } 68 | 69 | static inline void siphash_round(struct siphash * st) 70 | { 71 | st->v0 += st->v1; 72 | st->v1 = ROTL64(st->v1, 13); 73 | st->v1 ^= st->v0; 74 | st->v0 = ROTL64(st->v0, 32); 75 | st->v2 += st->v3; 76 | st->v3 = ROTL64(st->v3, 16); 77 | st->v3 ^= st->v2; 78 | st->v0 += st->v3; 79 | st->v3 = ROTL64(st->v3, 21); 80 | st->v3 ^= st->v0; 81 | st->v2 += st->v1; 82 | st->v1 = ROTL64(st->v1, 17); 83 | st->v1 ^= st->v2; 84 | st->v2 = ROTL64(st->v2, 32); 85 | } 86 | 87 | static void siphash_mix(struct siphash * st, uint64_t x) 88 | { 89 | st->v3 ^= x; 90 | siphash_round(st); 91 | siphash_round(st); 92 | st->v0 ^= x; 93 | } 94 | 95 | EXPORT void siphash_add(struct siphash * st, const unsigned char * p, size_t len) 96 | { 97 | int used = st->used; 98 | int free = SIPHASH_BUFLEN - used; 99 | 100 | st->len8 += len; 101 | if (len < free) { 102 | memcpy(st->buffer + used, p, len); 103 | st->used = used + len; 104 | return; 105 | } 106 | if (used > 0) { 107 | memcpy(st->buffer + used, p, free); 108 | siphash_mix(st, U8TO64_LE(st->buffer)); 109 | p += free; 110 | len -= free; 111 | } 112 | while (len >= SIPHASH_BUFLEN) { 113 | siphash_mix(st, U8TO64_LE(p)); 114 | p += SIPHASH_BUFLEN; 115 | len -= SIPHASH_BUFLEN; 116 | } 117 | if (len > 0) memcpy(st->buffer, p, len); 118 | st->used = len; 119 | } 120 | 121 | static uint64_t siphash_final_rounds(struct siphash * st) 122 | { 123 | /* Four rounds at the end */ 124 | for (int i = 0; i < 4; i++) siphash_round(st); 125 | /* Fold state down to 64 bits */ 126 | return st->v0 ^ st->v1 ^ st->v2 ^ st->v3; 127 | } 128 | 129 | EXPORT void siphash_final(struct siphash * st, int outlen, unsigned char * out) 130 | { 131 | uint64_t w; 132 | /* Finish with the remaining bytes (up to 7 bytes). 133 | Also use the low 8 bits of the length. */ 134 | w = (uint64_t) st->len8 << 56; 135 | switch (st->len8 & 7) { 136 | case 7: w |= (uint64_t) st->buffer[6] << 48; /* fallthrough */ 137 | case 6: w |= (uint64_t) st->buffer[5] << 40; /* fallthrough */ 138 | case 5: w |= (uint64_t) st->buffer[4] << 32; /* fallthrough */ 139 | case 4: w |= (uint64_t) st->buffer[3] << 24; /* fallthrough */ 140 | case 3: w |= (uint64_t) st->buffer[2] << 16; /* fallthrough */ 141 | case 2: w |= (uint64_t) st->buffer[1] << 8; /* fallthrough */ 142 | case 1: w |= (uint64_t) st->buffer[0]; /* fallthrough */ 143 | case 0: /*skip*/; 144 | } 145 | siphash_mix(st, w); 146 | /* First 64 bit of hash */ 147 | st->v2 ^= (outlen == 16 ? 0xEE : 0xFF); 148 | U64TO8_LE(out, siphash_final_rounds(st)); 149 | /* Next 64 bits of hash, if requested */ 150 | if (outlen == 16) { 151 | st->v1 ^= 0xDD; 152 | U64TO8_LE(out + 8, siphash_final_rounds(st)); 153 | } 154 | } 155 | 156 | -------------------------------------------------------------------------------- /src/sha1.c: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, projet Cristal, INRIA Rocquencourt */ 6 | /* */ 7 | /* Copyright 2002 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | /* SHA-1 hashing */ 15 | 16 | #include 17 | #include 18 | #include "sha1.h" 19 | 20 | /* Ref: Handbook of Applied Cryptography, section 9.4.2, algorithm 9.53 */ 21 | 22 | #define rol1(x) (((x) << 1) | ((x) >> 31)) 23 | #define rol5(x) (((x) << 5) | ((x) >> 27)) 24 | #define rol30(x) (((x) << 30) | ((x) >> 2)) 25 | 26 | static void SHA1_copy_and_swap(void * src, void * dst, int numwords) 27 | { 28 | #ifdef ARCH_BIG_ENDIAN 29 | memcpy(dst, src, numwords * sizeof(u32)); 30 | #else 31 | unsigned char * s, * d; 32 | unsigned char a, b; 33 | for (s = src, d = dst; numwords > 0; s += 4, d += 4, numwords--) { 34 | a = s[0]; 35 | b = s[1]; 36 | d[0] = s[3]; 37 | d[1] = s[2]; 38 | d[2] = b; 39 | d[3] = a; 40 | } 41 | #endif 42 | } 43 | 44 | #define F(x,y,z) ( z ^ (x & (y ^ z) ) ) 45 | #define G(x,y,z) ( (x & y) | (z & (x | y) ) ) 46 | #define H(x,y,z) ( x ^ y ^ z ) 47 | 48 | #define Y1 0x5A827999U 49 | #define Y2 0x6ED9EBA1U 50 | #define Y3 0x8F1BBCDCU 51 | #define Y4 0xCA62C1D6U 52 | 53 | static void SHA1_transform(struct SHA1Context * ctx) 54 | { 55 | int i; 56 | register u32 a, b, c, d, e, t; 57 | u32 data[80]; 58 | 59 | /* Convert buffer data to 16 big-endian integers */ 60 | SHA1_copy_and_swap(ctx->buffer, data, 16); 61 | 62 | /* Expand into 80 integers */ 63 | for (i = 16; i < 80; i++) { 64 | t = data[i-3] ^ data[i-8] ^ data[i-14] ^ data[i-16]; 65 | data[i] = rol1(t); 66 | } 67 | 68 | /* Initialize working variables */ 69 | a = ctx->state[0]; 70 | b = ctx->state[1]; 71 | c = ctx->state[2]; 72 | d = ctx->state[3]; 73 | e = ctx->state[4]; 74 | 75 | /* Perform rounds */ 76 | for (i = 0; i < 20; i++) { 77 | t = F(b, c, d) + Y1 + rol5(a) + e + data[i]; 78 | e = d; d = c; c = rol30(b); b = a; a = t; 79 | } 80 | for (/*nothing*/; i < 40; i++) { 81 | t = H(b, c, d) + Y2 + rol5(a) + e + data[i]; 82 | e = d; d = c; c = rol30(b); b = a; a = t; 83 | } 84 | for (/*nothing*/; i < 60; i++) { 85 | t = G(b, c, d) + Y3 + rol5(a) + e + data[i]; 86 | e = d; d = c; c = rol30(b); b = a; a = t; 87 | } 88 | for (/*nothing*/; i < 80; i++) { 89 | t = H(b, c, d) + Y4 + rol5(a) + e + data[i]; 90 | e = d; d = c; c = rol30(b); b = a; a = t; 91 | } 92 | 93 | /* Update chaining values */ 94 | ctx->state[0] += a; 95 | ctx->state[1] += b; 96 | ctx->state[2] += c; 97 | ctx->state[3] += d; 98 | ctx->state[4] += e; 99 | } 100 | 101 | EXPORT void SHA1_init(struct SHA1Context * ctx) 102 | { 103 | ctx->state[0] = 0x67452301U; 104 | ctx->state[1] = 0xEFCDAB89U; 105 | ctx->state[2] = 0x98BADCFEU; 106 | ctx->state[3] = 0x10325476U; 107 | ctx->state[4] = 0xC3D2E1F0U; 108 | ctx->numbytes = 0; 109 | ctx->length[0] = 0; 110 | ctx->length[1] = 0; 111 | } 112 | 113 | EXPORT void SHA1_add_data(struct SHA1Context * ctx, unsigned char * data, 114 | unsigned long len) 115 | { 116 | u32 t; 117 | 118 | /* Update length */ 119 | t = ctx->length[1]; 120 | if ((ctx->length[1] = t + (u32) (len << 3)) < t) 121 | ctx->length[0]++; /* carry from low 32 bits to high 32 bits */ 122 | ctx->length[0] += (u32) (len >> 29); 123 | 124 | /* If data was left in buffer, pad it with fresh data and munge block */ 125 | if (ctx->numbytes != 0) { 126 | t = 64 - ctx->numbytes; 127 | if (len < t) { 128 | memcpy(ctx->buffer + ctx->numbytes, data, len); 129 | ctx->numbytes += len; 130 | return; 131 | } 132 | memcpy(ctx->buffer + ctx->numbytes, data, t); 133 | SHA1_transform(ctx); 134 | data += t; 135 | len -= t; 136 | } 137 | /* Munge data in 64-byte chunks */ 138 | while (len >= 64) { 139 | memcpy(ctx->buffer, data, 64); 140 | SHA1_transform(ctx); 141 | data += 64; 142 | len -= 64; 143 | } 144 | /* Save remaining data */ 145 | memcpy(ctx->buffer, data, len); 146 | ctx->numbytes = len; 147 | } 148 | 149 | EXPORT void SHA1_finish(struct SHA1Context * ctx, unsigned char output[20]) 150 | { 151 | int i = ctx->numbytes; 152 | 153 | /* Set first char of padding to 0x80. There is always room. */ 154 | ctx->buffer[i++] = 0x80; 155 | /* If we do not have room for the length (8 bytes), pad to 64 bytes 156 | with zeroes and munge the data block */ 157 | if (i > 56) { 158 | memset(ctx->buffer + i, 0, 64 - i); 159 | SHA1_transform(ctx); 160 | i = 0; 161 | } 162 | /* Pad to byte 56 with zeroes */ 163 | memset(ctx->buffer + i, 0, 56 - i); 164 | /* Add length in big-endian */ 165 | SHA1_copy_and_swap(ctx->length, ctx->buffer + 56, 2); 166 | /* Munge the final block */ 167 | SHA1_transform(ctx); 168 | /* Final hash value is in ctx->state modulo big-endian conversion */ 169 | SHA1_copy_and_swap(ctx->state, output, 5); 170 | } 171 | -------------------------------------------------------------------------------- /src/chacha20.c: -------------------------------------------------------------------------------- 1 | /* Based on D. J. Bernstein's chacha-regs.c version 200801118, 2 | https://cr.yp.to/streamciphers/timings/estreambench/submissions/salsa20/chacha8/regs/chacha.c 3 | The initial code is in the public domain */ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "chacha20.h" 11 | 12 | static inline void U32TO8_LITTLE(uint8_t * dst, uint32_t val) 13 | { 14 | #ifdef ARCH_BIG_ENDIAN 15 | dst[0] = val; 16 | dst[1] = val >> 8; 17 | dst[2] = val >> 16; 18 | dst[3] = val >> 24; 19 | #else 20 | *((uint32_t *) dst) = val; 21 | #endif 22 | } 23 | 24 | static inline uint32_t U8TO32_LITTLE(const uint8_t * src) 25 | { 26 | return (uint32_t) src[0] 27 | + ((uint32_t) src[1] << 8) 28 | + ((uint32_t) src[2] << 16) 29 | + ((uint32_t) src[3] << 24); 30 | } 31 | 32 | #define ROTATE(v,c) ((v) << (c) | (v) >> (32 - (c))) 33 | #define XOR(v,w) ((v) ^ (w)) 34 | #define PLUS(v,w) ((v) + (w)) 35 | #define PLUSONE(v) ((v) + 1) 36 | 37 | #define QUARTERROUND(a,b,c,d) \ 38 | a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \ 39 | c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \ 40 | a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \ 41 | c = PLUS(c,d); b = ROTATE(XOR(b,c), 7); 42 | 43 | static void chacha20_block(chacha20_ctx * ctx) 44 | { 45 | uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; 46 | int i; 47 | 48 | x0 = ctx->input[0]; 49 | x1 = ctx->input[1]; 50 | x2 = ctx->input[2]; 51 | x3 = ctx->input[3]; 52 | x4 = ctx->input[4]; 53 | x5 = ctx->input[5]; 54 | x6 = ctx->input[6]; 55 | x7 = ctx->input[7]; 56 | x8 = ctx->input[8]; 57 | x9 = ctx->input[9]; 58 | x10 = ctx->input[10]; 59 | x11 = ctx->input[11]; 60 | x12 = ctx->input[12]; 61 | x13 = ctx->input[13]; 62 | x14 = ctx->input[14]; 63 | x15 = ctx->input[15]; 64 | for (i = 10; i > 0; i --) { 65 | QUARTERROUND( x0, x4, x8,x12) 66 | QUARTERROUND( x1, x5, x9,x13) 67 | QUARTERROUND( x2, x6,x10,x14) 68 | QUARTERROUND( x3, x7,x11,x15) 69 | QUARTERROUND( x0, x5,x10,x15) 70 | QUARTERROUND( x1, x6,x11,x12) 71 | QUARTERROUND( x2, x7, x8,x13) 72 | QUARTERROUND( x3, x4, x9,x14) 73 | } 74 | x0 = PLUS(x0,ctx->input[0]); 75 | x1 = PLUS(x1,ctx->input[1]); 76 | x2 = PLUS(x2,ctx->input[2]); 77 | x3 = PLUS(x3,ctx->input[3]); 78 | x4 = PLUS(x4,ctx->input[4]); 79 | x5 = PLUS(x5,ctx->input[5]); 80 | x6 = PLUS(x6,ctx->input[6]); 81 | x7 = PLUS(x7,ctx->input[7]); 82 | x8 = PLUS(x8,ctx->input[8]); 83 | x9 = PLUS(x9,ctx->input[9]); 84 | x10 = PLUS(x10,ctx->input[10]); 85 | x11 = PLUS(x11,ctx->input[11]); 86 | x12 = PLUS(x12,ctx->input[12]); 87 | x13 = PLUS(x13,ctx->input[13]); 88 | x14 = PLUS(x14,ctx->input[14]); 89 | x15 = PLUS(x15,ctx->input[15]); 90 | U32TO8_LITTLE(ctx->output + 0,x0); 91 | U32TO8_LITTLE(ctx->output + 4,x1); 92 | U32TO8_LITTLE(ctx->output + 8,x2); 93 | U32TO8_LITTLE(ctx->output + 12,x3); 94 | U32TO8_LITTLE(ctx->output + 16,x4); 95 | U32TO8_LITTLE(ctx->output + 20,x5); 96 | U32TO8_LITTLE(ctx->output + 24,x6); 97 | U32TO8_LITTLE(ctx->output + 28,x7); 98 | U32TO8_LITTLE(ctx->output + 32,x8); 99 | U32TO8_LITTLE(ctx->output + 36,x9); 100 | U32TO8_LITTLE(ctx->output + 40,x10); 101 | U32TO8_LITTLE(ctx->output + 44,x11); 102 | U32TO8_LITTLE(ctx->output + 48,x12); 103 | U32TO8_LITTLE(ctx->output + 52,x13); 104 | U32TO8_LITTLE(ctx->output + 56,x14); 105 | U32TO8_LITTLE(ctx->output + 60,x15); 106 | /* Increment the 32- or 64-bit counter */ 107 | if (++ ctx->input[12] == 0) { 108 | if (ctx->iv_length == 8) ++ ctx->input[13]; 109 | } 110 | } 111 | 112 | EXPORT void chacha20_transform(chacha20_ctx * ctx, 113 | const uint8_t * in, uint8_t * out, size_t len) 114 | { 115 | int n = ctx->next; 116 | for (/*nothing*/; len > 0; len--) { 117 | if (n >= 64) { chacha20_block(ctx); n = 0; } 118 | *out++ = *in++ ^ ctx->output[n++]; 119 | } 120 | ctx->next = n; 121 | } 122 | 123 | EXPORT void chacha20_extract(chacha20_ctx * ctx, 124 | uint8_t * out, size_t len) 125 | { 126 | int n = ctx->next; 127 | for (/*nothing*/; len > 0; len--) { 128 | if (n >= 64) { chacha20_block(ctx); n = 0; } 129 | *out++ = ctx->output[n++]; 130 | } 131 | ctx->next = n; 132 | } 133 | 134 | EXPORT void chacha20_init(chacha20_ctx * ctx, 135 | const uint8_t * key, size_t key_length, 136 | const uint8_t * iv, size_t iv_length, 137 | uint64_t counter) 138 | { 139 | const uint8_t *constants = 140 | (uint8_t *) (key_length == 32 ? "expand 32-byte k" : "expand 16-byte k"); 141 | assert (key_length == 16 || key_length == 32); 142 | assert (iv_length == 8 || iv_length == 12); 143 | ctx->input[0] = U8TO32_LITTLE(constants + 0); 144 | ctx->input[1] = U8TO32_LITTLE(constants + 4); 145 | ctx->input[2] = U8TO32_LITTLE(constants + 8); 146 | ctx->input[3] = U8TO32_LITTLE(constants + 12); 147 | ctx->input[4] = U8TO32_LITTLE(key + 0); 148 | ctx->input[5] = U8TO32_LITTLE(key + 4); 149 | ctx->input[6] = U8TO32_LITTLE(key + 8); 150 | ctx->input[7] = U8TO32_LITTLE(key + 12); 151 | if (key_length == 32) key += 16; 152 | ctx->input[8] = U8TO32_LITTLE(key + 0); 153 | ctx->input[9] = U8TO32_LITTLE(key + 4); 154 | ctx->input[10] = U8TO32_LITTLE(key + 8); 155 | ctx->input[11] = U8TO32_LITTLE(key + 12); 156 | ctx->input[12] = (uint32_t) counter; 157 | if (iv_length == 8) { 158 | ctx->input[13] = (uint32_t) (counter >> 32); 159 | ctx->input[14] = U8TO32_LITTLE(iv + 0); 160 | ctx->input[15] = U8TO32_LITTLE(iv + 4); 161 | } else { 162 | ctx->input[13] = U8TO32_LITTLE(iv + 0); 163 | ctx->input[14] = U8TO32_LITTLE(iv + 4); 164 | ctx->input[15] = U8TO32_LITTLE(iv + 8); 165 | } 166 | ctx->iv_length = iv_length; 167 | ctx->next = 64; 168 | } 169 | -------------------------------------------------------------------------------- /src/stubs-rng.c: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, projet Cristal, INRIA Rocquencourt */ 6 | /* */ 7 | /* Copyright 2003 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | /* Stub code for the system-provided RNG and for hardware RNG */ 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #if defined(HAVE_GETENTROPY) || defined(__APPLE__) 22 | 23 | /* getentropy() system RNG */ 24 | 25 | #include 26 | #ifdef __APPLE__ 27 | #include 28 | #endif 29 | 30 | CAMLprim value caml_get_system_rng(value unit) 31 | { 32 | return Val_unit; 33 | } 34 | 35 | CAMLprim value caml_close_system_rng(value vhc) 36 | { 37 | return Val_unit; 38 | } 39 | 40 | CAMLprim value caml_system_rng_random_bytes(value vhc, value str, 41 | value ofs, value len) 42 | { 43 | unsigned char * p = &Byte_u(str, Long_val(ofs)); 44 | intnat l = Long_val(len); 45 | while (l > 0) { 46 | int n = l < 256 ? l : 256; 47 | if (getentropy(p, n) == -1) return Val_false; 48 | p += n; l -= n; 49 | } 50 | return Val_true; 51 | } 52 | 53 | #elif defined(_WIN32) 54 | 55 | /* Win32 system RNG */ 56 | 57 | /* Inspired by Mike Lin's port of Cryptokit 1.0 */ 58 | 59 | #define _WIN32_WINNT 0x0400 60 | #define WIN32_LEAN_AND_MEAN 61 | #include 62 | #include 63 | #ifndef CRYPT_SILENT 64 | #define CRYPT_SILENT 0 65 | #endif 66 | 67 | #define HCRYPTPROV_val(v) (*((HCRYPTPROV *) &Field(v, 0))) 68 | 69 | CAMLprim value caml_get_system_rng(value unit) 70 | { 71 | HCRYPTPROV prov; 72 | value res; 73 | 74 | if (! CryptAcquireContext(&prov, NULL, NULL, PROV_RSA_FULL, 75 | CRYPT_VERIFYCONTEXT | CRYPT_SILENT)) 76 | caml_raise_not_found(); 77 | res = caml_alloc((sizeof(HCRYPTPROV) + sizeof(value) - 1) / sizeof(value), 78 | Abstract_tag); 79 | HCRYPTPROV_val(res) = prov; 80 | return res; 81 | } 82 | 83 | CAMLprim value caml_close_system_rng(value vhc) 84 | { 85 | CryptReleaseContext(HCRYPTPROV_val(vhc), 0); 86 | return Val_unit; 87 | } 88 | 89 | CAMLprim value caml_system_rng_random_bytes(value vhc, value str, 90 | value ofs, value len) 91 | { 92 | return Val_bool(CryptGenRandom(HCRYPTPROV_val(vhc), 93 | Long_val(len), 94 | &Byte(str, Long_val(ofs)))); 95 | } 96 | 97 | #else 98 | 99 | CAMLprim value caml_get_system_rng(value unit) 100 | { 101 | caml_raise_not_found(); 102 | return Val_unit; /* not reached */ 103 | } 104 | 105 | CAMLprim value caml_close_system_rng(value vhc) 106 | { 107 | return Val_unit; 108 | } 109 | 110 | CAMLprim value caml_system_rng_random_bytes(value vhc, value str, 111 | value ofs, value len) 112 | { 113 | return Val_false; 114 | } 115 | 116 | #endif 117 | 118 | /* Intel RDRAND instruction */ 119 | 120 | #if defined(__GNUC__) && defined(__x86_64) 121 | 122 | #include 123 | #include 124 | 125 | static inline int rdrand64(uint64_t * res) 126 | { 127 | uint64_t n; 128 | unsigned char ok; 129 | int retries; 130 | 131 | for (retries = 0; retries < 20; retries++) { 132 | __asm__ __volatile__ ("rdrand %0; setc %1" : "=r" (n), "=qm" (ok)); 133 | if (ok) { *res = n; return 1; } 134 | } 135 | return 0; 136 | } 137 | 138 | CAMLprim value caml_hardware_rng_available(value unit) 139 | { 140 | uint32_t ax, bx, cx, dx; 141 | uint64_t n; 142 | int retries; 143 | __asm__ __volatile__ ("cpuid" 144 | : "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) 145 | : "a" (1)); 146 | if ((cx & (1U << 30)) == 0) return Val_false; 147 | /* Early AMD Ryzen 3000 processors have a most annoying bug: 148 | the rdrand instruction always returns 0xFF....FF. 149 | We check for this condition here. */ 150 | for (retries = 0; retries < 8; retries++) { 151 | if (rdrand64(&n) && n != (uint64_t) (-1)) return Val_true; 152 | } 153 | /* If we reach here, either rdrand64 failed 8*20=160 times in a row, 154 | or it returned 8*64=512 "1" bits in a row. In either case, 155 | it's unusable. */ 156 | return Val_false; 157 | } 158 | 159 | CAMLprim value caml_hardware_rng_random_bytes(value str, value ofs, value len) 160 | { 161 | unsigned char * dst = &Byte_u(str, Long_val(ofs)); 162 | intnat nbytes = Long_val(len); 163 | uint64_t r, rr; 164 | 165 | while (nbytes >= 8) { 166 | if (! rdrand64(&r)) return Val_false; 167 | *((uint64_t *) dst) = r; 168 | dst += 8; 169 | nbytes -= 8; 170 | } 171 | if (nbytes > 0) { 172 | if (! rdrand64(&rr)) return Val_false; 173 | memcpy(dst, &rr, nbytes); 174 | } 175 | return Val_true; 176 | } 177 | 178 | #else 179 | 180 | CAMLprim value caml_hardware_rng_available(value unit) 181 | { return Val_false; } 182 | 183 | CAMLprim value caml_hardware_rng_random_bytes(value str, value ofs, value len) 184 | { return Val_false; } 185 | 186 | #endif 187 | -------------------------------------------------------------------------------- /src/keccak.c: -------------------------------------------------------------------------------- 1 | /* SHA-3 (Keccak) cryptographic hash function */ 2 | /* Code adapted from the "readable" implementation written by 3 | Markku-Juhani O. Saarinen */ 4 | 5 | #include 6 | #include 7 | #include 8 | #include "keccak.h" 9 | 10 | #define KECCAK_ROUNDS 24 11 | 12 | #define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y)))) 13 | 14 | static const u64 keccakf_rndc[24] = 15 | { 16 | 0x0000000000000001, 0x0000000000008082, 0x800000000000808a, 17 | 0x8000000080008000, 0x000000000000808b, 0x0000000080000001, 18 | 0x8000000080008081, 0x8000000000008009, 0x000000000000008a, 19 | 0x0000000000000088, 0x0000000080008009, 0x000000008000000a, 20 | 0x000000008000808b, 0x800000000000008b, 0x8000000000008089, 21 | 0x8000000000008003, 0x8000000000008002, 0x8000000000000080, 22 | 0x000000000000800a, 0x800000008000000a, 0x8000000080008081, 23 | 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 24 | }; 25 | 26 | #if 0 27 | /* Inlined */ 28 | static const int keccakf_rotc[24] = 29 | { 30 | 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, 31 | 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44 32 | }; 33 | 34 | static const int keccakf_piln[24] = 35 | { 36 | 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, 37 | 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1 38 | }; 39 | #endif 40 | 41 | /* Update the state with KECCAK_ROUND rounds */ 42 | 43 | static void KeccakPermutation(u64 st[25]) 44 | { 45 | int round, j; 46 | u64 t, bc[5]; 47 | 48 | for (round = 0; round < KECCAK_ROUNDS; round++) { 49 | 50 | // Theta 51 | #define THETA1(i) \ 52 | bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15] ^ st[i + 20] 53 | 54 | THETA1(0); THETA1(1); THETA1(2); THETA1(3); THETA1(4); 55 | 56 | #define THETA2(i) \ 57 | t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1); \ 58 | st[0 + i] ^= t; \ 59 | st[5 + i] ^= t; \ 60 | st[10 + i] ^= t; \ 61 | st[15 + i] ^= t; \ 62 | st[20 + i] ^= t 63 | 64 | THETA2(0); THETA2(1); THETA2(2); THETA2(3); THETA2(4); 65 | 66 | 67 | // Rho Pi 68 | 69 | #define RHOPI(i, rotc, piln) \ 70 | bc[0] = st[piln]; \ 71 | st[piln] = ROTL64(t, rotc); \ 72 | t = bc[0] 73 | 74 | t = st[1]; 75 | RHOPI(0, 1, 10); RHOPI(1, 3, 7); RHOPI(2, 6, 11); RHOPI(3, 10, 17); 76 | RHOPI(4, 15, 18); RHOPI(5, 21, 3); RHOPI(6, 28, 5); RHOPI(7, 36, 16); 77 | RHOPI(8, 45, 8); RHOPI(9, 55, 21); RHOPI(10, 2, 24); RHOPI(11, 14, 4); 78 | RHOPI(12, 27, 15); RHOPI(13, 41, 23); RHOPI(14, 56, 19); RHOPI(15, 8, 13); 79 | RHOPI(16, 25, 12); RHOPI(17, 43, 2); RHOPI(18, 62, 20); RHOPI(19, 18, 14); 80 | RHOPI(20, 39, 22); RHOPI(21, 61, 9); RHOPI(22, 20, 6); RHOPI(23, 44, 1); 81 | 82 | // Chi 83 | 84 | #define CHI1(i,j) \ 85 | bc[i] = st[j + i] 86 | #define CHI2(i,j) \ 87 | st[j + i] ^= (~bc[(i + 1) % 5]) & bc[(i + 2) % 5] 88 | 89 | for (j = 0; j < 25; j += 5) { 90 | CHI1(0,j); CHI1(1,j); CHI1(2,j); CHI1(3,j); CHI1(4,j); 91 | CHI2(0,j); CHI2(1,j); CHI2(2,j); CHI2(3,j); CHI2(4,j); 92 | } 93 | 94 | // Iota 95 | st[0] ^= keccakf_rndc[round]; 96 | } 97 | } 98 | 99 | /* Absorb the given data and permute */ 100 | 101 | static void KeccakAbsorb(u64 st[25], unsigned char * p, int rsiz) 102 | { 103 | int i; 104 | rsiz = rsiz / 8; 105 | for (i = 0; i < rsiz; i += 1, p += 8) { 106 | // fixme: use direct access for little-endian platforms without 107 | // alignment constraints? 108 | unsigned int l = p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24); 109 | unsigned int h = p[4] | (p[5] << 8) | (p[6] << 16) | (p[7] << 24); 110 | st[i] ^= l | ((unsigned long long) h << 32); 111 | } 112 | KeccakPermutation(st); 113 | } 114 | 115 | /* Exported interface */ 116 | 117 | EXPORT void SHA3_init(struct SHA3Context * ctx, int hsiz) 118 | { 119 | assert (hsiz == 224 || hsiz == 256 || hsiz == 384 || hsiz == 512); 120 | ctx->hsiz = hsiz / 8; 121 | ctx->rsiz = 200 - 2 * ctx->hsiz; 122 | ctx->numbytes = 0; 123 | memset(ctx->state, 0, sizeof(ctx->state)); 124 | } 125 | 126 | EXPORT void SHA3_absorb(struct SHA3Context * ctx, 127 | unsigned char * data, 128 | unsigned long len) 129 | { 130 | int n; 131 | 132 | /* If data was left in buffer, fill with fresh data and absorb */ 133 | if (ctx->numbytes != 0) { 134 | n = ctx->rsiz - ctx->numbytes; 135 | if (len < n) { 136 | memcpy(ctx->buffer + ctx->numbytes, data, len); 137 | ctx->numbytes += len; 138 | return; 139 | } 140 | memcpy(ctx->buffer + ctx->numbytes, data, n); 141 | KeccakAbsorb(ctx->state, ctx->buffer, ctx->rsiz); 142 | data += n; 143 | len -= n; 144 | } 145 | /* Absorb data in blocks of [rsiz] bytes */ 146 | while (len >= ctx->rsiz) { 147 | KeccakAbsorb(ctx->state, data, ctx->rsiz); 148 | data += ctx->rsiz; 149 | len -= ctx->rsiz; 150 | } 151 | /* Save remaining data */ 152 | if (len > 0) memcpy(ctx->buffer, data, len); 153 | ctx->numbytes = len; 154 | } 155 | 156 | EXPORT void SHA3_extract(unsigned char padding, 157 | struct SHA3Context * ctx, 158 | unsigned char * output) 159 | { 160 | int i, j, n; 161 | 162 | /* Apply final padding */ 163 | n = ctx->numbytes; 164 | ctx->buffer[n] = padding; 165 | n++; 166 | memset(ctx->buffer + n, 0, ctx->rsiz - n); 167 | ctx->buffer[ctx->rsiz - 1] |= 0x80; 168 | 169 | /* Absorb remaining data + padding */ 170 | KeccakAbsorb(ctx->state, ctx->buffer, ctx->rsiz); 171 | 172 | /* Extract hash as low bits of state */ 173 | for (i = 0, j = 0; j < ctx->hsiz; i += 1, j += 8) { 174 | u64 st = ctx->state[i]; 175 | output[j] = st; 176 | output[j + 1] = st >> 8; 177 | output[j + 2] = st >> 16; 178 | output[j + 3] = st >> 24; 179 | if (j + 4 >= ctx->hsiz) break; 180 | output[j + 4] = st >> 32; 181 | output[j + 5] = st >> 40; 182 | output[j + 6] = st >> 48; 183 | output[j + 7] = st >> 56; 184 | } 185 | } 186 | -------------------------------------------------------------------------------- /Changes: -------------------------------------------------------------------------------- 1 | - Change `Cryptokit.RSA` to use two distinct types for public keys and for 2 | private keys. (Breaking change.) (#41) 3 | - Add `Cryptokit.Paillier`: Paillier's homomorphic, public-key encryption. 4 | (Contributed by Atish Pranav.) (#39) 5 | 6 | Release 1.20: 7 | - Name space depollution: make C implementations of ciphers local to the 8 | OCaml/C stub code, so that they do not conflict with other C libraries 9 | implementing crypto functions with the same names (#35, #36) 10 | 11 | Release 1.19: 12 | - Fix missing root registration in some Chacha20, Blake2, and 13 | Blake3 functions (#34) 14 | 15 | Release 1.18: 16 | - Add BLAKE3 hash and MAC functions. 17 | - Fix compile-time error "SSE4.1 instruction set not enabled" (#32, #33). 18 | 19 | Release 1.17: 20 | - Add interfaces for authenticated encryption (AEAD) and two implementations: 21 | AES-GCM and Chacha20-Poly1305. 22 | - Use `getentropy()` for `system_rng` when available (Linux, macOS, BSD). 23 | - Removed support for EGD (the Entropy Gathering Daemon). 24 | - Added compile-time alerts on uses of broken or weak ciphers and hashes. 25 | (Can be silenced with "-alert -crypto".) 26 | - Add the hmac_sha384 MAC (#8). 27 | - Add the SipHash MAC. 28 | - Set file descriptor to close-on-exec in `device_rng` (#27). 29 | - Improve compatibility with OCaml 5.0 (#28). 30 | - Make sure CryptokitBignum is installed like before the switch to Dune (#31). 31 | 32 | Release 1.16.1: 33 | - Make the tests faster and more robust 34 | - Update dependencies and documentation. 35 | 36 | Release 1.16: 37 | - Use dune as the build system (contributed by Andrey Mokhov, PR #24) 38 | - Add BLAKE2b and BLAKE2s hash and MAC functions. 39 | 40 | Release 1.15: 41 | - Added constant-time `string_equal` and `bytes_equal` comparison functions 42 | (execution time depends on the lengths of the strings but not on their 43 | contents) (issue #13, PR #14) 44 | - Caml FFI: use caml_ long names and CAML_NAME_SPACE; get rid of Begin_roots 45 | - OASIS files regenerated in dynamic mode for OCaml 4.09 compatibility. 46 | For this reason, OASIS is now a build dependency. 47 | 48 | Release 1.14: 49 | - Ensure compatibility with OCaml 4.09 and up. 50 | - Detect early AMD Ryzen 3000 bug where the RDRAND instruction always 51 | generates 0xFF...FF, and, in this case, report the hardware RNG as 52 | unavailable. 53 | - Fix formatting of documentation comments (issue #3, PR #5) 54 | - Optional argument to control whether the zlib transform expects a 55 | zlib header (PR #12). 56 | - Fix issue with zlib >= 1.2.9 where internal sanity check is affected 57 | by the stream data block being moved by OCaml's GC (issue #7, PR #17). 58 | - DH.new_parameters: update documentation to suggest at least 2048 59 | bits (PR #18). 60 | - DH.derive_key: use SHA256 instead of SHA1 (PR #19). 61 | 62 | Release 1.13: 63 | - Add the Chacha20 stream cipher. 64 | - Add the AES-CMAC (a.k.a. AES-OMAC1) message authentication code. 65 | - Pseudo-random number generator: replace the old AES-CBC-Fibonacci generator 66 | with a faster, simpler generator based on Chacha20. 67 | - Add an alternate pseudo-random number generator based on AES in CTR mode. 68 | - Documentation: warn about known cryptographic weaknesses in Triple DES, 69 | Blowfish, and ARCfour. 70 | - Documentation: warn about problems with variable-length messages in 71 | MACs based on block ciphers in CBC mode. 72 | 73 | Release 1.12: 74 | - Fix x86-32 compilation error and improve detection of AES-NI for x86 75 | processors (Jeremie Dimino, Etienne Millon) 76 | (Closes: #1646) 77 | - AES-NI: align key_schedule on a 16 byte boundary (Etienne Millon) 78 | (Closes: #1709) 79 | - Add original Keccak submission to SHA-3 (Yoichi Hirai) 80 | 81 | Release 1.11: 82 | - Adapt to "safe string" mode (OCaml 4.02 and later required). 83 | The API should remain backward-compatible for clients compiled 84 | in "unsafe string" mode. 85 | - Update SHA-3 to the official NIST standard (different padding than 86 | in the Keccak submission). (Closes: #1528) 87 | - Fixed bounds checking in "add_substring" methods of hash functions 88 | and other functions that operate on a substring of a string. 89 | (Closes: #1480) 90 | - Use hardware implementation of AES when available on x86 processors. 91 | (Faster than the software implementation and less sensitive to 92 | side channel attacks.) 93 | - Use the Zarith library to implement RSA. 94 | (Faster than the previous implementation and less sensitive to 95 | side channel attacks.) 96 | - Support the hardware random number generator present in recent 97 | x86 processors. 98 | - Rebuilt generated files with Oasis 0.4.6 for OCaml 4.03 compatibility. 99 | 100 | Release 1.10: 101 | - Add all SHA-2 hash functions: SHA-224, SHA-384 and SHA-512 102 | in addition to the existing SHA-256. (Closes: #1223) 103 | - Add support for CTR (Counter) chaining mode. 104 | - Fix compilation error with OCaml 4.03+dev. 105 | - Avoid using some obsolete OCaml stdlib functions. 106 | 107 | Release 1.9: 108 | - More fixes to build in Windows with zlib (mingw and msvc). 109 | 110 | Release 1.8: 111 | - Build .cmxs with C bindings (Closes: #1303) 112 | - Use advapi32 on Windows (Close: #1055) 113 | - Allow to define --zlib-include and --zlib-libdir if zlib is not installed in 114 | the standard location. 115 | 116 | Release 1.7: 117 | - Added SHA-3 hash function. 118 | 119 | Release 1.6: 120 | - Regenerate setup.ml with oasis 0.3.0~rc6 version 121 | 122 | Release 1.5: 123 | - Fix bug check in buffered_output#ensure_capacity (Closes: #879) 124 | - Allow to have padding in Base64 (Closes: #897) 125 | 126 | Release 1.4: 127 | - Added Blowfish block cipher. 128 | - Added MAC functions based on HMAC construction applied to 129 | SHA-256 and RIPEMD-160. 130 | - Added OASIS and findlib support (Closes: #589) 131 | 132 | Release 1.3: 133 | - Added hash functions SHA-256 and RIPEMD-160. 134 | - Added "flush" method to transforms. 135 | - Fixed infinite loop in decompression of incorrect data. 136 | 137 | Release 1.2: 138 | - MS Windows port 139 | 140 | Release 1.1: 141 | - Added Diffie-Hellman key agreement 142 | - Exported raw modular arithmetic operations (mod_power, mod_mult) 143 | 144 | Release 1.0: 145 | - First public release 146 | -------------------------------------------------------------------------------- /src/poly1305-donna-64.h: -------------------------------------------------------------------------------- 1 | /* Poly1305 implementation written by Andrew Moon, 2 | https://github.com/floodyberry/poly1305-donna 3 | License: MIT or public domain. 4 | Minor adaptations for Cryptokit by Xavier Leroy. */ 5 | 6 | /* 7 | poly1305 implementation using 64 bit * 64 bit = 128 bit multiplication and 128 bit addition 8 | */ 9 | 10 | #include 11 | 12 | #if defined(__GNUC__) 13 | typedef unsigned __int128 uint128; 14 | #define MUL(out, x, y) out = ((uint128)x * y) 15 | #define ADD(out, in) out += in 16 | #define ADDLO(out, in) out += in 17 | #define SHR(in, shift) (uint64_t)(in >> (shift)) 18 | #define LO(in) (uint64_t)(in) 19 | 20 | #define POLY1305_NOINLINE __attribute__((noinline)) 21 | #endif 22 | 23 | #define poly1305_block_size 16 24 | 25 | /* 17 + sizeof(size_t) + 8*sizeof(uint64_t) */ 26 | typedef struct poly1305_state_internal_t { 27 | uint64_t r[3]; 28 | uint64_t h[3]; 29 | uint64_t pad[2]; 30 | size_t leftover; 31 | unsigned char buffer[poly1305_block_size]; 32 | unsigned char final; 33 | } poly1305_state_internal_t; 34 | 35 | /* interpret eight 8 bit unsigned integers as a 64 bit unsigned integer in little endian */ 36 | static inline uint64_t 37 | U8TO64(const unsigned char *p) { 38 | return 39 | (((uint64_t)(p[0] & 0xff) ) | 40 | ((uint64_t)(p[1] & 0xff) << 8) | 41 | ((uint64_t)(p[2] & 0xff) << 16) | 42 | ((uint64_t)(p[3] & 0xff) << 24) | 43 | ((uint64_t)(p[4] & 0xff) << 32) | 44 | ((uint64_t)(p[5] & 0xff) << 40) | 45 | ((uint64_t)(p[6] & 0xff) << 48) | 46 | ((uint64_t)(p[7] & 0xff) << 56)); 47 | } 48 | 49 | /* store a 64 bit unsigned integer as eight 8 bit unsigned integers in little endian */ 50 | static inline void 51 | U64TO8(unsigned char *p, uint64_t v) { 52 | p[0] = (v ) & 0xff; 53 | p[1] = (v >> 8) & 0xff; 54 | p[2] = (v >> 16) & 0xff; 55 | p[3] = (v >> 24) & 0xff; 56 | p[4] = (v >> 32) & 0xff; 57 | p[5] = (v >> 40) & 0xff; 58 | p[6] = (v >> 48) & 0xff; 59 | p[7] = (v >> 56) & 0xff; 60 | } 61 | 62 | void 63 | poly1305_init(poly1305_context *ctx, const unsigned char key[32]) { 64 | poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx; 65 | uint64_t t0,t1; 66 | 67 | /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ 68 | t0 = U8TO64(&key[0]); 69 | t1 = U8TO64(&key[8]); 70 | 71 | st->r[0] = ( t0 ) & 0xffc0fffffff; 72 | st->r[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffff; 73 | st->r[2] = ((t1 >> 24) ) & 0x00ffffffc0f; 74 | 75 | /* h = 0 */ 76 | st->h[0] = 0; 77 | st->h[1] = 0; 78 | st->h[2] = 0; 79 | 80 | /* save pad for later */ 81 | st->pad[0] = U8TO64(&key[16]); 82 | st->pad[1] = U8TO64(&key[24]); 83 | 84 | st->leftover = 0; 85 | st->final = 0; 86 | } 87 | 88 | static void 89 | poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m, size_t bytes) { 90 | const uint64_t hibit = (st->final) ? 0 : ((uint64_t)1 << 40); /* 1 << 128 */ 91 | uint64_t r0,r1,r2; 92 | uint64_t s1,s2; 93 | uint64_t h0,h1,h2; 94 | uint64_t c; 95 | uint128 d0,d1,d2,d; 96 | 97 | r0 = st->r[0]; 98 | r1 = st->r[1]; 99 | r2 = st->r[2]; 100 | 101 | h0 = st->h[0]; 102 | h1 = st->h[1]; 103 | h2 = st->h[2]; 104 | 105 | s1 = r1 * (5 << 2); 106 | s2 = r2 * (5 << 2); 107 | 108 | while (bytes >= poly1305_block_size) { 109 | uint64_t t0,t1; 110 | 111 | /* h += m[i] */ 112 | t0 = U8TO64(&m[0]); 113 | t1 = U8TO64(&m[8]); 114 | 115 | h0 += (( t0 ) & 0xfffffffffff); 116 | h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff); 117 | h2 += (((t1 >> 24) ) & 0x3ffffffffff) | hibit; 118 | 119 | /* h *= r */ 120 | MUL(d0, h0, r0); MUL(d, h1, s2); ADD(d0, d); MUL(d, h2, s1); ADD(d0, d); 121 | MUL(d1, h0, r1); MUL(d, h1, r0); ADD(d1, d); MUL(d, h2, s2); ADD(d1, d); 122 | MUL(d2, h0, r2); MUL(d, h1, r1); ADD(d2, d); MUL(d, h2, r0); ADD(d2, d); 123 | 124 | /* (partial) h %= p */ 125 | c = SHR(d0, 44); h0 = LO(d0) & 0xfffffffffff; 126 | ADDLO(d1, c); c = SHR(d1, 44); h1 = LO(d1) & 0xfffffffffff; 127 | ADDLO(d2, c); c = SHR(d2, 42); h2 = LO(d2) & 0x3ffffffffff; 128 | h0 += c * 5; c = (h0 >> 44); h0 = h0 & 0xfffffffffff; 129 | h1 += c; 130 | 131 | m += poly1305_block_size; 132 | bytes -= poly1305_block_size; 133 | } 134 | 135 | st->h[0] = h0; 136 | st->h[1] = h1; 137 | st->h[2] = h2; 138 | } 139 | 140 | 141 | void 142 | poly1305_finish(poly1305_context *ctx, unsigned char mac[16]) { 143 | poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx; 144 | uint64_t h0,h1,h2,c; 145 | uint64_t g0,g1,g2; 146 | uint64_t t0,t1; 147 | 148 | /* process the remaining block */ 149 | if (st->leftover) { 150 | size_t i = st->leftover; 151 | st->buffer[i] = 1; 152 | for (i = i + 1; i < poly1305_block_size; i++) 153 | st->buffer[i] = 0; 154 | st->final = 1; 155 | poly1305_blocks(st, st->buffer, poly1305_block_size); 156 | } 157 | 158 | /* fully carry h */ 159 | h0 = st->h[0]; 160 | h1 = st->h[1]; 161 | h2 = st->h[2]; 162 | 163 | c = (h1 >> 44); h1 &= 0xfffffffffff; 164 | h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff; 165 | h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff; 166 | h1 += c; c = (h1 >> 44); h1 &= 0xfffffffffff; 167 | h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff; 168 | h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff; 169 | h1 += c; 170 | 171 | /* compute h + -p */ 172 | g0 = h0 + 5; c = (g0 >> 44); g0 &= 0xfffffffffff; 173 | g1 = h1 + c; c = (g1 >> 44); g1 &= 0xfffffffffff; 174 | g2 = h2 + c - ((uint64_t)1 << 42); 175 | 176 | /* select h if h < p, or h + -p if h >= p */ 177 | c = (g2 >> ((sizeof(uint64_t) * 8) - 1)) - 1; 178 | g0 &= c; 179 | g1 &= c; 180 | g2 &= c; 181 | c = ~c; 182 | h0 = (h0 & c) | g0; 183 | h1 = (h1 & c) | g1; 184 | h2 = (h2 & c) | g2; 185 | 186 | /* h = (h + pad) */ 187 | t0 = st->pad[0]; 188 | t1 = st->pad[1]; 189 | 190 | h0 += (( t0 ) & 0xfffffffffff) ; c = (h0 >> 44); h0 &= 0xfffffffffff; 191 | h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff) + c; c = (h1 >> 44); h1 &= 0xfffffffffff; 192 | h2 += (((t1 >> 24) ) & 0x3ffffffffff) + c; h2 &= 0x3ffffffffff; 193 | 194 | /* mac = h % (2^128) */ 195 | h0 = ((h0 ) | (h1 << 44)); 196 | h1 = ((h1 >> 20) | (h2 << 24)); 197 | 198 | U64TO8(&mac[0], h0); 199 | U64TO8(&mac[8], h1); 200 | 201 | /* zero out the state */ 202 | st->h[0] = 0; 203 | st->h[1] = 0; 204 | st->h[2] = 0; 205 | st->r[0] = 0; 206 | st->r[1] = 0; 207 | st->r[2] = 0; 208 | st->pad[0] = 0; 209 | st->pad[1] = 0; 210 | } 211 | 212 | -------------------------------------------------------------------------------- /src/blake3_portable.c: -------------------------------------------------------------------------------- 1 | #include "blake3_impl.h" 2 | #include 3 | 4 | INLINE uint32_t rotr32(uint32_t w, uint32_t c) { 5 | return (w >> c) | (w << (32 - c)); 6 | } 7 | 8 | INLINE void g(uint32_t *state, size_t a, size_t b, size_t c, size_t d, 9 | uint32_t x, uint32_t y) { 10 | state[a] = state[a] + state[b] + x; 11 | state[d] = rotr32(state[d] ^ state[a], 16); 12 | state[c] = state[c] + state[d]; 13 | state[b] = rotr32(state[b] ^ state[c], 12); 14 | state[a] = state[a] + state[b] + y; 15 | state[d] = rotr32(state[d] ^ state[a], 8); 16 | state[c] = state[c] + state[d]; 17 | state[b] = rotr32(state[b] ^ state[c], 7); 18 | } 19 | 20 | INLINE void round_fn(uint32_t state[16], const uint32_t *msg, size_t round) { 21 | // Select the message schedule based on the round. 22 | const uint8_t *schedule = MSG_SCHEDULE[round]; 23 | 24 | // Mix the columns. 25 | g(state, 0, 4, 8, 12, msg[schedule[0]], msg[schedule[1]]); 26 | g(state, 1, 5, 9, 13, msg[schedule[2]], msg[schedule[3]]); 27 | g(state, 2, 6, 10, 14, msg[schedule[4]], msg[schedule[5]]); 28 | g(state, 3, 7, 11, 15, msg[schedule[6]], msg[schedule[7]]); 29 | 30 | // Mix the rows. 31 | g(state, 0, 5, 10, 15, msg[schedule[8]], msg[schedule[9]]); 32 | g(state, 1, 6, 11, 12, msg[schedule[10]], msg[schedule[11]]); 33 | g(state, 2, 7, 8, 13, msg[schedule[12]], msg[schedule[13]]); 34 | g(state, 3, 4, 9, 14, msg[schedule[14]], msg[schedule[15]]); 35 | } 36 | 37 | INLINE void compress_pre(uint32_t state[16], const uint32_t cv[8], 38 | const uint8_t block[BLAKE3_BLOCK_LEN], 39 | uint8_t block_len, uint64_t counter, uint8_t flags) { 40 | uint32_t block_words[16]; 41 | block_words[0] = load32(block + 4 * 0); 42 | block_words[1] = load32(block + 4 * 1); 43 | block_words[2] = load32(block + 4 * 2); 44 | block_words[3] = load32(block + 4 * 3); 45 | block_words[4] = load32(block + 4 * 4); 46 | block_words[5] = load32(block + 4 * 5); 47 | block_words[6] = load32(block + 4 * 6); 48 | block_words[7] = load32(block + 4 * 7); 49 | block_words[8] = load32(block + 4 * 8); 50 | block_words[9] = load32(block + 4 * 9); 51 | block_words[10] = load32(block + 4 * 10); 52 | block_words[11] = load32(block + 4 * 11); 53 | block_words[12] = load32(block + 4 * 12); 54 | block_words[13] = load32(block + 4 * 13); 55 | block_words[14] = load32(block + 4 * 14); 56 | block_words[15] = load32(block + 4 * 15); 57 | 58 | state[0] = cv[0]; 59 | state[1] = cv[1]; 60 | state[2] = cv[2]; 61 | state[3] = cv[3]; 62 | state[4] = cv[4]; 63 | state[5] = cv[5]; 64 | state[6] = cv[6]; 65 | state[7] = cv[7]; 66 | state[8] = IV[0]; 67 | state[9] = IV[1]; 68 | state[10] = IV[2]; 69 | state[11] = IV[3]; 70 | state[12] = counter_low(counter); 71 | state[13] = counter_high(counter); 72 | state[14] = (uint32_t)block_len; 73 | state[15] = (uint32_t)flags; 74 | 75 | round_fn(state, &block_words[0], 0); 76 | round_fn(state, &block_words[0], 1); 77 | round_fn(state, &block_words[0], 2); 78 | round_fn(state, &block_words[0], 3); 79 | round_fn(state, &block_words[0], 4); 80 | round_fn(state, &block_words[0], 5); 81 | round_fn(state, &block_words[0], 6); 82 | } 83 | 84 | EXPORT void blake3_compress_in_place_portable(uint32_t cv[8], 85 | const uint8_t block[BLAKE3_BLOCK_LEN], 86 | uint8_t block_len, uint64_t counter, 87 | uint8_t flags) { 88 | uint32_t state[16]; 89 | compress_pre(state, cv, block, block_len, counter, flags); 90 | cv[0] = state[0] ^ state[8]; 91 | cv[1] = state[1] ^ state[9]; 92 | cv[2] = state[2] ^ state[10]; 93 | cv[3] = state[3] ^ state[11]; 94 | cv[4] = state[4] ^ state[12]; 95 | cv[5] = state[5] ^ state[13]; 96 | cv[6] = state[6] ^ state[14]; 97 | cv[7] = state[7] ^ state[15]; 98 | } 99 | 100 | EXPORT void blake3_compress_xof_portable(const uint32_t cv[8], 101 | const uint8_t block[BLAKE3_BLOCK_LEN], 102 | uint8_t block_len, uint64_t counter, 103 | uint8_t flags, uint8_t out[64]) { 104 | uint32_t state[16]; 105 | compress_pre(state, cv, block, block_len, counter, flags); 106 | 107 | store32(&out[0 * 4], state[0] ^ state[8]); 108 | store32(&out[1 * 4], state[1] ^ state[9]); 109 | store32(&out[2 * 4], state[2] ^ state[10]); 110 | store32(&out[3 * 4], state[3] ^ state[11]); 111 | store32(&out[4 * 4], state[4] ^ state[12]); 112 | store32(&out[5 * 4], state[5] ^ state[13]); 113 | store32(&out[6 * 4], state[6] ^ state[14]); 114 | store32(&out[7 * 4], state[7] ^ state[15]); 115 | store32(&out[8 * 4], state[8] ^ cv[0]); 116 | store32(&out[9 * 4], state[9] ^ cv[1]); 117 | store32(&out[10 * 4], state[10] ^ cv[2]); 118 | store32(&out[11 * 4], state[11] ^ cv[3]); 119 | store32(&out[12 * 4], state[12] ^ cv[4]); 120 | store32(&out[13 * 4], state[13] ^ cv[5]); 121 | store32(&out[14 * 4], state[14] ^ cv[6]); 122 | store32(&out[15 * 4], state[15] ^ cv[7]); 123 | } 124 | 125 | INLINE void hash_one_portable(const uint8_t *input, size_t blocks, 126 | const uint32_t key[8], uint64_t counter, 127 | uint8_t flags, uint8_t flags_start, 128 | uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) { 129 | uint32_t cv[8]; 130 | memcpy(cv, key, BLAKE3_KEY_LEN); 131 | uint8_t block_flags = flags | flags_start; 132 | while (blocks > 0) { 133 | if (blocks == 1) { 134 | block_flags |= flags_end; 135 | } 136 | blake3_compress_in_place_portable(cv, input, BLAKE3_BLOCK_LEN, counter, 137 | block_flags); 138 | input = &input[BLAKE3_BLOCK_LEN]; 139 | blocks -= 1; 140 | block_flags = flags; 141 | } 142 | store_cv_words(out, cv); 143 | } 144 | 145 | EXPORT void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs, 146 | size_t blocks, const uint32_t key[8], 147 | uint64_t counter, bool increment_counter, 148 | uint8_t flags, uint8_t flags_start, 149 | uint8_t flags_end, uint8_t *out) { 150 | while (num_inputs > 0) { 151 | hash_one_portable(inputs[0], blocks, key, counter, flags, flags_start, 152 | flags_end, out); 153 | if (increment_counter) { 154 | counter += 1; 155 | } 156 | inputs += 1; 157 | num_inputs -= 1; 158 | out = &out[BLAKE3_OUT_LEN]; 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /src/poly1305-donna-32.h: -------------------------------------------------------------------------------- 1 | /* Poly1305 implementation written by Andrew Moon, 2 | https://github.com/floodyberry/poly1305-donna 3 | License: MIT or public domain. 4 | Minor adaptations for Cryptokit by Xavier Leroy. */ 5 | 6 | /* 7 | poly1305 implementation using 32 bit * 32 bit = 64 bit multiplication and 64 bit addition 8 | */ 9 | 10 | #include 11 | 12 | #define poly1305_block_size 16 13 | 14 | /* 17 + sizeof(size_t) + 14*sizeof(uint32_t) */ 15 | typedef struct poly1305_state_internal_t { 16 | uint32_t r[5]; 17 | uint32_t h[5]; 18 | uint32_t pad[4]; 19 | size_t leftover; 20 | unsigned char buffer[poly1305_block_size]; 21 | unsigned char final; 22 | } poly1305_state_internal_t; 23 | 24 | /* interpret four 8 bit unsigned integers as a 32 bit unsigned integer in little endian */ 25 | static inline uint32_t 26 | U8TO32(const unsigned char *p) { 27 | return 28 | (((uint32_t)(p[0] & 0xff) ) | 29 | ((uint32_t)(p[1] & 0xff) << 8) | 30 | ((uint32_t)(p[2] & 0xff) << 16) | 31 | ((uint32_t)(p[3] & 0xff) << 24)); 32 | } 33 | 34 | /* store a 32 bit unsigned integer as four 8 bit unsigned integers in little endian */ 35 | static inline void 36 | U32TO8(unsigned char *p, uint32_t v) { 37 | p[0] = (v ) & 0xff; 38 | p[1] = (v >> 8) & 0xff; 39 | p[2] = (v >> 16) & 0xff; 40 | p[3] = (v >> 24) & 0xff; 41 | } 42 | 43 | void 44 | poly1305_init(poly1305_context *ctx, const unsigned char key[32]) { 45 | poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx; 46 | 47 | /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ 48 | st->r[0] = (U8TO32(&key[ 0]) ) & 0x3ffffff; 49 | st->r[1] = (U8TO32(&key[ 3]) >> 2) & 0x3ffff03; 50 | st->r[2] = (U8TO32(&key[ 6]) >> 4) & 0x3ffc0ff; 51 | st->r[3] = (U8TO32(&key[ 9]) >> 6) & 0x3f03fff; 52 | st->r[4] = (U8TO32(&key[12]) >> 8) & 0x00fffff; 53 | 54 | /* h = 0 */ 55 | st->h[0] = 0; 56 | st->h[1] = 0; 57 | st->h[2] = 0; 58 | st->h[3] = 0; 59 | st->h[4] = 0; 60 | 61 | /* save pad for later */ 62 | st->pad[0] = U8TO32(&key[16]); 63 | st->pad[1] = U8TO32(&key[20]); 64 | st->pad[2] = U8TO32(&key[24]); 65 | st->pad[3] = U8TO32(&key[28]); 66 | 67 | st->leftover = 0; 68 | st->final = 0; 69 | } 70 | 71 | static void 72 | poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m, size_t bytes) { 73 | const uint32_t hibit = (st->final) ? 0 : (1UL << 24); /* 1 << 128 */ 74 | uint32_t r0,r1,r2,r3,r4; 75 | uint32_t s1,s2,s3,s4; 76 | uint32_t h0,h1,h2,h3,h4; 77 | uint64_t d0,d1,d2,d3,d4; 78 | uint32_t c; 79 | 80 | r0 = st->r[0]; 81 | r1 = st->r[1]; 82 | r2 = st->r[2]; 83 | r3 = st->r[3]; 84 | r4 = st->r[4]; 85 | 86 | s1 = r1 * 5; 87 | s2 = r2 * 5; 88 | s3 = r3 * 5; 89 | s4 = r4 * 5; 90 | 91 | h0 = st->h[0]; 92 | h1 = st->h[1]; 93 | h2 = st->h[2]; 94 | h3 = st->h[3]; 95 | h4 = st->h[4]; 96 | 97 | while (bytes >= poly1305_block_size) { 98 | /* h += m[i] */ 99 | h0 += (U8TO32(m+ 0) ) & 0x3ffffff; 100 | h1 += (U8TO32(m+ 3) >> 2) & 0x3ffffff; 101 | h2 += (U8TO32(m+ 6) >> 4) & 0x3ffffff; 102 | h3 += (U8TO32(m+ 9) >> 6) & 0x3ffffff; 103 | h4 += (U8TO32(m+12) >> 8) | hibit; 104 | 105 | /* h *= r */ 106 | d0 = ((uint64_t)h0 * r0) + ((uint64_t)h1 * s4) + ((uint64_t)h2 * s3) + ((uint64_t)h3 * s2) + ((uint64_t)h4 * s1); 107 | d1 = ((uint64_t)h0 * r1) + ((uint64_t)h1 * r0) + ((uint64_t)h2 * s4) + ((uint64_t)h3 * s3) + ((uint64_t)h4 * s2); 108 | d2 = ((uint64_t)h0 * r2) + ((uint64_t)h1 * r1) + ((uint64_t)h2 * r0) + ((uint64_t)h3 * s4) + ((uint64_t)h4 * s3); 109 | d3 = ((uint64_t)h0 * r3) + ((uint64_t)h1 * r2) + ((uint64_t)h2 * r1) + ((uint64_t)h3 * r0) + ((uint64_t)h4 * s4); 110 | d4 = ((uint64_t)h0 * r4) + ((uint64_t)h1 * r3) + ((uint64_t)h2 * r2) + ((uint64_t)h3 * r1) + ((uint64_t)h4 * r0); 111 | 112 | /* (partial) h %= p */ 113 | c = (uint32_t)(d0 >> 26); h0 = (uint32_t)d0 & 0x3ffffff; 114 | d1 += c; c = (uint32_t)(d1 >> 26); h1 = (uint32_t)d1 & 0x3ffffff; 115 | d2 += c; c = (uint32_t)(d2 >> 26); h2 = (uint32_t)d2 & 0x3ffffff; 116 | d3 += c; c = (uint32_t)(d3 >> 26); h3 = (uint32_t)d3 & 0x3ffffff; 117 | d4 += c; c = (uint32_t)(d4 >> 26); h4 = (uint32_t)d4 & 0x3ffffff; 118 | h0 += c * 5; c = (h0 >> 26); h0 = h0 & 0x3ffffff; 119 | h1 += c; 120 | 121 | m += poly1305_block_size; 122 | bytes -= poly1305_block_size; 123 | } 124 | 125 | st->h[0] = h0; 126 | st->h[1] = h1; 127 | st->h[2] = h2; 128 | st->h[3] = h3; 129 | st->h[4] = h4; 130 | } 131 | 132 | void 133 | poly1305_finish(poly1305_context *ctx, unsigned char mac[16]) { 134 | poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx; 135 | uint32_t h0,h1,h2,h3,h4,c; 136 | uint32_t g0,g1,g2,g3,g4; 137 | uint64_t f; 138 | uint32_t mask; 139 | 140 | /* process the remaining block */ 141 | if (st->leftover) { 142 | size_t i = st->leftover; 143 | st->buffer[i++] = 1; 144 | for (; i < poly1305_block_size; i++) 145 | st->buffer[i] = 0; 146 | st->final = 1; 147 | poly1305_blocks(st, st->buffer, poly1305_block_size); 148 | } 149 | 150 | /* fully carry h */ 151 | h0 = st->h[0]; 152 | h1 = st->h[1]; 153 | h2 = st->h[2]; 154 | h3 = st->h[3]; 155 | h4 = st->h[4]; 156 | 157 | c = h1 >> 26; h1 = h1 & 0x3ffffff; 158 | h2 += c; c = h2 >> 26; h2 = h2 & 0x3ffffff; 159 | h3 += c; c = h3 >> 26; h3 = h3 & 0x3ffffff; 160 | h4 += c; c = h4 >> 26; h4 = h4 & 0x3ffffff; 161 | h0 += c * 5; c = h0 >> 26; h0 = h0 & 0x3ffffff; 162 | h1 += c; 163 | 164 | /* compute h + -p */ 165 | g0 = h0 + 5; c = g0 >> 26; g0 &= 0x3ffffff; 166 | g1 = h1 + c; c = g1 >> 26; g1 &= 0x3ffffff; 167 | g2 = h2 + c; c = g2 >> 26; g2 &= 0x3ffffff; 168 | g3 = h3 + c; c = g3 >> 26; g3 &= 0x3ffffff; 169 | g4 = h4 + c - (1UL << 26); 170 | 171 | /* select h if h < p, or h + -p if h >= p */ 172 | mask = (g4 >> ((sizeof(uint32_t) * 8) - 1)) - 1; 173 | g0 &= mask; 174 | g1 &= mask; 175 | g2 &= mask; 176 | g3 &= mask; 177 | g4 &= mask; 178 | mask = ~mask; 179 | h0 = (h0 & mask) | g0; 180 | h1 = (h1 & mask) | g1; 181 | h2 = (h2 & mask) | g2; 182 | h3 = (h3 & mask) | g3; 183 | h4 = (h4 & mask) | g4; 184 | 185 | /* h = h % (2^128) */ 186 | h0 = ((h0 ) | (h1 << 26)) & 0xffffffff; 187 | h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff; 188 | h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff; 189 | h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff; 190 | 191 | /* mac = (h + pad) % (2^128) */ 192 | f = (uint64_t)h0 + st->pad[0] ; h0 = (uint32_t)f; 193 | f = (uint64_t)h1 + st->pad[1] + (f >> 32); h1 = (uint32_t)f; 194 | f = (uint64_t)h2 + st->pad[2] + (f >> 32); h2 = (uint32_t)f; 195 | f = (uint64_t)h3 + st->pad[3] + (f >> 32); h3 = (uint32_t)f; 196 | 197 | U32TO8(mac + 0, h0); 198 | U32TO8(mac + 4, h1); 199 | U32TO8(mac + 8, h2); 200 | U32TO8(mac + 12, h3); 201 | 202 | /* zero out the state */ 203 | st->h[0] = 0; 204 | st->h[1] = 0; 205 | st->h[2] = 0; 206 | st->h[3] = 0; 207 | st->h[4] = 0; 208 | st->r[0] = 0; 209 | st->r[1] = 0; 210 | st->r[2] = 0; 211 | st->r[3] = 0; 212 | st->r[4] = 0; 213 | st->pad[0] = 0; 214 | st->pad[1] = 0; 215 | st->pad[2] = 0; 216 | st->pad[3] = 0; 217 | } 218 | 219 | -------------------------------------------------------------------------------- /src/sha256.c: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, projet Cristal, INRIA Rocquencourt */ 6 | /* */ 7 | /* Copyright 2004 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | /* SHA-256 hashing */ 15 | 16 | #include 17 | #include 18 | #include "sha256.h" 19 | 20 | /* Ref: FIPS publication 180-2 */ 21 | 22 | #define ROTR(x,n) ((x) >> (n) | (x) << (32 - (n))) 23 | 24 | #define CH(x,y,z) (z ^ (x & (y ^ z))) 25 | #define MAJ(x,y,z) ((x & y) | (z & (x | y))) 26 | #define SIGMA0(x) (ROTR(x,2) ^ ROTR(x,13) ^ ROTR(x,22)) 27 | #define SIGMA1(x) (ROTR(x,6) ^ ROTR(x,11) ^ ROTR(x,25)) 28 | #define sigma0(x) (ROTR(x,7) ^ ROTR(x,18) ^ (x >> 3)) 29 | #define sigma1(x) (ROTR(x,17) ^ ROTR(x,19) ^ (x >> 10)) 30 | 31 | static void SHA256_copy_and_swap(void * src, void * dst, int numwords) 32 | { 33 | #ifdef ARCH_BIG_ENDIAN 34 | memcpy(dst, src, numwords * sizeof(u32)); 35 | #else 36 | unsigned char * s, * d; 37 | unsigned char a, b; 38 | for (s = src, d = dst; numwords > 0; s += 4, d += 4, numwords--) { 39 | a = s[0]; 40 | b = s[1]; 41 | d[0] = s[3]; 42 | d[1] = s[2]; 43 | d[2] = b; 44 | d[3] = a; 45 | } 46 | #endif 47 | } 48 | 49 | static u32 SHA256_constants[64] = { 50 | 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 51 | 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 52 | 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 53 | 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 54 | 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 55 | 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 56 | 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 57 | 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, 58 | 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 59 | 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 60 | 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 61 | 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 62 | 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 63 | 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, 64 | 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 65 | 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 66 | }; 67 | 68 | static void SHA256_transform(struct SHA256Context * ctx) 69 | { 70 | int i; 71 | register u32 a, b, c, d, e, f, g, h, t1, t2; 72 | u32 data[80]; 73 | 74 | /* Convert buffer data to 16 big-endian integers */ 75 | SHA256_copy_and_swap(ctx->buffer, data, 16); 76 | 77 | /* Expand into 80 integers */ 78 | for (i = 16; i < 80; i++) { 79 | data[i] = sigma1(data[i-2]) + data[i-7] + sigma0(data[i-15]) + data[i-16]; 80 | } 81 | 82 | /* Initialize working variables */ 83 | a = ctx->state[0]; 84 | b = ctx->state[1]; 85 | c = ctx->state[2]; 86 | d = ctx->state[3]; 87 | e = ctx->state[4]; 88 | f = ctx->state[5]; 89 | g = ctx->state[6]; 90 | h = ctx->state[7]; 91 | 92 | /* Perform rounds */ 93 | #if 0 94 | for (i = 0; i < 64; i++) { 95 | t1 = h + SIGMA1(e) + CH(e, f, g) + SHA256_constants[i] + data[i]; 96 | t2 = SIGMA0(a) + MAJ(a, b, c); 97 | h = g; g = f; f = e; e = d + t1; 98 | d = c; c = b; b = a; a = t1 + t2; 99 | } 100 | #else 101 | #define STEP(a,b,c,d,e,f,g,h,i) \ 102 | t1 = h + SIGMA1(e) + CH(e, f, g) + SHA256_constants[i] + data[i]; \ 103 | t2 = SIGMA0(a) + MAJ(a, b, c); \ 104 | d = d + t1; \ 105 | h = t1 + t2 106 | 107 | for (i = 0; i < 64; i += 8) { 108 | STEP(a,b,c,d,e,f,g,h,i); 109 | STEP(h,a,b,c,d,e,f,g,i+1); 110 | STEP(g,h,a,b,c,d,e,f,i+2); 111 | STEP(f,g,h,a,b,c,d,e,i+3); 112 | STEP(e,f,g,h,a,b,c,d,i+4); 113 | STEP(d,e,f,g,h,a,b,c,i+5); 114 | STEP(c,d,e,f,g,h,a,b,i+6); 115 | STEP(b,c,d,e,f,g,h,a,i+7); 116 | } 117 | #endif 118 | 119 | /* Update chaining values */ 120 | ctx->state[0] += a; 121 | ctx->state[1] += b; 122 | ctx->state[2] += c; 123 | ctx->state[3] += d; 124 | ctx->state[4] += e; 125 | ctx->state[5] += f; 126 | ctx->state[6] += g; 127 | ctx->state[7] += h; 128 | } 129 | 130 | EXPORT void SHA256_init(struct SHA256Context * ctx, int bitsize) 131 | { 132 | switch (bitsize) { 133 | case 224: 134 | ctx->state[0] = 0xc1059ed8; 135 | ctx->state[1] = 0x367cd507; 136 | ctx->state[2] = 0x3070dd17; 137 | ctx->state[3] = 0xf70e5939; 138 | ctx->state[4] = 0xffc00b31; 139 | ctx->state[5] = 0x68581511; 140 | ctx->state[6] = 0x64f98fa7; 141 | ctx->state[7] = 0xbefa4fa4; 142 | break; 143 | case 256: 144 | ctx->state[0] = 0x6A09E667; 145 | ctx->state[1] = 0xBB67AE85; 146 | ctx->state[2] = 0x3C6EF372; 147 | ctx->state[3] = 0xA54FF53A; 148 | ctx->state[4] = 0x510E527F; 149 | ctx->state[5] = 0x9B05688C; 150 | ctx->state[6] = 0x1F83D9AB; 151 | ctx->state[7] = 0x5BE0CD19; 152 | break; 153 | default: 154 | /* The bit size is wrong. Just zero the state to produce 155 | incorrect hashes. */ 156 | memset(ctx->state, 0, sizeof(ctx->state)); 157 | break; 158 | } 159 | ctx->numbytes = 0; 160 | ctx->length[0] = 0; 161 | ctx->length[1] = 0; 162 | } 163 | 164 | EXPORT void SHA256_add_data(struct SHA256Context * ctx, unsigned char * data, 165 | unsigned long len) 166 | { 167 | u32 t; 168 | 169 | /* Update length */ 170 | t = ctx->length[1]; 171 | if ((ctx->length[1] = t + (u32) (len << 3)) < t) 172 | ctx->length[0]++; /* carry from low 32 bits to high 32 bits */ 173 | ctx->length[0] += (u32) (len >> 29); 174 | 175 | /* If data was left in buffer, pad it with fresh data and munge block */ 176 | if (ctx->numbytes != 0) { 177 | t = 64 - ctx->numbytes; 178 | if (len < t) { 179 | memcpy(ctx->buffer + ctx->numbytes, data, len); 180 | ctx->numbytes += len; 181 | return; 182 | } 183 | memcpy(ctx->buffer + ctx->numbytes, data, t); 184 | SHA256_transform(ctx); 185 | data += t; 186 | len -= t; 187 | } 188 | /* Munge data in 64-byte chunks */ 189 | while (len >= 64) { 190 | memcpy(ctx->buffer, data, 64); 191 | SHA256_transform(ctx); 192 | data += 64; 193 | len -= 64; 194 | } 195 | /* Save remaining data */ 196 | memcpy(ctx->buffer, data, len); 197 | ctx->numbytes = len; 198 | } 199 | 200 | EXPORT void SHA256_finish(struct SHA256Context * ctx, int bitsize, 201 | unsigned char * output) 202 | { 203 | int i = ctx->numbytes; 204 | 205 | /* Set first char of padding to 0x80. There is always room. */ 206 | ctx->buffer[i++] = 0x80; 207 | /* If we do not have room for the length (8 bytes), pad to 64 bytes 208 | with zeroes and munge the data block */ 209 | if (i > 56) { 210 | memset(ctx->buffer + i, 0, 64 - i); 211 | SHA256_transform(ctx); 212 | i = 0; 213 | } 214 | /* Pad to byte 56 with zeroes */ 215 | memset(ctx->buffer + i, 0, 56 - i); 216 | /* Add length in big-endian */ 217 | SHA256_copy_and_swap(ctx->length, ctx->buffer + 56, 2); 218 | /* Munge the final block */ 219 | SHA256_transform(ctx); 220 | /* Final hash value is in ctx->state modulo big-endian conversion */ 221 | switch (bitsize) { 222 | case 256: 223 | SHA256_copy_and_swap(ctx->state, output, 8); 224 | break; 225 | case 224: 226 | SHA256_copy_and_swap(ctx->state, output, 7); 227 | break; 228 | /* default: The bit size is wrong. Produce no output. */ 229 | } 230 | } 231 | -------------------------------------------------------------------------------- /test/speedtest.ml: -------------------------------------------------------------------------------- 1 | (***********************************************************************) 2 | (* *) 3 | (* The Cryptokit library *) 4 | (* *) 5 | (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) 6 | (* *) 7 | (* Copyright 2002 Institut National de Recherche en Informatique et *) 8 | (* en Automatique. All rights reserved. This file is distributed *) 9 | (* under the terms of the GNU Library General Public License, with *) 10 | (* the special exception on linking described in file LICENSE. *) 11 | (* *) 12 | (***********************************************************************) 13 | 14 | (* $Id$ *) 15 | 16 | (* Performance measurement *) 17 | 18 | open Cryptokit 19 | 20 | let time_fn msg fn = 21 | let start = Sys.time() in 22 | let rec do_time nrun = 23 | let res = fn () in 24 | let stop = Sys.time() in 25 | let t = stop -. start in 26 | if t < 0.5 then do_time (nrun + 1) else begin 27 | Printf.printf "%7.3f %s\n%!" (t /. float nrun) msg; 28 | res 29 | end 30 | in do_time 1 31 | 32 | let rec repeat n fn () = 33 | if n <= 1 then fn() else (ignore(fn()); repeat (n-1) fn ()) 34 | 35 | let raw_block_cipher cipher niter () = 36 | let msg = Bytes.create cipher#blocksize in 37 | for i = 1 to niter do 38 | cipher#transform msg 0 msg 0 39 | done 40 | 41 | let raw_stream_cipher cipher niter blocksize () = 42 | let msg = Bytes.create blocksize in 43 | for i = 1 to niter do 44 | cipher#transform msg 0 msg 0 blocksize 45 | done 46 | 47 | let transform tr niter blocksize () = 48 | let msg = Bytes.create blocksize in 49 | for i = 1 to niter do 50 | tr#put_substring msg 0 blocksize; ignore(tr#get_substring) 51 | done 52 | 53 | let hash h niter blocksize () = 54 | let msg = Bytes.create blocksize in 55 | for i = 1 to niter do 56 | h#add_substring msg 0 blocksize 57 | done; 58 | ignore(h#result) 59 | 60 | let rng r niter blocksize () = 61 | let buf = Bytes.create blocksize in 62 | for i = 1 to niter do 63 | r#random_bytes buf 0 blocksize 64 | done 65 | 66 | let _ = 67 | time_fn "Raw AES 128, 64_000_000 bytes" 68 | (raw_block_cipher (new Block.aes_encrypt "0123456789ABCDEF") 4000000); 69 | time_fn "Raw AES 192, 64_000_000 bytes" 70 | (raw_block_cipher (new Block.aes_encrypt "0123456789ABCDEF01234567") 4000000); 71 | time_fn "Raw AES 256, 64_000_000 bytes" 72 | (raw_block_cipher (new Block.aes_encrypt "0123456789ABCDEF0123456789ABCDEF") 4000000); 73 | time_fn "Raw DES, 16_000_000 bytes" 74 | (raw_block_cipher (new Block.des_encrypt "01234567") 2000000); 75 | time_fn "Raw 3DES, 16_000_000 bytes" 76 | (raw_block_cipher (new Block.triple_des_encrypt "0123456789ABCDEF") 2000000); 77 | time_fn "Raw ARCfour, 64_000_000 bytes, 16-byte chunks" 78 | (raw_stream_cipher (new Stream.arcfour "0123456789ABCDEF") 4000000 16); 79 | time_fn "Raw ARCfour, 64_000_000 bytes, 64-byte chunks" 80 | (raw_stream_cipher (new Stream.arcfour "0123456789ABCDEF") 1000000 64); 81 | time_fn "Raw Chacha20, 64_000_000 bytes, 16-byte chunks" 82 | (raw_stream_cipher (new Stream.arcfour "0123456789ABCDEF") 4000000 16); 83 | time_fn "Raw Chacha20, 64_000_000 bytes, 64-byte chunks" 84 | (raw_stream_cipher (new Stream.arcfour "0123456789ABCDEF") 1000000 64); 85 | time_fn "Raw Blowfish 128, 64_000_000 bytes" 86 | (raw_block_cipher (new Block.blowfish_encrypt "0123456789ABCDEF") 8000000); 87 | time_fn "AES-GCM, 64_000_000 bytes" 88 | (transform (AEAD.aes_gcm ~iv:"0123456789AB" "0123456789ABCDEF" AEAD.Encrypt) 4000000 16); 89 | time_fn "Chacha20-Poly1305, 64_000_000 bytes" 90 | (transform (AEAD.chacha20_poly1305 ~iv:"0123456789AB" "0123456789ABCDEF" AEAD.Encrypt) 4000000 16); 91 | time_fn "Wrapped AES 128 CBC, 64_000_000 bytes" 92 | (transform (Cipher.aes "0123456789ABCDEF" Cipher.Encrypt) 4000000 16); 93 | time_fn "Wrapped AES 192 CBC, 64_000_000 bytes" 94 | (transform (Cipher.aes "0123456789ABCDEF01234567" Cipher.Encrypt) 4000000 16); 95 | time_fn "Wrapped AES 256 CBC, 64_000_000 bytes" 96 | (transform (Cipher.aes "0123456789ABCDEF0123456789ABCDEF" Cipher.Encrypt) 4000000 16); 97 | time_fn "Wrapped DES CBC, 16_000_000 bytes" 98 | (transform (Cipher.des "01234567" Cipher.Encrypt) 1000000 16); 99 | time_fn "Wrapped 3DES CBC, 16_000_000 bytes" 100 | (transform (Cipher.triple_des "0123456789ABCDEF" Cipher.Encrypt) 1000000 16); 101 | time_fn "Wrapped ARCfour, 64_000_000 bytes" 102 | (transform (Cipher.arcfour "0123456789ABCDEF" Cipher.Encrypt) 4000000 16); 103 | time_fn "Wrapped Chacha20, 64_000_000 bytes" 104 | (transform (Cipher.chacha20 "0123456789ABCDEF" Cipher.Encrypt) 4000000 16); 105 | time_fn "Wrapped Blowfish 128 CBC, 64_000_000 bytes" 106 | (transform (Cipher.blowfish "0123456789ABCDEF" Cipher.Encrypt) 4000000 16); 107 | time_fn "SHA-1, 64_000_000 bytes, 16-byte chunks" 108 | (hash (Hash.sha1()) 4000000 16); 109 | time_fn "SHA-256, 64_000_000 bytes, 16-byte chunks" 110 | (hash (Hash.sha256()) 4000000 16); 111 | time_fn "SHA-512, 64_000_000 bytes, 16-byte chunks" 112 | (hash (Hash.sha512()) 4000000 16); 113 | time_fn "SHA-512/256, 64_000_000 bytes, 16-byte chunks" 114 | (hash (Hash.sha512_256()) 4000000 16); 115 | time_fn "SHA-512/224, 64_000_000 bytes, 16-byte chunks" 116 | (hash (Hash.sha512_224()) 4000000 16); 117 | time_fn "SHA-3 256, 64_000_000 bytes, 16-byte chunks" 118 | (hash (Hash.sha3 256) 4000000 16); 119 | time_fn "SHA-3 512, 64_000_000 bytes, 16-byte chunks" 120 | (hash (Hash.sha3 512) 4000000 16); 121 | time_fn "BLAKE2b 512, 64_000_000 bytes, 16-byte chunks" 122 | (hash (Hash.blake2b 512) 4000000 16); 123 | time_fn "BLAKE2s 256, 64_000_000 bytes, 16-byte chunks" 124 | (hash (Hash.blake2s 256) 4000000 16); 125 | time_fn "BLAKE3, 64_000_000 bytes, 16-byte chunks" 126 | (hash (Hash.blake3 256) 4000000 16); 127 | time_fn "RIPEMD-160, 64_000_000 bytes, 16-byte chunks" 128 | (hash (Hash.ripemd160()) 4000000 16); 129 | time_fn "MD5, 64_000_000 bytes, 16-byte chunks" 130 | (hash (Hash.md5()) 4000000 16); 131 | time_fn "AES CMAC, 64_000_000 bytes, 16-byte chunks" 132 | (hash (MAC.aes_cmac "0123456789ABCDEF") 4000000 16); 133 | time_fn "HMAC-SHA1, 64_000_000 bytes, 16-byte chunks" 134 | (hash (MAC.hmac_sha1 "0123456789ABCDEF") 4000000 16); 135 | time_fn "HMAC-SHA256, 64_000_000 bytes, 16-byte chunks" 136 | (hash (MAC.hmac_sha256 "0123456789ABCDEF") 4000000 16); 137 | time_fn "SipHash 64, 64_000_000 bytes, 16-byte chunks" 138 | (hash (MAC.siphash "0123456789ABCDEF") 4000000 16); 139 | time_fn "SipHash 128, 64_000_000 bytes, 16-byte chunks" 140 | (hash (MAC.siphash128 "0123456789ABCDEF") 4000000 16); 141 | let prng = Random.pseudo_rng "supercalifragilistusexpialidolcius" in 142 | let (priv_key, pub_key) = 143 | time_fn "RSA key generation (2048 bits) x 10" 144 | (repeat 10 (fun () -> RSA.new_key ~rng:prng ~e:65537 2048)) in 145 | let plaintext = "ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ" in 146 | let ciphertext = 147 | time_fn "RSA public-key operation (2048 bits, exponent 65537) x 1000" 148 | (repeat 1000 (fun () -> RSA.encrypt pub_key plaintext)) in 149 | time_fn "RSA private-key operation (2048 bits) x 100" 150 | (repeat 100 (fun () -> ignore(RSA.decrypt priv_key ciphertext))); 151 | time_fn "RSA private-key operation with CRT (2048 bits) x 100" 152 | (repeat 100 (fun () -> ignore(RSA.decrypt_CRT priv_key ciphertext))); 153 | time_fn "PRNG, 64_000_000 bytes" 154 | (rng prng 1000000 64); 155 | time_fn "PRNG AES CTR, 64_000_000 bytes" 156 | (rng (Random.pseudo_rng_aes_ctr "supercalifragilistusexpialidolcius") 1000000 64); 157 | begin try 158 | let hr = Random.hardware_rng () in 159 | time_fn "Hardware RNG, 64_000_000 bytes" 160 | (rng hr 1000000 64) 161 | with Error No_entropy_source -> () 162 | end; 163 | () 164 | -------------------------------------------------------------------------------- /src/stubs-zlib.c: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, projet Cristal, INRIA Rocquencourt */ 6 | /* */ 7 | /* Copyright 2002 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | /* Stub code to interface with Zlib */ 15 | 16 | #ifdef HAVE_ZLIB 17 | #include 18 | #endif 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | static const value * caml_zlib_error_exn = NULL; 28 | 29 | #ifdef HAVE_ZLIB 30 | 31 | #define ZStream_val(v) (*((z_streamp *)Data_custom_val(v))) 32 | 33 | static void caml_zlib_error(char * fn, value vzs) 34 | { 35 | char * msg; 36 | CAMLparam1(vzs); 37 | CAMLlocal4(s1, s2, tuple, bucket); 38 | 39 | msg = ZStream_val(vzs)->msg; 40 | if (msg == NULL) msg = ""; 41 | if (caml_zlib_error_exn == NULL) { 42 | caml_zlib_error_exn = caml_named_value("Cryptokit.Error"); 43 | if (caml_zlib_error_exn == NULL) 44 | caml_invalid_argument("Exception Cryptokit.Error not initialized"); 45 | } 46 | s1 = caml_copy_string(fn); 47 | s2 = caml_copy_string(msg); 48 | tuple = caml_alloc_small(2, 0); 49 | Field(tuple, 0) = s1; 50 | Field(tuple, 1) = s2; 51 | bucket = caml_alloc_small(2, 0); 52 | Field(bucket, 0) = *caml_zlib_error_exn; 53 | Field(bucket, 1) = tuple; 54 | CAMLdrop; 55 | caml_raise(bucket); 56 | } 57 | 58 | void caml_zlib_free_stream(value vzs) 59 | { 60 | caml_stat_free(ZStream_val(vzs)); 61 | ZStream_val(vzs) = NULL; 62 | } 63 | 64 | static struct custom_operations caml_zlib_stream_ops = { 65 | "caml_zlib_stream_ops", &caml_zlib_free_stream, NULL, NULL, NULL, NULL 66 | }; 67 | 68 | static value caml_zlib_new_stream(void) 69 | { 70 | value res = caml_alloc_custom(&caml_zlib_stream_ops, sizeof(z_streamp), 0, 1); 71 | 72 | ZStream_val(res) = caml_stat_alloc(sizeof(z_stream)); 73 | ZStream_val(res)->zalloc = NULL; 74 | ZStream_val(res)->zfree = NULL; 75 | ZStream_val(res)->opaque = NULL; 76 | ZStream_val(res)->next_in = NULL; 77 | ZStream_val(res)->next_out = NULL; 78 | return res; 79 | } 80 | 81 | CAMLprim 82 | value caml_zlib_deflateInit(value vlevel, value expect_header) 83 | { 84 | value vzs = caml_zlib_new_stream(); 85 | if (deflateInit2(ZStream_val(vzs), 86 | Int_val(vlevel), 87 | Z_DEFLATED, 88 | Bool_val(expect_header) ? MAX_WBITS : -MAX_WBITS, 89 | 8, 90 | Z_DEFAULT_STRATEGY) != Z_OK) 91 | caml_zlib_error("Zlib.deflateInit", vzs); 92 | return vzs; 93 | } 94 | 95 | static int caml_zlib_flush_table[] = 96 | { Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FULL_FLUSH, Z_FINISH }; 97 | 98 | CAMLprim 99 | value caml_zlib_deflate(value vzs, value srcbuf, value srcpos, value srclen, 100 | value dstbuf, value dstpos, value dstlen, 101 | value vflush) 102 | { 103 | z_stream * zs = ZStream_val(vzs); 104 | int retcode; 105 | long used_in, used_out; 106 | value res; 107 | 108 | zs->next_in = &Byte_u(srcbuf, Long_val(srcpos)); 109 | zs->avail_in = Long_val(srclen); 110 | zs->next_out = &Byte_u(dstbuf, Long_val(dstpos)); 111 | zs->avail_out = Long_val(dstlen); 112 | retcode = deflate(zs, caml_zlib_flush_table[Int_val(vflush)]); 113 | if (retcode < 0) caml_zlib_error("Zlib.deflate", vzs); 114 | used_in = Long_val(srclen) - zs->avail_in; 115 | used_out = Long_val(dstlen) - zs->avail_out; 116 | zs->next_in = NULL; /* not required, but cleaner */ 117 | zs->next_out = NULL; /* (avoid dangling pointers into Caml heap) */ 118 | res = caml_alloc_small(3, 0); 119 | Field(res, 0) = Val_bool(retcode == Z_STREAM_END); 120 | Field(res, 1) = Val_int(used_in); 121 | Field(res, 2) = Val_int(used_out); 122 | return res; 123 | } 124 | 125 | CAMLprim 126 | value caml_zlib_deflateEnd(value vzs) 127 | { 128 | if (deflateEnd(ZStream_val(vzs)) != Z_OK) 129 | caml_zlib_error("Zlib.deflateEnd", vzs); 130 | return Val_unit; 131 | } 132 | 133 | CAMLprim 134 | value caml_zlib_inflateInit(value expect_header) 135 | { 136 | value vzs = caml_zlib_new_stream(); 137 | if (inflateInit2(ZStream_val(vzs), 138 | Bool_val(expect_header) ? MAX_WBITS : -MAX_WBITS) != Z_OK) 139 | caml_zlib_error("Zlib.inflateInit", vzs); 140 | return vzs; 141 | } 142 | 143 | CAMLprim 144 | value caml_zlib_inflate(value vzs, value srcbuf, value srcpos, value srclen, 145 | value dstbuf, value dstpos, value dstlen, 146 | value vflush) 147 | { 148 | z_stream * zs = ZStream_val(vzs); 149 | int retcode; 150 | long used_in, used_out; 151 | value res; 152 | 153 | zs->next_in = &Byte_u(srcbuf, Long_val(srcpos)); 154 | zs->avail_in = Long_val(srclen); 155 | zs->next_out = &Byte_u(dstbuf, Long_val(dstpos)); 156 | zs->avail_out = Long_val(dstlen); 157 | retcode = inflate(zs, caml_zlib_flush_table[Int_val(vflush)]); 158 | if (retcode < 0 || retcode == Z_NEED_DICT) 159 | caml_zlib_error("Zlib.inflate", vzs); 160 | used_in = Long_val(srclen) - zs->avail_in; 161 | used_out = Long_val(dstlen) - zs->avail_out; 162 | zs->next_in = NULL; /* not required, but cleaner */ 163 | zs->next_out = NULL; /* (avoid dangling pointers into Caml heap) */ 164 | res = caml_alloc_small(3, 0); 165 | Field(res, 0) = Val_bool(retcode == Z_STREAM_END); 166 | Field(res, 1) = Val_int(used_in); 167 | Field(res, 2) = Val_int(used_out); 168 | return res; 169 | } 170 | 171 | CAMLprim 172 | value caml_zlib_inflateEnd(value vzs) 173 | { 174 | if (inflateEnd(ZStream_val(vzs)) != Z_OK) 175 | caml_zlib_error("Zlib.inflateEnd", vzs); 176 | return Val_unit; 177 | } 178 | 179 | #else 180 | 181 | static void caml_zlib_not_supported(void) 182 | { 183 | value bucket; 184 | if (caml_zlib_error_exn == NULL) { 185 | caml_zlib_error_exn = caml_named_value("Cryptokit.Error"); 186 | if (caml_zlib_error_exn == NULL) 187 | caml_invalid_argument("Exception Cryptokit.Error not initialized"); 188 | } 189 | bucket = caml_alloc_small(2, 0); 190 | Field(bucket, 0) = *caml_zlib_error_exn; 191 | Field(bucket, 1) = Val_int(12); /* Compression_not_supported */ 192 | caml_raise(bucket); 193 | } 194 | 195 | CAMLprim 196 | value caml_zlib_deflateInit(value vlevel, value expect_header) 197 | { caml_zlib_not_supported(); return Val_unit; } 198 | 199 | CAMLprim 200 | value caml_zlib_deflate(value vzs, value srcbuf, value srcpos, value srclen, 201 | value dstbuf, value dstpos, value dstlen, 202 | value vflush) 203 | { caml_zlib_not_supported(); return Val_unit; } 204 | 205 | CAMLprim 206 | value caml_zlib_deflateEnd(value vzs) 207 | { caml_zlib_not_supported(); return Val_unit; } 208 | 209 | CAMLprim 210 | value caml_zlib_inflateInit(value expect_header) 211 | { caml_zlib_not_supported(); return Val_unit; } 212 | 213 | CAMLprim 214 | value caml_zlib_inflate(value vzs, value srcbuf, value srcpos, value srclen, 215 | value dstbuf, value dstpos, value dstlen, 216 | value vflush) 217 | { caml_zlib_not_supported(); return Val_unit; } 218 | 219 | CAMLprim 220 | value caml_zlib_inflateEnd(value vzs) 221 | { caml_zlib_not_supported(); return Val_unit; } 222 | 223 | #endif 224 | 225 | CAMLprim 226 | value caml_zlib_deflate_bytecode(value * arg, int nargs) 227 | { 228 | return caml_zlib_deflate(arg[0], arg[1], arg[2], arg[3], 229 | arg[4], arg[5], arg[6], arg[7]); 230 | } 231 | 232 | CAMLprim 233 | value caml_zlib_inflate_bytecode(value * arg, int nargs) 234 | { 235 | return caml_zlib_inflate(arg[0], arg[1], arg[2], arg[3], 236 | arg[4], arg[5], arg[6], arg[7]); 237 | } 238 | 239 | 240 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # The Cryptokit library 2 | 3 | ## Overview 4 | 5 | The Cryptokit library for OCaml provides a variety of cryptographic primitives that can be used to implement cryptographic protocols in security-sensitive applications. The primitives provided include: 6 | 7 | * Symmetric-key ciphers: AES, Chacha20, DES, Triple-DES, Blowfish, ARCfour, in ECB, CBC, CFB, OFB and counter modes. 8 | * Authenticated encryption: AES-GCM, Chacha20-Poly1305. 9 | * Public-key cryptography: RSA encryption and signature, Diffie-Hellman key agreement. 10 | * Hash functions and MACs: SHA-3, SHA-2, BLAKE2, BLAKE3, RIPEMD-160; MACs based on AES and DES; SipHash. (SHA-1 and MD5, despite being broken, are also provided for historical value.) 11 | * Random number generation. 12 | * Encodings and compression: base 64, hexadecimal, Zlib compression. 13 | 14 | Additional ciphers and hashes can easily be used in conjunction with the library. In particular, basic mechanisms such as chaining modes, output buffering, and padding are provided by generic classes that can easily be composed with user-provided ciphers. More generally, the library promotes a "Lego"-like style of constructing and composing transformations over character streams. 15 | 16 | This library is distributed under the conditions of the GNU Library General Public license version 2 or any later version, with the special OCaml exception on linking described in file LICENSE. 17 | 18 | ## Requirements 19 | 20 | * OCaml 4.08 or more recent. 21 | * The Dune build system, version 2.0 or more recent. 22 | * The Zarith library, version 1.4 or more recent. 23 | * The Zlib C library, version 1.1.3 or up is recommended. If it is not installed on your system (look for libz.a or libz.so), get it from http://www.gzip.org/, or indicate in the Makefile that you do not have it. If you are running Linux or BSD or MacOS, your distribution provides precompiled binaries for this library. 24 | 25 | ## Build, test and install 26 | 27 | * To configure, run `./configure`. There are options to disable or enable some features (run `./configure --help` for a list), but the default configuration is fine most of the time. 28 | 29 | * To build, run `dune build`. 30 | 31 | * To execute a test, run `dune exec test/.exe` where `` can be `test`, 32 | `prngtest` or `speedtest`, supplying additional command line arguments if needed. 33 | The main test file `test/test.ml` is also included into the `runtest` alias, so it 34 | can be executed simply by `dune test`. 35 | 36 | * To install, run `dune install`. 37 | 38 | ## Using the library 39 | 40 | The package name is `cryptokit`. With Dune, use `(library cryptokit)`. With ocamlfind, do 41 | ``` 42 | ocamlfind ocamlopt -package cryptokit ... # for compilation 43 | ocamlfind ocamlopt -package cryptokit -linkpkg ... # for linking 44 | ``` 45 | 46 | ## Documentation 47 | 48 | See the extensive documentation comments in file `src/cryptokit.mli`. 49 | 50 | To build HTML documentation, run `dune build @doc`. The resulting index file is 51 | located at `_build/default/_doc/_html/cryptokit/Cryptokit/index.html`. 52 | 53 | ## Warnings and disclaimers 54 | 55 | Disclaimer 1: the author is not an expert in cryptography. While reasonable care has been taken to select good, widely-used implementations of the ciphers and hashes, and follow recommended practices found in reputable applied cryptography textbooks, you are advised to review thoroughly the implementation of this module before using it in a security-critical application. 56 | 57 | Disclaimer 2: some knowledge of cryptography is needed to use effectively this library. A good introduction is the book __Serious Cryptography__ by J.-P. Aumasson (2018). Building secure applications out of cryptographic primitives also requires a general background in computer security. 58 | 59 | Disclaimer 3: in some countries, the use, distribution, import and/or export of cryptographic applications is restricted by law. The precise restrictions may depend on the strenght of the cryptography used (e.g. key size), but also on its purpose (e.g. confidentiality vs. authentication). It is up to the users of this library to comply with regulations applicable in their country. 60 | 61 | ## Design notes and references 62 | 63 | The library is organized around the concept of "transforms". A transform is an object that accepts strings, sub-strings, characters and bytes as input, transforms them, and buffers the output. While it is possible to enter all input, then fetch the output, lower memory requirements can be achieved by purging the output periodically during data input. 64 | 65 | The AES implementation is the public-domain optimized reference implementation by Daemen, Rijmen and Barreto. On x86 processors that support the AES-NI extensions, hardware implementation is used instead. 66 | 67 | The Chacha20 implementation is due to D.J.Bernstein, https://cr.yp.to/streamciphers/timings/estreambench/submissions/salsa20/chacha8/regs/chacha.c . It is in the public domain. 68 | 69 | The DES implementation is based on Outerbridge's popular "d3des" implementation. This is not the fastest DES implementation available, but one of the cleanest. Outerbridge's code is marked as public domain. 70 | 71 | The Blowfish implementation is that of Paul Kocher with some performance improvements. It is under the LGPL. It passes the test vectors listed at http://www.schneier.com/code/vectors.txt 72 | 73 | ARCfour (``alleged RC4'') is implemented from scratch, based on the algorithm described in Schneier's _Applied Cryptography_ 74 | 75 | For AES-GCM, the GHASH implementation is that of Steven M. Gibson at https://github.com/mko-x/SharedAES-GCM/blob/master/Sources/gcm.c . On x86 processors that support the PCLMUL extension, hardware implementation is used instead. Test vectors are taken from "The Galois/Counter Mode of Operation (GCM)" by David A. McGrew and John Viega. 76 | 77 | For Chacha20-Poly1305, Poly1305 is based on the "Donna" implementation by Andrew Moon, https://github.com/floodyberry/poly1305-donna . Test vectors are taken from RFC 7539 and from the BoringSSL project. 78 | 79 | SHA-1 is also implemented from scratch, based on the algorithm described in the _Handbook of Applied Cryptography_. It passes the FIPS test vectors. 80 | 81 | SHA-2 is implemented from scratch based on FIPS publication 180-2. It passes the FIPS test vectors. 82 | 83 | SHA-3 is based on the "readable" implementation of Keccak written by Markku-Juhani O. Saarinen . 84 | 85 | BLAKE2b and BLAKE2s are implemented from scratch based on RFC 7693. The test vectors are taken from https://github.com/BLAKE2/BLAKE2/tree/master/testvectors; others were obtained with the b2sum program. 86 | 87 | BLAKE3 uses the portable C implementation from https://github.com/BLAKE3-team/BLAKE3 . The authors released the code into the public domain with CC0 1.0. The test vectors come from the same source. 88 | 89 | RIPEMD-160 is based on the reference implementation by A.Bosselaers. It passes the test vectors listed at http://www.esat.kuleuven.ac.be/~bosselae/ripemd160.html 90 | 91 | MD5 uses the public-domain implementation by Colin Plumb that is also used in the OCaml runtime system for module Digest. 92 | 93 | SipHash is based on the reference implementation by J.-P. Aumasson and D. J. Bernstein, https://github.com/veorq/SipHash . It passes their test vectors. 94 | 95 | RSA encryption and decryption was implemented from scratch, using the Zarith OCaml library for arbitrary-precision arithmetic, which itself uses GMP. Modular exponentiation is the constant-time implementation provided by GMP. The Chinese remainder theorem is exploited when possible, though. Like all ciphers in this library, the RSA implementation is *not* protected against timing attacks. 96 | 97 | RSA key generation uses GMP's `nextprime` function for probabilistic primality testing. 98 | 99 | The hardware RNG uses the RDRAND instruction of recent x86 processors, if supported. It is not available on other platforms. A check is included to reject the broken RDRAND on AMD Ryzen 3000 processors (https://arstechnica.com/gadgets/2019/10/how-a-months-old-amd-microcode-bug-destroyed-my-weekend/). 100 | 101 | The system RNG uses the `getentropy` function provided by Linux, macOS and the BSDs, or the `CryptGenRandom` function from the Windows cryptographic API. 102 | 103 | The seeded PRNG is just the Chacha20 stream cipher encrypting the all-zeroes message. The seed is used as the Chacha20 key. An alternate seeded PRNG is provided, based on AES encryption of a 128-bit counter. Both PRNGs pass the Dieharder statistical tests. Still, better use the system RNG or the hardware RNG if high-quality random numbers are needed. 104 | 105 | ## Performance 106 | 107 | If you run `dune exec test/speedtest.exe`, a simple benchmark is performed and shows the speed of various operations from this library. 108 | -------------------------------------------------------------------------------- /src/blake3_dispatch.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "blake3_impl.h" 6 | 7 | #if defined(IS_X86) 8 | #if defined(_MSC_VER) 9 | #include 10 | #elif defined(__GNUC__) 11 | #include 12 | #else 13 | #undef IS_X86 /* Unimplemented! */ 14 | #endif 15 | #endif 16 | 17 | #define MAYBE_UNUSED(x) (void)((x)) 18 | 19 | #if defined(IS_X86) 20 | static uint64_t xgetbv(void) { 21 | #if defined(_MSC_VER) 22 | return _xgetbv(0); 23 | #else 24 | uint32_t eax = 0, edx = 0; 25 | __asm__ __volatile__("xgetbv\n" : "=a"(eax), "=d"(edx) : "c"(0)); 26 | return ((uint64_t)edx << 32) | eax; 27 | #endif 28 | } 29 | 30 | static void cpuid(uint32_t out[4], uint32_t id) { 31 | #if defined(_MSC_VER) 32 | __cpuid((int *)out, id); 33 | #elif defined(__i386__) || defined(_M_IX86) 34 | __asm__ __volatile__("movl %%ebx, %1\n" 35 | "cpuid\n" 36 | "xchgl %1, %%ebx\n" 37 | : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3]) 38 | : "a"(id)); 39 | #else 40 | __asm__ __volatile__("cpuid\n" 41 | : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) 42 | : "a"(id)); 43 | #endif 44 | } 45 | 46 | static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) { 47 | #if defined(_MSC_VER) 48 | __cpuidex((int *)out, id, sid); 49 | #elif defined(__i386__) || defined(_M_IX86) 50 | __asm__ __volatile__("movl %%ebx, %1\n" 51 | "cpuid\n" 52 | "xchgl %1, %%ebx\n" 53 | : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3]) 54 | : "a"(id), "c"(sid)); 55 | #else 56 | __asm__ __volatile__("cpuid\n" 57 | : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) 58 | : "a"(id), "c"(sid)); 59 | #endif 60 | } 61 | 62 | #endif 63 | 64 | enum cpu_feature { 65 | SSE2 = 1 << 0, 66 | SSSE3 = 1 << 1, 67 | SSE41 = 1 << 2, 68 | AVX = 1 << 3, 69 | AVX2 = 1 << 4, 70 | AVX512F = 1 << 5, 71 | AVX512VL = 1 << 6, 72 | /* ... */ 73 | UNDEFINED = 1 << 30 74 | }; 75 | 76 | #if !defined(BLAKE3_TESTING) 77 | static /* Allow the variable to be controlled manually for testing */ 78 | #endif 79 | enum cpu_feature g_cpu_features = UNDEFINED; 80 | 81 | #if !defined(BLAKE3_TESTING) 82 | static 83 | #endif 84 | enum cpu_feature 85 | get_cpu_features(void) { 86 | 87 | if (g_cpu_features != UNDEFINED) { 88 | return g_cpu_features; 89 | } else { 90 | #if defined(IS_X86) 91 | uint32_t regs[4] = {0}; 92 | uint32_t *eax = ®s[0], *ebx = ®s[1], *ecx = ®s[2], *edx = ®s[3]; 93 | (void)edx; 94 | enum cpu_feature features = 0; 95 | cpuid(regs, 0); 96 | const int max_id = *eax; 97 | cpuid(regs, 1); 98 | #if defined(__amd64__) || defined(_M_X64) 99 | features |= SSE2; 100 | #else 101 | if (*edx & (1UL << 26)) 102 | features |= SSE2; 103 | #endif 104 | if (*ecx & (1UL << 0)) 105 | features |= SSSE3; 106 | if (*ecx & (1UL << 19)) 107 | features |= SSE41; 108 | 109 | if (*ecx & (1UL << 27)) { // OSXSAVE 110 | const uint64_t mask = xgetbv(); 111 | if ((mask & 6) == 6) { // SSE and AVX states 112 | if (*ecx & (1UL << 28)) 113 | features |= AVX; 114 | if (max_id >= 7) { 115 | cpuidex(regs, 7, 0); 116 | if (*ebx & (1UL << 5)) 117 | features |= AVX2; 118 | if ((mask & 224) == 224) { // Opmask, ZMM_Hi256, Hi16_Zmm 119 | if (*ebx & (1UL << 31)) 120 | features |= AVX512VL; 121 | if (*ebx & (1UL << 16)) 122 | features |= AVX512F; 123 | } 124 | } 125 | } 126 | } 127 | g_cpu_features = features; 128 | return features; 129 | #else 130 | /* How to detect NEON? */ 131 | return 0; 132 | #endif 133 | } 134 | } 135 | 136 | EXPORT void blake3_compress_in_place(uint32_t cv[8], 137 | const uint8_t block[BLAKE3_BLOCK_LEN], 138 | uint8_t block_len, uint64_t counter, 139 | uint8_t flags) { 140 | #if defined(IS_X86) 141 | const enum cpu_feature features = get_cpu_features(); 142 | MAYBE_UNUSED(features); 143 | #if !defined(BLAKE3_NO_AVX512) 144 | if (features & AVX512VL) { 145 | blake3_compress_in_place_avx512(cv, block, block_len, counter, flags); 146 | return; 147 | } 148 | #endif 149 | #if !defined(BLAKE3_NO_SSE41) 150 | if (features & SSE41) { 151 | blake3_compress_in_place_sse41(cv, block, block_len, counter, flags); 152 | return; 153 | } 154 | #endif 155 | #if !defined(BLAKE3_NO_SSE2) 156 | if (features & SSE2) { 157 | blake3_compress_in_place_sse2(cv, block, block_len, counter, flags); 158 | return; 159 | } 160 | #endif 161 | #endif 162 | blake3_compress_in_place_portable(cv, block, block_len, counter, flags); 163 | } 164 | 165 | EXPORT void blake3_compress_xof(const uint32_t cv[8], 166 | const uint8_t block[BLAKE3_BLOCK_LEN], 167 | uint8_t block_len, uint64_t counter, uint8_t flags, 168 | uint8_t out[64]) { 169 | #if defined(IS_X86) 170 | const enum cpu_feature features = get_cpu_features(); 171 | MAYBE_UNUSED(features); 172 | #if !defined(BLAKE3_NO_AVX512) 173 | if (features & AVX512VL) { 174 | blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out); 175 | return; 176 | } 177 | #endif 178 | #if !defined(BLAKE3_NO_SSE41) 179 | if (features & SSE41) { 180 | blake3_compress_xof_sse41(cv, block, block_len, counter, flags, out); 181 | return; 182 | } 183 | #endif 184 | #if !defined(BLAKE3_NO_SSE2) 185 | if (features & SSE2) { 186 | blake3_compress_xof_sse2(cv, block, block_len, counter, flags, out); 187 | return; 188 | } 189 | #endif 190 | #endif 191 | blake3_compress_xof_portable(cv, block, block_len, counter, flags, out); 192 | } 193 | 194 | EXPORT void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs, 195 | size_t blocks, const uint32_t key[8], uint64_t counter, 196 | bool increment_counter, uint8_t flags, 197 | uint8_t flags_start, uint8_t flags_end, uint8_t *out) { 198 | #if defined(IS_X86) 199 | const enum cpu_feature features = get_cpu_features(); 200 | MAYBE_UNUSED(features); 201 | #if !defined(BLAKE3_NO_AVX512) 202 | if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) { 203 | blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter, 204 | increment_counter, flags, flags_start, flags_end, 205 | out); 206 | return; 207 | } 208 | #endif 209 | #if !defined(BLAKE3_NO_AVX2) 210 | if (features & AVX2) { 211 | blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter, 212 | increment_counter, flags, flags_start, flags_end, 213 | out); 214 | return; 215 | } 216 | #endif 217 | #if !defined(BLAKE3_NO_SSE41) 218 | if (features & SSE41) { 219 | blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter, 220 | increment_counter, flags, flags_start, flags_end, 221 | out); 222 | return; 223 | } 224 | #endif 225 | #if !defined(BLAKE3_NO_SSE2) 226 | if (features & SSE2) { 227 | blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter, 228 | increment_counter, flags, flags_start, flags_end, 229 | out); 230 | return; 231 | } 232 | #endif 233 | #endif 234 | 235 | #if BLAKE3_USE_NEON == 1 236 | blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter, 237 | increment_counter, flags, flags_start, flags_end, out); 238 | return; 239 | #endif 240 | 241 | blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter, 242 | increment_counter, flags, flags_start, flags_end, 243 | out); 244 | } 245 | 246 | // The dynamically detected SIMD degree of the current platform. 247 | EXPORT size_t blake3_simd_degree(void) { 248 | #if defined(IS_X86) 249 | const enum cpu_feature features = get_cpu_features(); 250 | MAYBE_UNUSED(features); 251 | #if !defined(BLAKE3_NO_AVX512) 252 | if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) { 253 | return 16; 254 | } 255 | #endif 256 | #if !defined(BLAKE3_NO_AVX2) 257 | if (features & AVX2) { 258 | return 8; 259 | } 260 | #endif 261 | #if !defined(BLAKE3_NO_SSE41) 262 | if (features & SSE41) { 263 | return 4; 264 | } 265 | #endif 266 | #if !defined(BLAKE3_NO_SSE2) 267 | if (features & SSE2) { 268 | return 4; 269 | } 270 | #endif 271 | #endif 272 | #if BLAKE3_USE_NEON == 1 273 | return 4; 274 | #endif 275 | return 1; 276 | } 277 | -------------------------------------------------------------------------------- /src/sha512.c: -------------------------------------------------------------------------------- 1 | /***********************************************************************/ 2 | /* */ 3 | /* The Cryptokit library */ 4 | /* */ 5 | /* Xavier Leroy, projet Cristal, INRIA Rocquencourt */ 6 | /* */ 7 | /* Copyright 2015 Institut National de Recherche en Informatique et */ 8 | /* en Automatique. All rights reserved. This file is distributed */ 9 | /* under the terms of the GNU Library General Public License, with */ 10 | /* the special exception on linking described in file LICENSE. */ 11 | /* */ 12 | /***********************************************************************/ 13 | 14 | /* SHA-512 hashing */ 15 | 16 | #include 17 | #include 18 | #include "sha512.h" 19 | 20 | /* Ref: FIPS publication 180-2 */ 21 | 22 | #define ROTR(x,n) ((x) >> (n) | (x) << (64 - (n))) 23 | 24 | #define CH(x,y,z) (z ^ (x & (y ^ z))) 25 | #define MAJ(x,y,z) ((x & y) | (z & (x | y))) 26 | #define SIGMA0(x) (ROTR(x,28) ^ ROTR(x,34) ^ ROTR(x,39)) 27 | #define SIGMA1(x) (ROTR(x,14) ^ ROTR(x,18) ^ ROTR(x,41)) 28 | #define sigma0(x) (ROTR(x,1) ^ ROTR(x,8) ^ (x >> 7)) 29 | #define sigma1(x) (ROTR(x,19) ^ ROTR(x,61) ^ (x >> 6)) 30 | 31 | static void SHA512_copy_and_swap(void * src, void * dst, int numwords) 32 | { 33 | #ifdef ARCH_BIG_ENDIAN 34 | memcpy(dst, src, numwords * 8); 35 | #else 36 | unsigned char * s, * d; 37 | unsigned char a, b; 38 | for (s = src, d = dst; numwords > 0; s += 8, d += 8, numwords--) { 39 | a = s[0]; 40 | b = s[1]; 41 | d[0] = s[7]; 42 | d[1] = s[6]; 43 | d[6] = b; 44 | d[7] = a; 45 | a = s[2]; 46 | b = s[3]; 47 | d[2] = s[5]; 48 | d[3] = s[4]; 49 | d[4] = b; 50 | d[5] = a; 51 | } 52 | #endif 53 | } 54 | 55 | static u64 SHA512_constants[80] = { 56 | UINT64_C(0x428a2f98d728ae22), 57 | UINT64_C(0x7137449123ef65cd), 58 | UINT64_C(0xb5c0fbcfec4d3b2f), 59 | UINT64_C(0xe9b5dba58189dbbc), 60 | UINT64_C(0x3956c25bf348b538), 61 | UINT64_C(0x59f111f1b605d019), 62 | UINT64_C(0x923f82a4af194f9b), 63 | UINT64_C(0xab1c5ed5da6d8118), 64 | UINT64_C(0xd807aa98a3030242), 65 | UINT64_C(0x12835b0145706fbe), 66 | UINT64_C(0x243185be4ee4b28c), 67 | UINT64_C(0x550c7dc3d5ffb4e2), 68 | UINT64_C(0x72be5d74f27b896f), 69 | UINT64_C(0x80deb1fe3b1696b1), 70 | UINT64_C(0x9bdc06a725c71235), 71 | UINT64_C(0xc19bf174cf692694), 72 | UINT64_C(0xe49b69c19ef14ad2), 73 | UINT64_C(0xefbe4786384f25e3), 74 | UINT64_C(0x0fc19dc68b8cd5b5), 75 | UINT64_C(0x240ca1cc77ac9c65), 76 | UINT64_C(0x2de92c6f592b0275), 77 | UINT64_C(0x4a7484aa6ea6e483), 78 | UINT64_C(0x5cb0a9dcbd41fbd4), 79 | UINT64_C(0x76f988da831153b5), 80 | UINT64_C(0x983e5152ee66dfab), 81 | UINT64_C(0xa831c66d2db43210), 82 | UINT64_C(0xb00327c898fb213f), 83 | UINT64_C(0xbf597fc7beef0ee4), 84 | UINT64_C(0xc6e00bf33da88fc2), 85 | UINT64_C(0xd5a79147930aa725), 86 | UINT64_C(0x06ca6351e003826f), 87 | UINT64_C(0x142929670a0e6e70), 88 | UINT64_C(0x27b70a8546d22ffc), 89 | UINT64_C(0x2e1b21385c26c926), 90 | UINT64_C(0x4d2c6dfc5ac42aed), 91 | UINT64_C(0x53380d139d95b3df), 92 | UINT64_C(0x650a73548baf63de), 93 | UINT64_C(0x766a0abb3c77b2a8), 94 | UINT64_C(0x81c2c92e47edaee6), 95 | UINT64_C(0x92722c851482353b), 96 | UINT64_C(0xa2bfe8a14cf10364), 97 | UINT64_C(0xa81a664bbc423001), 98 | UINT64_C(0xc24b8b70d0f89791), 99 | UINT64_C(0xc76c51a30654be30), 100 | UINT64_C(0xd192e819d6ef5218), 101 | UINT64_C(0xd69906245565a910), 102 | UINT64_C(0xf40e35855771202a), 103 | UINT64_C(0x106aa07032bbd1b8), 104 | UINT64_C(0x19a4c116b8d2d0c8), 105 | UINT64_C(0x1e376c085141ab53), 106 | UINT64_C(0x2748774cdf8eeb99), 107 | UINT64_C(0x34b0bcb5e19b48a8), 108 | UINT64_C(0x391c0cb3c5c95a63), 109 | UINT64_C(0x4ed8aa4ae3418acb), 110 | UINT64_C(0x5b9cca4f7763e373), 111 | UINT64_C(0x682e6ff3d6b2b8a3), 112 | UINT64_C(0x748f82ee5defb2fc), 113 | UINT64_C(0x78a5636f43172f60), 114 | UINT64_C(0x84c87814a1f0ab72), 115 | UINT64_C(0x8cc702081a6439ec), 116 | UINT64_C(0x90befffa23631e28), 117 | UINT64_C(0xa4506cebde82bde9), 118 | UINT64_C(0xbef9a3f7b2c67915), 119 | UINT64_C(0xc67178f2e372532b), 120 | UINT64_C(0xca273eceea26619c), 121 | UINT64_C(0xd186b8c721c0c207), 122 | UINT64_C(0xeada7dd6cde0eb1e), 123 | UINT64_C(0xf57d4f7fee6ed178), 124 | UINT64_C(0x06f067aa72176fba), 125 | UINT64_C(0x0a637dc5a2c898a6), 126 | UINT64_C(0x113f9804bef90dae), 127 | UINT64_C(0x1b710b35131c471b), 128 | UINT64_C(0x28db77f523047d84), 129 | UINT64_C(0x32caab7b40c72493), 130 | UINT64_C(0x3c9ebe0a15c9bebc), 131 | UINT64_C(0x431d67c49c100d4c), 132 | UINT64_C(0x4cc5d4becb3e42b6), 133 | UINT64_C(0x597f299cfc657e2a), 134 | UINT64_C(0x5fcb6fab3ad6faec), 135 | UINT64_C(0x6c44198c4a475817) 136 | }; 137 | 138 | static void SHA512_transform(struct SHA512Context * ctx) 139 | { 140 | int i; 141 | register u64 a, b, c, d, e, f, g, h, t1, t2; 142 | u64 data[80]; 143 | 144 | /* Convert buffer data to 16 big-endian integers */ 145 | SHA512_copy_and_swap(ctx->buffer, data, 16); 146 | 147 | /* Expand into 80 integers */ 148 | for (i = 16; i < 80; i++) { 149 | data[i] = sigma1(data[i-2]) + data[i-7] + sigma0(data[i-15]) + data[i-16]; 150 | } 151 | 152 | /* Initialize working variables */ 153 | a = ctx->state[0]; 154 | b = ctx->state[1]; 155 | c = ctx->state[2]; 156 | d = ctx->state[3]; 157 | e = ctx->state[4]; 158 | f = ctx->state[5]; 159 | g = ctx->state[6]; 160 | h = ctx->state[7]; 161 | 162 | /* Perform rounds */ 163 | #if 0 164 | for (i = 0; i < 80; i++) { 165 | t1 = h + SIGMA1(e) + CH(e, f, g) + SHA512_constants[i] + data[i]; 166 | t2 = SIGMA0(a) + MAJ(a, b, c); 167 | h = g; g = f; f = e; e = d + t1; 168 | d = c; c = b; b = a; a = t1 + t2; 169 | } 170 | #else 171 | #define STEP(a,b,c,d,e,f,g,h,i) \ 172 | t1 = h + SIGMA1(e) + CH(e, f, g) + SHA512_constants[i] + data[i]; \ 173 | t2 = SIGMA0(a) + MAJ(a, b, c); \ 174 | d = d + t1; \ 175 | h = t1 + t2 176 | 177 | for (i = 0; i < 80; i += 8) { 178 | STEP(a,b,c,d,e,f,g,h,i); 179 | STEP(h,a,b,c,d,e,f,g,i+1); 180 | STEP(g,h,a,b,c,d,e,f,i+2); 181 | STEP(f,g,h,a,b,c,d,e,i+3); 182 | STEP(e,f,g,h,a,b,c,d,i+4); 183 | STEP(d,e,f,g,h,a,b,c,i+5); 184 | STEP(c,d,e,f,g,h,a,b,i+6); 185 | STEP(b,c,d,e,f,g,h,a,i+7); 186 | } 187 | #endif 188 | 189 | /* Update chaining values */ 190 | ctx->state[0] += a; 191 | ctx->state[1] += b; 192 | ctx->state[2] += c; 193 | ctx->state[3] += d; 194 | ctx->state[4] += e; 195 | ctx->state[5] += f; 196 | ctx->state[6] += g; 197 | ctx->state[7] += h; 198 | } 199 | 200 | EXPORT void SHA512_init(struct SHA512Context * ctx, int bitsize) 201 | { 202 | switch (bitsize) { 203 | case 512: 204 | ctx->state[0] = UINT64_C(0x6a09e667f3bcc908); 205 | ctx->state[1] = UINT64_C(0xbb67ae8584caa73b); 206 | ctx->state[2] = UINT64_C(0x3c6ef372fe94f82b); 207 | ctx->state[3] = UINT64_C(0xa54ff53a5f1d36f1 ); 208 | ctx->state[4] = UINT64_C(0x510e527fade682d1); 209 | ctx->state[5] = UINT64_C(0x9b05688c2b3e6c1f); 210 | ctx->state[6] = UINT64_C(0x1f83d9abfb41bd6b); 211 | ctx->state[7] = UINT64_C(0x5be0cd19137e2179); 212 | break; 213 | case 384: 214 | ctx->state[0] = UINT64_C(0xcbbb9d5dc1059ed8); 215 | ctx->state[1] = UINT64_C(0x629a292a367cd507); 216 | ctx->state[2] = UINT64_C(0x9159015a3070dd17); 217 | ctx->state[3] = UINT64_C(0x152fecd8f70e5939 ); 218 | ctx->state[4] = UINT64_C(0x67332667ffc00b31); 219 | ctx->state[5] = UINT64_C(0x8eb44a8768581511); 220 | ctx->state[6] = UINT64_C(0xdb0c2e0d64f98fa7); 221 | ctx->state[7] = UINT64_C(0x47b5481dbefa4fa4); 222 | break; 223 | case 256: 224 | ctx->state[0] = UINT64_C(0x22312194fc2bf72c); 225 | ctx->state[1] = UINT64_C(0x9f555fa3c84c64c2); 226 | ctx->state[2] = UINT64_C(0x2393b86b6f53b151); 227 | ctx->state[3] = UINT64_C(0x963877195940eabd ); 228 | ctx->state[4] = UINT64_C(0x96283ee2a88effe3); 229 | ctx->state[5] = UINT64_C(0xbe5e1e2553863992); 230 | ctx->state[6] = UINT64_C(0x2b0199fc2c85b8aa); 231 | ctx->state[7] = UINT64_C(0x0eb72ddC81c52ca2); 232 | break; 233 | case 224: 234 | ctx->state[0] = UINT64_C(0x8c3d37c819544da2); 235 | ctx->state[1] = UINT64_C(0x73e1996689dcd4d6); 236 | ctx->state[2] = UINT64_C(0x1dfab7ae32ff9c82); 237 | ctx->state[3] = UINT64_C(0x679dd514582f9fcf ); 238 | ctx->state[4] = UINT64_C(0x0f6d2b697bd44da8); 239 | ctx->state[5] = UINT64_C(0x77e36f7304C48942); 240 | ctx->state[6] = UINT64_C(0x3f9d85a86a1d36C8); 241 | ctx->state[7] = UINT64_C(0x1112e6ad91d692a1); 242 | break; 243 | default: 244 | /* The bit size is wrong. Just zero the state to produce 245 | incorrect hashes. */ 246 | memset(ctx->state, 0, sizeof(ctx->state)); 247 | break; 248 | } 249 | ctx->numbytes = 0; 250 | ctx->length[0] = 0; 251 | ctx->length[1] = 0; 252 | } 253 | 254 | EXPORT void SHA512_add_data(struct SHA512Context * ctx, unsigned char * data, 255 | unsigned long len) 256 | { 257 | u64 t; 258 | 259 | /* Update length */ 260 | t = ctx->length[1]; 261 | if ((ctx->length[1] = t + (u64) (len << 3)) < t) 262 | ctx->length[0]++; /* carry from low 64 bits to high 64 bits */ 263 | ctx->length[0] += (u64) len >> 61; 264 | 265 | /* If data was left in buffer, pad it with fresh data and munge block */ 266 | if (ctx->numbytes != 0) { 267 | unsigned long l = 128 - ctx->numbytes; 268 | if (len < l) { 269 | memcpy(ctx->buffer + ctx->numbytes, data, len); 270 | ctx->numbytes += len; 271 | return; 272 | } 273 | memcpy(ctx->buffer + ctx->numbytes, data, l); 274 | SHA512_transform(ctx); 275 | data += l; 276 | len -= l; 277 | } 278 | /* Munge data in 128-byte chunks */ 279 | while (len >= 128) { 280 | memcpy(ctx->buffer, data, 128); 281 | SHA512_transform(ctx); 282 | data += 128; 283 | len -= 128; 284 | } 285 | /* Save remaining data */ 286 | memcpy(ctx->buffer, data, len); 287 | ctx->numbytes = len; 288 | } 289 | 290 | EXPORT void SHA512_finish(struct SHA512Context * ctx, int bitsize, 291 | unsigned char * output) 292 | { 293 | int i = ctx->numbytes; 294 | 295 | /* Set first char of padding to 0x80. There is always room. */ 296 | ctx->buffer[i++] = 0x80; 297 | /* If we do not have room for the length (8 bytes), pad to 64 bytes 298 | with zeroes and munge the data block */ 299 | if (i > 112) { 300 | memset(ctx->buffer + i, 0, 128 - i); 301 | SHA512_transform(ctx); 302 | i = 0; 303 | } 304 | /* Pad to byte 112 with zeroes */ 305 | memset(ctx->buffer + i, 0, 112 - i); 306 | /* Add length in big-endian */ 307 | SHA512_copy_and_swap(ctx->length, ctx->buffer + 112, 2); 308 | /* Munge the final block */ 309 | SHA512_transform(ctx); 310 | /* Final hash value is in ctx->state modulo big-endian conversion */ 311 | switch (bitsize) { 312 | case 512: 313 | SHA512_copy_and_swap(ctx->state, output, 8); 314 | break; 315 | case 384: 316 | SHA512_copy_and_swap(ctx->state, output, 6); 317 | break; 318 | case 256: 319 | SHA512_copy_and_swap(ctx->state, output, 4); 320 | break; 321 | case 224: 322 | SHA512_copy_and_swap(ctx->state, output, 3); 323 | #ifdef ARCH_BIG_ENDIAN 324 | memcpy(&ctx->state[24], &output[24], 4); 325 | #else 326 | output[24] = (ctx->state[3] >> (8*7)) & 0xff; 327 | output[25] = (ctx->state[3] >> (8*6)) & 0xff; 328 | output[26] = (ctx->state[3] >> (8*5)) & 0xff; 329 | output[27] = (ctx->state[3] >> (8*4)) & 0xff; 330 | #endif 331 | break; 332 | /* default: The bit size is wrong. Produce no output. */ 333 | } 334 | } 335 | -------------------------------------------------------------------------------- /src/blake3_impl.h: -------------------------------------------------------------------------------- 1 | #ifndef BLAKE3_IMPL_H 2 | #define BLAKE3_IMPL_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "blake3.h" 11 | 12 | // internal flags 13 | enum blake3_flags { 14 | CHUNK_START = 1 << 0, 15 | CHUNK_END = 1 << 1, 16 | PARENT = 1 << 2, 17 | ROOT = 1 << 3, 18 | KEYED_HASH = 1 << 4, 19 | DERIVE_KEY_CONTEXT = 1 << 5, 20 | DERIVE_KEY_MATERIAL = 1 << 6, 21 | }; 22 | 23 | // This C implementation tries to support recent versions of GCC, Clang, and 24 | // MSVC. 25 | #if defined(_MSC_VER) 26 | #define INLINE static __forceinline 27 | #else 28 | #define INLINE static inline __attribute__((always_inline)) 29 | #endif 30 | 31 | #if defined(__x86_64__) || defined(_M_X64) 32 | #define IS_X86 33 | #define IS_X86_64 34 | #endif 35 | 36 | #if defined(__i386__) || defined(_M_IX86) 37 | #define IS_X86 38 | #define IS_X86_32 39 | #endif 40 | 41 | #if defined(__aarch64__) || defined(_M_ARM64) 42 | #define IS_AARCH64 43 | #endif 44 | 45 | #if defined(IS_X86) 46 | #if defined(_MSC_VER) 47 | #include 48 | #endif 49 | #endif 50 | 51 | #if !defined(BLAKE3_USE_NEON) 52 | // If BLAKE3_USE_NEON not manually set, autodetect based on AArch64ness 53 | #if defined(IS_AARCH64) 54 | #define BLAKE3_USE_NEON 1 55 | #else 56 | #define BLAKE3_USE_NEON 0 57 | #endif 58 | #endif 59 | 60 | #if defined(IS_X86) 61 | #define MAX_SIMD_DEGREE 16 62 | #elif BLAKE3_USE_NEON == 1 63 | #define MAX_SIMD_DEGREE 4 64 | #else 65 | #define MAX_SIMD_DEGREE 1 66 | #endif 67 | 68 | // There are some places where we want a static size that's equal to the 69 | // MAX_SIMD_DEGREE, but also at least 2. 70 | #define MAX_SIMD_DEGREE_OR_2 (MAX_SIMD_DEGREE > 2 ? MAX_SIMD_DEGREE : 2) 71 | 72 | static const uint32_t IV[8] = {0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 73 | 0xA54FF53AUL, 0x510E527FUL, 0x9B05688CUL, 74 | 0x1F83D9ABUL, 0x5BE0CD19UL}; 75 | 76 | static const uint8_t MSG_SCHEDULE[7][16] = { 77 | {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 78 | {2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8}, 79 | {3, 4, 10, 12, 13, 2, 7, 14, 6, 5, 9, 0, 11, 15, 8, 1}, 80 | {10, 7, 12, 9, 14, 3, 13, 15, 4, 0, 11, 2, 5, 8, 1, 6}, 81 | {12, 13, 9, 11, 15, 10, 14, 8, 7, 2, 5, 3, 0, 1, 6, 4}, 82 | {9, 14, 11, 5, 8, 12, 15, 1, 13, 3, 0, 10, 2, 6, 4, 7}, 83 | {11, 15, 5, 0, 1, 9, 8, 6, 14, 10, 2, 12, 3, 4, 7, 13}, 84 | }; 85 | 86 | /* Find index of the highest set bit */ 87 | /* x is assumed to be nonzero. */ 88 | static unsigned int highest_one(uint64_t x) { 89 | #if defined(__GNUC__) || defined(__clang__) 90 | return 63 ^ __builtin_clzll(x); 91 | #elif defined(_MSC_VER) && defined(IS_X86_64) 92 | unsigned long index; 93 | _BitScanReverse64(&index, x); 94 | return index; 95 | #elif defined(_MSC_VER) && defined(IS_X86_32) 96 | if(x >> 32) { 97 | unsigned long index; 98 | _BitScanReverse(&index, (unsigned long)(x >> 32)); 99 | return 32 + index; 100 | } else { 101 | unsigned long index; 102 | _BitScanReverse(&index, (unsigned long)x); 103 | return index; 104 | } 105 | #else 106 | unsigned int c = 0; 107 | if(x & 0xffffffff00000000ULL) { x >>= 32; c += 32; } 108 | if(x & 0x00000000ffff0000ULL) { x >>= 16; c += 16; } 109 | if(x & 0x000000000000ff00ULL) { x >>= 8; c += 8; } 110 | if(x & 0x00000000000000f0ULL) { x >>= 4; c += 4; } 111 | if(x & 0x000000000000000cULL) { x >>= 2; c += 2; } 112 | if(x & 0x0000000000000002ULL) { c += 1; } 113 | return c; 114 | #endif 115 | } 116 | 117 | // Count the number of 1 bits. 118 | INLINE unsigned int popcnt(uint64_t x) { 119 | #if defined(__GNUC__) || defined(__clang__) 120 | return __builtin_popcountll(x); 121 | #else 122 | unsigned int count = 0; 123 | while (x != 0) { 124 | count += 1; 125 | x &= x - 1; 126 | } 127 | return count; 128 | #endif 129 | } 130 | 131 | // Largest power of two less than or equal to x. As a special case, returns 1 132 | // when x is 0. 133 | INLINE uint64_t round_down_to_power_of_2(uint64_t x) { 134 | return 1ULL << highest_one(x | 1); 135 | } 136 | 137 | INLINE uint32_t counter_low(uint64_t counter) { return (uint32_t)counter; } 138 | 139 | INLINE uint32_t counter_high(uint64_t counter) { 140 | return (uint32_t)(counter >> 32); 141 | } 142 | 143 | INLINE uint32_t load32(const void *src) { 144 | const uint8_t *p = (const uint8_t *)src; 145 | return ((uint32_t)(p[0]) << 0) | ((uint32_t)(p[1]) << 8) | 146 | ((uint32_t)(p[2]) << 16) | ((uint32_t)(p[3]) << 24); 147 | } 148 | 149 | INLINE void load_key_words(const uint8_t key[BLAKE3_KEY_LEN], 150 | uint32_t key_words[8]) { 151 | key_words[0] = load32(&key[0 * 4]); 152 | key_words[1] = load32(&key[1 * 4]); 153 | key_words[2] = load32(&key[2 * 4]); 154 | key_words[3] = load32(&key[3 * 4]); 155 | key_words[4] = load32(&key[4 * 4]); 156 | key_words[5] = load32(&key[5 * 4]); 157 | key_words[6] = load32(&key[6 * 4]); 158 | key_words[7] = load32(&key[7 * 4]); 159 | } 160 | 161 | INLINE void store32(void *dst, uint32_t w) { 162 | uint8_t *p = (uint8_t *)dst; 163 | p[0] = (uint8_t)(w >> 0); 164 | p[1] = (uint8_t)(w >> 8); 165 | p[2] = (uint8_t)(w >> 16); 166 | p[3] = (uint8_t)(w >> 24); 167 | } 168 | 169 | INLINE void store_cv_words(uint8_t bytes_out[32], uint32_t cv_words[8]) { 170 | store32(&bytes_out[0 * 4], cv_words[0]); 171 | store32(&bytes_out[1 * 4], cv_words[1]); 172 | store32(&bytes_out[2 * 4], cv_words[2]); 173 | store32(&bytes_out[3 * 4], cv_words[3]); 174 | store32(&bytes_out[4 * 4], cv_words[4]); 175 | store32(&bytes_out[5 * 4], cv_words[5]); 176 | store32(&bytes_out[6 * 4], cv_words[6]); 177 | store32(&bytes_out[7 * 4], cv_words[7]); 178 | } 179 | 180 | EXPORT void blake3_compress_in_place(uint32_t cv[8], 181 | const uint8_t block[BLAKE3_BLOCK_LEN], 182 | uint8_t block_len, uint64_t counter, 183 | uint8_t flags); 184 | 185 | EXPORT void blake3_compress_xof(const uint32_t cv[8], 186 | const uint8_t block[BLAKE3_BLOCK_LEN], 187 | uint8_t block_len, uint64_t counter, uint8_t flags, 188 | uint8_t out[64]); 189 | 190 | EXPORT void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs, 191 | size_t blocks, const uint32_t key[8], uint64_t counter, 192 | bool increment_counter, uint8_t flags, 193 | uint8_t flags_start, uint8_t flags_end, uint8_t *out); 194 | 195 | EXPORT size_t blake3_simd_degree(void); 196 | 197 | 198 | // Declarations for implementation-specific functions. 199 | EXPORT void blake3_compress_in_place_portable(uint32_t cv[8], 200 | const uint8_t block[BLAKE3_BLOCK_LEN], 201 | uint8_t block_len, uint64_t counter, 202 | uint8_t flags); 203 | 204 | EXPORT void blake3_compress_xof_portable(const uint32_t cv[8], 205 | const uint8_t block[BLAKE3_BLOCK_LEN], 206 | uint8_t block_len, uint64_t counter, 207 | uint8_t flags, uint8_t out[64]); 208 | 209 | EXPORT void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs, 210 | size_t blocks, const uint32_t key[8], 211 | uint64_t counter, bool increment_counter, 212 | uint8_t flags, uint8_t flags_start, 213 | uint8_t flags_end, uint8_t *out); 214 | 215 | #if defined(IS_X86) 216 | #if !defined(BLAKE3_NO_SSE2) 217 | EXPORT void blake3_compress_in_place_sse2(uint32_t cv[8], 218 | const uint8_t block[BLAKE3_BLOCK_LEN], 219 | uint8_t block_len, uint64_t counter, 220 | uint8_t flags); 221 | EXPORT void blake3_compress_xof_sse2(const uint32_t cv[8], 222 | const uint8_t block[BLAKE3_BLOCK_LEN], 223 | uint8_t block_len, uint64_t counter, 224 | uint8_t flags, uint8_t out[64]); 225 | EXPORT void blake3_hash_many_sse2(const uint8_t *const *inputs, size_t num_inputs, 226 | size_t blocks, const uint32_t key[8], 227 | uint64_t counter, bool increment_counter, 228 | uint8_t flags, uint8_t flags_start, 229 | uint8_t flags_end, uint8_t *out); 230 | #endif 231 | #if !defined(BLAKE3_NO_SSE41) 232 | EXPORT void blake3_compress_in_place_sse41(uint32_t cv[8], 233 | const uint8_t block[BLAKE3_BLOCK_LEN], 234 | uint8_t block_len, uint64_t counter, 235 | uint8_t flags); 236 | EXPORT void blake3_compress_xof_sse41(const uint32_t cv[8], 237 | const uint8_t block[BLAKE3_BLOCK_LEN], 238 | uint8_t block_len, uint64_t counter, 239 | uint8_t flags, uint8_t out[64]); 240 | EXPORT void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs, 241 | size_t blocks, const uint32_t key[8], 242 | uint64_t counter, bool increment_counter, 243 | uint8_t flags, uint8_t flags_start, 244 | uint8_t flags_end, uint8_t *out); 245 | #endif 246 | #if !defined(BLAKE3_NO_AVX2) 247 | EXPORT void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs, 248 | size_t blocks, const uint32_t key[8], 249 | uint64_t counter, bool increment_counter, 250 | uint8_t flags, uint8_t flags_start, 251 | uint8_t flags_end, uint8_t *out); 252 | #endif 253 | #if !defined(BLAKE3_NO_AVX512) 254 | EXPORT void blake3_compress_in_place_avx512(uint32_t cv[8], 255 | const uint8_t block[BLAKE3_BLOCK_LEN], 256 | uint8_t block_len, uint64_t counter, 257 | uint8_t flags); 258 | 259 | EXPORT void blake3_compress_xof_avx512(const uint32_t cv[8], 260 | const uint8_t block[BLAKE3_BLOCK_LEN], 261 | uint8_t block_len, uint64_t counter, 262 | uint8_t flags, uint8_t out[64]); 263 | 264 | EXPORT void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs, 265 | size_t blocks, const uint32_t key[8], 266 | uint64_t counter, bool increment_counter, 267 | uint8_t flags, uint8_t flags_start, 268 | uint8_t flags_end, uint8_t *out); 269 | #endif 270 | #endif 271 | 272 | #if BLAKE3_USE_NEON == 1 273 | EXPORT void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs, 274 | size_t blocks, const uint32_t key[8], 275 | uint64_t counter, bool increment_counter, 276 | uint8_t flags, uint8_t flags_start, 277 | uint8_t flags_end, uint8_t *out); 278 | #endif 279 | 280 | 281 | #endif /* BLAKE3_IMPL_H */ 282 | --------------------------------------------------------------------------------