├── .gitignore
├── src
    ├── config
    │   ├── config_vars.ml.default
    │   ├── dune
    │   └── flags.ml
    ├── poly1305-donna.h
    ├── keccak.h
    ├── d3des.h
    ├── chacha20.h
    ├── blowfish.h
    ├── pclmul.h
    ├── arcfour.h
    ├── sha1.h
    ├── ghash.h
    ├── poly1305-donna.c
    ├── dune
    ├── ripemd160.h
    ├── siphash.h
    ├── sha256.h
    ├── cryptokitBignumOld.mli
    ├── sha512.h
    ├── stubs-sha1.c
    ├── cryptokitBignum.mli
    ├── stubs-ripemd160.c
    ├── stubs-poly1305.c
    ├── stubs-des.c
    ├── aesni.h
    ├── stubs-siphash.c
    ├── blake2.h
    ├── arcfour.c
    ├── stubs-misc.c
    ├── stubs-arcfour.c
    ├── rijndael-alg-fst.h
    ├── stubs-sha256.c
    ├── stubs-md5.c
    ├── blake3.h
    ├── stubs-ghash.c
    ├── stubs-chacha20.c
    ├── stubs-blowfish.c
    ├── stubs-blake2.c
    ├── stubs-blake3.c
    ├── stubs-sha512.c
    ├── stubs-sha3.c
    ├── stubs-aes.c
    ├── pclmul.c
    ├── cryptokitBignum.ml
    ├── ghash.c
    ├── siphash.c
    ├── sha1.c
    ├── chacha20.c
    ├── stubs-rng.c
    ├── keccak.c
    ├── poly1305-donna-64.h
    ├── blake3_portable.c
    ├── poly1305-donna-32.h
    ├── sha256.c
    ├── stubs-zlib.c
    ├── blake3_dispatch.c
    ├── sha512.c
    └── blake3_impl.h
├── test
    ├── dune
    ├── prngtest.ml
    └── speedtest.ml
├── .github
    └── workflows
    │   └── build.yml
├── dune-project
├── cryptokit.opam
├── configure
├── Changes
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | _build
2 | setup.data
3 | setup.log
4 | *.native
5 | *~
6 | .merlin
7 | # This file is generated by the configure script
8 | src/config/config_vars.ml
9 | 


--------------------------------------------------------------------------------
/src/config/config_vars.ml.default:
--------------------------------------------------------------------------------
1 | type 'a value =
2 |     | This of 'a
3 |     | Auto
4 | 
5 | let enable_zlib = Auto
6 | 
7 | let enable_hardware_support = Auto
8 | 


--------------------------------------------------------------------------------
/src/config/dune:
--------------------------------------------------------------------------------
1 | (executable
2 |   (name flags)
3 |   (libraries dune-configurator))
4 | 
5 | (rule
6 |   (mode fallback)
7 |   (action (copy config_vars.ml.default config_vars.ml)))
8 | 


--------------------------------------------------------------------------------
/test/dune:
--------------------------------------------------------------------------------
 1 | (test
 2 |  (name test)
 3 |  (modules test)
 4 |  (libraries cryptokit)
 5 |  (flags :standard -w -35 -alert -crypto))
 6 | 
 7 | (executable
 8 |  (name prngtest)
 9 |  (modules prngtest)
10 |  (libraries cryptokit))
11 | 
12 | (executable
13 |  (name speedtest)
14 |  (modules speedtest)
15 |  (libraries cryptokit)
16 |  (flags :standard -w -35 -alert -crypto))
17 | 


--------------------------------------------------------------------------------
/src/poly1305-donna.h:
--------------------------------------------------------------------------------
 1 | /* Poly1305 implementation written by Andrew Moon,
 2 |           https://github.com/floodyberry/poly1305-donna
 3 |    License: MIT or public domain.
 4 |    Minor adaptations for Cryptokit by Xavier Leroy. */
 5 | 
 6 | #ifndef POLY1305_DONNA_H
 7 | #define POLY1305_DONNA_H
 8 | 
 9 | #include <stddef.h>
10 | 
11 | typedef struct poly1305_context {
12 | 	size_t aligner;
13 | 	unsigned char opaque[136];
14 | } poly1305_context;
15 | 
16 | EXPORT void poly1305_init(poly1305_context *ctx, const unsigned char key[32]);
17 | EXPORT void poly1305_update(poly1305_context *ctx, const unsigned char *m, size_t bytes);
18 | EXPORT void poly1305_finish(poly1305_context *ctx, unsigned char mac[16]);
19 | 
20 | #endif /* POLY1305_DONNA_H */
21 | 
22 | 


--------------------------------------------------------------------------------
/src/keccak.h:
--------------------------------------------------------------------------------
 1 | /* SHA-3 (Keccak) cryptographic hash function */
 2 | 
 3 | typedef unsigned long long u64;
 4 | 
 5 | struct SHA3Context {
 6 |   u64 state[25];
 7 |   unsigned char buffer[144];
 8 |   int numbytes;       /* number of bytes in buffer */
 9 |   int rsiz;           /* number of message bytes processed by permutation */
10 |   int hsiz;           /* size of hash in bytes */
11 | };
12 | 
13 | EXPORT void SHA3_init(struct SHA3Context * ctx, int hsiz);
14 | 
15 | EXPORT void SHA3_absorb(struct SHA3Context * ctx, 
16 |                         unsigned char * data,
17 |                         unsigned long len);
18 | 
19 | EXPORT void SHA3_extract(unsigned char padding,
20 |                          struct SHA3Context * ctx,
21 |                          unsigned char * output);
22 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: Build and test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - 'master'
 7 |   pull_request:
 8 |   workflow_dispatch:
 9 | 
10 | permissions: read-all
11 | 
12 | jobs:
13 |   build:
14 |     strategy:
15 |       fail-fast: false
16 |       matrix:
17 |         os:
18 |           - ubuntu-latest
19 |           - macos-latest
20 |           - windows-latest
21 | 
22 |     runs-on: ${{ matrix.os }}
23 | 
24 |     steps:
25 |       - name: Checkout tree
26 |         uses: actions/checkout@v4
27 | 
28 |       - name: Set-up OCaml
29 |         uses: ocaml/setup-ocaml@v3
30 |         with:
31 |           ocaml-compiler: 5
32 | 
33 |       - run: opam install . --deps-only --with-test
34 | 
35 |       - run: opam exec -- dune build
36 | 
37 |       - run: opam exec -- dune runtest
38 | 


--------------------------------------------------------------------------------
/dune-project:
--------------------------------------------------------------------------------
 1 | (lang dune 2.5)
 2 | (generate_opam_files true)
 3 | 
 4 | (name cryptokit)
 5 | (source (github xavierleroy/cryptokit))
 6 | (authors "Xavier Leroy")
 7 | (maintainers "Xavier Leroy <xavier.leroy@college-de-france.fr>")
 8 | 
 9 | (package
10 |  (name cryptokit)
11 |  (synopsis "A library of cryptographic primitives")
12 |  (version 1.20)
13 |  (license "LGPL-2.0-or-later WITH OCaml-LGPL-linking-exception")
14 |  (description "Cryptokit includes authenticated encryption (AES-GCM, Chacha20-Poly1305), block ciphers (AES, DES, 3DES), stream ciphers (Chacha20, ARCfour), public-key cryptography (RSA, DH), hashes (SHA-256, SHA-512, SHA-3, Blake2, Blake3), MACs, compression, random number generation -- all presented with a compositional, extensible interface.")
15 | 
16 | (depends
17 |   (ocaml (>= 4.08.0))
18 |   (dune (>= 2.5))
19 |   dune-configurator
20 |   (zarith (>= 1.4))
21 |   conf-zlib
22 |   conf-gmp-powm-sec))
23 | 


--------------------------------------------------------------------------------
/src/d3des.h:
--------------------------------------------------------------------------------
 1 | /* d3des.h -
 2 |  *
 3 |  *	Headers and defines for d3des.c
 4 |  *	Graven Imagery, 1992.
 5 |  *
 6 |  * Copyright (c) 1988,1989,1990,1991,1992 by Richard Outerbridge
 7 |  *	(GEnie : OUTER; CIS : [71755,204])
 8 |  *
 9 |  * Modified and adapted by Xavier Leroy, 2002.
10 |  */
11 | 
12 | #define EN0	0	/* MODE == encrypt */
13 | #define DE1	1	/* MODE == decrypt */
14 | 
15 | typedef unsigned char u8;
16 | typedef unsigned int u32;
17 | 
18 | EXPORT void d3des_cook_key(u8 key[8], int mode, u32 res[32]);
19 | /* Sets the key register [res] according to the hexadecimal
20 |  * key contained in the 8 bytes of [key], according to the DES,
21 |  * for encryption or decryption according to [mode].
22 |  */
23 | 
24 | EXPORT void d3des_transform(u32 key[32], u8 from[8], u8 to[8]);
25 | /* Encrypts/Decrypts (according to the key [key])
26 |  * one block of eight bytes at address 'from'
27 |  * into the block at address 'to'.  They can be the same.
28 |  */
29 | 


--------------------------------------------------------------------------------
/src/chacha20.h:
--------------------------------------------------------------------------------
 1 | /* Based on D. J. Bernstein's chacha-regs.c version 200801118,
 2 |   https://cr.yp.to/streamciphers/timings/estreambench/submissions/salsa20/chacha8/regs/chacha.c
 3 |   The initial code is in the public domain */
 4 | 
 5 | #include <stddef.h>
 6 | #include <stdint.h>
 7 | 
 8 | typedef struct {
 9 |   uint32_t input[16];           /* The current state */
10 |   uint8_t output[64];           /* Output data for the current state */
11 |   int next;                     /* Index of next unused byte in output */
12 |   int iv_length;                /* 8 or 12 */
13 | } chacha20_ctx;
14 | 
15 | EXPORT void chacha20_init(chacha20_ctx * ctx,
16 |                           const uint8_t * key, size_t key_length,
17 |                           const uint8_t * iv, size_t iv_length,
18 |                           uint64_t ctr);
19 | 
20 | EXPORT void chacha20_extract(chacha20_ctx * ctx,
21 |                              uint8_t * out, size_t len);
22 | 
23 | EXPORT void chacha20_transform(chacha20_ctx * ctx,
24 |                                const uint8_t * in, uint8_t * out, size_t len);
25 | 


--------------------------------------------------------------------------------
/src/blowfish.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | blowfish.h:  Header file for blowfish.c
 3 | 
 4 | Copyright (C) 1997 by Paul Kocher
 5 | 
 6 | This library is free software; you can redistribute it and/or
 7 | modify it under the terms of the GNU Lesser General Public
 8 | License as published by the Free Software Foundation; either
 9 | version 2.1 of the License, or (at your option) any later version.
10 | This library is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 | Lesser General Public License for more details.
14 | You should have received a copy of the GNU Lesser General Public
15 | License along with this library; if not, write to the Free Software
16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17 | 
18 | 
19 | See blowfish.c for more information about this file.
20 | */
21 | 
22 | typedef unsigned int u32;
23 |   
24 | typedef struct {
25 |   u32 P[16 + 2];
26 |   u32 S[4][256];
27 | } BLOWFISH_CTX;
28 | 
29 | EXPORT void Blowfish_Init(BLOWFISH_CTX *ctx, unsigned char *key, int keyLen);
30 | EXPORT void Blowfish_Encrypt(BLOWFISH_CTX *ctx, u32 *xl, u32 *xr);
31 | EXPORT void Blowfish_Decrypt(BLOWFISH_CTX *ctx, u32 *xl, u32 *xr);
32 | 
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/cryptokit.opam:
--------------------------------------------------------------------------------
 1 | # This file is generated by dune, edit dune-project instead
 2 | opam-version: "2.0"
 3 | version: "1.20"
 4 | synopsis: "A library of cryptographic primitives"
 5 | description:
 6 |   "Cryptokit includes authenticated encryption (AES-GCM, Chacha20-Poly1305), block ciphers (AES, DES, 3DES), stream ciphers (Chacha20, ARCfour), public-key cryptography (RSA, DH), hashes (SHA-256, SHA-512, SHA-3, Blake2, Blake3), MACs, compression, random number generation -- all presented with a compositional, extensible interface."
 7 | maintainer: ["Xavier Leroy <xavier.leroy@college-de-france.fr>"]
 8 | authors: ["Xavier Leroy"]
 9 | license: "LGPL-2.0-or-later WITH OCaml-LGPL-linking-exception"
10 | x-maintenance-intent: ["(latest)"]
11 | homepage: "https://github.com/xavierleroy/cryptokit"
12 | bug-reports: "https://github.com/xavierleroy/cryptokit/issues"
13 | depends: [
14 |   "ocaml" {>= "4.08.0"}
15 |   "dune" {>= "2.5"}
16 |   "dune-configurator"
17 |   "zarith" {>= "1.4"}
18 |   "conf-zlib"
19 |   "conf-gmp-powm-sec"
20 | ]
21 | build: [
22 |   ["dune" "subst"] {pinned}
23 |   [
24 |     "dune"
25 |     "build"
26 |     "-p"
27 |     name
28 |     "-j"
29 |     jobs
30 |     "@install"
31 |     "@runtest" {with-test}
32 |     "@doc" {with-doc}
33 |   ]
34 | ]
35 | dev-repo: "git+https://github.com/xavierleroy/cryptokit.git"
36 | 


--------------------------------------------------------------------------------
/src/pclmul.h:
--------------------------------------------------------------------------------
 1 | /***********************************************************************/
 2 | /*                                                                     */
 3 | /*                      The Cryptokit library                          */
 4 | /*                                                                     */
 5 | /*            Xavier Leroy, Collège de France and Inria                */
 6 | /*                                                                     */
 7 | /*  Copyright 2022 Institut National de Recherche en Informatique et   */
 8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 9 | /*  under the terms of the GNU Library General Public License, with    */
10 | /*  the special exception on linking described in file LICENSE.        */
11 | /*                                                                     */
12 | /***********************************************************************/
13 | 
14 | /* Hardware-accelerated implementation of GHASH multiplication */
15 | 
16 | EXPORT int pclmul_available;
17 | /* -1: unknown, call pclmul_check_available() to determine 
18 |     0: not available
19 |     1: available
20 | */
21 | 
22 | EXPORT int pclmul_check_available(void);
23 | 
24 | EXPORT void pclmul_mult(uint8_t res[16],
25 |                         const uint8_t arg1[16], const uint8_t arg2[16]);
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/src/arcfour.h:
--------------------------------------------------------------------------------
 1 | /***********************************************************************/
 2 | /*                                                                     */
 3 | /*                      The Cryptokit library                          */
 4 | /*                                                                     */
 5 | /*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         */
 6 | /*                                                                     */
 7 | /*  Copyright 2002 Institut National de Recherche en Informatique et   */
 8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 9 | /*  under the terms of the GNU Library General Public License, with    */
10 | /*  the special exception on linking described in file LICENSE.        */
11 | /*                                                                     */
12 | /***********************************************************************/
13 | 
14 | /* $Id$ */
15 | 
16 | struct arcfour_key
17 | {      
18 |   unsigned char state[256];       
19 |   unsigned char x, y;
20 | };
21 | 
22 | EXPORT void arcfour_cook_key(struct arcfour_key * key,
23 |                              unsigned char * key_data,
24 |                              int key_data_len);
25 | 
26 | EXPORT void arcfour_encrypt(struct arcfour_key * key,
27 |                             char * src, char * dst, long len);
28 | 
29 | 


--------------------------------------------------------------------------------
/src/sha1.h:
--------------------------------------------------------------------------------
 1 | /***********************************************************************/
 2 | /*                                                                     */
 3 | /*                      The Cryptokit library                          */
 4 | /*                                                                     */
 5 | /*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         */
 6 | /*                                                                     */
 7 | /*  Copyright 2002 Institut National de Recherche en Informatique et   */
 8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 9 | /*  under the terms of the GNU Library General Public License, with    */
10 | /*  the special exception on linking described in file LICENSE.        */
11 | /*                                                                     */
12 | /***********************************************************************/
13 | 
14 | /* $Id$ */
15 | 
16 | /* SHA-1 hashing */
17 | 
18 | typedef unsigned int u32;
19 | 
20 | struct SHA1Context {
21 |   u32 state[5];
22 |   u32 length[2];
23 |   int numbytes;
24 |   unsigned char buffer[64];
25 | };
26 | 
27 | EXPORT void SHA1_init(struct SHA1Context * ctx);
28 | EXPORT void SHA1_add_data(struct SHA1Context * ctx, unsigned char * data,
29 |                           unsigned long len);
30 | EXPORT void SHA1_finish(struct SHA1Context * ctx, unsigned char output[20]);
31 | 


--------------------------------------------------------------------------------
/src/ghash.h:
--------------------------------------------------------------------------------
 1 | /***********************************************************************/
 2 | /*                                                                     */
 3 | /*                      The Cryptokit library                          */
 4 | /*                                                                     */
 5 | /*            Xavier Leroy, Collège de France and Inria                */
 6 | /*                                                                     */
 7 | /*  Copyright 2022 Institut National de Recherche en Informatique et   */
 8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 9 | /*  under the terms of the GNU Library General Public License, with    */
10 | /*  the special exception on linking described in file LICENSE.        */
11 | /*                                                                     */
12 | /***********************************************************************/
13 | 
14 | /* Software implementation of GHASH multiplication */
15 | 
16 | struct ghash_context {
17 |     uint64_t HL[16];        // precalculated lo-half HTable
18 |     uint64_t HH[16];        // precalculated hi-half HTable
19 | };
20 | 
21 | EXPORT void ghash_init(struct ghash_context * ctx,
22 |                        const uint8_t h[16]);
23 | 
24 | EXPORT void ghash_mult(const struct ghash_context * ctx,
25 |                        const uint8_t input[16],
26 |                        uint8_t output[16]);
27 | 


--------------------------------------------------------------------------------
/src/poly1305-donna.c:
--------------------------------------------------------------------------------
 1 | /* Poly1305 implementation written by Andrew Moon,
 2 |           https://github.com/floodyberry/poly1305-donna
 3 |    License: MIT or public domain.
 4 |    Minor adaptations for Cryptokit by Xavier Leroy. */
 5 | 
 6 | #include "poly1305-donna.h"
 7 | 
 8 | /* auto detect between 32bit / 64bit */
 9 | #if defined(__SIZEOF_INT128__) && defined(__LP64__)
10 | #include "poly1305-donna-64.h"
11 | #else
12 | #include "poly1305-donna-32.h"
13 | #endif
14 | 
15 | void
16 | EXPORT poly1305_update(poly1305_context *ctx, const unsigned char *m, size_t bytes) {
17 | 	poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx;
18 | 	size_t i;
19 | 
20 | 	/* handle leftover */
21 | 	if (st->leftover) {
22 | 		size_t want = (poly1305_block_size - st->leftover);
23 | 		if (want > bytes)
24 | 			want = bytes;
25 | 		for (i = 0; i < want; i++)
26 | 			st->buffer[st->leftover + i] = m[i];
27 | 		bytes -= want;
28 | 		m += want;
29 | 		st->leftover += want;
30 | 		if (st->leftover < poly1305_block_size)
31 | 			return;
32 | 		poly1305_blocks(st, st->buffer, poly1305_block_size);
33 | 		st->leftover = 0;
34 | 	}
35 | 
36 | 	/* process full blocks */
37 | 	if (bytes >= poly1305_block_size) {
38 | 		size_t want = (bytes & ~(poly1305_block_size - 1));
39 | 		poly1305_blocks(st, m, want);
40 | 		m += want;
41 | 		bytes -= want;
42 | 	}
43 | 
44 | 	/* store leftover */
45 | 	if (bytes) {
46 | 		for (i = 0; i < bytes; i++)
47 | 			st->buffer[st->leftover + i] = m[i];
48 | 		st->leftover += bytes;
49 | 	}
50 | }
51 | 


--------------------------------------------------------------------------------
/src/dune:
--------------------------------------------------------------------------------
 1 | (library (name cryptokit) (public_name cryptokit)
 2 |  (libraries unix zarith)
 3 |  (wrapped false)
 4 |  (modules CryptokitBignum Cryptokit)
 5 |  (foreign_stubs
 6 |   (language c)
 7 |   (flags -DCAML_NAME_SPACE -DEXPORT=static
 8 |          -DBLAKE3_NO_SSE2 -DBLAKE3_NO_SSE41 -DBLAKE3_NO_AVX2 -DBLAKE3_NO_AVX512
 9 |          -DBLAKE3_USE_NEON=0
10 |          (:include flags.sexp))
11 |   (names stubs-arcfour
12 |          stubs-blowfish
13 |          stubs-des
14 |          stubs-ripemd160
15 |          stubs-sha1
16 |          stubs-sha256
17 |          stubs-sha512
18 |          stubs-aes
19 |          stubs-md5
20 |          stubs-misc
21 |          stubs-rng
22 |          stubs-zlib
23 |          stubs-sha3
24 |          stubs-chacha20
25 |          stubs-blake2
26 |          stubs-ghash
27 |          stubs-poly1305
28 |          stubs-siphash
29 |          stubs-blake3)
30 |   (extra_deps
31 |     aesni.c
32 |     arcfour.c
33 |     blowfish.c
34 |     d3des.c
35 |     rijndael-alg-fst.c
36 |     ripemd160.c
37 |     sha1.c
38 |     sha256.c
39 |     sha512.c
40 |     keccak.c
41 |     chacha20.c
42 |     blake2.c
43 |     ghash.c
44 |     pclmul.c
45 |     poly1305-donna.c
46 |     siphash.c
47 |     blake3.c
48 |     blake3_dispatch.c
49 |     blake3_portable.c))
50 |   (c_library_flags (:include library_flags.sexp))
51 |   (flags :standard -safe-string -w -7 -w -27 -w -37))
52 | 
53 | ; compute flags
54 | (rule
55 |  (alias configure)
56 |  (targets flags.sexp library_flags.sexp)
57 |  (action (run config/flags.exe)))
58 | 


--------------------------------------------------------------------------------
/src/ripemd160.h:
--------------------------------------------------------------------------------
 1 | /***********************************************************************/
 2 | /*                                                                     */
 3 | /*                      The Cryptokit library                          */
 4 | /*                                                                     */
 5 | /*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         */
 6 | /*                                                                     */
 7 | /*  Copyright 2005 Institut National de Recherche en Informatique et   */
 8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 9 | /*  under the terms of the GNU Library General Public License, with    */
10 | /*  the special exception on linking described in file LICENSE.        */
11 | /*                                                                     */
12 | /***********************************************************************/
13 | 
14 | /* $Id$ */
15 | 
16 | /* RIPEMD160 hashing */
17 | 
18 | typedef unsigned int u32;
19 | 
20 | struct RIPEMD160Context {
21 |   u32 state[5];
22 |   u32 length[2];
23 |   int numbytes;
24 |   unsigned char buffer[64];
25 | };
26 | 
27 | EXPORT void RIPEMD160_init(struct RIPEMD160Context * ctx);
28 | EXPORT void RIPEMD160_add_data(struct RIPEMD160Context * ctx, 
29 |                                unsigned char * data,
30 |                                unsigned long len);
31 | EXPORT void RIPEMD160_finish(struct RIPEMD160Context * ctx, 
32 |                              unsigned char output[20]);
33 | 


--------------------------------------------------------------------------------
/src/siphash.h:
--------------------------------------------------------------------------------
 1 | /***********************************************************************/
 2 | /*                                                                     */
 3 | /*                      The Cryptokit library                          */
 4 | /*                                                                     */
 5 | /*              Xavier Leroy, Collège de France and Inria              */
 6 | /*                                                                     */
 7 | /*  Copyright 2022 Institut National de Recherche en Informatique et   */
 8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 9 | /*  under the terms of the GNU Library General Public License, with    */
10 | /*  the special exception on linking described in file LICENSE.        */
11 | /*                                                                     */
12 | /***********************************************************************/
13 | 
14 | #define SIPHASH_BUFLEN 8
15 | 
16 | struct siphash {
17 |   uint64_t v0, v1, v2, v3;
18 |   unsigned char buffer[SIPHASH_BUFLEN];
19 |   int used;        /* number of valid bytes in buffer */
20 |   uint8_t len8;    /* 8 low bits of total data length */
21 | };
22 | 
23 | EXPORT void siphash_init(struct siphash * st,
24 |                          const unsigned char * key, int outlen);
25 | EXPORT void siphash_add(struct siphash * st,
26 |                         const unsigned char * p, size_t len);
27 | EXPORT void siphash_final(struct siphash * st,
28 |                           int outlen, unsigned char * out);
29 | 
30 | 


--------------------------------------------------------------------------------
/src/sha256.h:
--------------------------------------------------------------------------------
 1 | /***********************************************************************/
 2 | /*                                                                     */
 3 | /*                      The Cryptokit library                          */
 4 | /*                                                                     */
 5 | /*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         */
 6 | /*                                                                     */
 7 | /*  Copyright 2002 Institut National de Recherche en Informatique et   */
 8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 9 | /*  under the terms of the GNU Library General Public License, with    */
10 | /*  the special exception on linking described in file LICENSE.        */
11 | /*                                                                     */
12 | /***********************************************************************/
13 | 
14 | /* $Id$ */
15 | 
16 | /* SHA-256 hashing */
17 | 
18 | #ifndef _MSC_VER
19 | #include <stdint.h>
20 | typedef uint32_t u32;
21 | #else
22 | typedef unsigned int u32;
23 | #endif
24 | 
25 | struct SHA256Context {
26 |   u32 state[8];
27 |   u32 length[2];
28 |   int numbytes;
29 |   unsigned char buffer[64];
30 | };
31 | 
32 | EXPORT void SHA256_init(struct SHA256Context * ctx, int bitsize);
33 | EXPORT void SHA256_add_data(struct SHA256Context * ctx, unsigned char * data,
34 |                             unsigned long len);
35 | EXPORT void SHA256_finish(struct SHA256Context * ctx, 
36 |                           int bitsize,
37 |                           unsigned char * output);
38 | 


--------------------------------------------------------------------------------
/src/cryptokitBignumOld.mli:
--------------------------------------------------------------------------------
 1 | (***********************************************************************)
 2 | (*                                                                     *)
 3 | (*                      The Cryptokit library                          *)
 4 | (*                                                                     *)
 5 | (*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         *)
 6 | (*                                                                     *)
 7 | (*  Copyright 2002 Institut National de Recherche en Informatique et   *)
 8 | (*  en Automatique.  All rights reserved.  This file is distributed    *)
 9 | (*  under the terms of the GNU Library General Public License, with    *)
10 | (*  the special exception on linking described in file LICENSE.        *)
11 | (*                                                                     *)
12 | (***********************************************************************)
13 | 
14 | (* Arithmetic on big integers *)
15 | 
16 | type t
17 | 
18 | val zero : t
19 | val one : t
20 | val of_int : int -> t
21 | 
22 | val compare : t -> t -> int
23 | 
24 | val add : t -> t -> t
25 | val sub : t -> t -> t
26 | val mult : t -> t -> t
27 | val mod_ : t -> t -> t
28 | 
29 | val relative_prime : t -> t -> bool
30 | val mod_power : t -> t -> t -> t
31 | val mod_power_CRT : t -> t -> t -> t -> t -> t -> t
32 | val mod_inv : t -> t -> t
33 | 
34 | val of_bytes : string -> t
35 | val to_bytes : ?numbits:int -> t -> bytes
36 | 
37 | val random : rng:(bytes -> int -> int -> unit) -> ?lowbits:int -> int -> t
38 | val random_prime : rng:(bytes -> int -> int -> unit) -> int -> t
39 | 
40 | val wipe : t -> unit
41 | 
42 | 


--------------------------------------------------------------------------------
/src/sha512.h:
--------------------------------------------------------------------------------
 1 | /***********************************************************************/
 2 | /*                                                                     */
 3 | /*                      The Cryptokit library                          */
 4 | /*                                                                     */
 5 | /*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         */
 6 | /*                                                                     */
 7 | /*  Copyright 2015 Institut National de Recherche en Informatique et   */
 8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 9 | /*  under the terms of the GNU Library General Public License, with    */
10 | /*  the special exception on linking described in file LICENSE.        */
11 | /*                                                                     */
12 | /***********************************************************************/
13 | 
14 | /* $Id: sha256.h 53 2010-08-30 10:53:00Z gildor-admin $ */
15 | 
16 | /* SHA-512 hashing */
17 | 
18 | #ifndef _MSC_VER
19 | #include <stdint.h>
20 | typedef uint64_t u64;
21 | #else
22 | typedef unsigned __int64 u64;
23 | #define UINT64_C(x) x##ui64
24 | #endif
25 | 
26 | struct SHA512Context {
27 |   u64 state[8];
28 |   u64 length[2];
29 |   int numbytes;
30 |   unsigned char buffer[128];
31 | };
32 | 
33 | EXPORT void SHA512_init(struct SHA512Context * ctx, int bitsize);
34 | EXPORT void SHA512_add_data(struct SHA512Context * ctx, unsigned char * data,
35 |                             unsigned long len);
36 | EXPORT void SHA512_finish(struct SHA512Context * ctx, int bitsize,
37 |                           unsigned char * output);
38 | 


--------------------------------------------------------------------------------
/configure:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ocaml
 2 | (* -*- tuareg -*- *)
 3 | 
 4 | type 'a value =
 5 |     | This of 'a
 6 |     | Auto
 7 | 
 8 | let string_of_value to_string = function
 9 |   | This a -> "This (" ^ to_string a ^ ")"
10 |   | Auto -> "Auto"
11 | 
12 | let () =
13 |   let declare_flag arg description =
14 |     let reference = ref Auto in
15 |     let args =
16 |        [ "--enable-" ^ arg, Arg.Unit (fun () -> reference := This true),
17 |          " Enable " ^ description
18 |        ; "--disable-" ^ arg, Arg.Unit (fun () -> reference := This false),
19 |          " Disable " ^ description
20 |        ]
21 |     in args, reference
22 |    in
23 |   let args_zlib, ref_zlib = declare_flag "zlib" "ZLib" in
24 |   let args_hardware_support, ref_hardware_support =
25 |     declare_flag "hardwaresupport"
26 |                  "hardware support for AES and GCM (needs GCC or Clang)" in
27 |   Arg.parse
28 |     (Arg.align (args_zlib @ args_hardware_support))
29 |     (fun s -> raise (Arg.Bad (Printf.sprintf "don't know what to do with %S" s)))
30 |     "Usage: ./configure [OPTIONS]";
31 |   let oc = open_out_bin "src/config/config_vars.ml" in
32 |   Printf.fprintf oc {|
33 | type 'a value =
34 |     | This of 'a
35 |     | Auto
36 | 
37 | let enable_zlib = %s
38 | let enable_hardware_support = %s
39 | |}
40 |     (string_of_value string_of_bool !ref_zlib)
41 |     (string_of_value string_of_bool !ref_hardware_support);
42 |   close_out oc;
43 |   (* Below is a temporary workaround to make sure the configuration happens
44 |      every time this script is run. *)
45 |   (try
46 |     Sys.remove "_build/default/src/flags.sexp";
47 |   with _ -> ());
48 |   exit (Sys.command "dune build @configure --release")
49 | 


--------------------------------------------------------------------------------
/src/stubs-sha1.c:
--------------------------------------------------------------------------------
 1 | /***********************************************************************/
 2 | /*                                                                     */
 3 | /*                      The Cryptokit library                          */
 4 | /*                                                                     */
 5 | /*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         */
 6 | /*                                                                     */
 7 | /*  Copyright 2002 Institut National de Recherche en Informatique et   */
 8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 9 | /*  under the terms of the GNU Library General Public License, with    */
10 | /*  the special exception on linking described in file LICENSE.        */
11 | /*                                                                     */
12 | /***********************************************************************/
13 | 
14 | #include "sha1.c"
15 | #include <caml/mlvalues.h>
16 | #include <caml/memory.h>
17 | #include <caml/alloc.h>
18 | 
19 | #define Context_val(v) ((struct SHA1Context *) String_val(v))
20 | 
21 | CAMLprim value caml_sha1_init(value unit)
22 | {
23 |   value ctx = caml_alloc_string(sizeof(struct SHA1Context));
24 |   SHA1_init(Context_val(ctx));
25 |   return ctx;
26 | }
27 | 
28 | CAMLprim value caml_sha1_update(value ctx, value src, value ofs, value len)
29 | {
30 |   SHA1_add_data(Context_val(ctx), &Byte_u(src, Long_val(ofs)), Long_val(len));
31 |   return Val_unit;
32 | }
33 | 
34 | CAMLprim value caml_sha1_final(value ctx)
35 | {
36 |   CAMLparam1(ctx);
37 |   CAMLlocal1(res);
38 | 
39 |   res = caml_alloc_string(20);
40 |   SHA1_finish(Context_val(ctx), &Byte_u(res, 0));
41 |   CAMLreturn(res);
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/src/cryptokitBignum.mli:
--------------------------------------------------------------------------------
 1 | (***********************************************************************)
 2 | (*                                                                     *)
 3 | (*                      The Cryptokit library                          *)
 4 | (*                                                                     *)
 5 | (*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         *)
 6 | (*                                                                     *)
 7 | (*  Copyright 2002 Institut National de Recherche en Informatique et   *)
 8 | (*  en Automatique.  All rights reserved.  This file is distributed    *)
 9 | (*  under the terms of the GNU Library General Public License, with    *)
10 | (*  the special exception on linking described in file LICENSE.        *)
11 | (*                                                                     *)
12 | (***********************************************************************)
13 | 
14 | (** Operations on big integers, used for the implementation of module
15 |     {!Cryptokit}. *)
16 | 
17 | type t
18 | 
19 | val zero : t
20 | val one : t
21 | val of_int : int -> t
22 | 
23 | val compare : t -> t -> int
24 | 
25 | val add : t -> t -> t
26 | val sub : t -> t -> t
27 | val mult : t -> t -> t
28 | val div : t -> t -> t
29 | val lcm : t -> t -> t
30 | val mod_ : t -> t -> t
31 | 
32 | val relative_prime : t -> t -> bool
33 | val mod_power : t -> t -> t -> t
34 | val mod_power_CRT : t -> t -> t -> t -> t -> t -> t
35 | val mod_inv : t -> t -> t
36 | 
37 | val of_bytes : string -> t
38 | val to_bytes : ?numbits:int -> t -> string
39 | 
40 | val random : rng:(bytes -> int -> int -> unit) -> ?odd:bool -> int -> t
41 | val random_prime : rng:(bytes -> int -> int -> unit) -> int -> t
42 | 
43 | val wipe : t -> unit
44 | 
45 | 


--------------------------------------------------------------------------------
/src/stubs-ripemd160.c:
--------------------------------------------------------------------------------
 1 | /***********************************************************************/
 2 | /*                                                                     */
 3 | /*                      The Cryptokit library                          */
 4 | /*                                                                     */
 5 | /*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         */
 6 | /*                                                                     */
 7 | /*  Copyright 2005 Institut National de Recherche en Informatique et   */
 8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 9 | /*  under the terms of the GNU Library General Public License, with    */
10 | /*  the special exception on linking described in file LICENSE.        */
11 | /*                                                                     */
12 | /***********************************************************************/
13 | 
14 | #include "ripemd160.c"
15 | #include <caml/mlvalues.h>
16 | #include <caml/memory.h>
17 | #include <caml/alloc.h>
18 | 
19 | #define Context_val(v) ((struct RIPEMD160Context *) String_val(v))
20 | 
21 | CAMLprim value caml_ripemd160_init(value unit)
22 | {
23 |   value ctx = caml_alloc_string(sizeof(struct RIPEMD160Context));
24 |   RIPEMD160_init(Context_val(ctx));
25 |   return ctx;
26 | }
27 | 
28 | CAMLprim value caml_ripemd160_update(value ctx, value src, value ofs, value len)
29 | {
30 |   RIPEMD160_add_data(Context_val(ctx), &Byte_u(src, Long_val(ofs)), Long_val(len));
31 |   return Val_unit;
32 | }
33 | 
34 | CAMLprim value caml_ripemd160_final(value ctx)
35 | {
36 |   CAMLparam1(ctx);
37 |   CAMLlocal1(res);
38 | 
39 |   res = caml_alloc_string(20);
40 |   RIPEMD160_finish(Context_val(ctx), &Byte_u(res, 0));
41 |   CAMLreturn(res);
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/src/stubs-poly1305.c:
--------------------------------------------------------------------------------
 1 | /***********************************************************************/
 2 | /*                                                                     */
 3 | /*                      The Cryptokit library                          */
 4 | /*                                                                     */
 5 | /*            Xavier Leroy, Collège de France and Inria                */
 6 | /*                                                                     */
 7 | /*  Copyright 2022 Institut National de Recherche en Informatique et   */
 8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 9 | /*  under the terms of the GNU Library General Public License, with    */
10 | /*  the special exception on linking described in file LICENSE.        */
11 | /*                                                                     */
12 | /***********************************************************************/
13 | 
14 | #include "poly1305-donna.c"
15 | #include <caml/mlvalues.h>
16 | #include <caml/memory.h>
17 | #include <caml/alloc.h>
18 | 
19 | #define Context_val(v) ((struct poly1305_context *) String_val(v))
20 | 
21 | CAMLprim value caml_poly1305_init(value key)
22 | {
23 |   CAMLparam1(key);
24 |   value ctx = caml_alloc_string(sizeof(struct poly1305_context));
25 |   poly1305_init(Context_val(ctx), &Byte_u(key, 0));
26 |   CAMLreturn(ctx);
27 | }
28 | 
29 | CAMLprim value caml_poly1305_update(value ctx, value src, value ofs, value len)
30 | {
31 |   poly1305_update(Context_val(ctx), &Byte_u(src, Long_val(ofs)), Long_val(len));
32 |   return Val_unit;
33 | }
34 | 
35 | CAMLprim value caml_poly1305_final(value ctx)
36 | {
37 |   CAMLparam1(ctx);
38 |   CAMLlocal1(res);
39 |   res = caml_alloc_string(16);
40 |   poly1305_finish(Context_val(ctx), &Byte_u(res, 0));
41 |   CAMLreturn(res);
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/src/stubs-des.c:
--------------------------------------------------------------------------------
 1 | /***********************************************************************/
 2 | /*                                                                     */
 3 | /*                      The Cryptokit library                          */
 4 | /*                                                                     */
 5 | /*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         */
 6 | /*                                                                     */
 7 | /*  Copyright 2002 Institut National de Recherche en Informatique et   */
 8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 9 | /*  under the terms of the GNU Library General Public License, with    */
10 | /*  the special exception on linking described in file LICENSE.        */
11 | /*                                                                     */
12 | /***********************************************************************/
13 | 
14 | /* Stub code for DES */
15 | 
16 | #include "d3des.c"
17 | #include <caml/mlvalues.h>
18 | #include <caml/memory.h>
19 | #include <caml/alloc.h>
20 | 
21 | #define Cooked_key_size (32 * sizeof(u32))
22 | 
23 | CAMLprim value caml_des_cook_key(value key, value ofs, value direction)
24 | {
25 |   CAMLparam2(key,direction);
26 |   value ckey = caml_alloc_string(Cooked_key_size);
27 |   d3des_cook_key((u8 *) &Byte(key, Long_val(ofs)),
28 |                  Int_val(direction),
29 |                  (u32 *) String_val(ckey));
30 |   CAMLreturn(ckey);
31 | }
32 | 
33 | CAMLprim value caml_des_transform(value ckey, value src, value src_ofs,
34 |                                   value dst, value dst_ofs)
35 | {
36 |   d3des_transform((u32 *) String_val(ckey),
37 |                   (u8 *) &Byte(src, Long_val(src_ofs)),
38 |                   (u8 *) &Byte(dst, Long_val(dst_ofs)));
39 |   return Val_unit;
40 | }
41 | 
42 | 


--------------------------------------------------------------------------------
/src/aesni.h:
--------------------------------------------------------------------------------
 1 | /***********************************************************************/
 2 | /*                                                                     */
 3 | /*                      The Cryptokit library                          */
 4 | /*                                                                     */
 5 | /*            Xavier Leroy, projet Gallium, INRIA Paris                */
 6 | /*                                                                     */
 7 | /*  Copyright 2016 Institut National de Recherche en Informatique et   */
 8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 9 | /*  under the terms of the GNU Library General Public License, with    */
10 | /*  the special exception on linking described in file LICENSE.        */
11 | /*                                                                     */
12 | /***********************************************************************/
13 | 
14 | /* Hardware-accelerated implementation of AES */
15 | 
16 | EXPORT int aesni_available;
17 | /* -1: unknown, call aesni_check_available() to determine 
18 |     0: not available
19 |     1: available
20 | */
21 | 
22 | EXPORT int aesni_check_available(void);
23 | 
24 | EXPORT int aesniKeySetupEnc(unsigned char * ckey,
25 |                             const unsigned char * key,
26 |                             int keylength);
27 | 
28 | EXPORT int aesniKeySetupDec(unsigned char * ckey,
29 |                             const unsigned char * key,
30 |                             int keylength);
31 | 
32 | EXPORT void aesniEncrypt(const unsigned char * key, int nrounds,
33 |                          const unsigned char * in,
34 |                          unsigned char * out);
35 | 
36 | EXPORT void aesniDecrypt(const unsigned char * key, int nrounds,
37 |                          const unsigned char * in,
38 |                          unsigned char * out);
39 |     
40 | 
41 | 


--------------------------------------------------------------------------------
/src/stubs-siphash.c:
--------------------------------------------------------------------------------
 1 | /***********************************************************************/
 2 | /*                                                                     */
 3 | /*                      The Cryptokit library                          */
 4 | /*                                                                     */
 5 | /*              Xavier Leroy, Collège de France and Inria              */
 6 | /*                                                                     */
 7 | /*  Copyright 2022 Institut National de Recherche en Informatique et   */
 8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 9 | /*  under the terms of the GNU Library General Public License, with    */
10 | /*  the special exception on linking described in file LICENSE.        */
11 | /*                                                                     */
12 | /***********************************************************************/
13 | 
14 | #include <stdint.h>
15 | #include <string.h>
16 | #include "siphash.c"
17 | 
18 | #include <caml/mlvalues.h>
19 | #include <caml/memory.h>
20 | #include <caml/alloc.h>
21 | 
22 | #define siphash_val(v) ((struct siphash *) String_val(v))
23 | 
24 | CAMLprim value caml_siphash_init(value key, value hashlen)
25 | {
26 |   value ctx = caml_alloc_string(sizeof(struct siphash));
27 |   siphash_init(siphash_val(ctx), &Byte_u(key, 0), Int_val(hashlen));
28 |   return ctx;
29 | }
30 | 
31 | CAMLprim value caml_siphash_update(value ctx, value src, value ofs, value len)
32 | {
33 |   siphash_add(siphash_val(ctx), &Byte_u(src, Long_val(ofs)), Long_val(len));
34 |   return Val_unit;
35 | }
36 | 
37 | CAMLprim value caml_siphash_final(value ctx, value hashlen)
38 | {
39 |   CAMLparam1(ctx);
40 |   CAMLlocal1(res);
41 |   int len = Int_val(hashlen);
42 |   res = caml_alloc_string(len);
43 |   siphash_final(siphash_val(ctx), len, &Byte_u(res, 0));
44 |   CAMLreturn(res);
45 | }
46 | 


--------------------------------------------------------------------------------
/test/prngtest.ml:
--------------------------------------------------------------------------------
 1 | (***********************************************************************)
 2 | (*                                                                     *)
 3 | (*                      The Cryptokit library                          *)
 4 | (*                                                                     *)
 5 | (*            Xavier Leroy, projet Gallium, INRIA Paris                *)
 6 | (*                                                                     *)
 7 | (*  Copyright 2017 Institut National de Recherche en Informatique et   *)
 8 | (*  en Automatique.  All rights reserved.  This file is distributed    *)
 9 | (*  under the terms of the GNU Library General Public License, with    *)
10 | (*  the special exception on linking described in file LICENSE.        *)
11 | (*                                                                     *)
12 | (***********************************************************************)
13 | 
14 | (* Generate pseudorandom data on stdout, for testing with "dieharder" *)
15 | 
16 | open Cryptokit
17 | 
18 | let output_pr_data rng =
19 |   let b = Bytes.create 64 in
20 |   while true do
21 |     rng#random_bytes b 0 64;
22 |     output stdout b 0 64
23 |   done
24 | 
25 | let usage() =
26 |   prerr_string {|Usage:
27 |     ./prngtest.native aes-ctr  | dieharder -a -g 200
28 |     ./prngtest.native chacha20 | dieharder -a -g 200
29 |     ./prngtest.native hardware | dieharder -a -g 200
30 | Warning: each dieharder run takes a long time.
31 | |};
32 |   exit 2
33 | 
34 | let _ =
35 |   let seed =
36 |     if Array.length Sys.argv > 2
37 |     then Sys.argv.(2)
38 |     else "Supercalifragilistusexpialidolcius" in
39 |   let rng =
40 |     if Array.length Sys.argv > 1 then begin
41 |       match Sys.argv.(1) with
42 |       | "aes-ctr"  -> Random.pseudo_rng_aes_ctr seed
43 |       | "chacha20" -> Random.pseudo_rng seed
44 |       | "hardware" -> Random.hardware_rng ()
45 |       | _          -> usage()
46 |     end else usage() in
47 |   output_pr_data rng
48 | 
49 |   
50 | 


--------------------------------------------------------------------------------
/src/blake2.h:
--------------------------------------------------------------------------------
 1 | /***********************************************************************/
 2 | /*                                                                     */
 3 | /*                      The Cryptokit library                          */
 4 | /*                                                                     */
 5 | /*              Xavier Leroy, Collège de France and Inria              */
 6 | /*                                                                     */
 7 | /*  Copyright 2020 Institut National de Recherche en Informatique et   */
 8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 9 | /*  under the terms of the GNU Library General Public License, with    */
10 | /*  the special exception on linking described in file LICENSE.        */
11 | /*                                                                     */
12 | /***********************************************************************/
13 | 
14 | /* BLAKE2b hashing */
15 | 
16 | #define BLAKE2b_BLOCKSIZE 128
17 | 
18 | struct blake2b {
19 |   uint64_t h[8];
20 |   uint64_t len[2];
21 |   int numbytes;
22 |   unsigned char buffer[BLAKE2b_BLOCKSIZE];
23 | };
24 | 
25 | EXPORT void blake2b_init(struct blake2b * s,
26 |                          int hashlen, int keylen, unsigned char * key);
27 | EXPORT void blake2b_add_data(struct blake2b * s,
28 |                              unsigned char * data, size_t len);
29 | EXPORT void blake2b_final(struct blake2b * s,
30 |                           int hashlen, unsigned char * hash);
31 | 
32 | #define BLAKE2s_BLOCKSIZE 64
33 | 
34 | struct blake2s {
35 |   uint32_t h[8];
36 |   uint32_t len[2];
37 |   int numbytes;
38 |   unsigned char buffer[BLAKE2s_BLOCKSIZE];
39 | };
40 | 
41 | EXPORT void blake2s_init(struct blake2s * s,
42 |                          int hashlen, int keylen, unsigned char * key);
43 | EXPORT void blake2s_add_data(struct blake2s * s,
44 |                              unsigned char * data, size_t len);
45 | EXPORT void blake2s_final(struct blake2s * s,
46 |                           int hashlen, unsigned char * hash);
47 | 
48 | 


--------------------------------------------------------------------------------
/src/arcfour.c:
--------------------------------------------------------------------------------
 1 | /***********************************************************************/
 2 | /*                                                                     */
 3 | /*                      The Cryptokit library                          */
 4 | /*                                                                     */
 5 | /*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         */
 6 | /*                                                                     */
 7 | /*  Copyright 2002 Institut National de Recherche en Informatique et   */
 8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 9 | /*  under the terms of the GNU Library General Public License, with    */
10 | /*  the special exception on linking described in file LICENSE.        */
11 | /*                                                                     */
12 | /***********************************************************************/
13 | 
14 | #include "arcfour.h"
15 | 
16 | EXPORT void arcfour_cook_key(struct arcfour_key * key,
17 |                       unsigned char * key_data,
18 |                       int key_data_len)
19 | {
20 |   unsigned char * s;
21 |   int i;
22 |   unsigned char t, index1, index2;
23 | 
24 |   s = &key->state[0];
25 |   for (i = 0; i < 256; i++) s[i] = i;
26 |   key->x = 0;
27 |   key->y = 0;
28 |   index1 = 0;
29 |   index2 = 0;
30 |   for (i = 0; i < 256; i++) {
31 |     index2 = key_data[index1] + s[i] + index2;
32 |     t = s[i]; s[i] = s[index2]; s[index2] = t;
33 |     index1++;
34 |     if (index1 >= key_data_len) index1 = 0;
35 |   }
36 | }
37 | 
38 | EXPORT void arcfour_encrypt(struct arcfour_key * key,
39 |                      char * src, char * dst, long len)
40 | {
41 |   int x, y, kx, ky;
42 | 
43 |   x = key->x;
44 |   y = key->y;
45 |   for (/*nothing*/; len > 0; len--) {
46 |     x = (x + 1) & 0xFF;
47 |     kx = key->state[x];
48 |     y = (kx + y) & 0xFF;
49 |     ky = key->state[y];
50 |     key->state[x] = ky; key->state[y] = kx;
51 |     *dst++ = *src++ ^ key->state[(kx + ky) & 0xFF];
52 |   }
53 |   key->x = x;
54 |   key->y = y;
55 | }
56 |   
57 | 
58 | 


--------------------------------------------------------------------------------
/src/stubs-misc.c:
--------------------------------------------------------------------------------
 1 | /***********************************************************************/
 2 | /*                                                                     */
 3 | /*                      The Cryptokit library                          */
 4 | /*                                                                     */
 5 | /*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         */
 6 | /*                                                                     */
 7 | /*  Copyright 2002 Institut National de Recherche en Informatique et   */
 8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 9 | /*  under the terms of the GNU Library General Public License, with    */
10 | /*  the special exception on linking described in file LICENSE.        */
11 | /*                                                                     */
12 | /***********************************************************************/
13 | 
14 | #include <string.h>
15 | #include <caml/mlvalues.h>
16 | 
17 | #define ALIGNMENT_OF(x) ((uintnat)(x) & (sizeof(uintnat) - 1))
18 | 
19 | CAMLprim value caml_xor_string(value src, value src_ofs,
20 |                                value dst, value dst_ofs,
21 |                                value len)
22 | {
23 |   char * s = &Byte(src, Long_val(src_ofs));
24 |   char * d = &Byte(dst, Long_val(dst_ofs));
25 |   long l = Long_val(len);
26 | 
27 |   if (l >= 64 && ALIGNMENT_OF(s) == ALIGNMENT_OF(d)) {
28 |     while (ALIGNMENT_OF(s) != 0 && l > 0) {
29 |       *d ^= *s;
30 |       s += 1;
31 |       d += 1;
32 |       l -= 1;
33 |     }
34 |     while (l >= sizeof(uintnat)) {
35 |       *((uintnat *) d) ^= *((uintnat *) s);
36 |       s += sizeof(uintnat);
37 |       d += sizeof(uintnat);
38 |       l -= sizeof(uintnat);
39 |     }
40 |   }
41 |   while (l > 0) {
42 |     *d ^= *s;
43 |     s += 1;
44 |     d += 1;
45 |     l -= 1;
46 |   }
47 |   return Val_unit;
48 | }
49 | 
50 | CAMLprim value caml_wipe_z(value v)
51 | {
52 |   if (Is_block(v) && Tag_val(v) == Custom_tag) {
53 |     memset(Data_custom_val(v), 0, (Wosize_val(v) - 1) * sizeof(value));
54 |   }
55 |   return Val_unit;
56 | }
57 | 


--------------------------------------------------------------------------------
/src/stubs-arcfour.c:
--------------------------------------------------------------------------------
 1 | /***********************************************************************/
 2 | /*                                                                     */
 3 | /*                      The Cryptokit library                          */
 4 | /*                                                                     */
 5 | /*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         */
 6 | /*                                                                     */
 7 | /*  Copyright 2002 Institut National de Recherche en Informatique et   */
 8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 9 | /*  under the terms of the GNU Library General Public License, with    */
10 | /*  the special exception on linking described in file LICENSE.        */
11 | /*                                                                     */
12 | /***********************************************************************/
13 | 
14 | /* Stub code for ARC4 */
15 | 
16 | #include "arcfour.c"
17 | #include <caml/mlvalues.h>
18 | #include <caml/alloc.h>
19 | #include <caml/memory.h>
20 | 
21 | #define Cooked_key_size (sizeof(struct arcfour_key))
22 | #define Key_val(v) ((struct arcfour_key *) String_val(v))
23 | 
24 | CAMLprim value caml_arcfour_cook_key(value key)
25 | {
26 |   CAMLparam1(key);
27 |   value ckey = caml_alloc_string(Cooked_key_size);
28 |   arcfour_cook_key(Key_val(ckey),
29 |                    (unsigned char *) String_val(key),
30 |                    caml_string_length(key));
31 |   CAMLreturn(ckey);
32 | }
33 | 
34 | CAMLprim value caml_arcfour_transform(value ckey, value src, value src_ofs,
35 |                                       value dst, value dst_ofs, value len)
36 | {
37 |   arcfour_encrypt(Key_val(ckey),
38 |                   &Byte(src, Long_val(src_ofs)),
39 |                   &Byte(dst, Long_val(dst_ofs)),
40 |                   Long_val(len));
41 |   return Val_unit;
42 | }
43 | 
44 | CAMLprim value caml_arcfour_transform_bytecode(value * argv, int argc)
45 | {
46 |   return caml_arcfour_transform(argv[0], argv[1], argv[2],
47 |                                 argv[3], argv[4], argv[5]);
48 | }
49 | 


--------------------------------------------------------------------------------
/src/rijndael-alg-fst.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * rijndael-alg-fst.h
 3 |  *
 4 |  * @version 3.0 (December 2000)
 5 |  *
 6 |  * Optimised ANSI C code for the Rijndael cipher (now AES)
 7 |  *
 8 |  * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
 9 |  * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
10 |  * @author Paulo Barreto <paulo.barreto@terra.com.br>
11 |  *
12 |  * This code is hereby placed in the public domain.
13 |  *
14 |  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
15 |  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 |  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
18 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 |  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 |  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
21 |  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
23 |  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | #ifndef __RIJNDAEL_ALG_FST_H
27 | #define __RIJNDAEL_ALG_FST_H
28 | 
29 | #define MAXKC	(256/32)
30 | #define MAXKB	(256/8)
31 | #define MAXNR	14
32 | 
33 | typedef unsigned char	u8;	
34 | typedef unsigned short	u16;	
35 | typedef unsigned int	u32;
36 | 
37 | EXPORT int rijndaelKeySetupEnc(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits);
38 | EXPORT int rijndaelKeySetupDec(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits);
39 | EXPORT void rijndaelEncrypt(const u32 rk[/*4*(Nr + 1)*/], int Nr, const u8 pt[16], u8 ct[16]);
40 | EXPORT void rijndaelDecrypt(const u32 rk[/*4*(Nr + 1)*/], int Nr, const u8 ct[16], u8 pt[16]);
41 | 
42 | #ifdef INTERMEDIATE_VALUE_KAT
43 | EXPORT void rijndaelEncryptRound(const u32 rk[/*4*(Nr + 1)*/], int Nr, u8 block[16], int rounds);
44 | EXPORT void rijndaelDecryptRound(const u32 rk[/*4*(Nr + 1)*/], int Nr, u8 block[16], int rounds);
45 | #endif /* INTERMEDIATE_VALUE_KAT */
46 | 
47 | #endif /* __RIJNDAEL_ALG_FST_H */
48 | 


--------------------------------------------------------------------------------
/src/stubs-sha256.c:
--------------------------------------------------------------------------------
 1 | /***********************************************************************/
 2 | /*                                                                     */
 3 | /*                      The Cryptokit library                          */
 4 | /*                                                                     */
 5 | /*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         */
 6 | /*                                                                     */
 7 | /*  Copyright 2004 Institut National de Recherche en Informatique et   */
 8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 9 | /*  under the terms of the GNU Library General Public License, with    */
10 | /*  the special exception on linking described in file LICENSE.        */
11 | /*                                                                     */
12 | /***********************************************************************/
13 | 
14 | #include "sha256.c"
15 | #include <caml/mlvalues.h>
16 | #include <caml/memory.h>
17 | #include <caml/alloc.h>
18 | 
19 | #define Context_val(v) ((struct SHA256Context *) String_val(v))
20 | 
21 | CAMLprim value caml_sha256_init(value unit)
22 | {
23 |   value ctx = caml_alloc_string(sizeof(struct SHA256Context));
24 |   SHA256_init(Context_val(ctx), 256);
25 |   return ctx;
26 | }
27 | 
28 | CAMLprim value caml_sha224_init(value unit)
29 | {
30 |   value ctx = caml_alloc_string(sizeof(struct SHA256Context));
31 |   SHA256_init(Context_val(ctx), 224);
32 |   return ctx;
33 | }
34 | 
35 | CAMLprim value caml_sha256_update(value ctx, value src, value ofs, value len)
36 | {
37 |   SHA256_add_data(Context_val(ctx), &Byte_u(src, Long_val(ofs)), Long_val(len));
38 |   return Val_unit;
39 | }
40 | 
41 | CAMLprim value caml_sha256_final(value ctx)
42 | {
43 |   CAMLparam1(ctx);
44 |   CAMLlocal1(res);
45 | 
46 |   res = caml_alloc_string(32);
47 |   SHA256_finish(Context_val(ctx), 256, &Byte_u(res, 0));
48 |   CAMLreturn(res);
49 | }
50 | 
51 | CAMLprim value caml_sha224_final(value ctx)
52 | {
53 |   CAMLparam1(ctx);
54 |   CAMLlocal1(res);
55 | 
56 |   res = caml_alloc_string(28);
57 |   SHA256_finish(Context_val(ctx), 224, &Byte_u(res, 0));
58 |   CAMLreturn(res);
59 | }
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/src/stubs-md5.c:
--------------------------------------------------------------------------------
 1 | /***********************************************************************/
 2 | /*                                                                     */
 3 | /*                      The Cryptokit library                          */
 4 | /*                                                                     */
 5 | /*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         */
 6 | /*                                                                     */
 7 | /*  Copyright 2002 Institut National de Recherche en Informatique et   */
 8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 9 | /*  under the terms of the GNU Library General Public License, with    */
10 | /*  the special exception on linking described in file LICENSE.        */
11 | /*                                                                     */
12 | /***********************************************************************/
13 | 
14 | #include <caml/mlvalues.h>
15 | #include <caml/memory.h>
16 | #include <caml/alloc.h>
17 | 
18 | #ifndef _MSC_VER
19 | #include <stdint.h>
20 | typedef uint32_t u32;
21 | #else
22 | typedef unsigned int u32;
23 | #endif
24 | 
25 | struct MD5Context {
26 |         u32 buf[4];
27 |         u32 bits[2];
28 |         unsigned char in[64];
29 | };
30 | 
31 | CAMLextern void caml_MD5Init (struct MD5Context *context);
32 | CAMLextern void caml_MD5Update (struct MD5Context *context,
33 |                            unsigned char *buf, unsigned len);
34 | CAMLextern void caml_MD5Final (unsigned char *digest, struct MD5Context *ctx);
35 | 
36 | #define Context_val(v) ((struct MD5Context *) String_val(v))
37 | 
38 | CAMLprim value caml_md5_init(value unit)
39 | {
40 |   value ctx = caml_alloc_string(sizeof(struct MD5Context));
41 |   caml_MD5Init(Context_val(ctx));
42 |   return ctx;
43 | }
44 | 
45 | CAMLprim value caml_md5_update(value ctx, value src, value ofs, value len)
46 | {
47 |   caml_MD5Update(Context_val(ctx), &Byte_u(src, Long_val(ofs)), Long_val(len));
48 |   return Val_unit;
49 | }
50 | 
51 | CAMLprim value caml_md5_final(value ctx)
52 | {
53 |   CAMLparam1(ctx);
54 |   CAMLlocal1(res);
55 | 
56 |   res = caml_alloc_string(16);
57 |   caml_MD5Final(&Byte_u(res, 0), Context_val(ctx));
58 |   CAMLreturn(res);
59 | }
60 | 
61 | 


--------------------------------------------------------------------------------
/src/blake3.h:
--------------------------------------------------------------------------------
 1 | #ifndef BLAKE3_H
 2 | #define BLAKE3_H
 3 | 
 4 | #include <stddef.h>
 5 | #include <stdint.h>
 6 | 
 7 | #ifdef __cplusplus
 8 | EXPORT "C" {
 9 | #endif
10 | 
11 | #define BLAKE3_VERSION_STRING "1.3.1"
12 | #define BLAKE3_KEY_LEN 32
13 | #define BLAKE3_OUT_LEN 32
14 | #define BLAKE3_BLOCK_LEN 64
15 | #define BLAKE3_CHUNK_LEN 1024
16 | #define BLAKE3_MAX_DEPTH 54
17 | 
18 | // This struct is a private implementation detail. It has to be here because
19 | // it's part of blake3_hasher below.
20 | typedef struct {
21 |   uint32_t cv[8];
22 |   uint64_t chunk_counter;
23 |   uint8_t buf[BLAKE3_BLOCK_LEN];
24 |   uint8_t buf_len;
25 |   uint8_t blocks_compressed;
26 |   uint8_t flags;
27 | } blake3_chunk_state;
28 | 
29 | typedef struct {
30 |   uint32_t key[8];
31 |   blake3_chunk_state chunk;
32 |   uint8_t cv_stack_len;
33 |   // The stack size is MAX_DEPTH + 1 because we do lazy merging. For example,
34 |   // with 7 chunks, we have 3 entries in the stack. Adding an 8th chunk
35 |   // requires a 4th entry, rather than merging everything down to 1, because we
36 |   // don't know whether more input is coming. This is different from how the
37 |   // reference implementation does things.
38 |   uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
39 | } blake3_hasher;
40 | 
41 | EXPORT const char *blake3_version(void);
42 | EXPORT void blake3_hasher_init(blake3_hasher *self);
43 | EXPORT void blake3_hasher_init_keyed(blake3_hasher *self,
44 |                                      const uint8_t key[BLAKE3_KEY_LEN]);
45 | EXPORT void blake3_hasher_init_derive_key(blake3_hasher *self, const char *context);
46 | EXPORT void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
47 |                                               size_t context_len);
48 | EXPORT void blake3_hasher_update(blake3_hasher *self, const void *input,
49 |                                  size_t input_len);
50 | EXPORT void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
51 |                                    size_t out_len);
52 | EXPORT void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
53 |                                         uint8_t *out, size_t out_len);
54 | EXPORT void blake3_hasher_reset(blake3_hasher *self);
55 | 
56 | #ifdef __cplusplus
57 | }
58 | #endif
59 | 
60 | #endif /* BLAKE3_H */
61 | 


--------------------------------------------------------------------------------
/src/stubs-ghash.c:
--------------------------------------------------------------------------------
 1 | /***********************************************************************/
 2 | /*                                                                     */
 3 | /*                      The Cryptokit library                          */
 4 | /*                                                                     */
 5 | /*            Xavier Leroy, Collège de France and Inria                */
 6 | /*                                                                     */
 7 | /*  Copyright 2022 Institut National de Recherche en Informatique et   */
 8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 9 | /*  under the terms of the GNU Library General Public License, with    */
10 | /*  the special exception on linking described in file LICENSE.        */
11 | /*                                                                     */
12 | /***********************************************************************/
13 | 
14 | #include <stdint.h>
15 | #include <string.h>
16 | #include "ghash.c"
17 | #include "pclmul.c"
18 | #include <caml/mlvalues.h>
19 | #include <caml/memory.h>
20 | #include <caml/custom.h>
21 | 
22 | #define Context_val(v) (*((struct ghash_context **) Data_custom_val(v)))
23 | 
24 | static void caml_ghash_finalize(value ctx)
25 | {
26 |   if (Context_val(ctx) != NULL) {
27 |     caml_stat_free(Context_val(ctx));
28 |     Context_val(ctx) = NULL;
29 |   }
30 | }
31 | 
32 | static struct custom_operations ghash_context_ops = {
33 |   "fr.inria.caml.cryptokit.GHASH_context",
34 |   caml_ghash_finalize,
35 |   custom_compare_default,
36 |   custom_hash_default,
37 |   custom_deserialize_default,
38 |   custom_compare_ext_default
39 | };
40 | 
41 | CAMLprim value caml_ghash_init(value key)
42 | {
43 |   if (pclmul_available == -1) pclmul_check_available();
44 |   if (pclmul_available == 1) {
45 |     return key;
46 |   } else {
47 |     struct ghash_context * ctx = caml_stat_alloc(sizeof(struct ghash_context));
48 |     value res =
49 |       caml_alloc_custom(&ghash_context_ops,
50 |                         sizeof(struct ghash_context *),
51 |                         0, 1);
52 |     ghash_init(ctx, &Byte_u(key, 0));
53 |     Context_val(res) = ctx;
54 |     return res;
55 |   }
56 | }
57 | 
58 | CAMLprim value caml_ghash_mult(value ctx, value x)
59 | {
60 |   if (pclmul_available == 1) {
61 |     pclmul_mult(&Byte_u(x, 0), &Byte_u(ctx, 0), &Byte_u(x, 0));
62 |   } else {
63 |     ghash_mult(Context_val(ctx), &Byte_u(x, 0), &Byte_u(x, 0));
64 |   }
65 |   return Val_unit;
66 | }
67 | 


--------------------------------------------------------------------------------
/src/stubs-chacha20.c:
--------------------------------------------------------------------------------
 1 | /***********************************************************************/
 2 | /*                                                                     */
 3 | /*                      The Cryptokit library                          */
 4 | /*                                                                     */
 5 | /*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         */
 6 | /*                                                                     */
 7 | /*  Copyright 2002 Institut National de Recherche en Informatique et   */
 8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 9 | /*  under the terms of the GNU Library General Public License, with    */
10 | /*  the special exception on linking described in file LICENSE.        */
11 | /*                                                                     */
12 | /***********************************************************************/
13 | 
14 | /* Stub code for Chacha20 */
15 | 
16 | #include "chacha20.c"
17 | #include <caml/mlvalues.h>
18 | #include <caml/alloc.h>
19 | #include <caml/memory.h>
20 | 
21 | #define Cooked_key_size (sizeof(chacha20_ctx))
22 | #define Key_val(v) ((chacha20_ctx *) String_val(v))
23 | 
24 | CAMLprim value caml_chacha20_cook_key(value key, value iv, value counter)
25 | {
26 |   CAMLparam3(key, iv, counter);
27 |   value ckey = caml_alloc_string(Cooked_key_size);
28 |   chacha20_init(Key_val(ckey),
29 |                 (unsigned char *) String_val(key), caml_string_length(key),
30 |                 (unsigned char *) String_val(iv), caml_string_length(iv),
31 |                 Int64_val(counter));
32 |   CAMLreturn(ckey);
33 | }
34 | 
35 | CAMLprim value caml_chacha20_transform(value ckey, value src, value src_ofs,
36 |                                       value dst, value dst_ofs, value len)
37 | {
38 |   chacha20_transform(Key_val(ckey),
39 |                      &Byte_u(src, Long_val(src_ofs)),
40 |                      &Byte_u(dst, Long_val(dst_ofs)),
41 |                      Long_val(len));
42 |   return Val_unit;
43 | }
44 | 
45 | CAMLprim value caml_chacha20_transform_bytecode(value * argv, int argc)
46 | {
47 |   return caml_chacha20_transform(argv[0], argv[1], argv[2],
48 |                                  argv[3], argv[4], argv[5]);
49 | }
50 | 
51 | CAMLprim value caml_chacha20_extract(value ckey,
52 |                                      value dst, value dst_ofs, value len)
53 | {
54 |   chacha20_extract(Key_val(ckey),
55 |                    &Byte_u(dst, Long_val(dst_ofs)),
56 |                    Long_val(len));
57 |   return Val_unit;
58 | }
59 | 
60 | 


--------------------------------------------------------------------------------
/src/config/flags.ml:
--------------------------------------------------------------------------------
 1 | (* Compute compilation and linking flags *)
 2 | 
 3 | open Printf
 4 | open Config_vars
 5 | 
 6 | module Configurator = Configurator.V1
 7 | 
 8 | (* Compile and link a dummy C program with the given flags. *)
 9 | let test ~cfg ~c_flags ~link_flags =
10 |   let test_program = "int main() { return 0; }" in
11 |   Configurator.c_test cfg test_program ~c_flags ~link_flags
12 | 
13 | (* Check that a list of header files declare a list of identifiers. *)
14 | let provides ~cfg ~c_flags ~link_flags ~headers ~functions =
15 |   let test_program =
16 |       List.map (fun h -> sprintf "#include <%s>\n" h) headers
17 |     @ ["int main() {\n"]
18 |     @ List.map (fun f -> sprintf "  void * ptr_%s = &%s;\n" f f) functions
19 |     @ ["}\n"] in
20 |   Configurator.c_test cfg (String.concat "" test_program) ~c_flags ~link_flags
21 | 
22 | let () = Configurator.main ~name:"cryptokit" @@ fun cfg ->
23 |   let os_type = Configurator.ocaml_config_var_exn cfg "os_type" in
24 |   let system = Configurator.ocaml_config_var_exn cfg "system" in
25 |   let architecture = Configurator.ocaml_config_var_exn cfg "architecture" in
26 |   let zlib = match enable_zlib with
27 |     | This bool -> bool
28 |     | Auto -> os_type <> "Win32"
29 |   in
30 |   let hardware_support = match enable_hardware_support with
31 |     | This bool -> bool
32 |     | Auto -> (architecture = "amd64" || architecture = "i386")
33 |               && test ~cfg ~c_flags:[ "-maes"; "-mpclmul" ] ~link_flags:[]
34 |   in
35 |   let has_getentropy =
36 |     provides ~cfg ~c_flags:[] ~link_flags:[]
37 |              ~headers:["unistd.h"] ~functions:["getentropy"]
38 |   in
39 |   let append_if c y x = if c then x @ [ y ] else x in
40 |   let flags =
41 |     []
42 |     |> append_if has_getentropy "-DHAVE_GETENTROPY"
43 |     |> append_if zlib "-DHAVE_ZLIB"
44 |     |> append_if hardware_support "-maes"
45 |     |> append_if hardware_support "-mpclmul"
46 |   in
47 |   let library_flags =
48 |     []
49 |     |> append_if (zlib && (system = "win32" || system = "win64")) "zlib.lib"
50 |     |> append_if (zlib && system <> "win32" && system <> "win64") "-lz"
51 |     |> append_if (system = "win32" || system = "win64") "advapi32.lib"
52 |     |> append_if (system = "mingw" || system = "mingw64") "-ladvapi32"
53 |   in
54 |   Configurator.Flags.write_sexp "flags.sexp" flags;
55 |   Configurator.Flags.write_sexp "library_flags.sexp" library_flags;
56 |   let describe_bool = function
57 |     | true -> "enabled"
58 |     | false -> "disabled"
59 |   in
60 |   printf "ZLib: ............................... %s\n" (describe_bool zlib);
61 |   printf "Hardware support for AES and GCM: ... %s\n" (describe_bool hardware_support);
62 |   printf "getentropy():........................ %s\n" (describe_bool has_getentropy)
63 | 
64 | 


--------------------------------------------------------------------------------
/src/stubs-blowfish.c:
--------------------------------------------------------------------------------
 1 | /***********************************************************************/
 2 | /*                                                                     */
 3 | /*                      The Cryptokit library                          */
 4 | /*                                                                     */
 5 | /*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         */
 6 | /*                                                                     */
 7 | /*  Copyright 2006 Institut National de Recherche en Informatique et   */
 8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 9 | /*  under the terms of the GNU Library General Public License, with    */
10 | /*  the special exception on linking described in file LICENSE.        */
11 | /*                                                                     */
12 | /***********************************************************************/
13 | 
14 | /* Stub code for Blowfish */
15 | 
16 | #include "blowfish.c"
17 | #include <caml/mlvalues.h>
18 | #include <caml/alloc.h>
19 | #include <caml/memory.h>
20 | 
21 | CAMLprim value caml_blowfish_cook_key(value key)
22 | {
23 |   CAMLparam1(key);
24 |   value ckey = caml_alloc_string(sizeof(BLOWFISH_CTX));
25 |   Blowfish_Init((BLOWFISH_CTX *) String_val(ckey),
26 |                 &Byte_u(key, 0),
27 |                 caml_string_length(key));
28 |   CAMLreturn(ckey);
29 | }
30 | 
31 | #ifdef ARCH_BIG_ENDIAN
32 | #define COPY4BYTES(dst,src) \
33 |   (dst)[0] = (src)[0], \
34 |   (dst)[1] = (src)[1], \
35 |   (dst)[2] = (src)[2], \
36 |   (dst)[3] = (src)[3]
37 | #else
38 | #define COPY4BYTES(dst,src) \
39 |   (dst)[0] = (src)[3], \
40 |   (dst)[1] = (src)[2], \
41 |   (dst)[2] = (src)[1], \
42 |   (dst)[3] = (src)[0]
43 | #endif
44 | 
45 | CAMLprim value caml_blowfish_encrypt(value ckey, value src, value src_ofs,
46 |                                      value dst, value dst_ofs)
47 | {
48 |   u32 xl, xr;
49 |   unsigned char * p;
50 | 
51 |   p = &Byte_u(src, Long_val(src_ofs));
52 |   COPY4BYTES((unsigned char *) &xl, p);
53 |   COPY4BYTES((unsigned char *) &xr, p + 4);
54 |   Blowfish_Encrypt((BLOWFISH_CTX *) String_val(ckey), &xl, &xr);
55 |   p = &Byte_u(dst, Long_val(dst_ofs));
56 |   COPY4BYTES(p, (unsigned char *) &xl);
57 |   COPY4BYTES(p + 4, (unsigned char *) &xr);
58 |   return Val_unit;
59 | }
60 | 
61 | CAMLprim value caml_blowfish_decrypt(value ckey, value src, value src_ofs,
62 |                                      value dst, value dst_ofs)
63 | {
64 |   u32 xl, xr;
65 |   unsigned char * p;
66 | 
67 |   p = &Byte_u(src, Long_val(src_ofs));
68 |   COPY4BYTES((unsigned char *) &xl, p);
69 |   COPY4BYTES((unsigned char *) &xr, p + 4);
70 |   Blowfish_Decrypt((BLOWFISH_CTX *) String_val(ckey), &xl, &xr);
71 |   p = &Byte_u(dst, Long_val(dst_ofs));
72 |   COPY4BYTES(p, (unsigned char *) &xl);
73 |   COPY4BYTES(p + 4, (unsigned char *) &xr);
74 |   return Val_unit;
75 | }
76 | 
77 | 


--------------------------------------------------------------------------------
/src/stubs-blake2.c:
--------------------------------------------------------------------------------
 1 | /***********************************************************************/
 2 | /*                                                                     */
 3 | /*                      The Cryptokit library                          */
 4 | /*                                                                     */
 5 | /*              Xavier Leroy, Collège de France and Inria              */
 6 | /*                                                                     */
 7 | /*  Copyright 2020 Institut National de Recherche en Informatique et   */
 8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 9 | /*  under the terms of the GNU Library General Public License, with    */
10 | /*  the special exception on linking described in file LICENSE.        */
11 | /*                                                                     */
12 | /***********************************************************************/
13 | 
14 | #include <stdint.h>
15 | #include <string.h>
16 | #include "blake2.c"
17 | 
18 | #include <caml/mlvalues.h>
19 | #include <caml/memory.h>
20 | #include <caml/alloc.h>
21 | 
22 | #define blake2b_val(v) ((struct blake2b *) String_val(v))
23 | 
24 | CAMLprim value caml_blake2b_init(value hashlen, value key)
25 | {
26 |   CAMLparam1(key);
27 |   value ctx = caml_alloc_string(sizeof(struct blake2b));
28 |   blake2b_init(blake2b_val(ctx),
29 |                Int_val(hashlen),
30 |                caml_string_length(key), &Byte_u(key, 0));
31 |   CAMLreturn(ctx);
32 | }
33 | 
34 | CAMLprim value caml_blake2b_update(value ctx, value src, value ofs, value len)
35 | {
36 |   blake2b_add_data(blake2b_val(ctx), 
37 |                    &Byte_u(src, Long_val(ofs)), Long_val(len));
38 |   return Val_unit;
39 | }
40 | 
41 | CAMLprim value caml_blake2b_final(value ctx, value hashlen)
42 | {
43 |   CAMLparam1(ctx);
44 |   CAMLlocal1(res);
45 |   int len = Int_val(hashlen);
46 |   res = caml_alloc_string(len);
47 |   blake2b_final(blake2b_val(ctx), len, &Byte_u(res, 0));
48 |   CAMLreturn(res);
49 | }
50 | 
51 | #define blake2s_val(v) ((struct blake2s *) String_val(v))
52 | 
53 | CAMLprim value caml_blake2s_init(value hashlen, value key)
54 | {
55 |   CAMLparam1(key);
56 |   value ctx = caml_alloc_string(sizeof(struct blake2s));
57 |   blake2s_init(blake2s_val(ctx),
58 |                Int_val(hashlen),
59 |                caml_string_length(key), &Byte_u(key, 0));
60 |   CAMLreturn(ctx);
61 | }
62 | 
63 | CAMLprim value caml_blake2s_update(value ctx, value src, value ofs, value len)
64 | {
65 |   blake2s_add_data(blake2s_val(ctx), 
66 |                    &Byte_u(src, Long_val(ofs)), Long_val(len));
67 |   return Val_unit;
68 | }
69 | 
70 | CAMLprim value caml_blake2s_final(value ctx, value hashlen)
71 | {
72 |   CAMLparam1(ctx);
73 |   CAMLlocal1(res);
74 |   int len = Int_val(hashlen);
75 |   res = caml_alloc_string(len);
76 |   blake2s_final(blake2s_val(ctx), len, &Byte_u(res, 0));
77 |   CAMLreturn(res);
78 | }
79 | 


--------------------------------------------------------------------------------
/src/stubs-blake3.c:
--------------------------------------------------------------------------------
 1 | /***********************************************************************/
 2 | /*                                                                     */
 3 | /*                      The Cryptokit library                          */
 4 | /*                                                                     */
 5 | /*              Xavier Leroy, Collège de France and Inria              */
 6 | /*                                                                     */
 7 | /*  Copyright 2022 Institut National de Recherche en Informatique et   */
 8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 9 | /*  under the terms of the GNU Library General Public License, with    */
10 | /*  the special exception on linking described in file LICENSE.        */
11 | /*                                                                     */
12 | /***********************************************************************/
13 | 
14 | #include <stdint.h>
15 | #include <string.h>
16 | #include "blake3.c"
17 | #include "blake3_portable.c"
18 | #include "blake3_dispatch.c"
19 | 
20 | #include <caml/mlvalues.h>
21 | #include <caml/memory.h>
22 | #include <caml/alloc.h>
23 | #include <caml/custom.h>
24 | 
25 | #define Context_val(v) (*((blake3_hasher **) Data_custom_val(v)))
26 | 
27 | static void caml_blake3_finalize(value ctx)
28 | {
29 |   if (Context_val(ctx) != NULL) {
30 |     caml_stat_free(Context_val(ctx));
31 |     Context_val(ctx) = NULL;
32 |   }
33 | }
34 | 
35 | static struct custom_operations blake3_context_ops = {
36 |   "fr.inria.caml.cryptokit.blake3_context",
37 |   caml_blake3_finalize,
38 |   custom_compare_default,
39 |   custom_hash_default,
40 |   custom_deserialize_default,
41 |   custom_compare_ext_default
42 | };
43 | 
44 | CAMLprim value caml_blake3_init(value optkey)
45 | {
46 |   CAMLparam1(optkey);
47 |   blake3_hasher * ctx = caml_stat_alloc(sizeof(blake3_hasher));
48 |   value res =
49 |     caml_alloc_custom(&blake3_context_ops,
50 |                       sizeof(blake3_hasher *),
51 |                       0, 1);
52 |   if (caml_string_length(optkey) == BLAKE3_KEY_LEN) {
53 |     blake3_hasher_init_keyed(ctx, &Byte_u(optkey, 0));
54 |   } else {
55 |     blake3_hasher_init(ctx);
56 |   }
57 |   Context_val(res) = ctx;
58 |   CAMLreturn(res);
59 | }
60 | 
61 | CAMLprim value caml_blake3_update(value ctx,
62 |                                   value src, value ofs, value len)
63 | {
64 |   blake3_hasher_update(Context_val(ctx),
65 |                        &Byte_u(src, Long_val(ofs)), Long_val(len));
66 |   return Val_unit;
67 | }
68 | 
69 | 
70 | CAMLprim value caml_blake3_extract(value ctx, value vlen)
71 | {
72 |   CAMLparam2(ctx, vlen);
73 |   CAMLlocal1(res);
74 |   size_t len = Long_val(vlen);
75 |   res = caml_alloc_string(len);
76 |   blake3_hasher_finalize(Context_val(ctx), &Byte_u(res, 0), len);
77 |   CAMLreturn(res);
78 | }
79 | 
80 | CAMLprim value caml_blake3_wipe(value ctx)
81 | {
82 |   if (Context_val(ctx) != NULL)
83 |     memset(Context_val(ctx), 0, sizeof(blake3_hasher));
84 |   caml_blake3_finalize(ctx);
85 |   return Val_unit;
86 | }
87 | 
88 | 


--------------------------------------------------------------------------------
/src/stubs-sha512.c:
--------------------------------------------------------------------------------
 1 | /***********************************************************************/
 2 | /*                                                                     */
 3 | /*                      The Cryptokit library                          */
 4 | /*                                                                     */
 5 | /*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         */
 6 | /*                                                                     */
 7 | /*  Copyright 2015 Institut National de Recherche en Informatique et   */
 8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 9 | /*  under the terms of the GNU Library General Public License, with    */
10 | /*  the special exception on linking described in file LICENSE.        */
11 | /*                                                                     */
12 | /***********************************************************************/
13 | 
14 | #include "sha512.c"
15 | #include <caml/mlvalues.h>
16 | #include <caml/memory.h>
17 | #include <caml/alloc.h>
18 | 
19 | #define Context_val(v) ((struct SHA512Context *) String_val(v))
20 | 
21 | CAMLprim value caml_sha512_init(value unit)
22 | {
23 |   value ctx = caml_alloc_string(sizeof(struct SHA512Context));
24 |   SHA512_init(Context_val(ctx), 512);
25 |   return ctx;
26 | }
27 | 
28 | CAMLprim value caml_sha384_init(value unit)
29 | {
30 |   value ctx = caml_alloc_string(sizeof(struct SHA512Context));
31 |   SHA512_init(Context_val(ctx), 384);
32 |   return ctx;
33 | }
34 | 
35 | CAMLprim value caml_sha512_256_init(value unit)
36 | {
37 |   value ctx = caml_alloc_string(sizeof(struct SHA512Context));
38 |   SHA512_init(Context_val(ctx), 256);
39 |   return ctx;
40 | }
41 | 
42 | CAMLprim value caml_sha512_224_init(value unit)
43 | {
44 |   value ctx = caml_alloc_string(sizeof(struct SHA512Context));
45 |   SHA512_init(Context_val(ctx), 224);
46 |   return ctx;
47 | }
48 | 
49 | CAMLprim value caml_sha512_update(value ctx, value src, value ofs, value len)
50 | {
51 |   SHA512_add_data(Context_val(ctx), &Byte_u(src, Long_val(ofs)), Long_val(len));
52 |   return Val_unit;
53 | }
54 | 
55 | CAMLprim value caml_sha512_final(value ctx)
56 | {
57 |   CAMLparam1(ctx);
58 |   CAMLlocal1(res);
59 | 
60 |   res = caml_alloc_string(64);
61 |   SHA512_finish(Context_val(ctx), 512, &Byte_u(res, 0));
62 |   CAMLreturn(res);
63 | }
64 | 
65 | CAMLprim value caml_sha384_final(value ctx)
66 | {
67 |   CAMLparam1(ctx);
68 |   CAMLlocal1(res);
69 | 
70 |   res = caml_alloc_string(48);
71 |   SHA512_finish(Context_val(ctx), 384, &Byte_u(res, 0));
72 |   CAMLreturn(res);
73 | }
74 | 
75 | CAMLprim value caml_sha512_256_final(value ctx)
76 | {
77 |   CAMLparam1(ctx);
78 |   CAMLlocal1(res);
79 | 
80 |   res = caml_alloc_string(32);
81 |   SHA512_finish(Context_val(ctx), 256, &Byte_u(res, 0));
82 |   CAMLreturn(res);
83 | }
84 | 
85 | CAMLprim value caml_sha512_224_final(value ctx)
86 | {
87 |   CAMLparam1(ctx);
88 |   CAMLlocal1(res);
89 | 
90 |   res = caml_alloc_string(28);
91 |   SHA512_finish(Context_val(ctx), 224, &Byte_u(res, 0));
92 |   CAMLreturn(res);
93 | }
94 | 


--------------------------------------------------------------------------------
/src/stubs-sha3.c:
--------------------------------------------------------------------------------
 1 | /***********************************************************************/
 2 | /*                                                                     */
 3 | /*                      The Cryptokit library                          */
 4 | /*                                                                     */
 5 | /*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         */
 6 | /*                                                                     */
 7 | /*  Copyright 2013 Institut National de Recherche en Informatique et   */
 8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 9 | /*  under the terms of the GNU Library General Public License, with    */
10 | /*  the special exception on linking described in file LICENSE.        */
11 | /*                                                                     */
12 | /***********************************************************************/
13 | 
14 | #include <string.h>
15 | #include "keccak.c"
16 | #include <caml/mlvalues.h>
17 | #include <caml/memory.h>
18 | #include <caml/alloc.h>
19 | #include <caml/custom.h>
20 | 
21 | #define Context_val(v) (*((struct SHA3Context **) Data_custom_val(v)))
22 | 
23 | static void caml_sha3_finalize(value ctx)
24 | {
25 |   if (Context_val(ctx) != NULL) {
26 |     caml_stat_free(Context_val(ctx));
27 |     Context_val(ctx) = NULL;
28 |   }
29 | }
30 | 
31 | static struct custom_operations SHA3_context_ops = {
32 |   "fr.inria.caml.cryptokit.SHA3_context",
33 |   caml_sha3_finalize,
34 |   custom_compare_default,
35 |   custom_hash_default,
36 |   custom_deserialize_default,
37 |   custom_compare_ext_default
38 | };
39 | 
40 | CAMLprim value caml_sha3_init(value vsize)
41 | {
42 |   struct SHA3Context * ctx = caml_stat_alloc(sizeof(struct SHA3Context));
43 |   value res =
44 |     caml_alloc_custom(&SHA3_context_ops,
45 |                       sizeof(struct SHA3Context *),
46 |                       0, 1);
47 |   SHA3_init(ctx, Int_val(vsize));
48 |   Context_val(res) = ctx;
49 |   return res;
50 | }
51 | 
52 | CAMLprim value caml_sha3_absorb(value ctx,
53 |                                 value src, value ofs, value len)
54 | {
55 |   SHA3_absorb(Context_val(ctx), &Byte_u(src, Long_val(ofs)), Long_val(len));
56 |   return Val_unit;
57 | }
58 | 
59 | 
60 | /* On page 9 of Keccak Implementation Overview (Version 3.2)
61 |    http://keccak.noekeon.org/Keccak-implementation-3.2.pdf,
62 |    there is a figure `0x01` as the padding byte. */
63 | static const unsigned keccak_padding = 0x01;
64 | 
65 | /* In a similar, updated description at http://keccak.noekeon.org/specs_summary.html,
66 |    on Table 3, `0x06` is shown as the relevant padding byte. */
67 | static const unsigned sha3_padding = 0x06;
68 | 
69 | CAMLprim value caml_sha3_extract(value official, value ctx)
70 | {
71 |   CAMLparam2(official, ctx);
72 |   CAMLlocal1(res);
73 | 
74 |   res = caml_alloc_string(Context_val(ctx)->hsiz);
75 |   SHA3_extract(Bool_val(official) ? sha3_padding : keccak_padding, Context_val(ctx), &Byte_u(res, 0));
76 |   CAMLreturn(res);
77 | }
78 | 
79 | CAMLprim value caml_sha3_wipe(value ctx)
80 | {
81 |   if (Context_val(ctx) != NULL) {
82 |     memset(Context_val(ctx), 0, sizeof(struct SHA3Context));
83 |     caml_stat_free(Context_val(ctx));
84 |     Context_val(ctx) = NULL;
85 |   }
86 |   return Val_unit;
87 | }
88 | 
89 | 


--------------------------------------------------------------------------------
/src/stubs-aes.c:
--------------------------------------------------------------------------------
 1 | /***********************************************************************/
 2 | /*                                                                     */
 3 | /*                      The Cryptokit library                          */
 4 | /*                                                                     */
 5 | /*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         */
 6 | /*                                                                     */
 7 | /*  Copyright 2002 Institut National de Recherche en Informatique et   */
 8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 9 | /*  under the terms of the GNU Library General Public License, with    */
10 | /*  the special exception on linking described in file LICENSE.        */
11 | /*                                                                     */
12 | /***********************************************************************/
13 | 
14 | /* Stub code for AES */
15 | 
16 | #include "rijndael-alg-fst.c"
17 | #include "aesni.c"
18 | 
19 | #include <caml/mlvalues.h>
20 | #include <caml/alloc.h>
21 | #include <caml/memory.h>
22 | 
23 | #define Cooked_key_NR_offset ((4 * (MAXNR + 1)) * sizeof(u32))
24 | #define Cooked_key_size (Cooked_key_NR_offset + 1)
25 | 
26 | CAMLprim value caml_aes_cook_encrypt_key(value key)
27 | {
28 |   CAMLparam1(key);
29 |   value ckey = caml_alloc_string(Cooked_key_size);
30 |   int nr;
31 | 
32 |   if (aesni_available == -1) aesni_check_available();
33 |   if (aesni_available == 1)
34 |     nr = aesniKeySetupEnc((u8 *) String_val(ckey),
35 |                           (const u8 *) String_val(key),
36 |                           8 * caml_string_length(key));
37 |   else
38 |     nr = rijndaelKeySetupEnc((u32 *) String_val(ckey),
39 |                              (const u8 *) String_val(key),
40 |                              8 * caml_string_length(key));
41 |   Byte(ckey, Cooked_key_NR_offset) = nr;
42 |   CAMLreturn(ckey);
43 | }
44 | 
45 | CAMLprim value caml_aes_cook_decrypt_key(value key)
46 | {
47 |   CAMLparam1(key);
48 |   value ckey = caml_alloc_string(Cooked_key_size);
49 |   int nr;
50 | 
51 |   if (aesni_available == -1) aesni_check_available();
52 |   if (aesni_available == 1)
53 |     nr = aesniKeySetupDec((u8 *) String_val(ckey),
54 |                           (const u8 *) String_val(key),
55 |                           8 * caml_string_length(key));
56 |   else
57 |     nr = rijndaelKeySetupDec((u32 *) String_val(ckey),
58 |                              (const u8 *) String_val(key),
59 |                              8 * caml_string_length(key));
60 |   Byte(ckey, Cooked_key_NR_offset) = nr;
61 |   CAMLreturn(ckey);
62 | }
63 | 
64 | CAMLprim value caml_aes_encrypt(value ckey, value src, value src_ofs,
65 |                                 value dst, value dst_ofs)
66 | {
67 |   if (aesni_available == 1)
68 |     aesniEncrypt((const u8 *) String_val(ckey),
69 |                  Byte(ckey, Cooked_key_NR_offset),
70 |                  (const u8 *) &Byte(src, Long_val(src_ofs)),
71 |                  (u8 *) &Byte(dst, Long_val(dst_ofs)));
72 |   else
73 |     rijndaelEncrypt((const u32 *) String_val(ckey),
74 |                     Byte(ckey, Cooked_key_NR_offset),
75 |                     (const u8 *) &Byte(src, Long_val(src_ofs)),
76 |                     (u8 *) &Byte(dst, Long_val(dst_ofs)));
77 |   return Val_unit;
78 | }
79 | 
80 | CAMLprim value caml_aes_decrypt(value ckey, value src, value src_ofs,
81 |                                 value dst, value dst_ofs)
82 | {
83 |   if (aesni_available == 1)
84 |     aesniDecrypt((const u8 *) String_val(ckey),
85 |                  Byte(ckey, Cooked_key_NR_offset),
86 |                  (const u8 *) &Byte(src, Long_val(src_ofs)),
87 |                  (u8 *) &Byte(dst, Long_val(dst_ofs)));
88 |   else
89 |     rijndaelDecrypt((const u32 *) String_val(ckey),
90 |                     Byte(ckey, Cooked_key_NR_offset),
91 |                     (const u8 *) &Byte(src, Long_val(src_ofs)),
92 |                     (u8 *) &Byte(dst, Long_val(dst_ofs)));
93 |   return Val_unit;
94 | }
95 | 
96 | 


--------------------------------------------------------------------------------
/src/pclmul.c:
--------------------------------------------------------------------------------
  1 | /***********************************************************************/
  2 | /*                                                                     */
  3 | /*                      The Cryptokit library                          */
  4 | /*                                                                     */
  5 | /*            Xavier Leroy, Collège de France and Inria                */
  6 | /*                                                                     */
  7 | /*  Copyright 2022 Institut National de Recherche en Informatique et   */
  8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
  9 | /*  under the terms of the GNU Library General Public License, with    */
 10 | /*  the special exception on linking described in file LICENSE.        */
 11 | /*                                                                     */
 12 | /***********************************************************************/
 13 | 
 14 | /* Hardware-accelerated implementation of GHASH multiplication */
 15 | 
 16 | #include <stdint.h>
 17 | #include <stdlib.h>
 18 | #include "pclmul.h"
 19 | 
 20 | #ifdef __PCLMUL__
 21 | 
 22 | #include <wmmintrin.h>
 23 | #include <emmintrin.h>
 24 | #include <cpuid.h>
 25 | 
 26 | EXPORT int pclmul_available = -1;
 27 | 
 28 | EXPORT int pclmul_check_available(void)
 29 | {
 30 |   unsigned int eax, ebx, ecx, edx;
 31 |   if(__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
 32 |     pclmul_available = (ecx & (1 << 1)) != 0;
 33 |   } else {
 34 |     pclmul_available = 0;
 35 |   }
 36 |   return pclmul_available;
 37 | }
 38 | 
 39 | static void copy_reverse_16(void * dst, const void * src)
 40 | {
 41 | #define COPY(i) *((uint8_t*) dst + i) = *((const uint8_t *) src + 15 - i)
 42 |   COPY(0); COPY(1); COPY(2); COPY(3);
 43 |   COPY(4); COPY(5); COPY(6); COPY(7);
 44 |   COPY(8); COPY(9); COPY(10); COPY(11);
 45 |   COPY(12); COPY(13); COPY(14); COPY(15);
 46 | #undef COPY
 47 | }
 48 | 
 49 | EXPORT void pclmul_mult(uint8_t res[16],
 50 |                  const uint8_t arg1[16], const uint8_t arg2[16])
 51 | {
 52 |   __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9;
 53 | 
 54 |   copy_reverse_16(&tmp0, arg1);
 55 |   copy_reverse_16(&tmp1, arg2);
 56 | 
 57 |   tmp3 = _mm_clmulepi64_si128(tmp0, tmp1, 0x00);
 58 |   tmp4 = _mm_clmulepi64_si128(tmp0, tmp1, 0x10);
 59 |   tmp5 = _mm_clmulepi64_si128(tmp0, tmp1, 0x01);
 60 |   tmp6 = _mm_clmulepi64_si128(tmp0, tmp1, 0x11);
 61 | 
 62 |   tmp4 = _mm_xor_si128(tmp4, tmp5);
 63 |   tmp5 = _mm_slli_si128(tmp4, 8);
 64 |   tmp4 = _mm_srli_si128(tmp4, 8);
 65 |   tmp3 = _mm_xor_si128(tmp3, tmp5);
 66 |   tmp6 = _mm_xor_si128(tmp6, tmp4);
 67 | 
 68 |   tmp7 = _mm_srli_epi32(tmp3, 31);
 69 |   tmp8 = _mm_srli_epi32(tmp6, 31);
 70 |   tmp3 = _mm_slli_epi32(tmp3, 1);
 71 |   tmp6 = _mm_slli_epi32(tmp6, 1);
 72 | 
 73 |   tmp9 = _mm_srli_si128(tmp7, 12);
 74 |   tmp8 = _mm_slli_si128(tmp8, 4);
 75 |   tmp7 = _mm_slli_si128(tmp7, 4);
 76 |   tmp3 = _mm_or_si128(tmp3, tmp7);
 77 |   tmp6 = _mm_or_si128(tmp6, tmp8);
 78 |   tmp6 = _mm_or_si128(tmp6, tmp9);
 79 | 
 80 |   tmp7 = _mm_slli_epi32(tmp3, 31);
 81 |   tmp8 = _mm_slli_epi32(tmp3, 30);
 82 |   tmp9 = _mm_slli_epi32(tmp3, 25);
 83 | 
 84 |   tmp7 = _mm_xor_si128(tmp7, tmp8);
 85 |   tmp7 = _mm_xor_si128(tmp7, tmp9);
 86 |   tmp8 = _mm_srli_si128(tmp7, 4);
 87 |   tmp7 = _mm_slli_si128(tmp7, 12);
 88 |   tmp3 = _mm_xor_si128(tmp3, tmp7);
 89 | 
 90 |   tmp2 = _mm_srli_epi32(tmp3, 1);
 91 |   tmp4 = _mm_srli_epi32(tmp3, 2);
 92 |   tmp5 = _mm_srli_epi32(tmp3, 7);
 93 |   tmp2 = _mm_xor_si128(tmp2, tmp4);
 94 |   tmp2 = _mm_xor_si128(tmp2, tmp5);
 95 |   tmp2 = _mm_xor_si128(tmp2, tmp8);
 96 |   tmp3 = _mm_xor_si128(tmp3, tmp2);
 97 |   tmp6 = _mm_xor_si128(tmp6, tmp3);
 98 | 
 99 |   tmp0 = tmp6;
100 |   copy_reverse_16(res, &tmp0);
101 | }
102 | 
103 | #else
104 | 
105 | EXPORT int pclmul_available = 0;
106 | 
107 | EXPORT int pclmul_check_available(void) { return 0; }
108 | 
109 | EXPORT void pclmul_mult(uint8_t res[16],
110 |                  const uint8_t arg1[16], const uint8_t arg2[16])
111 | { abort(); }
112 | 
113 | #endif
114 | 


--------------------------------------------------------------------------------
/src/cryptokitBignum.ml:
--------------------------------------------------------------------------------
  1 | (***********************************************************************)
  2 | (*                                                                     *)
  3 | (*                      The Cryptokit library                          *)
  4 | (*                                                                     *)
  5 | (*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         *)
  6 | (*                                                                     *)
  7 | (*  Copyright 2002 Institut National de Recherche en Informatique et   *)
  8 | (*  en Automatique.  All rights reserved.  This file is distributed    *)
  9 | (*  under the terms of the GNU Library General Public License, with    *)
 10 | (*  the special exception on linking described in file LICENSE.        *)
 11 | (*                                                                     *)
 12 | (***********************************************************************)
 13 | 
 14 | (* Arithmetic on big integers, based on the ZArith library. *)
 15 | 
 16 | type t = Z.t
 17 | 
 18 | external wipe: t -> unit = "caml_wipe_z"
 19 | 
 20 | let zero = Z.zero
 21 | let one = Z.one
 22 | 
 23 | let of_int = Z.of_int
 24 | 
 25 | let compare = Z.compare
 26 | 
 27 | let add = Z.add
 28 | let sub = Z.sub
 29 | let mult = Z.mul
 30 | 
 31 | let div = Z.div
 32 | let mod_ = Z.rem
 33 | 
 34 | let lcm = Z.lcm
 35 | 
 36 | let relative_prime a b =
 37 |   Z.equal (Z.gcd a b) Z.one
 38 | 
 39 | let mod_power = Z.powm_sec
 40 | 
 41 | let sub_mod a b p =
 42 |   let d = Z.sub a b in
 43 |   if Z.sign d < 0 then Z.add d p else d
 44 | 
 45 | (* Modular exponentiation via the Chinese Remainder Theorem.
 46 |    Compute a ^ d mod pq, where d is defined by
 47 |    dp = d mod (p-1) and dq = d mod (q-1).
 48 |    qinv is q^-1 mod p.
 49 |    Formula:
 50 |      mp = (a mod p)^dp mod p
 51 |      mq = (a mod q)^dq mod q
 52 |      m = ((((mp - mq) mod p) * qInv) mod p) * q + mq
 53 | *)
 54 | 
 55 | let mod_power_CRT a p q dp dq qinv =
 56 |   let amodp = Z.rem a p and amodq = Z.rem a q in
 57 |   let mp = mod_power amodp dp p and mq = mod_power amodq dq q in
 58 |   let diff = sub_mod mp mq p in
 59 |   let diff_qinv = Z.mul diff qinv in
 60 |   let diff_qinv_mod_p = Z.rem diff_qinv p in
 61 |   let res = Z.(add (mul q diff_qinv_mod_p) mq) in
 62 |   wipe amodp; wipe amodq;
 63 |   (* It is possible that res == mq, so we cannot wipe mq.
 64 |      For consistency we don't wipe any of the intermediate results
 65 |      besides amodp and amodq. *)
 66 |   res
 67 | 
 68 | let mod_inv = Z.invert
 69 | 
 70 | let wipe_bytes s = Bytes.fill s 0 (Bytes.length s) '\000'
 71 |   
 72 | let of_bytes s =
 73 |   let l = String.length s in
 74 |   let t = Bytes.create l in
 75 |   for i = 0 to l - 1 do Bytes.set t i s.[l - 1 - i] done;
 76 |   let n = Z.of_bits (Bytes.unsafe_to_string t) in
 77 |   wipe_bytes t;
 78 |   n
 79 | 
 80 | let to_bytes ?numbits n =
 81 |   let s = Z.to_bits n in
 82 |   let l =
 83 |     match numbits with
 84 |     | None -> String.length s
 85 |     | Some nb -> assert (Z.numbits n <= nb); (nb + 7) / 8 in
 86 |   let t = Bytes.make l '\000' in
 87 |   for i = 0 to String.length s - 1 do
 88 |     Bytes.set t (l - 1 - i) s.[i]
 89 |   done;
 90 |   wipe_bytes (Bytes.unsafe_of_string s);
 91 |   Bytes.unsafe_to_string t
 92 | 
 93 | let change_byte s i f =
 94 |   Bytes.set s i (Char.chr (f (Char.code (Bytes.get s i))))
 95 | 
 96 | let random ~rng ?(odd = false) numbits =
 97 |   let numbytes = (numbits + 7) / 8 in
 98 |   let buf = Bytes.create numbytes in
 99 |   rng buf 0 numbytes;
100 |   (* adjust low byte if requested *)
101 |   if odd then
102 |     change_byte buf 0 (fun b -> b lor 1);
103 |   (* adjust high byte so that the number is exactly numbits long *)
104 |   let mask = 1 lsl ((numbits - 1) land 7) in
105 |   change_byte buf (numbytes - 1)
106 |     (fun b -> (b land (mask - 1)) lor mask);
107 |   (* convert to a number *)
108 |   let n = Z.of_bits (Bytes.unsafe_to_string buf) in
109 |   wipe_bytes buf;
110 |   assert (Z.numbits n = numbits);
111 |   if odd then assert (Z.is_odd n);
112 |   n
113 | 
114 | let rec random_prime ~rng numbits =
115 |   (* Generate random odd number *)
116 |   let n = random ~rng ~odd:true numbits in
117 |   (* Find next prime above n *)
118 |   let p = Z.nextprime n in
119 |   (* Make sure it has the right number of bits *)
120 |   if Z.numbits p = numbits then p else random_prime ~rng numbits
121 | 
122 | 


--------------------------------------------------------------------------------
/src/ghash.c:
--------------------------------------------------------------------------------
  1 | /***********************************************************************/
  2 | /*                                                                     */
  3 | /*                      The Cryptokit library                          */
  4 | /*                                                                     */
  5 | /*            Xavier Leroy, Collège de France and Inria                */
  6 | /*                                                                     */
  7 | /*  Copyright 2022 Institut National de Recherche en Informatique et   */
  8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
  9 | /*  under the terms of the GNU Library General Public License, with    */
 10 | /*  the special exception on linking described in file LICENSE.        */
 11 | /*                                                                     */
 12 | /***********************************************************************/
 13 | 
 14 | /* Software implementation of GHASH multiplication */
 15 | 
 16 | /* Based on the implementation by Steven M. Gibson at
 17 |    https://github.com/mko-x/SharedAES-GCM/blob/master/Sources/gcm.c
 18 |    Gibson's implementation is in the public domain. */
 19 | 
 20 | #include <stdint.h>
 21 | #include <string.h>
 22 | #include "ghash.h"
 23 | 
 24 | static inline uint64_t get_uint64_be(const uint8_t * b, int i)
 25 | {
 26 |   return
 27 |       ( (uint64_t) b[i    ] << 56 )
 28 |     | ( (uint64_t) b[i + 1] << 48 )
 29 |     | ( (uint64_t) b[i + 2] << 40 )
 30 |     | ( (uint64_t) b[i + 3] << 32 )
 31 |     | ( (uint64_t) b[i + 4] << 24 )
 32 |     | ( (uint64_t) b[i + 5] << 16 )
 33 |     | ( (uint64_t) b[i + 6] <<  8 )
 34 |     | ( (uint64_t) b[i + 7]       );
 35 | }
 36 | 
 37 | static inline void put_uint64_be(uint64_t n, uint8_t * b, int i)
 38 | {
 39 |     b[i    ] = n >> 56;
 40 |     b[i + 1] = n >> 48;
 41 |     b[i + 2] = n >> 40;
 42 |     b[i + 3] = n >> 32;
 43 |     b[i + 4] = n >> 24;
 44 |     b[i + 5] = n >> 16;
 45 |     b[i + 6] = n >>  8;
 46 |     b[i + 7] = n;
 47 | }
 48 | 
 49 | EXPORT void ghash_mult(const struct ghash_context * ctx,
 50 |                 const uint8_t input[16],
 51 |                 uint8_t output[16])
 52 | {
 53 |     static const uint64_t last4[16] = {
 54 |         0x0000, 0x1c20, 0x3840, 0x2460, 0x7080, 0x6ca0, 0x48c0, 0x54e0,
 55 |         0xe100, 0xfd20, 0xd940, 0xc560, 0x9180, 0x8da0, 0xa9c0, 0xb5e0
 56 |     };
 57 |     int i;
 58 |     uint8_t lo, hi, rem;
 59 |     uint64_t zh, zl;
 60 | 
 61 |     lo = (uint8_t)( input[15] & 0x0f );
 62 |     hi = (uint8_t)( input[15] >> 4 );
 63 |     zh = ctx->HH[lo];
 64 |     zl = ctx->HL[lo];
 65 | 
 66 |     for( i = 15; i >= 0; i-- ) {
 67 |         lo = (uint8_t) ( input[i] & 0x0f );
 68 |         hi = (uint8_t) ( input[i] >> 4 );
 69 | 
 70 |         if( i != 15 ) {
 71 |             rem = (uint8_t) ( zl & 0x0f );
 72 |             zl = ( zh << 60 ) | ( zl >> 4 );
 73 |             zh = ( zh >> 4 );
 74 |             zh ^= (uint64_t) last4[rem] << 48;
 75 |             zh ^= ctx->HH[lo];
 76 |             zl ^= ctx->HL[lo];
 77 |         }
 78 |         rem = (uint8_t) ( zl & 0x0f );
 79 |         zl = ( zh << 60 ) | ( zl >> 4 );
 80 |         zh = ( zh >> 4 );
 81 |         zh ^= (uint64_t) last4[rem] << 48;
 82 |         zh ^= ctx->HH[hi];
 83 |         zl ^= ctx->HL[hi];
 84 |     }
 85 |     put_uint64_be(zh, output, 0 );
 86 |     put_uint64_be(zl, output, 8 );
 87 | }
 88 | 
 89 | EXPORT void ghash_init(struct ghash_context * ctx,
 90 |                 const uint8_t h[16])
 91 | {
 92 |     int ret, i, j;
 93 |     uint64_t hi, lo;
 94 |     uint64_t vl, vh;
 95 | 
 96 |     memset(ctx, 0, sizeof(struct ghash_context)); 
 97 | 
 98 |     vh = get_uint64_be(h, 0);
 99 |     vl = get_uint64_be(h, 8);
100 | 
101 |     ctx->HL[8] = vl;                // 8 = 1000 corresponds to 1 in GF(2^128)
102 |     ctx->HH[8] = vh;
103 |     ctx->HH[0] = 0;                 // 0 corresponds to 0 in GF(2^128)
104 |     ctx->HL[0] = 0;
105 | 
106 |     for( i = 4; i > 0; i >>= 1 ) {
107 |         uint32_t T = (uint32_t) ( vl & 1 ) * 0xe1000000U;
108 |         vl  = ( vh << 63 ) | ( vl >> 1 );
109 |         vh  = ( vh >> 1 ) ^ ( (uint64_t) T << 32);
110 |         ctx->HL[i] = vl;
111 |         ctx->HH[i] = vh;
112 |     }
113 |     for (i = 2; i < 16; i <<= 1 ) {
114 |         uint64_t *HiL = ctx->HL + i, *HiH = ctx->HH + i;
115 |         vh = *HiH;
116 |         vl = *HiL;
117 |         for( j = 1; j < i; j++ ) {
118 |             HiH[j] = vh ^ ctx->HH[j];
119 |             HiL[j] = vl ^ ctx->HL[j];
120 |         }
121 |     }
122 | }
123 | 


--------------------------------------------------------------------------------
/src/siphash.c:
--------------------------------------------------------------------------------
  1 | /***********************************************************************/
  2 | /*                                                                     */
  3 | /*                      The Cryptokit library                          */
  4 | /*                                                                     */
  5 | /*              Xavier Leroy, Collège de France and Inria              */
  6 | /*                                                                     */
  7 | /*  Copyright (c) 2012-2016 Jean-Philippe Aumasson                     */
  8 | /*  <jeanphilippe.aumasson@gmail.com>                                  */
  9 | /*  Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to>         */
 10 | /*  Copyright 2022 Institut National de Recherche en Informatique et   */
 11 | /*  en Automatique.  All rights reserved.  This file is distributed    */
 12 | /*  under the terms of the GNU Library General Public License, with    */
 13 | /*  the special exception on linking described in file LICENSE.        */
 14 | /*                                                                     */
 15 | /***********************************************************************/
 16 | 
 17 | /* Based on the SipHash reference C implementation by Aumasson and Berstein
 18 |    https://github.com/veorq/SipHash
 19 |    and lightly adapted by Leroy.
 20 |    The original implementation is distributed under the CC0 Public Domain
 21 |    Dedication. */
 22 | 
 23 | #include <stddef.h>
 24 | #include <stdint.h>
 25 | #include <string.h>
 26 | #include "siphash.h"
 27 | 
 28 | #define ROTL64(x,n) ((x) << n | (x) >> (64-n))
 29 | 
 30 | static inline uint64_t U8TO64_LE(const unsigned char *p) {
 31 |   return (((uint64_t)(p[0] & 0xff)      ) |
 32 |           ((uint64_t)(p[1] & 0xff) <<  8) |
 33 |           ((uint64_t)(p[2] & 0xff) << 16) |
 34 |           ((uint64_t)(p[3] & 0xff) << 24) |
 35 |           ((uint64_t)(p[4] & 0xff) << 32) |
 36 |           ((uint64_t)(p[5] & 0xff) << 40) |
 37 |           ((uint64_t)(p[6] & 0xff) << 48) |
 38 |           ((uint64_t)(p[7] & 0xff) << 56));
 39 | }
 40 | 
 41 | static inline void U64TO8_LE(unsigned char *p, uint64_t v) {
 42 |   p[0] = (v      ) & 0xff;
 43 |   p[1] = (v >>  8) & 0xff;
 44 |   p[2] = (v >> 16) & 0xff;
 45 |   p[3] = (v >> 24) & 0xff;
 46 |   p[4] = (v >> 32) & 0xff;
 47 |   p[5] = (v >> 40) & 0xff;
 48 |   p[6] = (v >> 48) & 0xff;
 49 |   p[7] = (v >> 56) & 0xff;
 50 | }
 51 | 
 52 | EXPORT void siphash_init(struct siphash * st, const unsigned char * key, int outlen)
 53 | {
 54 |   uint64_t k0 = U8TO64_LE(key);
 55 |   uint64_t k1 = U8TO64_LE(key + 8);
 56 |   st->v0 = 0x736f6d6570736575;
 57 |   st->v1 = 0x646f72616e646f6d;
 58 |   st->v2 = 0x6c7967656e657261;
 59 |   st->v3 = 0x7465646279746573;
 60 |   st->v3 ^= k1;
 61 |   st->v2 ^= k0;
 62 |   st->v1 ^= k1;
 63 |   st->v0 ^= k0;
 64 |   if (outlen == 16) st->v1 ^= 0xEE;
 65 |   st->used = 0;
 66 |   st->len8 = 0;
 67 | }
 68 | 
 69 | static inline void siphash_round(struct siphash * st)
 70 | {
 71 |   st->v0 += st->v1;
 72 |   st->v1 = ROTL64(st->v1, 13);
 73 |   st->v1 ^= st->v0;
 74 |   st->v0 = ROTL64(st->v0, 32);
 75 |   st->v2 += st->v3;
 76 |   st->v3 = ROTL64(st->v3, 16);
 77 |   st->v3 ^= st->v2;
 78 |   st->v0 += st->v3;
 79 |   st->v3 = ROTL64(st->v3, 21);
 80 |   st->v3 ^= st->v0;
 81 |   st->v2 += st->v1;
 82 |   st->v1 = ROTL64(st->v1, 17);
 83 |   st->v1 ^= st->v2;
 84 |   st->v2 = ROTL64(st->v2, 32);
 85 | }
 86 | 
 87 | static void siphash_mix(struct siphash * st, uint64_t x)
 88 | {
 89 |   st->v3 ^= x;
 90 |   siphash_round(st);
 91 |   siphash_round(st);
 92 |   st->v0 ^= x;
 93 | }
 94 | 
 95 | EXPORT void siphash_add(struct siphash * st, const unsigned char * p, size_t len)
 96 | {
 97 |   int used = st->used;
 98 |   int free = SIPHASH_BUFLEN - used;
 99 | 
100 |   st->len8 += len;
101 |   if (len < free) {
102 |     memcpy(st->buffer + used, p, len);
103 |     st->used = used + len;
104 |     return;
105 |   }
106 |   if (used > 0) {
107 |     memcpy(st->buffer + used, p, free);
108 |     siphash_mix(st, U8TO64_LE(st->buffer));
109 |     p += free;
110 |     len -= free;
111 |   }
112 |   while (len >= SIPHASH_BUFLEN) {
113 |     siphash_mix(st, U8TO64_LE(p));
114 |     p += SIPHASH_BUFLEN;
115 |     len -= SIPHASH_BUFLEN;
116 |   }
117 |   if (len > 0) memcpy(st->buffer, p, len);
118 |   st->used = len;
119 | }
120 | 
121 | static uint64_t siphash_final_rounds(struct siphash * st)
122 | {
123 |   /* Four rounds at the end */
124 |   for (int i = 0; i < 4; i++) siphash_round(st);
125 |   /* Fold state down to 64 bits */
126 |   return st->v0 ^ st->v1 ^ st->v2 ^ st->v3;
127 | }
128 | 
129 | EXPORT void siphash_final(struct siphash * st, int outlen, unsigned char * out)
130 | {
131 |   uint64_t w;
132 |   /* Finish with the remaining bytes (up to 7 bytes).
133 |      Also use the low 8 bits of the length. */
134 |   w = (uint64_t) st->len8 << 56;
135 |   switch (st->len8 & 7) {
136 |   case 7: w |= (uint64_t) st->buffer[6] << 48;  /* fallthrough */
137 |   case 6: w |= (uint64_t) st->buffer[5] << 40;  /* fallthrough */
138 |   case 5: w |= (uint64_t) st->buffer[4] << 32;  /* fallthrough */
139 |   case 4: w |= (uint64_t) st->buffer[3] << 24;  /* fallthrough */
140 |   case 3: w |= (uint64_t) st->buffer[2] << 16;  /* fallthrough */
141 |   case 2: w |= (uint64_t) st->buffer[1] << 8;   /* fallthrough */
142 |   case 1: w |= (uint64_t) st->buffer[0];        /* fallthrough */
143 |   case 0: /*skip*/;
144 |   }
145 |   siphash_mix(st, w);
146 |   /* First 64 bit of hash */
147 |   st->v2 ^= (outlen == 16 ? 0xEE : 0xFF);
148 |   U64TO8_LE(out, siphash_final_rounds(st));
149 |   /* Next 64 bits of hash, if requested */
150 |   if (outlen == 16) {
151 |     st->v1 ^= 0xDD;
152 |     U64TO8_LE(out + 8, siphash_final_rounds(st));
153 |   }
154 | }
155 | 
156 | 


--------------------------------------------------------------------------------
/src/sha1.c:
--------------------------------------------------------------------------------
  1 | /***********************************************************************/
  2 | /*                                                                     */
  3 | /*                      The Cryptokit library                          */
  4 | /*                                                                     */
  5 | /*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         */
  6 | /*                                                                     */
  7 | /*  Copyright 2002 Institut National de Recherche en Informatique et   */
  8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
  9 | /*  under the terms of the GNU Library General Public License, with    */
 10 | /*  the special exception on linking described in file LICENSE.        */
 11 | /*                                                                     */
 12 | /***********************************************************************/
 13 | 
 14 | /* SHA-1 hashing */
 15 | 
 16 | #include <string.h>
 17 | #include <caml/config.h>
 18 | #include "sha1.h"
 19 | 
 20 | /* Ref: Handbook of Applied Cryptography, section 9.4.2, algorithm 9.53 */
 21 | 
 22 | #define rol1(x) (((x) << 1) | ((x) >> 31))
 23 | #define rol5(x) (((x) << 5) | ((x) >> 27))
 24 | #define rol30(x) (((x) << 30) | ((x) >> 2))
 25 | 
 26 | static void SHA1_copy_and_swap(void * src, void * dst, int numwords)
 27 | {
 28 | #ifdef ARCH_BIG_ENDIAN
 29 |   memcpy(dst, src, numwords * sizeof(u32));
 30 | #else
 31 |   unsigned char * s, * d;
 32 |   unsigned char a, b;
 33 |   for (s = src, d = dst; numwords > 0; s += 4, d += 4, numwords--) {
 34 |     a = s[0];
 35 |     b = s[1];
 36 |     d[0] = s[3];
 37 |     d[1] = s[2];
 38 |     d[2] = b;
 39 |     d[3] = a;
 40 |   }
 41 | #endif
 42 | }
 43 | 
 44 | #define F(x,y,z) ( z ^ (x & (y ^ z) ) )
 45 | #define G(x,y,z) ( (x & y) | (z & (x | y) ) )
 46 | #define H(x,y,z) ( x ^ y ^ z )
 47 | 
 48 | #define Y1 0x5A827999U
 49 | #define Y2 0x6ED9EBA1U
 50 | #define Y3 0x8F1BBCDCU
 51 | #define Y4 0xCA62C1D6U
 52 | 
 53 | static void SHA1_transform(struct SHA1Context * ctx)
 54 | {
 55 |   int i;
 56 |   register u32 a, b, c, d, e, t;
 57 |   u32 data[80];
 58 | 
 59 |   /* Convert buffer data to 16 big-endian integers */
 60 |   SHA1_copy_and_swap(ctx->buffer, data, 16);
 61 | 
 62 |   /* Expand into 80 integers */
 63 |   for (i = 16; i < 80; i++) {
 64 |     t = data[i-3] ^ data[i-8] ^ data[i-14] ^ data[i-16];
 65 |     data[i] = rol1(t);
 66 |   }
 67 | 
 68 |   /* Initialize working variables */
 69 |   a = ctx->state[0];
 70 |   b = ctx->state[1];
 71 |   c = ctx->state[2];
 72 |   d = ctx->state[3];
 73 |   e = ctx->state[4];
 74 | 
 75 |   /* Perform rounds */
 76 |   for (i = 0; i < 20; i++) {
 77 |     t = F(b, c, d) + Y1 + rol5(a) + e + data[i];
 78 |     e = d; d = c; c = rol30(b); b = a; a = t;
 79 |   }
 80 |   for (/*nothing*/; i < 40; i++) {
 81 |     t = H(b, c, d) + Y2 + rol5(a) + e + data[i];
 82 |     e = d; d = c; c = rol30(b); b = a; a = t;
 83 |   }
 84 |   for (/*nothing*/; i < 60; i++) {
 85 |     t = G(b, c, d) + Y3 + rol5(a) + e + data[i];
 86 |     e = d; d = c; c = rol30(b); b = a; a = t;
 87 |   }
 88 |   for (/*nothing*/; i < 80; i++) {
 89 |     t = H(b, c, d) + Y4 + rol5(a) + e + data[i];
 90 |     e = d; d = c; c = rol30(b); b = a; a = t;
 91 |   }
 92 | 
 93 |   /* Update chaining values */
 94 |   ctx->state[0] += a;
 95 |   ctx->state[1] += b;
 96 |   ctx->state[2] += c;
 97 |   ctx->state[3] += d;
 98 |   ctx->state[4] += e;
 99 | }
100 | 
101 | EXPORT void SHA1_init(struct SHA1Context * ctx)
102 | {
103 |   ctx->state[0] = 0x67452301U;
104 |   ctx->state[1] = 0xEFCDAB89U;
105 |   ctx->state[2] = 0x98BADCFEU;
106 |   ctx->state[3] = 0x10325476U;
107 |   ctx->state[4] = 0xC3D2E1F0U;
108 |   ctx->numbytes = 0;
109 |   ctx->length[0] = 0;
110 |   ctx->length[1] = 0;
111 | }
112 | 
113 | EXPORT void SHA1_add_data(struct SHA1Context * ctx, unsigned char * data,
114 |                    unsigned long len)
115 | {
116 |   u32 t;
117 | 
118 |   /* Update length */
119 |   t = ctx->length[1];
120 |   if ((ctx->length[1] = t + (u32) (len << 3)) < t)
121 |     ctx->length[0]++;    /* carry from low 32 bits to high 32 bits */
122 |   ctx->length[0] += (u32) (len >> 29);
123 | 
124 |   /* If data was left in buffer, pad it with fresh data and munge block */
125 |   if (ctx->numbytes != 0) {
126 |     t = 64 - ctx->numbytes;
127 |     if (len < t) {
128 |       memcpy(ctx->buffer + ctx->numbytes, data, len);
129 |       ctx->numbytes += len;
130 |       return;
131 |     }
132 |     memcpy(ctx->buffer + ctx->numbytes, data, t);
133 |     SHA1_transform(ctx);
134 |     data += t;
135 |     len -= t;
136 |   }
137 |   /* Munge data in 64-byte chunks */
138 |   while (len >= 64) {
139 |     memcpy(ctx->buffer, data, 64);
140 |     SHA1_transform(ctx);
141 |     data += 64;
142 |     len -= 64;
143 |   }
144 |   /* Save remaining data */
145 |   memcpy(ctx->buffer, data, len);
146 |   ctx->numbytes = len;
147 | }
148 | 
149 | EXPORT void SHA1_finish(struct SHA1Context * ctx, unsigned char output[20])
150 | {
151 |   int i = ctx->numbytes;
152 | 
153 |   /* Set first char of padding to 0x80. There is always room. */
154 |   ctx->buffer[i++] = 0x80;
155 |   /* If we do not have room for the length (8 bytes), pad to 64 bytes
156 |      with zeroes and munge the data block */
157 |   if (i > 56) {
158 |     memset(ctx->buffer + i, 0, 64 - i);
159 |     SHA1_transform(ctx);
160 |     i = 0;
161 |   }
162 |   /* Pad to byte 56 with zeroes */
163 |   memset(ctx->buffer + i, 0, 56 - i);
164 |   /* Add length in big-endian */
165 |   SHA1_copy_and_swap(ctx->length, ctx->buffer + 56, 2);
166 |   /* Munge the final block */
167 |   SHA1_transform(ctx);
168 |   /* Final hash value is in ctx->state modulo big-endian conversion */
169 |   SHA1_copy_and_swap(ctx->state, output, 5);
170 | }
171 | 


--------------------------------------------------------------------------------
/src/chacha20.c:
--------------------------------------------------------------------------------
  1 | /* Based on D. J. Bernstein's chacha-regs.c version 200801118,
  2 |   https://cr.yp.to/streamciphers/timings/estreambench/submissions/salsa20/chacha8/regs/chacha.c
  3 |   The initial code is in the public domain */
  4 | 
  5 | #include <assert.h>
  6 | #include <stddef.h>
  7 | #include <stdint.h>
  8 | #include <string.h>
  9 | #include <caml/config.h>
 10 | #include "chacha20.h"
 11 | 
 12 | static inline void U32TO8_LITTLE(uint8_t * dst, uint32_t val)
 13 | {
 14 | #ifdef ARCH_BIG_ENDIAN
 15 |   dst[0] = val;
 16 |   dst[1] = val >> 8;
 17 |   dst[2] = val >> 16;
 18 |   dst[3] = val >> 24;
 19 | #else
 20 |   *((uint32_t *) dst) = val;
 21 | #endif
 22 | }
 23 | 
 24 | static inline uint32_t U8TO32_LITTLE(const uint8_t * src)
 25 | {
 26 |   return (uint32_t) src[0]
 27 |     + ((uint32_t) src[1] << 8)
 28 |     + ((uint32_t) src[2] << 16)
 29 |     + ((uint32_t) src[3] << 24);
 30 | }
 31 | 
 32 | #define ROTATE(v,c) ((v) << (c) | (v) >> (32 - (c)))
 33 | #define XOR(v,w) ((v) ^ (w))
 34 | #define PLUS(v,w) ((v) + (w))
 35 | #define PLUSONE(v) ((v) + 1)
 36 | 
 37 | #define QUARTERROUND(a,b,c,d) \
 38 |   a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
 39 |   c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
 40 |   a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
 41 |   c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
 42 | 
 43 | static void chacha20_block(chacha20_ctx * ctx)
 44 | {
 45 |   uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
 46 |   int i;
 47 | 
 48 |   x0 = ctx->input[0];
 49 |   x1 = ctx->input[1];
 50 |   x2 = ctx->input[2];
 51 |   x3 = ctx->input[3];
 52 |   x4 = ctx->input[4];
 53 |   x5 = ctx->input[5];
 54 |   x6 = ctx->input[6];
 55 |   x7 = ctx->input[7];
 56 |   x8 = ctx->input[8];
 57 |   x9 = ctx->input[9];
 58 |   x10 = ctx->input[10];
 59 |   x11 = ctx->input[11];
 60 |   x12 = ctx->input[12];
 61 |   x13 = ctx->input[13];
 62 |   x14 = ctx->input[14];
 63 |   x15 = ctx->input[15];
 64 |   for (i = 10; i > 0; i --) {
 65 |     QUARTERROUND( x0, x4, x8,x12)
 66 |     QUARTERROUND( x1, x5, x9,x13)
 67 |     QUARTERROUND( x2, x6,x10,x14)
 68 |     QUARTERROUND( x3, x7,x11,x15)
 69 |     QUARTERROUND( x0, x5,x10,x15)
 70 |     QUARTERROUND( x1, x6,x11,x12)
 71 |     QUARTERROUND( x2, x7, x8,x13)
 72 |     QUARTERROUND( x3, x4, x9,x14)
 73 |   }
 74 |   x0 = PLUS(x0,ctx->input[0]);
 75 |   x1 = PLUS(x1,ctx->input[1]);
 76 |   x2 = PLUS(x2,ctx->input[2]);
 77 |   x3 = PLUS(x3,ctx->input[3]);
 78 |   x4 = PLUS(x4,ctx->input[4]);
 79 |   x5 = PLUS(x5,ctx->input[5]);
 80 |   x6 = PLUS(x6,ctx->input[6]);
 81 |   x7 = PLUS(x7,ctx->input[7]);
 82 |   x8 = PLUS(x8,ctx->input[8]);
 83 |   x9 = PLUS(x9,ctx->input[9]);
 84 |   x10 = PLUS(x10,ctx->input[10]);
 85 |   x11 = PLUS(x11,ctx->input[11]);
 86 |   x12 = PLUS(x12,ctx->input[12]);
 87 |   x13 = PLUS(x13,ctx->input[13]);
 88 |   x14 = PLUS(x14,ctx->input[14]);
 89 |   x15 = PLUS(x15,ctx->input[15]);
 90 |   U32TO8_LITTLE(ctx->output + 0,x0);
 91 |   U32TO8_LITTLE(ctx->output + 4,x1);
 92 |   U32TO8_LITTLE(ctx->output + 8,x2);
 93 |   U32TO8_LITTLE(ctx->output + 12,x3);
 94 |   U32TO8_LITTLE(ctx->output + 16,x4);
 95 |   U32TO8_LITTLE(ctx->output + 20,x5);
 96 |   U32TO8_LITTLE(ctx->output + 24,x6);
 97 |   U32TO8_LITTLE(ctx->output + 28,x7);
 98 |   U32TO8_LITTLE(ctx->output + 32,x8);
 99 |   U32TO8_LITTLE(ctx->output + 36,x9);
100 |   U32TO8_LITTLE(ctx->output + 40,x10);
101 |   U32TO8_LITTLE(ctx->output + 44,x11);
102 |   U32TO8_LITTLE(ctx->output + 48,x12);
103 |   U32TO8_LITTLE(ctx->output + 52,x13);
104 |   U32TO8_LITTLE(ctx->output + 56,x14);
105 |   U32TO8_LITTLE(ctx->output + 60,x15);
106 |   /* Increment the 32- or 64-bit counter */
107 |   if (++ ctx->input[12] == 0) {
108 |     if (ctx->iv_length == 8) ++ ctx->input[13];
109 |   }
110 | }
111 | 
112 | EXPORT void chacha20_transform(chacha20_ctx * ctx,
113 |                         const uint8_t * in, uint8_t * out, size_t len)
114 | {
115 |   int n = ctx->next;
116 |   for (/*nothing*/; len > 0; len--) {
117 |     if (n >= 64) { chacha20_block(ctx); n = 0; }
118 |     *out++ = *in++ ^ ctx->output[n++];
119 |   }
120 |   ctx->next = n;
121 | }
122 | 
123 | EXPORT void chacha20_extract(chacha20_ctx * ctx,
124 |                       uint8_t * out, size_t len)
125 | {
126 |   int n = ctx->next;
127 |   for (/*nothing*/; len > 0; len--) {
128 |     if (n >= 64) { chacha20_block(ctx); n = 0; }
129 |     *out++ = ctx->output[n++];
130 |   }
131 |   ctx->next = n;
132 | }
133 | 
134 | EXPORT void chacha20_init(chacha20_ctx * ctx,
135 |                    const uint8_t * key, size_t key_length,
136 |                    const uint8_t * iv, size_t iv_length,
137 |                    uint64_t counter)
138 | {
139 |   const uint8_t *constants = 
140 |     (uint8_t *) (key_length == 32 ? "expand 32-byte k" : "expand 16-byte k");
141 |   assert (key_length == 16 || key_length == 32);
142 |   assert (iv_length == 8 || iv_length == 12);
143 |   ctx->input[0] = U8TO32_LITTLE(constants + 0);
144 |   ctx->input[1] = U8TO32_LITTLE(constants + 4);
145 |   ctx->input[2] = U8TO32_LITTLE(constants + 8);
146 |   ctx->input[3] = U8TO32_LITTLE(constants + 12);
147 |   ctx->input[4] = U8TO32_LITTLE(key + 0);
148 |   ctx->input[5] = U8TO32_LITTLE(key + 4);
149 |   ctx->input[6] = U8TO32_LITTLE(key + 8);
150 |   ctx->input[7] = U8TO32_LITTLE(key + 12);
151 |   if (key_length == 32) key += 16;
152 |   ctx->input[8] = U8TO32_LITTLE(key + 0);
153 |   ctx->input[9] = U8TO32_LITTLE(key + 4);
154 |   ctx->input[10] = U8TO32_LITTLE(key + 8);
155 |   ctx->input[11] = U8TO32_LITTLE(key + 12);
156 |   ctx->input[12] = (uint32_t) counter;
157 |   if (iv_length == 8) {
158 |     ctx->input[13] = (uint32_t) (counter >> 32);
159 |     ctx->input[14] = U8TO32_LITTLE(iv + 0);
160 |     ctx->input[15] = U8TO32_LITTLE(iv + 4);
161 |   } else {
162 |     ctx->input[13] = U8TO32_LITTLE(iv + 0);
163 |     ctx->input[14] = U8TO32_LITTLE(iv + 4);
164 |     ctx->input[15] = U8TO32_LITTLE(iv + 8);
165 |   }
166 |   ctx->iv_length = iv_length;
167 |   ctx->next = 64;
168 | }
169 | 


--------------------------------------------------------------------------------
/src/stubs-rng.c:
--------------------------------------------------------------------------------
  1 | /***********************************************************************/
  2 | /*                                                                     */
  3 | /*                      The Cryptokit library                          */
  4 | /*                                                                     */
  5 | /*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         */
  6 | /*                                                                     */
  7 | /*  Copyright 2003 Institut National de Recherche en Informatique et   */
  8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
  9 | /*  under the terms of the GNU Library General Public License, with    */
 10 | /*  the special exception on linking described in file LICENSE.        */
 11 | /*                                                                     */
 12 | /***********************************************************************/
 13 | 
 14 | /* Stub code for the system-provided RNG and for hardware RNG */
 15 | 
 16 | #include <caml/mlvalues.h>
 17 | #include <caml/alloc.h>
 18 | #include <caml/fail.h>
 19 | #include <caml/memory.h>
 20 | 
 21 | #if defined(HAVE_GETENTROPY) || defined(__APPLE__)
 22 | 
 23 | /* getentropy() system RNG */
 24 | 
 25 | #include <unistd.h>
 26 | #ifdef __APPLE__
 27 | #include <sys/random.h>
 28 | #endif
 29 | 
 30 | CAMLprim value caml_get_system_rng(value unit)
 31 | {
 32 |   return Val_unit;
 33 | }
 34 | 
 35 | CAMLprim value caml_close_system_rng(value vhc)
 36 | {
 37 |   return Val_unit;
 38 | }
 39 | 
 40 | CAMLprim value caml_system_rng_random_bytes(value vhc, value str,
 41 |                                             value ofs, value len)
 42 | {
 43 |   unsigned char * p = &Byte_u(str, Long_val(ofs));
 44 |   intnat l = Long_val(len);
 45 |   while (l > 0) {
 46 |     int n = l < 256 ? l : 256;
 47 |     if (getentropy(p, n) == -1) return Val_false;
 48 |     p += n; l -= n;
 49 |   }
 50 |   return Val_true;
 51 | }
 52 | 
 53 | #elif defined(_WIN32)
 54 | 
 55 | /* Win32 system RNG */
 56 | 
 57 | /* Inspired by Mike Lin's port of Cryptokit 1.0 */
 58 | 
 59 | #define _WIN32_WINNT 0x0400
 60 | #define WIN32_LEAN_AND_MEAN
 61 | #include <windows.h>
 62 | #include <wincrypt.h>
 63 | #ifndef CRYPT_SILENT
 64 | #define CRYPT_SILENT 0
 65 | #endif
 66 | 
 67 | #define HCRYPTPROV_val(v) (*((HCRYPTPROV *) &Field(v, 0)))
 68 | 
 69 | CAMLprim value caml_get_system_rng(value unit)
 70 | {
 71 |   HCRYPTPROV prov;
 72 |   value res;
 73 | 
 74 |   if (! CryptAcquireContext(&prov, NULL, NULL, PROV_RSA_FULL,
 75 |                             CRYPT_VERIFYCONTEXT | CRYPT_SILENT))
 76 |     caml_raise_not_found();
 77 |   res = caml_alloc((sizeof(HCRYPTPROV) + sizeof(value) - 1) / sizeof(value),
 78 |               Abstract_tag);
 79 |   HCRYPTPROV_val(res) = prov;
 80 |   return res;
 81 | }
 82 | 
 83 | CAMLprim value caml_close_system_rng(value vhc)
 84 | {
 85 |   CryptReleaseContext(HCRYPTPROV_val(vhc), 0);
 86 |   return Val_unit;
 87 | }
 88 | 
 89 | CAMLprim value caml_system_rng_random_bytes(value vhc, value str,
 90 |                                             value ofs, value len)
 91 | {
 92 |   return Val_bool(CryptGenRandom(HCRYPTPROV_val(vhc),
 93 |                                  Long_val(len),
 94 |                                  &Byte(str, Long_val(ofs))));
 95 | }
 96 | 
 97 | #else
 98 | 
 99 | CAMLprim value caml_get_system_rng(value unit)
100 | {
101 |   caml_raise_not_found();
102 |   return Val_unit;              /* not reached */
103 | }
104 | 
105 | CAMLprim value caml_close_system_rng(value vhc)
106 | {
107 |   return Val_unit;
108 | }
109 | 
110 | CAMLprim value caml_system_rng_random_bytes(value vhc, value str,
111 |                                             value ofs, value len)
112 | {
113 |   return Val_false;
114 | }
115 | 
116 | #endif
117 | 
118 | /* Intel RDRAND instruction */
119 | 
120 | #if defined(__GNUC__) && defined(__x86_64)
121 | 
122 | #include <stdint.h>
123 | #include <string.h>
124 | 
125 | static inline int rdrand64(uint64_t * res)
126 | {
127 |   uint64_t n;
128 |   unsigned char ok;
129 |   int retries;
130 | 
131 |   for (retries = 0; retries < 20; retries++) {
132 |     __asm__ __volatile__ ("rdrand %0; setc %1" : "=r" (n), "=qm" (ok));
133 |     if (ok) { *res = n; return 1; }
134 |   }
135 |   return 0;
136 | }
137 | 
138 | CAMLprim value caml_hardware_rng_available(value unit)
139 | {
140 |   uint32_t ax, bx, cx, dx;
141 |   uint64_t n;
142 |   int retries;
143 |   __asm__ __volatile__ ("cpuid"
144 |                         : "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx)
145 |                         : "a" (1));
146 |   if ((cx & (1U << 30)) == 0) return Val_false;
147 |   /* Early AMD Ryzen 3000 processors have a most annoying bug:
148 |      the rdrand instruction always returns 0xFF....FF.
149 |      We check for this condition here. */
150 |   for (retries = 0; retries < 8; retries++) {
151 |     if (rdrand64(&n) && n != (uint64_t) (-1)) return Val_true;
152 |   }
153 |   /* If we reach here, either rdrand64 failed 8*20=160 times in a row,
154 |      or it returned 8*64=512 "1" bits in a row.  In either case,
155 |      it's unusable. */
156 |   return Val_false;
157 | }
158 | 
159 | CAMLprim value caml_hardware_rng_random_bytes(value str, value ofs, value len)
160 | {
161 |   unsigned char * dst = &Byte_u(str, Long_val(ofs));
162 |   intnat nbytes = Long_val(len);
163 |   uint64_t r, rr;
164 | 
165 |   while (nbytes >= 8) {
166 |     if (! rdrand64(&r)) return Val_false;
167 |     *((uint64_t *) dst) = r;
168 |     dst += 8;
169 |     nbytes -= 8;
170 |   }
171 |   if (nbytes > 0) {
172 |     if (! rdrand64(&rr)) return Val_false;
173 |     memcpy(dst, &rr, nbytes);
174 |   }
175 |   return Val_true;
176 | }
177 | 
178 | #else
179 | 
180 | CAMLprim value caml_hardware_rng_available(value unit)
181 | { return Val_false; }
182 | 
183 | CAMLprim value caml_hardware_rng_random_bytes(value str, value ofs, value len)
184 | { return Val_false; }
185 | 
186 | #endif
187 | 


--------------------------------------------------------------------------------
/src/keccak.c:
--------------------------------------------------------------------------------
  1 | /* SHA-3 (Keccak) cryptographic hash function */
  2 | /* Code adapted from the "readable" implementation written by
  3 |    Markku-Juhani O. Saarinen <mjos@iki.fi> */
  4 | 
  5 | #include <assert.h>
  6 | #include <string.h>
  7 | #include <caml/config.h>
  8 | #include "keccak.h"
  9 | 
 10 | #define KECCAK_ROUNDS 24
 11 | 
 12 | #define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
 13 | 
 14 | static const u64 keccakf_rndc[24] = 
 15 | {
 16 |     0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
 17 |     0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
 18 |     0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
 19 |     0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
 20 |     0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
 21 |     0x8000000000008003, 0x8000000000008002, 0x8000000000000080, 
 22 |     0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
 23 |     0x8000000000008080, 0x0000000080000001, 0x8000000080008008
 24 | };
 25 | 
 26 | #if 0
 27 | /* Inlined */
 28 | static const int keccakf_rotc[24] = 
 29 | {
 30 |     1,  3,  6,  10, 15, 21, 28, 36, 45, 55, 2,  14, 
 31 |     27, 41, 56, 8,  25, 43, 62, 18, 39, 61, 20, 44
 32 | };
 33 | 
 34 | static const int keccakf_piln[24] = 
 35 | {
 36 |     10, 7,  11, 17, 18, 3, 5,  16, 8,  21, 24, 4, 
 37 |     15, 23, 19, 13, 12, 2, 20, 14, 22, 9,  6,  1 
 38 | };
 39 | #endif
 40 | 
 41 | /* Update the state with KECCAK_ROUND rounds */
 42 | 
 43 | static void KeccakPermutation(u64 st[25])
 44 | {
 45 |   int round, j;
 46 |     u64 t, bc[5];
 47 | 
 48 |     for (round = 0; round < KECCAK_ROUNDS; round++) {
 49 | 
 50 |         // Theta
 51 | #define THETA1(i) \
 52 |             bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15] ^ st[i + 20]
 53 | 
 54 |       THETA1(0); THETA1(1); THETA1(2); THETA1(3); THETA1(4);
 55 | 
 56 | #define THETA2(i) \
 57 |             t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1); \
 58 |             st[0 + i] ^= t; \
 59 |             st[5 + i] ^= t; \
 60 |             st[10 + i] ^= t; \
 61 |             st[15 + i] ^= t; \
 62 |             st[20 + i] ^= t
 63 | 
 64 |       THETA2(0); THETA2(1); THETA2(2); THETA2(3); THETA2(4);
 65 | 
 66 | 
 67 |         // Rho Pi
 68 | 
 69 | #define RHOPI(i, rotc, piln) \
 70 |             bc[0] = st[piln]; \
 71 |             st[piln] = ROTL64(t, rotc); \
 72 |             t = bc[0]
 73 | 
 74 |         t = st[1];
 75 |         RHOPI(0, 1, 10); RHOPI(1, 3, 7); RHOPI(2, 6, 11); RHOPI(3, 10, 17);
 76 |         RHOPI(4, 15, 18); RHOPI(5, 21, 3); RHOPI(6, 28, 5); RHOPI(7, 36, 16);
 77 |         RHOPI(8, 45, 8); RHOPI(9, 55, 21); RHOPI(10, 2, 24); RHOPI(11, 14, 4);
 78 |         RHOPI(12, 27, 15); RHOPI(13, 41, 23); RHOPI(14, 56, 19); RHOPI(15, 8, 13);
 79 |         RHOPI(16, 25, 12); RHOPI(17, 43, 2); RHOPI(18, 62, 20); RHOPI(19, 18, 14);
 80 |         RHOPI(20, 39, 22); RHOPI(21, 61, 9); RHOPI(22, 20, 6); RHOPI(23, 44, 1);
 81 | 
 82 |         //  Chi
 83 | 
 84 | #define CHI1(i,j) \
 85 |                 bc[i] = st[j + i]
 86 | #define CHI2(i,j) \
 87 |                 st[j + i] ^= (~bc[(i + 1) % 5]) & bc[(i + 2) % 5]
 88 | 
 89 |         for (j = 0; j < 25; j += 5) {
 90 |           CHI1(0,j); CHI1(1,j); CHI1(2,j); CHI1(3,j); CHI1(4,j);
 91 |           CHI2(0,j); CHI2(1,j); CHI2(2,j); CHI2(3,j); CHI2(4,j);
 92 |         }
 93 | 
 94 |         //  Iota
 95 |         st[0] ^= keccakf_rndc[round];
 96 |     }
 97 | }
 98 | 
 99 | /* Absorb the given data and permute */
100 | 
101 | static void KeccakAbsorb(u64 st[25], unsigned char * p, int rsiz)
102 | {
103 |   int i;
104 |   rsiz = rsiz / 8;
105 |   for (i = 0; i < rsiz; i += 1, p += 8) {
106 |     // fixme: use direct access for little-endian platforms without
107 |     // alignment constraints?
108 |       unsigned int l = p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
109 |       unsigned int h = p[4] | (p[5] << 8) | (p[6] << 16) | (p[7] << 24);
110 |       st[i] ^= l | ((unsigned long long) h << 32);
111 |   }
112 |   KeccakPermutation(st);
113 | }
114 | 
115 | /* Exported interface */
116 | 
117 | EXPORT void SHA3_init(struct SHA3Context * ctx, int hsiz)
118 | {
119 |   assert (hsiz == 224 || hsiz == 256 || hsiz == 384 || hsiz == 512);
120 |   ctx->hsiz = hsiz / 8;
121 |   ctx->rsiz = 200 - 2 * ctx->hsiz;
122 |   ctx->numbytes = 0;
123 |   memset(ctx->state, 0, sizeof(ctx->state));
124 | }
125 | 
126 | EXPORT void SHA3_absorb(struct SHA3Context * ctx, 
127 |                  unsigned char * data,
128 |                  unsigned long len)
129 | {
130 |   int n;
131 | 
132 |   /* If data was left in buffer, fill with fresh data and absorb */
133 |   if (ctx->numbytes != 0) {
134 |     n = ctx->rsiz - ctx->numbytes;
135 |     if (len < n) {
136 |       memcpy(ctx->buffer + ctx->numbytes, data, len);
137 |       ctx->numbytes += len;
138 |       return;
139 |     }
140 |     memcpy(ctx->buffer + ctx->numbytes, data, n);
141 |     KeccakAbsorb(ctx->state, ctx->buffer, ctx->rsiz);
142 |     data += n;
143 |     len  -= n;
144 |   }
145 |   /* Absorb data in blocks of [rsiz] bytes */
146 |   while (len >= ctx->rsiz) {
147 |     KeccakAbsorb(ctx->state, data, ctx->rsiz);
148 |     data += ctx->rsiz;
149 |     len  -= ctx->rsiz;
150 |   }
151 |   /* Save remaining data */
152 |   if (len > 0) memcpy(ctx->buffer, data, len);
153 |   ctx->numbytes = len;
154 | }
155 | 
156 | EXPORT void SHA3_extract(unsigned char padding,
157 |                   struct SHA3Context * ctx,
158 |                   unsigned char * output)
159 | {
160 |   int i, j, n;
161 | 
162 |   /* Apply final padding */
163 |   n = ctx->numbytes;
164 |   ctx->buffer[n] = padding;
165 |   n++;
166 |   memset(ctx->buffer + n, 0, ctx->rsiz - n);
167 |   ctx->buffer[ctx->rsiz - 1] |= 0x80;
168 | 
169 |   /* Absorb remaining data + padding */
170 |   KeccakAbsorb(ctx->state, ctx->buffer, ctx->rsiz);
171 | 
172 |   /* Extract hash as low bits of state */
173 |   for (i = 0, j = 0; j < ctx->hsiz; i += 1, j += 8) {
174 |     u64 st = ctx->state[i];
175 |     output[j] = st;
176 |     output[j + 1] = st >> 8;
177 |     output[j + 2] = st >> 16;
178 |     output[j + 3] = st >> 24;
179 |     if (j + 4 >= ctx->hsiz) break;
180 |     output[j + 4] = st >> 32;
181 |     output[j + 5] = st >> 40;
182 |     output[j + 6] = st >> 48;
183 |     output[j + 7] = st >> 56;
184 |   }
185 | }
186 | 


--------------------------------------------------------------------------------
/Changes:
--------------------------------------------------------------------------------
  1 | - Change `Cryptokit.RSA` to use two distinct types for public keys and for
  2 |   private keys.  (Breaking change.)  (#41)
  3 | - Add `Cryptokit.Paillier`: Paillier's homomorphic, public-key encryption.
  4 |   (Contributed by Atish Pranav.)   (#39)
  5 | 
  6 | Release 1.20:
  7 | - Name space depollution: make C implementations of ciphers local to the
  8 |   OCaml/C stub code, so that they do not conflict with other C libraries
  9 |   implementing crypto functions with the same names (#35, #36)
 10 | 
 11 | Release 1.19:
 12 | - Fix missing root registration in some Chacha20, Blake2, and
 13 |   Blake3 functions (#34)
 14 | 
 15 | Release 1.18:
 16 | - Add BLAKE3 hash and MAC functions.
 17 | - Fix compile-time error "SSE4.1 instruction set not enabled" (#32, #33).
 18 | 
 19 | Release 1.17:
 20 | - Add interfaces for authenticated encryption (AEAD) and two implementations:
 21 |   AES-GCM and Chacha20-Poly1305.
 22 | - Use `getentropy()` for `system_rng` when available (Linux, macOS, BSD).
 23 | - Removed support for EGD (the Entropy Gathering Daemon).
 24 | - Added compile-time alerts on uses of broken or weak ciphers and hashes.
 25 |   (Can be silenced with "-alert -crypto".)
 26 | - Add the hmac_sha384 MAC (#8).
 27 | - Add the SipHash MAC.
 28 | - Set file descriptor to close-on-exec in `device_rng` (#27).
 29 | - Improve compatibility with OCaml 5.0 (#28).
 30 | - Make sure CryptokitBignum is installed like before the switch to Dune (#31).
 31 | 
 32 | Release 1.16.1:
 33 | - Make the tests faster and more robust
 34 | - Update dependencies and documentation.
 35 | 
 36 | Release 1.16:
 37 | - Use dune as the build system (contributed by Andrey Mokhov, PR #24)
 38 | - Add BLAKE2b and BLAKE2s hash and MAC functions.
 39 | 
 40 | Release 1.15:
 41 | - Added constant-time `string_equal` and `bytes_equal` comparison functions
 42 |   (execution time depends on the lengths of the strings but not on their
 43 |   contents) (issue #13, PR #14)
 44 | - Caml FFI: use caml_ long names and CAML_NAME_SPACE; get rid of Begin_roots
 45 | - OASIS files regenerated in dynamic mode for OCaml 4.09 compatibility.
 46 |   For this reason, OASIS is now a build dependency.
 47 | 
 48 | Release 1.14:
 49 | - Ensure compatibility with OCaml 4.09 and up.
 50 | - Detect early AMD Ryzen 3000 bug where the RDRAND instruction always
 51 |   generates 0xFF...FF, and, in this case, report the hardware RNG as
 52 |   unavailable.
 53 | - Fix formatting of documentation comments (issue #3, PR #5)
 54 | - Optional argument to control whether the zlib transform expects a
 55 |   zlib header (PR #12).
 56 | - Fix issue with zlib >= 1.2.9 where internal sanity check is affected
 57 |   by the stream data block being moved by OCaml's GC (issue #7, PR #17).
 58 | - DH.new_parameters: update documentation to suggest at least 2048
 59 |   bits (PR #18).
 60 | - DH.derive_key: use SHA256 instead of SHA1 (PR #19).
 61 | 
 62 | Release 1.13:
 63 | - Add the Chacha20 stream cipher.
 64 | - Add the AES-CMAC (a.k.a. AES-OMAC1) message authentication code.
 65 | - Pseudo-random number generator: replace the old AES-CBC-Fibonacci generator
 66 |   with a faster, simpler generator based on Chacha20.
 67 | - Add an alternate pseudo-random number generator based on AES in CTR mode.
 68 | - Documentation: warn about known cryptographic weaknesses in Triple DES,
 69 |   Blowfish, and ARCfour.
 70 | - Documentation: warn about problems with variable-length messages in
 71 |   MACs based on block ciphers in CBC mode.
 72 | 
 73 | Release 1.12:
 74 | - Fix x86-32 compilation error and improve detection of AES-NI for x86
 75 |   processors (Jeremie Dimino, Etienne Millon)
 76 |   (Closes: #1646)
 77 | - AES-NI: align key_schedule on a 16 byte boundary (Etienne Millon)
 78 |   (Closes: #1709)
 79 | - Add original Keccak submission to SHA-3 (Yoichi Hirai)
 80 | 
 81 | Release 1.11:
 82 | - Adapt to "safe string" mode (OCaml 4.02 and later required).
 83 |   The API should remain backward-compatible for clients compiled
 84 |   in "unsafe string" mode.  
 85 | - Update SHA-3 to the official NIST standard (different padding than
 86 |   in the Keccak submission).  (Closes: #1528)
 87 | - Fixed bounds checking in "add_substring" methods of hash functions
 88 |   and other functions that operate on a substring of a string.
 89 |   (Closes: #1480)
 90 | - Use hardware implementation of AES when available on x86 processors.
 91 |   (Faster than the software implementation and less sensitive to
 92 |    side channel attacks.)
 93 | - Use the Zarith library to implement RSA.
 94 |   (Faster than the previous implementation and less sensitive to
 95 |    side channel attacks.)
 96 | - Support the hardware random number generator present in recent
 97 |   x86 processors.
 98 | - Rebuilt generated files with Oasis 0.4.6 for OCaml 4.03 compatibility.
 99 | 
100 | Release 1.10:
101 | - Add all SHA-2 hash functions: SHA-224, SHA-384 and SHA-512 
102 |   in addition to the existing SHA-256.  (Closes: #1223)
103 | - Add support for CTR (Counter) chaining mode.
104 | - Fix compilation error with OCaml 4.03+dev.
105 | - Avoid using some obsolete OCaml stdlib functions.
106 | 
107 | Release 1.9:
108 |  - More fixes to build in Windows with zlib (mingw and msvc).
109 | 
110 | Release 1.8:
111 |  - Build .cmxs with C bindings (Closes: #1303)
112 |  - Use advapi32 on Windows (Close: #1055)
113 |  - Allow to define --zlib-include and --zlib-libdir if zlib is not installed in
114 |    the standard location.
115 | 
116 | Release 1.7:
117 | - Added SHA-3 hash function.
118 | 
119 | Release 1.6:
120 | - Regenerate setup.ml with oasis 0.3.0~rc6 version
121 | 
122 | Release 1.5:
123 | - Fix bug check in buffered_output#ensure_capacity (Closes: #879)
124 | - Allow to have padding in Base64 (Closes: #897)
125 | 
126 | Release 1.4:
127 | - Added Blowfish block cipher.
128 | - Added MAC functions based on HMAC construction applied to 
129 |   SHA-256 and RIPEMD-160.
130 | - Added OASIS and findlib support (Closes: #589)
131 | 
132 | Release 1.3:
133 | - Added hash functions SHA-256 and RIPEMD-160.
134 | - Added "flush" method to transforms.
135 | - Fixed infinite loop in decompression of incorrect data.
136 | 
137 | Release 1.2:
138 | - MS Windows port
139 | 
140 | Release 1.1:
141 | - Added Diffie-Hellman key agreement
142 | - Exported raw modular arithmetic operations (mod_power, mod_mult)
143 | 
144 | Release 1.0:
145 | - First public release
146 | 


--------------------------------------------------------------------------------
/src/poly1305-donna-64.h:
--------------------------------------------------------------------------------
  1 | /* Poly1305 implementation written by Andrew Moon,
  2 |           https://github.com/floodyberry/poly1305-donna
  3 |    License: MIT or public domain.
  4 |    Minor adaptations for Cryptokit by Xavier Leroy. */
  5 | 
  6 | /*
  7 | 	poly1305 implementation using 64 bit * 64 bit = 128 bit multiplication and 128 bit addition
  8 | */
  9 | 
 10 | #include <stdint.h>
 11 | 
 12 | #if defined(__GNUC__)
 13 |         typedef unsigned __int128 uint128;
 14 | 	#define MUL(out, x, y) out = ((uint128)x * y)
 15 | 	#define ADD(out, in) out += in
 16 | 	#define ADDLO(out, in) out += in
 17 | 	#define SHR(in, shift) (uint64_t)(in >> (shift))
 18 | 	#define LO(in) (uint64_t)(in)
 19 | 
 20 | 	#define POLY1305_NOINLINE __attribute__((noinline))
 21 | #endif
 22 | 
 23 | #define poly1305_block_size 16
 24 | 
 25 | /* 17 + sizeof(size_t) + 8*sizeof(uint64_t) */
 26 | typedef struct poly1305_state_internal_t {
 27 | 	uint64_t r[3];
 28 | 	uint64_t h[3];
 29 | 	uint64_t pad[2];
 30 | 	size_t leftover;
 31 | 	unsigned char buffer[poly1305_block_size];
 32 | 	unsigned char final;
 33 | } poly1305_state_internal_t;
 34 | 
 35 | /* interpret eight 8 bit unsigned integers as a 64 bit unsigned integer in little endian */
 36 | static inline uint64_t
 37 | U8TO64(const unsigned char *p) {
 38 | 	return
 39 | 		(((uint64_t)(p[0] & 0xff)      ) |
 40 | 		 ((uint64_t)(p[1] & 0xff) <<  8) |
 41 | 		 ((uint64_t)(p[2] & 0xff) << 16) |
 42 | 		 ((uint64_t)(p[3] & 0xff) << 24) |
 43 | 		 ((uint64_t)(p[4] & 0xff) << 32) |
 44 | 		 ((uint64_t)(p[5] & 0xff) << 40) |
 45 | 		 ((uint64_t)(p[6] & 0xff) << 48) |
 46 | 		 ((uint64_t)(p[7] & 0xff) << 56));
 47 | }
 48 | 
 49 | /* store a 64 bit unsigned integer as eight 8 bit unsigned integers in little endian */
 50 | static inline void
 51 | U64TO8(unsigned char *p, uint64_t v) {
 52 | 	p[0] = (v      ) & 0xff;
 53 | 	p[1] = (v >>  8) & 0xff;
 54 | 	p[2] = (v >> 16) & 0xff;
 55 | 	p[3] = (v >> 24) & 0xff;
 56 | 	p[4] = (v >> 32) & 0xff;
 57 | 	p[5] = (v >> 40) & 0xff;
 58 | 	p[6] = (v >> 48) & 0xff;
 59 | 	p[7] = (v >> 56) & 0xff;
 60 | }
 61 | 
 62 | void
 63 | poly1305_init(poly1305_context *ctx, const unsigned char key[32]) {
 64 | 	poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx;
 65 | 	uint64_t t0,t1;
 66 | 
 67 | 	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
 68 | 	t0 = U8TO64(&key[0]);
 69 | 	t1 = U8TO64(&key[8]);
 70 | 
 71 | 	st->r[0] = ( t0                    ) & 0xffc0fffffff;
 72 | 	st->r[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffff;
 73 | 	st->r[2] = ((t1 >> 24)             ) & 0x00ffffffc0f;
 74 | 
 75 | 	/* h = 0 */
 76 | 	st->h[0] = 0;
 77 | 	st->h[1] = 0;
 78 | 	st->h[2] = 0;
 79 | 
 80 | 	/* save pad for later */
 81 | 	st->pad[0] = U8TO64(&key[16]);
 82 | 	st->pad[1] = U8TO64(&key[24]);
 83 | 
 84 | 	st->leftover = 0;
 85 | 	st->final = 0;
 86 | }
 87 | 
 88 | static void
 89 | poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m, size_t bytes) {
 90 | 	const uint64_t hibit = (st->final) ? 0 : ((uint64_t)1 << 40); /* 1 << 128 */
 91 | 	uint64_t r0,r1,r2;
 92 | 	uint64_t s1,s2;
 93 | 	uint64_t h0,h1,h2;
 94 | 	uint64_t c;
 95 | 	uint128 d0,d1,d2,d;
 96 | 
 97 | 	r0 = st->r[0];
 98 | 	r1 = st->r[1];
 99 | 	r2 = st->r[2];
100 | 
101 | 	h0 = st->h[0];
102 | 	h1 = st->h[1];
103 | 	h2 = st->h[2];
104 | 
105 | 	s1 = r1 * (5 << 2);
106 | 	s2 = r2 * (5 << 2);
107 | 
108 | 	while (bytes >= poly1305_block_size) {
109 | 		uint64_t t0,t1;
110 | 
111 | 		/* h += m[i] */
112 | 		t0 = U8TO64(&m[0]);
113 | 		t1 = U8TO64(&m[8]);
114 | 
115 | 		h0 += (( t0                    ) & 0xfffffffffff);
116 | 		h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff);
117 | 		h2 += (((t1 >> 24)             ) & 0x3ffffffffff) | hibit;
118 | 
119 | 		/* h *= r */
120 | 		MUL(d0, h0, r0); MUL(d, h1, s2); ADD(d0, d); MUL(d, h2, s1); ADD(d0, d);
121 | 		MUL(d1, h0, r1); MUL(d, h1, r0); ADD(d1, d); MUL(d, h2, s2); ADD(d1, d);
122 | 		MUL(d2, h0, r2); MUL(d, h1, r1); ADD(d2, d); MUL(d, h2, r0); ADD(d2, d);
123 | 
124 | 		/* (partial) h %= p */
125 | 		              c = SHR(d0, 44); h0 = LO(d0) & 0xfffffffffff;
126 | 		ADDLO(d1, c); c = SHR(d1, 44); h1 = LO(d1) & 0xfffffffffff;
127 | 		ADDLO(d2, c); c = SHR(d2, 42); h2 = LO(d2) & 0x3ffffffffff;
128 | 		h0  += c * 5; c = (h0 >> 44);  h0 =    h0  & 0xfffffffffff;
129 | 		h1  += c;
130 | 
131 | 		m += poly1305_block_size;
132 | 		bytes -= poly1305_block_size;
133 | 	}
134 | 
135 | 	st->h[0] = h0;
136 | 	st->h[1] = h1;
137 | 	st->h[2] = h2;
138 | }
139 | 
140 | 
141 | void
142 | poly1305_finish(poly1305_context *ctx, unsigned char mac[16]) {
143 | 	poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx;
144 | 	uint64_t h0,h1,h2,c;
145 | 	uint64_t g0,g1,g2;
146 | 	uint64_t t0,t1;
147 | 
148 | 	/* process the remaining block */
149 | 	if (st->leftover) {
150 | 		size_t i = st->leftover;
151 | 		st->buffer[i] = 1;
152 | 		for (i = i + 1; i < poly1305_block_size; i++)
153 | 			st->buffer[i] = 0;
154 | 		st->final = 1;
155 | 		poly1305_blocks(st, st->buffer, poly1305_block_size);
156 | 	}
157 | 
158 | 	/* fully carry h */
159 | 	h0 = st->h[0];
160 | 	h1 = st->h[1];
161 | 	h2 = st->h[2];
162 | 
163 | 	             c = (h1 >> 44); h1 &= 0xfffffffffff;
164 | 	h2 += c;     c = (h2 >> 42); h2 &= 0x3ffffffffff;
165 | 	h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff;
166 | 	h1 += c;     c = (h1 >> 44); h1 &= 0xfffffffffff;
167 | 	h2 += c;     c = (h2 >> 42); h2 &= 0x3ffffffffff;
168 | 	h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff;
169 | 	h1 += c;
170 | 
171 | 	/* compute h + -p */
172 | 	g0 = h0 + 5; c = (g0 >> 44); g0 &= 0xfffffffffff;
173 | 	g1 = h1 + c; c = (g1 >> 44); g1 &= 0xfffffffffff;
174 | 	g2 = h2 + c - ((uint64_t)1 << 42);
175 | 
176 | 	/* select h if h < p, or h + -p if h >= p */
177 | 	c = (g2 >> ((sizeof(uint64_t) * 8) - 1)) - 1;
178 | 	g0 &= c;
179 | 	g1 &= c;
180 | 	g2 &= c;
181 | 	c = ~c;
182 | 	h0 = (h0 & c) | g0;
183 | 	h1 = (h1 & c) | g1;
184 | 	h2 = (h2 & c) | g2;
185 | 
186 | 	/* h = (h + pad) */
187 | 	t0 = st->pad[0];
188 | 	t1 = st->pad[1];
189 | 
190 | 	h0 += (( t0                    ) & 0xfffffffffff)    ; c = (h0 >> 44); h0 &= 0xfffffffffff;
191 | 	h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff) + c; c = (h1 >> 44); h1 &= 0xfffffffffff;
192 | 	h2 += (((t1 >> 24)             ) & 0x3ffffffffff) + c;                 h2 &= 0x3ffffffffff;
193 | 
194 | 	/* mac = h % (2^128) */
195 | 	h0 = ((h0      ) | (h1 << 44));
196 | 	h1 = ((h1 >> 20) | (h2 << 24));
197 | 
198 | 	U64TO8(&mac[0], h0);
199 | 	U64TO8(&mac[8], h1);
200 | 
201 | 	/* zero out the state */
202 | 	st->h[0] = 0;
203 | 	st->h[1] = 0;
204 | 	st->h[2] = 0;
205 | 	st->r[0] = 0;
206 | 	st->r[1] = 0;
207 | 	st->r[2] = 0;
208 | 	st->pad[0] = 0;
209 | 	st->pad[1] = 0;
210 | }
211 | 
212 | 


--------------------------------------------------------------------------------
/src/blake3_portable.c:
--------------------------------------------------------------------------------
  1 | #include "blake3_impl.h"
  2 | #include <string.h>
  3 | 
  4 | INLINE uint32_t rotr32(uint32_t w, uint32_t c) {
  5 |   return (w >> c) | (w << (32 - c));
  6 | }
  7 | 
  8 | INLINE void g(uint32_t *state, size_t a, size_t b, size_t c, size_t d,
  9 |               uint32_t x, uint32_t y) {
 10 |   state[a] = state[a] + state[b] + x;
 11 |   state[d] = rotr32(state[d] ^ state[a], 16);
 12 |   state[c] = state[c] + state[d];
 13 |   state[b] = rotr32(state[b] ^ state[c], 12);
 14 |   state[a] = state[a] + state[b] + y;
 15 |   state[d] = rotr32(state[d] ^ state[a], 8);
 16 |   state[c] = state[c] + state[d];
 17 |   state[b] = rotr32(state[b] ^ state[c], 7);
 18 | }
 19 | 
 20 | INLINE void round_fn(uint32_t state[16], const uint32_t *msg, size_t round) {
 21 |   // Select the message schedule based on the round.
 22 |   const uint8_t *schedule = MSG_SCHEDULE[round];
 23 | 
 24 |   // Mix the columns.
 25 |   g(state, 0, 4, 8, 12, msg[schedule[0]], msg[schedule[1]]);
 26 |   g(state, 1, 5, 9, 13, msg[schedule[2]], msg[schedule[3]]);
 27 |   g(state, 2, 6, 10, 14, msg[schedule[4]], msg[schedule[5]]);
 28 |   g(state, 3, 7, 11, 15, msg[schedule[6]], msg[schedule[7]]);
 29 | 
 30 |   // Mix the rows.
 31 |   g(state, 0, 5, 10, 15, msg[schedule[8]], msg[schedule[9]]);
 32 |   g(state, 1, 6, 11, 12, msg[schedule[10]], msg[schedule[11]]);
 33 |   g(state, 2, 7, 8, 13, msg[schedule[12]], msg[schedule[13]]);
 34 |   g(state, 3, 4, 9, 14, msg[schedule[14]], msg[schedule[15]]);
 35 | }
 36 | 
 37 | INLINE void compress_pre(uint32_t state[16], const uint32_t cv[8],
 38 |                          const uint8_t block[BLAKE3_BLOCK_LEN],
 39 |                          uint8_t block_len, uint64_t counter, uint8_t flags) {
 40 |   uint32_t block_words[16];
 41 |   block_words[0] = load32(block + 4 * 0);
 42 |   block_words[1] = load32(block + 4 * 1);
 43 |   block_words[2] = load32(block + 4 * 2);
 44 |   block_words[3] = load32(block + 4 * 3);
 45 |   block_words[4] = load32(block + 4 * 4);
 46 |   block_words[5] = load32(block + 4 * 5);
 47 |   block_words[6] = load32(block + 4 * 6);
 48 |   block_words[7] = load32(block + 4 * 7);
 49 |   block_words[8] = load32(block + 4 * 8);
 50 |   block_words[9] = load32(block + 4 * 9);
 51 |   block_words[10] = load32(block + 4 * 10);
 52 |   block_words[11] = load32(block + 4 * 11);
 53 |   block_words[12] = load32(block + 4 * 12);
 54 |   block_words[13] = load32(block + 4 * 13);
 55 |   block_words[14] = load32(block + 4 * 14);
 56 |   block_words[15] = load32(block + 4 * 15);
 57 | 
 58 |   state[0] = cv[0];
 59 |   state[1] = cv[1];
 60 |   state[2] = cv[2];
 61 |   state[3] = cv[3];
 62 |   state[4] = cv[4];
 63 |   state[5] = cv[5];
 64 |   state[6] = cv[6];
 65 |   state[7] = cv[7];
 66 |   state[8] = IV[0];
 67 |   state[9] = IV[1];
 68 |   state[10] = IV[2];
 69 |   state[11] = IV[3];
 70 |   state[12] = counter_low(counter);
 71 |   state[13] = counter_high(counter);
 72 |   state[14] = (uint32_t)block_len;
 73 |   state[15] = (uint32_t)flags;
 74 | 
 75 |   round_fn(state, &block_words[0], 0);
 76 |   round_fn(state, &block_words[0], 1);
 77 |   round_fn(state, &block_words[0], 2);
 78 |   round_fn(state, &block_words[0], 3);
 79 |   round_fn(state, &block_words[0], 4);
 80 |   round_fn(state, &block_words[0], 5);
 81 |   round_fn(state, &block_words[0], 6);
 82 | }
 83 | 
 84 | EXPORT void blake3_compress_in_place_portable(uint32_t cv[8],
 85 |                                        const uint8_t block[BLAKE3_BLOCK_LEN],
 86 |                                        uint8_t block_len, uint64_t counter,
 87 |                                        uint8_t flags) {
 88 |   uint32_t state[16];
 89 |   compress_pre(state, cv, block, block_len, counter, flags);
 90 |   cv[0] = state[0] ^ state[8];
 91 |   cv[1] = state[1] ^ state[9];
 92 |   cv[2] = state[2] ^ state[10];
 93 |   cv[3] = state[3] ^ state[11];
 94 |   cv[4] = state[4] ^ state[12];
 95 |   cv[5] = state[5] ^ state[13];
 96 |   cv[6] = state[6] ^ state[14];
 97 |   cv[7] = state[7] ^ state[15];
 98 | }
 99 | 
100 | EXPORT void blake3_compress_xof_portable(const uint32_t cv[8],
101 |                                   const uint8_t block[BLAKE3_BLOCK_LEN],
102 |                                   uint8_t block_len, uint64_t counter,
103 |                                   uint8_t flags, uint8_t out[64]) {
104 |   uint32_t state[16];
105 |   compress_pre(state, cv, block, block_len, counter, flags);
106 | 
107 |   store32(&out[0 * 4], state[0] ^ state[8]);
108 |   store32(&out[1 * 4], state[1] ^ state[9]);
109 |   store32(&out[2 * 4], state[2] ^ state[10]);
110 |   store32(&out[3 * 4], state[3] ^ state[11]);
111 |   store32(&out[4 * 4], state[4] ^ state[12]);
112 |   store32(&out[5 * 4], state[5] ^ state[13]);
113 |   store32(&out[6 * 4], state[6] ^ state[14]);
114 |   store32(&out[7 * 4], state[7] ^ state[15]);
115 |   store32(&out[8 * 4], state[8] ^ cv[0]);
116 |   store32(&out[9 * 4], state[9] ^ cv[1]);
117 |   store32(&out[10 * 4], state[10] ^ cv[2]);
118 |   store32(&out[11 * 4], state[11] ^ cv[3]);
119 |   store32(&out[12 * 4], state[12] ^ cv[4]);
120 |   store32(&out[13 * 4], state[13] ^ cv[5]);
121 |   store32(&out[14 * 4], state[14] ^ cv[6]);
122 |   store32(&out[15 * 4], state[15] ^ cv[7]);
123 | }
124 | 
125 | INLINE void hash_one_portable(const uint8_t *input, size_t blocks,
126 |                               const uint32_t key[8], uint64_t counter,
127 |                               uint8_t flags, uint8_t flags_start,
128 |                               uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) {
129 |   uint32_t cv[8];
130 |   memcpy(cv, key, BLAKE3_KEY_LEN);
131 |   uint8_t block_flags = flags | flags_start;
132 |   while (blocks > 0) {
133 |     if (blocks == 1) {
134 |       block_flags |= flags_end;
135 |     }
136 |     blake3_compress_in_place_portable(cv, input, BLAKE3_BLOCK_LEN, counter,
137 |                                       block_flags);
138 |     input = &input[BLAKE3_BLOCK_LEN];
139 |     blocks -= 1;
140 |     block_flags = flags;
141 |   }
142 |   store_cv_words(out, cv);
143 | }
144 | 
145 | EXPORT void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
146 |                                size_t blocks, const uint32_t key[8],
147 |                                uint64_t counter, bool increment_counter,
148 |                                uint8_t flags, uint8_t flags_start,
149 |                                uint8_t flags_end, uint8_t *out) {
150 |   while (num_inputs > 0) {
151 |     hash_one_portable(inputs[0], blocks, key, counter, flags, flags_start,
152 |                       flags_end, out);
153 |     if (increment_counter) {
154 |       counter += 1;
155 |     }
156 |     inputs += 1;
157 |     num_inputs -= 1;
158 |     out = &out[BLAKE3_OUT_LEN];
159 |   }
160 | }
161 | 


--------------------------------------------------------------------------------
/src/poly1305-donna-32.h:
--------------------------------------------------------------------------------
  1 | /* Poly1305 implementation written by Andrew Moon,
  2 |           https://github.com/floodyberry/poly1305-donna
  3 |    License: MIT or public domain.
  4 |    Minor adaptations for Cryptokit by Xavier Leroy. */
  5 | 
  6 | /*
  7 | 	poly1305 implementation using 32 bit * 32 bit = 64 bit multiplication and 64 bit addition
  8 | */
  9 | 
 10 | #include <stdint.h>
 11 | 
 12 | #define poly1305_block_size 16
 13 | 
 14 | /* 17 + sizeof(size_t) + 14*sizeof(uint32_t) */
 15 | typedef struct poly1305_state_internal_t {
 16 | 	uint32_t r[5];
 17 | 	uint32_t h[5];
 18 | 	uint32_t pad[4];
 19 | 	size_t leftover;
 20 | 	unsigned char buffer[poly1305_block_size];
 21 | 	unsigned char final;
 22 | } poly1305_state_internal_t;
 23 | 
 24 | /* interpret four 8 bit unsigned integers as a 32 bit unsigned integer in little endian */
 25 | static inline uint32_t
 26 | U8TO32(const unsigned char *p) {
 27 | 	return
 28 |           (((uint32_t)(p[0] & 0xff)      ) |
 29 |            ((uint32_t)(p[1] & 0xff) <<  8) |
 30 |            ((uint32_t)(p[2] & 0xff) << 16) |
 31 |            ((uint32_t)(p[3] & 0xff) << 24));
 32 | }
 33 | 
 34 | /* store a 32 bit unsigned integer as four 8 bit unsigned integers in little endian */
 35 | static inline void
 36 | U32TO8(unsigned char *p, uint32_t v) {
 37 | 	p[0] = (v      ) & 0xff;
 38 | 	p[1] = (v >>  8) & 0xff;
 39 | 	p[2] = (v >> 16) & 0xff;
 40 | 	p[3] = (v >> 24) & 0xff;
 41 | }
 42 | 
 43 | void
 44 | poly1305_init(poly1305_context *ctx, const unsigned char key[32]) {
 45 | 	poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx;
 46 | 
 47 | 	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
 48 | 	st->r[0] = (U8TO32(&key[ 0])     ) & 0x3ffffff;
 49 | 	st->r[1] = (U8TO32(&key[ 3]) >> 2) & 0x3ffff03;
 50 | 	st->r[2] = (U8TO32(&key[ 6]) >> 4) & 0x3ffc0ff;
 51 | 	st->r[3] = (U8TO32(&key[ 9]) >> 6) & 0x3f03fff;
 52 | 	st->r[4] = (U8TO32(&key[12]) >> 8) & 0x00fffff;
 53 | 
 54 | 	/* h = 0 */
 55 | 	st->h[0] = 0;
 56 | 	st->h[1] = 0;
 57 | 	st->h[2] = 0;
 58 | 	st->h[3] = 0;
 59 | 	st->h[4] = 0;
 60 | 
 61 | 	/* save pad for later */
 62 | 	st->pad[0] = U8TO32(&key[16]);
 63 | 	st->pad[1] = U8TO32(&key[20]);
 64 | 	st->pad[2] = U8TO32(&key[24]);
 65 | 	st->pad[3] = U8TO32(&key[28]);
 66 | 
 67 | 	st->leftover = 0;
 68 | 	st->final = 0;
 69 | }
 70 | 
 71 | static void
 72 | poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m, size_t bytes) {
 73 | 	const uint32_t hibit = (st->final) ? 0 : (1UL << 24); /* 1 << 128 */
 74 | 	uint32_t r0,r1,r2,r3,r4;
 75 | 	uint32_t s1,s2,s3,s4;
 76 | 	uint32_t h0,h1,h2,h3,h4;
 77 | 	uint64_t d0,d1,d2,d3,d4;
 78 | 	uint32_t c;
 79 | 
 80 | 	r0 = st->r[0];
 81 | 	r1 = st->r[1];
 82 | 	r2 = st->r[2];
 83 | 	r3 = st->r[3];
 84 | 	r4 = st->r[4];
 85 | 
 86 | 	s1 = r1 * 5;
 87 | 	s2 = r2 * 5;
 88 | 	s3 = r3 * 5;
 89 | 	s4 = r4 * 5;
 90 | 
 91 | 	h0 = st->h[0];
 92 | 	h1 = st->h[1];
 93 | 	h2 = st->h[2];
 94 | 	h3 = st->h[3];
 95 | 	h4 = st->h[4];
 96 | 
 97 | 	while (bytes >= poly1305_block_size) {
 98 | 		/* h += m[i] */
 99 | 		h0 += (U8TO32(m+ 0)     ) & 0x3ffffff;
100 | 		h1 += (U8TO32(m+ 3) >> 2) & 0x3ffffff;
101 | 		h2 += (U8TO32(m+ 6) >> 4) & 0x3ffffff;
102 | 		h3 += (U8TO32(m+ 9) >> 6) & 0x3ffffff;
103 | 		h4 += (U8TO32(m+12) >> 8) | hibit;
104 | 
105 | 		/* h *= r */
106 | 		d0 = ((uint64_t)h0 * r0) + ((uint64_t)h1 * s4) + ((uint64_t)h2 * s3) + ((uint64_t)h3 * s2) + ((uint64_t)h4 * s1);
107 | 		d1 = ((uint64_t)h0 * r1) + ((uint64_t)h1 * r0) + ((uint64_t)h2 * s4) + ((uint64_t)h3 * s3) + ((uint64_t)h4 * s2);
108 | 		d2 = ((uint64_t)h0 * r2) + ((uint64_t)h1 * r1) + ((uint64_t)h2 * r0) + ((uint64_t)h3 * s4) + ((uint64_t)h4 * s3);
109 | 		d3 = ((uint64_t)h0 * r3) + ((uint64_t)h1 * r2) + ((uint64_t)h2 * r1) + ((uint64_t)h3 * r0) + ((uint64_t)h4 * s4);
110 | 		d4 = ((uint64_t)h0 * r4) + ((uint64_t)h1 * r3) + ((uint64_t)h2 * r2) + ((uint64_t)h3 * r1) + ((uint64_t)h4 * r0);
111 | 
112 | 		/* (partial) h %= p */
113 | 		              c = (uint32_t)(d0 >> 26); h0 = (uint32_t)d0 & 0x3ffffff;
114 | 		d1 += c;      c = (uint32_t)(d1 >> 26); h1 = (uint32_t)d1 & 0x3ffffff;
115 | 		d2 += c;      c = (uint32_t)(d2 >> 26); h2 = (uint32_t)d2 & 0x3ffffff;
116 | 		d3 += c;      c = (uint32_t)(d3 >> 26); h3 = (uint32_t)d3 & 0x3ffffff;
117 | 		d4 += c;      c = (uint32_t)(d4 >> 26); h4 = (uint32_t)d4 & 0x3ffffff;
118 | 		h0 += c * 5;  c =                (h0 >> 26); h0 =                h0 & 0x3ffffff;
119 | 		h1 += c;
120 | 
121 | 		m += poly1305_block_size;
122 | 		bytes -= poly1305_block_size;
123 | 	}
124 | 
125 | 	st->h[0] = h0;
126 | 	st->h[1] = h1;
127 | 	st->h[2] = h2;
128 | 	st->h[3] = h3;
129 | 	st->h[4] = h4;
130 | }
131 | 
132 | void
133 | poly1305_finish(poly1305_context *ctx, unsigned char mac[16]) {
134 | 	poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx;
135 | 	uint32_t h0,h1,h2,h3,h4,c;
136 | 	uint32_t g0,g1,g2,g3,g4;
137 | 	uint64_t f;
138 | 	uint32_t mask;
139 | 
140 | 	/* process the remaining block */
141 | 	if (st->leftover) {
142 | 		size_t i = st->leftover;
143 | 		st->buffer[i++] = 1;
144 | 		for (; i < poly1305_block_size; i++)
145 | 			st->buffer[i] = 0;
146 | 		st->final = 1;
147 | 		poly1305_blocks(st, st->buffer, poly1305_block_size);
148 | 	}
149 | 
150 | 	/* fully carry h */
151 | 	h0 = st->h[0];
152 | 	h1 = st->h[1];
153 | 	h2 = st->h[2];
154 | 	h3 = st->h[3];
155 | 	h4 = st->h[4];
156 | 
157 | 	             c = h1 >> 26; h1 = h1 & 0x3ffffff;
158 | 	h2 +=     c; c = h2 >> 26; h2 = h2 & 0x3ffffff;
159 | 	h3 +=     c; c = h3 >> 26; h3 = h3 & 0x3ffffff;
160 | 	h4 +=     c; c = h4 >> 26; h4 = h4 & 0x3ffffff;
161 | 	h0 += c * 5; c = h0 >> 26; h0 = h0 & 0x3ffffff;
162 | 	h1 +=     c;
163 | 
164 | 	/* compute h + -p */
165 | 	g0 = h0 + 5; c = g0 >> 26; g0 &= 0x3ffffff;
166 | 	g1 = h1 + c; c = g1 >> 26; g1 &= 0x3ffffff;
167 | 	g2 = h2 + c; c = g2 >> 26; g2 &= 0x3ffffff;
168 | 	g3 = h3 + c; c = g3 >> 26; g3 &= 0x3ffffff;
169 | 	g4 = h4 + c - (1UL << 26);
170 | 
171 | 	/* select h if h < p, or h + -p if h >= p */
172 | 	mask = (g4 >> ((sizeof(uint32_t) * 8) - 1)) - 1;
173 | 	g0 &= mask;
174 | 	g1 &= mask;
175 | 	g2 &= mask;
176 | 	g3 &= mask;
177 | 	g4 &= mask;
178 | 	mask = ~mask;
179 | 	h0 = (h0 & mask) | g0;
180 | 	h1 = (h1 & mask) | g1;
181 | 	h2 = (h2 & mask) | g2;
182 | 	h3 = (h3 & mask) | g3;
183 | 	h4 = (h4 & mask) | g4;
184 | 
185 | 	/* h = h % (2^128) */
186 | 	h0 = ((h0      ) | (h1 << 26)) & 0xffffffff;
187 | 	h1 = ((h1 >>  6) | (h2 << 20)) & 0xffffffff;
188 | 	h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
189 | 	h3 = ((h3 >> 18) | (h4 <<  8)) & 0xffffffff;
190 | 
191 | 	/* mac = (h + pad) % (2^128) */
192 | 	f = (uint64_t)h0 + st->pad[0]            ; h0 = (uint32_t)f;
193 | 	f = (uint64_t)h1 + st->pad[1] + (f >> 32); h1 = (uint32_t)f;
194 | 	f = (uint64_t)h2 + st->pad[2] + (f >> 32); h2 = (uint32_t)f;
195 | 	f = (uint64_t)h3 + st->pad[3] + (f >> 32); h3 = (uint32_t)f;
196 | 
197 | 	U32TO8(mac +  0, h0);
198 | 	U32TO8(mac +  4, h1);
199 | 	U32TO8(mac +  8, h2);
200 | 	U32TO8(mac + 12, h3);
201 | 
202 | 	/* zero out the state */
203 | 	st->h[0] = 0;
204 | 	st->h[1] = 0;
205 | 	st->h[2] = 0;
206 | 	st->h[3] = 0;
207 | 	st->h[4] = 0;
208 | 	st->r[0] = 0;
209 | 	st->r[1] = 0;
210 | 	st->r[2] = 0;
211 | 	st->r[3] = 0;
212 | 	st->r[4] = 0;
213 | 	st->pad[0] = 0;
214 | 	st->pad[1] = 0;
215 | 	st->pad[2] = 0;
216 | 	st->pad[3] = 0;
217 | }
218 | 
219 | 


--------------------------------------------------------------------------------
/src/sha256.c:
--------------------------------------------------------------------------------
  1 | /***********************************************************************/
  2 | /*                                                                     */
  3 | /*                      The Cryptokit library                          */
  4 | /*                                                                     */
  5 | /*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         */
  6 | /*                                                                     */
  7 | /*  Copyright 2004 Institut National de Recherche en Informatique et   */
  8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
  9 | /*  under the terms of the GNU Library General Public License, with    */
 10 | /*  the special exception on linking described in file LICENSE.        */
 11 | /*                                                                     */
 12 | /***********************************************************************/
 13 | 
 14 | /* SHA-256 hashing */
 15 | 
 16 | #include <string.h>
 17 | #include <caml/config.h>
 18 | #include "sha256.h"
 19 | 
 20 | /* Ref: FIPS publication 180-2 */
 21 | 
 22 | #define ROTR(x,n) ((x) >> (n) | (x) << (32 - (n)))
 23 | 
 24 | #define CH(x,y,z) (z ^ (x & (y ^ z)))
 25 | #define MAJ(x,y,z) ((x & y) | (z & (x | y)))
 26 | #define SIGMA0(x) (ROTR(x,2) ^ ROTR(x,13) ^ ROTR(x,22))
 27 | #define SIGMA1(x) (ROTR(x,6) ^ ROTR(x,11) ^ ROTR(x,25))
 28 | #define sigma0(x) (ROTR(x,7) ^ ROTR(x,18) ^ (x >> 3))
 29 | #define sigma1(x) (ROTR(x,17) ^ ROTR(x,19) ^ (x >> 10))
 30 | 
 31 | static void SHA256_copy_and_swap(void * src, void * dst, int numwords)
 32 | {
 33 | #ifdef ARCH_BIG_ENDIAN
 34 |   memcpy(dst, src, numwords * sizeof(u32));
 35 | #else
 36 |   unsigned char * s, * d;
 37 |   unsigned char a, b;
 38 |   for (s = src, d = dst; numwords > 0; s += 4, d += 4, numwords--) {
 39 |     a = s[0];
 40 |     b = s[1];
 41 |     d[0] = s[3];
 42 |     d[1] = s[2];
 43 |     d[2] = b;
 44 |     d[3] = a;
 45 |   }
 46 | #endif
 47 | }
 48 | 
 49 | static u32 SHA256_constants[64] = {
 50 |   0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
 51 |   0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
 52 |   0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
 53 |   0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
 54 |   0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
 55 |   0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
 56 |   0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
 57 |   0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
 58 |   0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
 59 |   0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
 60 |   0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
 61 |   0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
 62 |   0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
 63 |   0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
 64 |   0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
 65 |   0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
 66 | };
 67 | 
 68 | static void SHA256_transform(struct SHA256Context * ctx)
 69 | {
 70 |   int i;
 71 |   register u32 a, b, c, d, e, f, g, h, t1, t2;
 72 |   u32 data[80];
 73 | 
 74 |   /* Convert buffer data to 16 big-endian integers */
 75 |   SHA256_copy_and_swap(ctx->buffer, data, 16);
 76 | 
 77 |   /* Expand into 80 integers */
 78 |   for (i = 16; i < 80; i++) {
 79 |     data[i] = sigma1(data[i-2]) + data[i-7] + sigma0(data[i-15]) + data[i-16];
 80 |   }
 81 | 
 82 |   /* Initialize working variables */
 83 |   a = ctx->state[0];
 84 |   b = ctx->state[1];
 85 |   c = ctx->state[2];
 86 |   d = ctx->state[3];
 87 |   e = ctx->state[4];
 88 |   f = ctx->state[5];
 89 |   g = ctx->state[6];
 90 |   h = ctx->state[7];
 91 | 
 92 |   /* Perform rounds */
 93 | #if 0
 94 |   for (i = 0; i < 64; i++) {
 95 |     t1 = h + SIGMA1(e) + CH(e, f, g) + SHA256_constants[i] + data[i];
 96 |     t2 = SIGMA0(a) + MAJ(a, b, c);
 97 |     h = g;  g = f;  f = e;  e = d + t1;
 98 |     d = c;  c = b;  b = a;  a = t1 + t2;
 99 |   }
100 | #else
101 | #define STEP(a,b,c,d,e,f,g,h,i) \
102 |     t1 = h + SIGMA1(e) + CH(e, f, g) + SHA256_constants[i] + data[i]; \
103 |     t2 = SIGMA0(a) + MAJ(a, b, c); \
104 |     d = d + t1; \
105 |     h = t1 + t2
106 | 
107 |   for (i = 0; i < 64; i += 8) {
108 |     STEP(a,b,c,d,e,f,g,h,i);
109 |     STEP(h,a,b,c,d,e,f,g,i+1);
110 |     STEP(g,h,a,b,c,d,e,f,i+2);
111 |     STEP(f,g,h,a,b,c,d,e,i+3);
112 |     STEP(e,f,g,h,a,b,c,d,i+4);
113 |     STEP(d,e,f,g,h,a,b,c,i+5);
114 |     STEP(c,d,e,f,g,h,a,b,i+6);
115 |     STEP(b,c,d,e,f,g,h,a,i+7);
116 |   }
117 | #endif
118 | 
119 |   /* Update chaining values */
120 |   ctx->state[0] += a;
121 |   ctx->state[1] += b;
122 |   ctx->state[2] += c;
123 |   ctx->state[3] += d;
124 |   ctx->state[4] += e;
125 |   ctx->state[5] += f;
126 |   ctx->state[6] += g;
127 |   ctx->state[7] += h;
128 | }
129 | 
130 | EXPORT void SHA256_init(struct SHA256Context * ctx, int bitsize)
131 | {
132 |   switch (bitsize) {
133 |   case 224:
134 |     ctx->state[0] = 0xc1059ed8;
135 |     ctx->state[1] = 0x367cd507;
136 |     ctx->state[2] = 0x3070dd17;
137 |     ctx->state[3] = 0xf70e5939;
138 |     ctx->state[4] = 0xffc00b31;
139 |     ctx->state[5] = 0x68581511;
140 |     ctx->state[6] = 0x64f98fa7;
141 |     ctx->state[7] = 0xbefa4fa4;
142 |     break;
143 |   case 256:
144 |     ctx->state[0] = 0x6A09E667;
145 |     ctx->state[1] = 0xBB67AE85;
146 |     ctx->state[2] = 0x3C6EF372;
147 |     ctx->state[3] = 0xA54FF53A;
148 |     ctx->state[4] = 0x510E527F;
149 |     ctx->state[5] = 0x9B05688C;
150 |     ctx->state[6] = 0x1F83D9AB;
151 |     ctx->state[7] = 0x5BE0CD19;
152 |     break;
153 |   default:
154 |     /* The bit size is wrong.  Just zero the state to produce 
155 |        incorrect hashes. */
156 |     memset(ctx->state, 0, sizeof(ctx->state));
157 |     break;
158 |   }
159 |   ctx->numbytes = 0;
160 |   ctx->length[0] = 0;
161 |   ctx->length[1] = 0;
162 | }
163 | 
164 | EXPORT void SHA256_add_data(struct SHA256Context * ctx, unsigned char * data,
165 |                    unsigned long len)
166 | {
167 |   u32 t;
168 | 
169 |   /* Update length */
170 |   t = ctx->length[1];
171 |   if ((ctx->length[1] = t + (u32) (len << 3)) < t)
172 |     ctx->length[0]++;    /* carry from low 32 bits to high 32 bits */
173 |   ctx->length[0] += (u32) (len >> 29);
174 | 
175 |   /* If data was left in buffer, pad it with fresh data and munge block */
176 |   if (ctx->numbytes != 0) {
177 |     t = 64 - ctx->numbytes;
178 |     if (len < t) {
179 |       memcpy(ctx->buffer + ctx->numbytes, data, len);
180 |       ctx->numbytes += len;
181 |       return;
182 |     }
183 |     memcpy(ctx->buffer + ctx->numbytes, data, t);
184 |     SHA256_transform(ctx);
185 |     data += t;
186 |     len -= t;
187 |   }
188 |   /* Munge data in 64-byte chunks */
189 |   while (len >= 64) {
190 |     memcpy(ctx->buffer, data, 64);
191 |     SHA256_transform(ctx);
192 |     data += 64;
193 |     len -= 64;
194 |   }
195 |   /* Save remaining data */
196 |   memcpy(ctx->buffer, data, len);
197 |   ctx->numbytes = len;
198 | }
199 | 
200 | EXPORT void SHA256_finish(struct SHA256Context * ctx, int bitsize,
201 |                    unsigned char * output)
202 | {
203 |   int i = ctx->numbytes;
204 | 
205 |   /* Set first char of padding to 0x80. There is always room. */
206 |   ctx->buffer[i++] = 0x80;
207 |   /* If we do not have room for the length (8 bytes), pad to 64 bytes
208 |      with zeroes and munge the data block */
209 |   if (i > 56) {
210 |     memset(ctx->buffer + i, 0, 64 - i);
211 |     SHA256_transform(ctx);
212 |     i = 0;
213 |   }
214 |   /* Pad to byte 56 with zeroes */
215 |   memset(ctx->buffer + i, 0, 56 - i);
216 |   /* Add length in big-endian */
217 |   SHA256_copy_and_swap(ctx->length, ctx->buffer + 56, 2);
218 |   /* Munge the final block */
219 |   SHA256_transform(ctx);
220 |   /* Final hash value is in ctx->state modulo big-endian conversion */
221 |   switch (bitsize) {
222 |   case 256:
223 |     SHA256_copy_and_swap(ctx->state, output, 8);
224 |     break;
225 |   case 224:
226 |     SHA256_copy_and_swap(ctx->state, output, 7);
227 |     break;
228 |   /* default: The bit size is wrong.  Produce no output. */
229 |   }
230 | }
231 | 


--------------------------------------------------------------------------------
/test/speedtest.ml:
--------------------------------------------------------------------------------
  1 | (***********************************************************************)
  2 | (*                                                                     *)
  3 | (*                      The Cryptokit library                          *)
  4 | (*                                                                     *)
  5 | (*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         *)
  6 | (*                                                                     *)
  7 | (*  Copyright 2002 Institut National de Recherche en Informatique et   *)
  8 | (*  en Automatique.  All rights reserved.  This file is distributed    *)
  9 | (*  under the terms of the GNU Library General Public License, with    *)
 10 | (*  the special exception on linking described in file LICENSE.        *)
 11 | (*                                                                     *)
 12 | (***********************************************************************)
 13 | 
 14 | (* $Id$ *)
 15 | 
 16 | (* Performance measurement *)
 17 | 
 18 | open Cryptokit
 19 | 
 20 | let time_fn msg fn =
 21 |   let start = Sys.time() in
 22 |   let rec do_time nrun =
 23 |     let res = fn () in
 24 |     let stop = Sys.time() in
 25 |     let t = stop -. start in
 26 |     if t < 0.5 then do_time (nrun + 1) else begin
 27 |       Printf.printf "%7.3f  %s\n%!" (t /. float nrun) msg;
 28 |       res
 29 |     end
 30 |   in do_time 1
 31 | 
 32 | let rec repeat n fn () =
 33 |   if n <= 1 then fn() else (ignore(fn()); repeat (n-1) fn ())
 34 | 
 35 | let raw_block_cipher cipher niter () =
 36 |   let msg = Bytes.create cipher#blocksize in
 37 |   for i = 1 to niter do
 38 |     cipher#transform msg 0 msg 0
 39 |   done
 40 | 
 41 | let raw_stream_cipher cipher niter blocksize () =
 42 |   let msg = Bytes.create blocksize in
 43 |   for i = 1 to niter do
 44 |     cipher#transform msg 0 msg 0 blocksize
 45 |   done
 46 | 
 47 | let transform tr niter blocksize () =
 48 |   let msg = Bytes.create blocksize in
 49 |   for i = 1 to niter do
 50 |     tr#put_substring msg 0 blocksize; ignore(tr#get_substring)
 51 |   done
 52 | 
 53 | let hash h niter blocksize () =
 54 |   let msg = Bytes.create blocksize in
 55 |   for i = 1 to niter do
 56 |     h#add_substring msg 0 blocksize
 57 |   done;
 58 |   ignore(h#result)
 59 | 
 60 | let rng r niter blocksize () =
 61 |   let buf = Bytes.create blocksize in
 62 |   for i = 1 to niter do
 63 |     r#random_bytes buf 0 blocksize
 64 |   done
 65 | 
 66 | let _ =
 67 |   time_fn "Raw AES 128, 64_000_000 bytes"
 68 |     (raw_block_cipher (new Block.aes_encrypt "0123456789ABCDEF") 4000000);
 69 |   time_fn "Raw AES 192, 64_000_000 bytes"
 70 |     (raw_block_cipher (new Block.aes_encrypt "0123456789ABCDEF01234567") 4000000);
 71 |   time_fn "Raw AES 256, 64_000_000 bytes"
 72 |     (raw_block_cipher (new Block.aes_encrypt "0123456789ABCDEF0123456789ABCDEF")  4000000);
 73 |   time_fn "Raw DES, 16_000_000 bytes"
 74 |     (raw_block_cipher (new Block.des_encrypt "01234567") 2000000);
 75 |   time_fn "Raw 3DES, 16_000_000 bytes"
 76 |     (raw_block_cipher (new Block.triple_des_encrypt "0123456789ABCDEF") 2000000);
 77 |   time_fn "Raw ARCfour, 64_000_000 bytes, 16-byte chunks"
 78 |     (raw_stream_cipher (new Stream.arcfour "0123456789ABCDEF") 4000000 16);
 79 |   time_fn "Raw ARCfour, 64_000_000 bytes, 64-byte chunks"
 80 |     (raw_stream_cipher (new Stream.arcfour "0123456789ABCDEF") 1000000 64);
 81 |   time_fn "Raw Chacha20, 64_000_000 bytes, 16-byte chunks"
 82 |     (raw_stream_cipher (new Stream.arcfour "0123456789ABCDEF") 4000000 16);
 83 |   time_fn "Raw Chacha20, 64_000_000 bytes, 64-byte chunks"
 84 |     (raw_stream_cipher (new Stream.arcfour "0123456789ABCDEF") 1000000 64);
 85 |   time_fn "Raw Blowfish 128, 64_000_000 bytes"
 86 |     (raw_block_cipher (new Block.blowfish_encrypt "0123456789ABCDEF")  8000000);
 87 |   time_fn "AES-GCM, 64_000_000 bytes"
 88 |     (transform (AEAD.aes_gcm ~iv:"0123456789AB" "0123456789ABCDEF" AEAD.Encrypt) 4000000 16);
 89 |   time_fn "Chacha20-Poly1305, 64_000_000 bytes"
 90 |     (transform (AEAD.chacha20_poly1305 ~iv:"0123456789AB" "0123456789ABCDEF" AEAD.Encrypt) 4000000 16);
 91 |   time_fn "Wrapped AES 128 CBC, 64_000_000 bytes"
 92 |     (transform (Cipher.aes "0123456789ABCDEF" Cipher.Encrypt) 4000000 16);
 93 |   time_fn "Wrapped AES 192 CBC, 64_000_000 bytes"
 94 |     (transform (Cipher.aes "0123456789ABCDEF01234567" Cipher.Encrypt) 4000000 16);
 95 |   time_fn "Wrapped AES 256 CBC, 64_000_000 bytes"
 96 |     (transform (Cipher.aes "0123456789ABCDEF0123456789ABCDEF" Cipher.Encrypt) 4000000 16);
 97 |   time_fn "Wrapped DES CBC, 16_000_000 bytes"
 98 |     (transform (Cipher.des "01234567" Cipher.Encrypt) 1000000 16);
 99 |   time_fn "Wrapped 3DES CBC, 16_000_000 bytes"
100 |     (transform (Cipher.triple_des "0123456789ABCDEF" Cipher.Encrypt) 1000000 16);
101 |   time_fn "Wrapped ARCfour, 64_000_000 bytes"
102 |     (transform (Cipher.arcfour "0123456789ABCDEF" Cipher.Encrypt) 4000000 16);
103 |   time_fn "Wrapped Chacha20, 64_000_000 bytes"
104 |     (transform (Cipher.chacha20 "0123456789ABCDEF" Cipher.Encrypt) 4000000 16);
105 |   time_fn "Wrapped Blowfish 128 CBC, 64_000_000 bytes"
106 |     (transform (Cipher.blowfish "0123456789ABCDEF" Cipher.Encrypt) 4000000 16);
107 |   time_fn "SHA-1, 64_000_000 bytes, 16-byte chunks"
108 |     (hash (Hash.sha1()) 4000000 16);
109 |   time_fn "SHA-256, 64_000_000 bytes, 16-byte chunks"
110 |     (hash (Hash.sha256()) 4000000 16);
111 |   time_fn "SHA-512, 64_000_000 bytes, 16-byte chunks"
112 |     (hash (Hash.sha512()) 4000000 16);
113 |   time_fn "SHA-512/256, 64_000_000 bytes, 16-byte chunks"
114 |     (hash (Hash.sha512_256()) 4000000 16);
115 |   time_fn "SHA-512/224, 64_000_000 bytes, 16-byte chunks"
116 |     (hash (Hash.sha512_224()) 4000000 16);
117 |   time_fn "SHA-3 256, 64_000_000 bytes, 16-byte chunks"
118 |     (hash (Hash.sha3 256) 4000000 16);
119 |   time_fn "SHA-3 512, 64_000_000 bytes, 16-byte chunks"
120 |     (hash (Hash.sha3 512) 4000000 16);
121 |   time_fn "BLAKE2b 512, 64_000_000 bytes, 16-byte chunks"
122 |     (hash (Hash.blake2b 512) 4000000 16);
123 |   time_fn "BLAKE2s 256, 64_000_000 bytes, 16-byte chunks"
124 |     (hash (Hash.blake2s 256) 4000000 16);
125 |   time_fn "BLAKE3, 64_000_000 bytes, 16-byte chunks"
126 |     (hash (Hash.blake3 256) 4000000 16);
127 |   time_fn "RIPEMD-160, 64_000_000 bytes, 16-byte chunks"
128 |     (hash (Hash.ripemd160()) 4000000 16);
129 |   time_fn "MD5, 64_000_000 bytes, 16-byte chunks"
130 |     (hash (Hash.md5()) 4000000 16);
131 |   time_fn "AES CMAC, 64_000_000 bytes, 16-byte chunks"
132 |     (hash (MAC.aes_cmac "0123456789ABCDEF") 4000000 16);
133 |   time_fn "HMAC-SHA1, 64_000_000 bytes, 16-byte chunks"
134 |     (hash (MAC.hmac_sha1 "0123456789ABCDEF") 4000000 16);
135 |   time_fn "HMAC-SHA256, 64_000_000 bytes, 16-byte chunks"
136 |     (hash (MAC.hmac_sha256 "0123456789ABCDEF") 4000000 16);
137 |   time_fn "SipHash 64, 64_000_000 bytes, 16-byte chunks"
138 |     (hash (MAC.siphash "0123456789ABCDEF") 4000000 16);
139 |   time_fn "SipHash 128, 64_000_000 bytes, 16-byte chunks"
140 |     (hash (MAC.siphash128 "0123456789ABCDEF") 4000000 16);
141 |   let prng = Random.pseudo_rng "supercalifragilistusexpialidolcius" in
142 |   let (priv_key, pub_key) =
143 |   time_fn "RSA key generation (2048 bits) x 10"
144 |     (repeat 10 (fun () -> RSA.new_key ~rng:prng ~e:65537 2048)) in
145 |   let plaintext = "ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ" in
146 |   let ciphertext =
147 |   time_fn "RSA public-key operation (2048 bits, exponent 65537) x 1000"
148 |     (repeat 1000 (fun () -> RSA.encrypt pub_key plaintext)) in
149 |   time_fn "RSA private-key operation (2048 bits) x 100"
150 |     (repeat 100 (fun () -> ignore(RSA.decrypt priv_key ciphertext)));
151 |   time_fn "RSA private-key operation with CRT (2048 bits) x 100"
152 |     (repeat 100 (fun () -> ignore(RSA.decrypt_CRT priv_key ciphertext)));
153 |   time_fn "PRNG, 64_000_000 bytes"
154 |     (rng prng 1000000 64);
155 |   time_fn "PRNG AES CTR, 64_000_000 bytes"
156 |     (rng (Random.pseudo_rng_aes_ctr "supercalifragilistusexpialidolcius") 1000000 64);
157 |   begin try
158 |     let hr = Random.hardware_rng () in
159 |     time_fn "Hardware RNG, 64_000_000 bytes"
160 |       (rng hr 1000000 64)
161 |   with Error No_entropy_source -> ()
162 |   end;
163 |   ()
164 | 


--------------------------------------------------------------------------------
/src/stubs-zlib.c:
--------------------------------------------------------------------------------
  1 | /***********************************************************************/
  2 | /*                                                                     */
  3 | /*                      The Cryptokit library                          */
  4 | /*                                                                     */
  5 | /*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         */
  6 | /*                                                                     */
  7 | /*  Copyright 2002 Institut National de Recherche en Informatique et   */
  8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
  9 | /*  under the terms of the GNU Library General Public License, with    */
 10 | /*  the special exception on linking described in file LICENSE.        */
 11 | /*                                                                     */
 12 | /***********************************************************************/
 13 | 
 14 | /* Stub code to interface with Zlib */
 15 | 
 16 | #ifdef HAVE_ZLIB
 17 | #include <zlib.h>
 18 | #endif
 19 | 
 20 | #include <caml/mlvalues.h>
 21 | #include <caml/alloc.h>
 22 | #include <caml/callback.h>
 23 | #include <caml/fail.h>
 24 | #include <caml/memory.h>
 25 | #include <caml/custom.h>
 26 | 
 27 | static const value * caml_zlib_error_exn = NULL;
 28 | 
 29 | #ifdef HAVE_ZLIB
 30 | 
 31 | #define ZStream_val(v) (*((z_streamp *)Data_custom_val(v)))
 32 | 
 33 | static void caml_zlib_error(char * fn, value vzs)
 34 | {
 35 |   char * msg;
 36 |   CAMLparam1(vzs);
 37 |   CAMLlocal4(s1, s2, tuple, bucket);
 38 | 
 39 |   msg = ZStream_val(vzs)->msg;
 40 |   if (msg == NULL) msg = "";
 41 |   if (caml_zlib_error_exn == NULL) {
 42 |     caml_zlib_error_exn = caml_named_value("Cryptokit.Error");
 43 |     if (caml_zlib_error_exn == NULL)
 44 |       caml_invalid_argument("Exception Cryptokit.Error not initialized");
 45 |   }
 46 |   s1 = caml_copy_string(fn);
 47 |   s2 = caml_copy_string(msg);
 48 |   tuple = caml_alloc_small(2, 0);
 49 |   Field(tuple, 0) = s1;
 50 |   Field(tuple, 1) = s2;
 51 |   bucket = caml_alloc_small(2, 0);
 52 |   Field(bucket, 0) = *caml_zlib_error_exn;
 53 |   Field(bucket, 1) = tuple;
 54 |   CAMLdrop;
 55 |   caml_raise(bucket);
 56 | }
 57 | 
 58 | void caml_zlib_free_stream(value vzs)
 59 | {
 60 |   caml_stat_free(ZStream_val(vzs));
 61 |   ZStream_val(vzs) = NULL;
 62 | }
 63 | 
 64 | static struct custom_operations caml_zlib_stream_ops = {
 65 |   "caml_zlib_stream_ops", &caml_zlib_free_stream, NULL, NULL, NULL, NULL
 66 | };
 67 | 
 68 | static value caml_zlib_new_stream(void)
 69 | {
 70 |   value res = caml_alloc_custom(&caml_zlib_stream_ops, sizeof(z_streamp), 0, 1);
 71 | 
 72 |   ZStream_val(res) = caml_stat_alloc(sizeof(z_stream));
 73 |   ZStream_val(res)->zalloc = NULL;
 74 |   ZStream_val(res)->zfree = NULL;
 75 |   ZStream_val(res)->opaque = NULL;
 76 |   ZStream_val(res)->next_in = NULL;
 77 |   ZStream_val(res)->next_out = NULL;
 78 |   return res;
 79 | }
 80 | 
 81 | CAMLprim
 82 | value caml_zlib_deflateInit(value vlevel, value expect_header)
 83 | {
 84 |   value vzs = caml_zlib_new_stream();
 85 |   if (deflateInit2(ZStream_val(vzs),
 86 |                    Int_val(vlevel),
 87 |                    Z_DEFLATED,
 88 |                    Bool_val(expect_header) ? MAX_WBITS : -MAX_WBITS,
 89 |                    8,
 90 |                    Z_DEFAULT_STRATEGY) != Z_OK)
 91 |     caml_zlib_error("Zlib.deflateInit", vzs);
 92 |   return vzs;
 93 | }
 94 | 
 95 | static int caml_zlib_flush_table[] = 
 96 | { Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FULL_FLUSH, Z_FINISH };
 97 | 
 98 | CAMLprim
 99 | value caml_zlib_deflate(value vzs, value srcbuf, value srcpos, value srclen,
100 |                       value dstbuf, value dstpos, value dstlen,
101 |                       value vflush)
102 | {
103 |   z_stream * zs = ZStream_val(vzs);
104 |   int retcode;
105 |   long used_in, used_out;
106 |   value res;
107 | 
108 |   zs->next_in = &Byte_u(srcbuf, Long_val(srcpos));
109 |   zs->avail_in = Long_val(srclen);
110 |   zs->next_out = &Byte_u(dstbuf, Long_val(dstpos));
111 |   zs->avail_out = Long_val(dstlen);
112 |   retcode = deflate(zs, caml_zlib_flush_table[Int_val(vflush)]);
113 |   if (retcode < 0) caml_zlib_error("Zlib.deflate", vzs);
114 |   used_in = Long_val(srclen) - zs->avail_in;
115 |   used_out = Long_val(dstlen) - zs->avail_out;
116 |   zs->next_in = NULL;         /* not required, but cleaner */
117 |   zs->next_out = NULL;        /* (avoid dangling pointers into Caml heap) */
118 |   res = caml_alloc_small(3, 0);
119 |   Field(res, 0) = Val_bool(retcode == Z_STREAM_END);
120 |   Field(res, 1) = Val_int(used_in);
121 |   Field(res, 2) = Val_int(used_out);
122 |   return res;
123 | }
124 | 
125 | CAMLprim
126 | value caml_zlib_deflateEnd(value vzs)
127 | {
128 |   if (deflateEnd(ZStream_val(vzs)) != Z_OK)
129 |     caml_zlib_error("Zlib.deflateEnd", vzs);
130 |   return Val_unit;
131 | }
132 | 
133 | CAMLprim
134 | value caml_zlib_inflateInit(value expect_header)
135 | {
136 |   value vzs = caml_zlib_new_stream();
137 |   if (inflateInit2(ZStream_val(vzs),
138 |                    Bool_val(expect_header) ? MAX_WBITS : -MAX_WBITS) != Z_OK)
139 |     caml_zlib_error("Zlib.inflateInit", vzs);
140 |   return vzs;
141 | }
142 | 
143 | CAMLprim
144 | value caml_zlib_inflate(value vzs, value srcbuf, value srcpos, value srclen,
145 |                       value dstbuf, value dstpos, value dstlen,
146 |                       value vflush)
147 | {
148 |   z_stream * zs = ZStream_val(vzs);
149 |   int retcode;
150 |   long used_in, used_out;
151 |   value res;
152 | 
153 |   zs->next_in = &Byte_u(srcbuf, Long_val(srcpos));
154 |   zs->avail_in = Long_val(srclen);
155 |   zs->next_out = &Byte_u(dstbuf, Long_val(dstpos));
156 |   zs->avail_out = Long_val(dstlen);
157 |   retcode = inflate(zs, caml_zlib_flush_table[Int_val(vflush)]);
158 |   if (retcode < 0 || retcode == Z_NEED_DICT)
159 |     caml_zlib_error("Zlib.inflate", vzs);
160 |   used_in = Long_val(srclen) - zs->avail_in;
161 |   used_out = Long_val(dstlen) - zs->avail_out;
162 |   zs->next_in = NULL;           /* not required, but cleaner */
163 |   zs->next_out = NULL;          /* (avoid dangling pointers into Caml heap) */
164 |   res = caml_alloc_small(3, 0);
165 |   Field(res, 0) = Val_bool(retcode == Z_STREAM_END);
166 |   Field(res, 1) = Val_int(used_in);
167 |   Field(res, 2) = Val_int(used_out);
168 |   return res;
169 | }
170 | 
171 | CAMLprim
172 | value caml_zlib_inflateEnd(value vzs)
173 | {
174 |   if (inflateEnd(ZStream_val(vzs)) != Z_OK)
175 |     caml_zlib_error("Zlib.inflateEnd", vzs);
176 |   return Val_unit;
177 | }
178 | 
179 | #else
180 | 
181 | static void caml_zlib_not_supported(void)
182 | {
183 |   value bucket;
184 |   if (caml_zlib_error_exn == NULL) {
185 |     caml_zlib_error_exn = caml_named_value("Cryptokit.Error");
186 |     if (caml_zlib_error_exn == NULL)
187 |       caml_invalid_argument("Exception Cryptokit.Error not initialized");
188 |   }
189 |   bucket = caml_alloc_small(2, 0);
190 |   Field(bucket, 0) = *caml_zlib_error_exn;
191 |   Field(bucket, 1) = Val_int(12); /* Compression_not_supported */
192 |   caml_raise(bucket);
193 | }
194 | 
195 | CAMLprim
196 | value caml_zlib_deflateInit(value vlevel, value expect_header)
197 | { caml_zlib_not_supported(); return Val_unit; }
198 | 
199 | CAMLprim
200 | value caml_zlib_deflate(value vzs, value srcbuf, value srcpos, value srclen,
201 |                       value dstbuf, value dstpos, value dstlen,
202 |                       value vflush)
203 | { caml_zlib_not_supported(); return Val_unit; }
204 | 
205 | CAMLprim
206 | value caml_zlib_deflateEnd(value vzs)
207 | { caml_zlib_not_supported(); return Val_unit; }
208 | 
209 | CAMLprim
210 | value caml_zlib_inflateInit(value expect_header)
211 | { caml_zlib_not_supported(); return Val_unit; }
212 | 
213 | CAMLprim
214 | value caml_zlib_inflate(value vzs, value srcbuf, value srcpos, value srclen,
215 |                       value dstbuf, value dstpos, value dstlen,
216 |                       value vflush)
217 | { caml_zlib_not_supported(); return Val_unit; }
218 | 
219 | CAMLprim
220 | value caml_zlib_inflateEnd(value vzs)
221 | { caml_zlib_not_supported(); return Val_unit; }
222 | 
223 | #endif
224 | 
225 | CAMLprim
226 | value caml_zlib_deflate_bytecode(value * arg, int nargs)
227 | {
228 |   return caml_zlib_deflate(arg[0], arg[1], arg[2], arg[3],
229 |                          arg[4], arg[5], arg[6], arg[7]);
230 | }
231 | 
232 | CAMLprim
233 | value caml_zlib_inflate_bytecode(value * arg, int nargs)
234 | {
235 |   return caml_zlib_inflate(arg[0], arg[1], arg[2], arg[3],
236 |                          arg[4], arg[5], arg[6], arg[7]);
237 | }
238 | 
239 | 
240 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # The Cryptokit library
  2 | 
  3 | ## Overview
  4 | 
  5 | The Cryptokit library for OCaml provides a variety of cryptographic primitives that can be used to implement cryptographic protocols in security-sensitive applications.  The primitives provided include:
  6 | 
  7 | * Symmetric-key ciphers: AES, Chacha20, DES, Triple-DES, Blowfish, ARCfour, in ECB, CBC, CFB, OFB and counter modes.
  8 | * Authenticated encryption: AES-GCM, Chacha20-Poly1305.
  9 | * Public-key cryptography: RSA encryption and signature, Diffie-Hellman key agreement.
 10 | * Hash functions and MACs: SHA-3, SHA-2, BLAKE2, BLAKE3, RIPEMD-160; MACs based on AES and DES; SipHash.  (SHA-1 and MD5, despite being broken, are also provided for historical value.)
 11 | * Random number generation.
 12 | * Encodings and compression: base 64, hexadecimal, Zlib compression.
 13 | 
 14 | Additional ciphers and hashes can easily be used in conjunction with the library.  In particular, basic mechanisms such as chaining modes, output buffering, and padding are provided by generic classes that can easily be composed with user-provided ciphers.  More generally, the library promotes a "Lego"-like style of constructing and composing transformations over character streams.
 15 | 
 16 | This library is distributed under the conditions of the GNU Library General Public license version 2 or any later version, with the special OCaml exception on linking described in file LICENSE.
 17 | 
 18 | ## Requirements
 19 | 
 20 | * OCaml 4.08 or more recent.
 21 | * The Dune build system, version 2.0 or more recent.
 22 | * The Zarith library, version 1.4 or more recent.
 23 | * The Zlib C library, version 1.1.3 or up is recommended. If it is not installed on your system (look for libz.a or libz.so), get it from http://www.gzip.org/, or indicate in the Makefile that you do not have it.  If you are running Linux or BSD or MacOS, your distribution provides precompiled binaries for this library.
 24 | 
 25 | ## Build, test and install
 26 | 
 27 | * To configure, run `./configure`.  There are options to disable or enable some features (run `./configure --help` for a list), but the default configuration is fine most of the time.
 28 | 
 29 | * To build, run `dune build`.
 30 | 
 31 | * To execute a test, run `dune exec test/<name>.exe` where `<name>` can be `test`,
 32 |   `prngtest` or `speedtest`, supplying additional command line arguments if needed.
 33 |   The main test file `test/test.ml` is also included into the `runtest` alias, so it
 34 |   can be executed simply by `dune test`.
 35 | 
 36 | * To install, run `dune install`.
 37 | 
 38 | ## Using the library
 39 | 
 40 | The package name is `cryptokit`.  With Dune, use `(library cryptokit)`.  With ocamlfind, do
 41 | ```
 42 |         ocamlfind ocamlopt -package cryptokit ...             # for compilation
 43 |         ocamlfind ocamlopt -package cryptokit -linkpkg ...    # for linking
 44 | ```
 45 | 
 46 | ## Documentation
 47 | 
 48 | See the extensive documentation comments in file `src/cryptokit.mli`.
 49 | 
 50 | To build HTML documentation, run `dune build @doc`. The resulting index file is
 51 | located at `_build/default/_doc/_html/cryptokit/Cryptokit/index.html`.
 52 | 
 53 | ## Warnings and disclaimers
 54 | 
 55 | Disclaimer 1: the author is not an expert in cryptography.  While reasonable care has been taken to select good, widely-used implementations of the ciphers and hashes, and follow recommended practices found in reputable applied cryptography textbooks, you are advised to review thoroughly the implementation of this module before using it in a security-critical application.
 56 | 
 57 | Disclaimer 2: some knowledge of cryptography is needed to use effectively this library.  A good introduction is the book __Serious Cryptography__ by J.-P. Aumasson (2018).  Building secure applications out of cryptographic primitives also requires a general background in computer security.
 58 | 
 59 | Disclaimer 3: in some countries, the use, distribution, import and/or export of cryptographic applications is restricted by law. The precise restrictions may depend on the strenght of the cryptography used (e.g. key size), but also on its purpose (e.g. confidentiality vs. authentication).  It is up to the users of this library to comply with regulations applicable in their country.
 60 | 
 61 | ## Design notes and references
 62 | 
 63 | The library is organized around the concept of "transforms".  A transform is an object that accepts strings, sub-strings, characters and bytes as input, transforms them, and buffers the output.  While it is possible to enter all input, then fetch the output, lower memory requirements can be achieved by purging the output periodically during data input.
 64 | 
 65 | The AES implementation is the public-domain optimized reference implementation by Daemen, Rijmen and Barreto.  On x86 processors that support the AES-NI extensions, hardware implementation is used instead.
 66 | 
 67 | The Chacha20 implementation is due to D.J.Bernstein, https://cr.yp.to/streamciphers/timings/estreambench/submissions/salsa20/chacha8/regs/chacha.c . It is in the public domain.
 68 | 
 69 | The DES implementation is based on Outerbridge's popular "d3des" implementation.  This is not the fastest DES implementation available, but one of the cleanest.  Outerbridge's code is marked as public domain.
 70 | 
 71 | The Blowfish implementation is that of Paul Kocher with some performance improvements.  It is under the LGPL.  It passes the test vectors listed at http://www.schneier.com/code/vectors.txt
 72 | 
 73 | ARCfour (``alleged RC4'') is implemented from scratch, based on the algorithm described in Schneier's _Applied Cryptography_
 74 | 
 75 | For AES-GCM, the GHASH implementation is that of Steven M. Gibson at https://github.com/mko-x/SharedAES-GCM/blob/master/Sources/gcm.c .  On x86 processors that support the PCLMUL extension, hardware implementation is used instead.  Test vectors are taken from "The Galois/Counter Mode of Operation (GCM)" by David A. McGrew and John Viega.
 76 | 
 77 | For Chacha20-Poly1305,  Poly1305 is based on the "Donna" implementation by Andrew Moon, https://github.com/floodyberry/poly1305-donna .  Test vectors are taken from RFC 7539 and from the BoringSSL project.
 78 | 
 79 | SHA-1 is also implemented from scratch, based on the algorithm described in the _Handbook of Applied Cryptography_.   It passes the FIPS test vectors.
 80 | 
 81 | SHA-2 is implemented from scratch based on FIPS publication 180-2.  It passes the FIPS test vectors.
 82 | 
 83 | SHA-3 is based on the "readable" implementation of Keccak written by Markku-Juhani O. Saarinen <mjos@iki.fi>.
 84 | 
 85 | BLAKE2b and BLAKE2s are implemented from scratch based on RFC 7693.  The test vectors are taken from https://github.com/BLAKE2/BLAKE2/tree/master/testvectors; others were obtained with the b2sum program.
 86 | 
 87 | BLAKE3 uses the portable C implementation from https://github.com/BLAKE3-team/BLAKE3 .  The authors released the code into the public domain with CC0 1.0.  The test vectors come from the same source.
 88 | 
 89 | RIPEMD-160 is based on the reference implementation by A.Bosselaers. It passes the test vectors listed at http://www.esat.kuleuven.ac.be/~bosselae/ripemd160.html
 90 | 
 91 | MD5 uses the public-domain implementation by Colin Plumb that is also used in the OCaml runtime system for module Digest.
 92 | 
 93 | SipHash is based on the reference implementation by J.-P. Aumasson and D. J. Bernstein, https://github.com/veorq/SipHash .  It passes their test vectors.
 94 | 
 95 | RSA encryption and decryption was implemented from scratch, using the Zarith OCaml library for arbitrary-precision arithmetic, which itself uses GMP.  Modular  exponentiation is the constant-time implementation provided by GMP.  The Chinese remainder theorem is exploited when possible, though.  Like all ciphers in this library, the RSA implementation is *not* protected against timing attacks.
 96 | 
 97 | RSA key generation uses GMP's `nextprime` function for probabilistic primality testing.
 98 | 
 99 | The hardware RNG uses the RDRAND instruction of recent x86 processors, if supported.  It is not available on other platforms.  A check is included to reject the broken RDRAND on AMD Ryzen 3000 processors (https://arstechnica.com/gadgets/2019/10/how-a-months-old-amd-microcode-bug-destroyed-my-weekend/).
100 | 
101 | The system RNG uses the `getentropy` function provided by Linux, macOS and the BSDs, or the `CryptGenRandom` function from the Windows cryptographic API.
102 | 
103 | The seeded PRNG is just the Chacha20 stream cipher encrypting the all-zeroes message.  The seed is used as the Chacha20 key.  An alternate seeded PRNG is provided, based on AES encryption of a 128-bit counter.  Both PRNGs pass the Dieharder statistical tests.  Still, better use the system RNG or the hardware RNG if high-quality random numbers are needed.
104 | 
105 | ## Performance
106 | 
107 | If you run `dune exec test/speedtest.exe`, a simple benchmark is performed and shows the speed of various operations from this library.
108 | 


--------------------------------------------------------------------------------
/src/blake3_dispatch.c:
--------------------------------------------------------------------------------
  1 | #include <stdbool.h>
  2 | #include <stddef.h>
  3 | #include <stdint.h>
  4 | 
  5 | #include "blake3_impl.h"
  6 | 
  7 | #if defined(IS_X86)
  8 | #if defined(_MSC_VER)
  9 | #include <intrin.h>
 10 | #elif defined(__GNUC__)
 11 | #include <immintrin.h>
 12 | #else
 13 | #undef IS_X86 /* Unimplemented! */
 14 | #endif
 15 | #endif
 16 | 
 17 | #define MAYBE_UNUSED(x) (void)((x))
 18 | 
 19 | #if defined(IS_X86)
 20 | static uint64_t xgetbv(void) {
 21 | #if defined(_MSC_VER)
 22 |   return _xgetbv(0);
 23 | #else
 24 |   uint32_t eax = 0, edx = 0;
 25 |   __asm__ __volatile__("xgetbv\n" : "=a"(eax), "=d"(edx) : "c"(0));
 26 |   return ((uint64_t)edx << 32) | eax;
 27 | #endif
 28 | }
 29 | 
 30 | static void cpuid(uint32_t out[4], uint32_t id) {
 31 | #if defined(_MSC_VER)
 32 |   __cpuid((int *)out, id);
 33 | #elif defined(__i386__) || defined(_M_IX86)
 34 |   __asm__ __volatile__("movl %%ebx, %1\n"
 35 |                        "cpuid\n"
 36 |                        "xchgl %1, %%ebx\n"
 37 |                        : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
 38 |                        : "a"(id));
 39 | #else
 40 |   __asm__ __volatile__("cpuid\n"
 41 |                        : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
 42 |                        : "a"(id));
 43 | #endif
 44 | }
 45 | 
 46 | static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) {
 47 | #if defined(_MSC_VER)
 48 |   __cpuidex((int *)out, id, sid);
 49 | #elif defined(__i386__) || defined(_M_IX86)
 50 |   __asm__ __volatile__("movl %%ebx, %1\n"
 51 |                        "cpuid\n"
 52 |                        "xchgl %1, %%ebx\n"
 53 |                        : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
 54 |                        : "a"(id), "c"(sid));
 55 | #else
 56 |   __asm__ __volatile__("cpuid\n"
 57 |                        : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
 58 |                        : "a"(id), "c"(sid));
 59 | #endif
 60 | }
 61 | 
 62 | #endif
 63 | 
 64 | enum cpu_feature {
 65 |   SSE2 = 1 << 0,
 66 |   SSSE3 = 1 << 1,
 67 |   SSE41 = 1 << 2,
 68 |   AVX = 1 << 3,
 69 |   AVX2 = 1 << 4,
 70 |   AVX512F = 1 << 5,
 71 |   AVX512VL = 1 << 6,
 72 |   /* ... */
 73 |   UNDEFINED = 1 << 30
 74 | };
 75 | 
 76 | #if !defined(BLAKE3_TESTING)
 77 | static /* Allow the variable to be controlled manually for testing */
 78 | #endif
 79 |     enum cpu_feature g_cpu_features = UNDEFINED;
 80 | 
 81 | #if !defined(BLAKE3_TESTING)
 82 | static
 83 | #endif
 84 |     enum cpu_feature
 85 |     get_cpu_features(void) {
 86 | 
 87 |   if (g_cpu_features != UNDEFINED) {
 88 |     return g_cpu_features;
 89 |   } else {
 90 | #if defined(IS_X86)
 91 |     uint32_t regs[4] = {0};
 92 |     uint32_t *eax = &regs[0], *ebx = &regs[1], *ecx = &regs[2], *edx = &regs[3];
 93 |     (void)edx;
 94 |     enum cpu_feature features = 0;
 95 |     cpuid(regs, 0);
 96 |     const int max_id = *eax;
 97 |     cpuid(regs, 1);
 98 | #if defined(__amd64__) || defined(_M_X64)
 99 |     features |= SSE2;
100 | #else
101 |     if (*edx & (1UL << 26))
102 |       features |= SSE2;
103 | #endif
104 |     if (*ecx & (1UL << 0))
105 |       features |= SSSE3;
106 |     if (*ecx & (1UL << 19))
107 |       features |= SSE41;
108 | 
109 |     if (*ecx & (1UL << 27)) { // OSXSAVE
110 |       const uint64_t mask = xgetbv();
111 |       if ((mask & 6) == 6) { // SSE and AVX states
112 |         if (*ecx & (1UL << 28))
113 |           features |= AVX;
114 |         if (max_id >= 7) {
115 |           cpuidex(regs, 7, 0);
116 |           if (*ebx & (1UL << 5))
117 |             features |= AVX2;
118 |           if ((mask & 224) == 224) { // Opmask, ZMM_Hi256, Hi16_Zmm
119 |             if (*ebx & (1UL << 31))
120 |               features |= AVX512VL;
121 |             if (*ebx & (1UL << 16))
122 |               features |= AVX512F;
123 |           }
124 |         }
125 |       }
126 |     }
127 |     g_cpu_features = features;
128 |     return features;
129 | #else
130 |     /* How to detect NEON? */
131 |     return 0;
132 | #endif
133 |   }
134 | }
135 | 
136 | EXPORT void blake3_compress_in_place(uint32_t cv[8],
137 |                               const uint8_t block[BLAKE3_BLOCK_LEN],
138 |                               uint8_t block_len, uint64_t counter,
139 |                               uint8_t flags) {
140 | #if defined(IS_X86)
141 |   const enum cpu_feature features = get_cpu_features();
142 |   MAYBE_UNUSED(features);
143 | #if !defined(BLAKE3_NO_AVX512)
144 |   if (features & AVX512VL) {
145 |     blake3_compress_in_place_avx512(cv, block, block_len, counter, flags);
146 |     return;
147 |   }
148 | #endif
149 | #if !defined(BLAKE3_NO_SSE41)
150 |   if (features & SSE41) {
151 |     blake3_compress_in_place_sse41(cv, block, block_len, counter, flags);
152 |     return;
153 |   }
154 | #endif
155 | #if !defined(BLAKE3_NO_SSE2)
156 |   if (features & SSE2) {
157 |     blake3_compress_in_place_sse2(cv, block, block_len, counter, flags);
158 |     return;
159 |   }
160 | #endif
161 | #endif
162 |   blake3_compress_in_place_portable(cv, block, block_len, counter, flags);
163 | }
164 | 
165 | EXPORT void blake3_compress_xof(const uint32_t cv[8],
166 |                          const uint8_t block[BLAKE3_BLOCK_LEN],
167 |                          uint8_t block_len, uint64_t counter, uint8_t flags,
168 |                          uint8_t out[64]) {
169 | #if defined(IS_X86)
170 |   const enum cpu_feature features = get_cpu_features();
171 |   MAYBE_UNUSED(features);
172 | #if !defined(BLAKE3_NO_AVX512)
173 |   if (features & AVX512VL) {
174 |     blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out);
175 |     return;
176 |   }
177 | #endif
178 | #if !defined(BLAKE3_NO_SSE41)
179 |   if (features & SSE41) {
180 |     blake3_compress_xof_sse41(cv, block, block_len, counter, flags, out);
181 |     return;
182 |   }
183 | #endif
184 | #if !defined(BLAKE3_NO_SSE2)
185 |   if (features & SSE2) {
186 |     blake3_compress_xof_sse2(cv, block, block_len, counter, flags, out);
187 |     return;
188 |   }
189 | #endif
190 | #endif
191 |   blake3_compress_xof_portable(cv, block, block_len, counter, flags, out);
192 | }
193 | 
194 | EXPORT void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
195 |                       size_t blocks, const uint32_t key[8], uint64_t counter,
196 |                       bool increment_counter, uint8_t flags,
197 |                       uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
198 | #if defined(IS_X86)
199 |   const enum cpu_feature features = get_cpu_features();
200 |   MAYBE_UNUSED(features);
201 | #if !defined(BLAKE3_NO_AVX512)
202 |   if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
203 |     blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
204 |                             increment_counter, flags, flags_start, flags_end,
205 |                             out);
206 |     return;
207 |   }
208 | #endif
209 | #if !defined(BLAKE3_NO_AVX2)
210 |   if (features & AVX2) {
211 |     blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
212 |                           increment_counter, flags, flags_start, flags_end,
213 |                           out);
214 |     return;
215 |   }
216 | #endif
217 | #if !defined(BLAKE3_NO_SSE41)
218 |   if (features & SSE41) {
219 |     blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
220 |                            increment_counter, flags, flags_start, flags_end,
221 |                            out);
222 |     return;
223 |   }
224 | #endif
225 | #if !defined(BLAKE3_NO_SSE2)
226 |   if (features & SSE2) {
227 |     blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter,
228 |                           increment_counter, flags, flags_start, flags_end,
229 |                           out);
230 |     return;
231 |   }
232 | #endif
233 | #endif
234 | 
235 | #if BLAKE3_USE_NEON == 1
236 |   blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
237 |                         increment_counter, flags, flags_start, flags_end, out);
238 |   return;
239 | #endif
240 | 
241 |   blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
242 |                             increment_counter, flags, flags_start, flags_end,
243 |                             out);
244 | }
245 | 
246 | // The dynamically detected SIMD degree of the current platform.
247 | EXPORT size_t blake3_simd_degree(void) {
248 | #if defined(IS_X86)
249 |   const enum cpu_feature features = get_cpu_features();
250 |   MAYBE_UNUSED(features);
251 | #if !defined(BLAKE3_NO_AVX512)
252 |   if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
253 |     return 16;
254 |   }
255 | #endif
256 | #if !defined(BLAKE3_NO_AVX2)
257 |   if (features & AVX2) {
258 |     return 8;
259 |   }
260 | #endif
261 | #if !defined(BLAKE3_NO_SSE41)
262 |   if (features & SSE41) {
263 |     return 4;
264 |   }
265 | #endif
266 | #if !defined(BLAKE3_NO_SSE2)
267 |   if (features & SSE2) {
268 |     return 4;
269 |   }
270 | #endif
271 | #endif
272 | #if BLAKE3_USE_NEON == 1
273 |   return 4;
274 | #endif
275 |   return 1;
276 | }
277 | 


--------------------------------------------------------------------------------
/src/sha512.c:
--------------------------------------------------------------------------------
  1 | /***********************************************************************/
  2 | /*                                                                     */
  3 | /*                      The Cryptokit library                          */
  4 | /*                                                                     */
  5 | /*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         */
  6 | /*                                                                     */
  7 | /*  Copyright 2015 Institut National de Recherche en Informatique et   */
  8 | /*  en Automatique.  All rights reserved.  This file is distributed    */
  9 | /*  under the terms of the GNU Library General Public License, with    */
 10 | /*  the special exception on linking described in file LICENSE.        */
 11 | /*                                                                     */
 12 | /***********************************************************************/
 13 | 
 14 | /* SHA-512 hashing */
 15 | 
 16 | #include <string.h>
 17 | #include <caml/config.h>
 18 | #include "sha512.h"
 19 | 
 20 | /* Ref: FIPS publication 180-2 */
 21 | 
 22 | #define ROTR(x,n) ((x) >> (n) | (x) << (64 - (n)))
 23 | 
 24 | #define CH(x,y,z) (z ^ (x & (y ^ z)))
 25 | #define MAJ(x,y,z) ((x & y) | (z & (x | y)))
 26 | #define SIGMA0(x) (ROTR(x,28) ^ ROTR(x,34) ^ ROTR(x,39))
 27 | #define SIGMA1(x) (ROTR(x,14) ^ ROTR(x,18) ^ ROTR(x,41))
 28 | #define sigma0(x) (ROTR(x,1) ^ ROTR(x,8) ^ (x >> 7))
 29 | #define sigma1(x) (ROTR(x,19) ^ ROTR(x,61) ^ (x >> 6))
 30 | 
 31 | static void SHA512_copy_and_swap(void * src, void * dst, int numwords)
 32 | {
 33 | #ifdef ARCH_BIG_ENDIAN
 34 |   memcpy(dst, src, numwords * 8);
 35 | #else
 36 |   unsigned char * s, * d;
 37 |   unsigned char a, b;
 38 |   for (s = src, d = dst; numwords > 0; s += 8, d += 8, numwords--) {
 39 |     a = s[0];
 40 |     b = s[1];
 41 |     d[0] = s[7];
 42 |     d[1] = s[6];
 43 |     d[6] = b;
 44 |     d[7] = a;
 45 |     a = s[2];
 46 |     b = s[3];
 47 |     d[2] = s[5];
 48 |     d[3] = s[4];
 49 |     d[4] = b;
 50 |     d[5] = a;
 51 |   }
 52 | #endif
 53 | }
 54 | 
 55 | static u64 SHA512_constants[80] = {
 56 |   UINT64_C(0x428a2f98d728ae22),
 57 |   UINT64_C(0x7137449123ef65cd),
 58 |   UINT64_C(0xb5c0fbcfec4d3b2f),
 59 |   UINT64_C(0xe9b5dba58189dbbc),
 60 |   UINT64_C(0x3956c25bf348b538),
 61 |   UINT64_C(0x59f111f1b605d019),
 62 |   UINT64_C(0x923f82a4af194f9b),
 63 |   UINT64_C(0xab1c5ed5da6d8118),
 64 |   UINT64_C(0xd807aa98a3030242),
 65 |   UINT64_C(0x12835b0145706fbe),
 66 |   UINT64_C(0x243185be4ee4b28c),
 67 |   UINT64_C(0x550c7dc3d5ffb4e2),
 68 |   UINT64_C(0x72be5d74f27b896f),
 69 |   UINT64_C(0x80deb1fe3b1696b1),
 70 |   UINT64_C(0x9bdc06a725c71235),
 71 |   UINT64_C(0xc19bf174cf692694),
 72 |   UINT64_C(0xe49b69c19ef14ad2),
 73 |   UINT64_C(0xefbe4786384f25e3),
 74 |   UINT64_C(0x0fc19dc68b8cd5b5),
 75 |   UINT64_C(0x240ca1cc77ac9c65),
 76 |   UINT64_C(0x2de92c6f592b0275),
 77 |   UINT64_C(0x4a7484aa6ea6e483),
 78 |   UINT64_C(0x5cb0a9dcbd41fbd4),
 79 |   UINT64_C(0x76f988da831153b5),
 80 |   UINT64_C(0x983e5152ee66dfab),
 81 |   UINT64_C(0xa831c66d2db43210),
 82 |   UINT64_C(0xb00327c898fb213f),
 83 |   UINT64_C(0xbf597fc7beef0ee4),
 84 |   UINT64_C(0xc6e00bf33da88fc2),
 85 |   UINT64_C(0xd5a79147930aa725),
 86 |   UINT64_C(0x06ca6351e003826f),
 87 |   UINT64_C(0x142929670a0e6e70),
 88 |   UINT64_C(0x27b70a8546d22ffc),
 89 |   UINT64_C(0x2e1b21385c26c926),
 90 |   UINT64_C(0x4d2c6dfc5ac42aed),
 91 |   UINT64_C(0x53380d139d95b3df),
 92 |   UINT64_C(0x650a73548baf63de),
 93 |   UINT64_C(0x766a0abb3c77b2a8),
 94 |   UINT64_C(0x81c2c92e47edaee6),
 95 |   UINT64_C(0x92722c851482353b),
 96 |   UINT64_C(0xa2bfe8a14cf10364),
 97 |   UINT64_C(0xa81a664bbc423001),
 98 |   UINT64_C(0xc24b8b70d0f89791),
 99 |   UINT64_C(0xc76c51a30654be30),
100 |   UINT64_C(0xd192e819d6ef5218),
101 |   UINT64_C(0xd69906245565a910),
102 |   UINT64_C(0xf40e35855771202a),
103 |   UINT64_C(0x106aa07032bbd1b8),
104 |   UINT64_C(0x19a4c116b8d2d0c8),
105 |   UINT64_C(0x1e376c085141ab53),
106 |   UINT64_C(0x2748774cdf8eeb99),
107 |   UINT64_C(0x34b0bcb5e19b48a8),
108 |   UINT64_C(0x391c0cb3c5c95a63),
109 |   UINT64_C(0x4ed8aa4ae3418acb),
110 |   UINT64_C(0x5b9cca4f7763e373),
111 |   UINT64_C(0x682e6ff3d6b2b8a3),
112 |   UINT64_C(0x748f82ee5defb2fc),
113 |   UINT64_C(0x78a5636f43172f60),
114 |   UINT64_C(0x84c87814a1f0ab72),
115 |   UINT64_C(0x8cc702081a6439ec),
116 |   UINT64_C(0x90befffa23631e28),
117 |   UINT64_C(0xa4506cebde82bde9),
118 |   UINT64_C(0xbef9a3f7b2c67915),
119 |   UINT64_C(0xc67178f2e372532b),
120 |   UINT64_C(0xca273eceea26619c),
121 |   UINT64_C(0xd186b8c721c0c207),
122 |   UINT64_C(0xeada7dd6cde0eb1e),
123 |   UINT64_C(0xf57d4f7fee6ed178),
124 |   UINT64_C(0x06f067aa72176fba),
125 |   UINT64_C(0x0a637dc5a2c898a6),
126 |   UINT64_C(0x113f9804bef90dae),
127 |   UINT64_C(0x1b710b35131c471b),
128 |   UINT64_C(0x28db77f523047d84),
129 |   UINT64_C(0x32caab7b40c72493),
130 |   UINT64_C(0x3c9ebe0a15c9bebc),
131 |   UINT64_C(0x431d67c49c100d4c),
132 |   UINT64_C(0x4cc5d4becb3e42b6),
133 |   UINT64_C(0x597f299cfc657e2a),
134 |   UINT64_C(0x5fcb6fab3ad6faec),
135 |   UINT64_C(0x6c44198c4a475817)
136 | };
137 | 
138 | static void SHA512_transform(struct SHA512Context * ctx)
139 | {
140 |   int i;
141 |   register u64 a, b, c, d, e, f, g, h, t1, t2;
142 |   u64 data[80];
143 | 
144 |   /* Convert buffer data to 16 big-endian integers */
145 |   SHA512_copy_and_swap(ctx->buffer, data, 16);
146 | 
147 |   /* Expand into 80 integers */
148 |   for (i = 16; i < 80; i++) {
149 |     data[i] = sigma1(data[i-2]) + data[i-7] + sigma0(data[i-15]) + data[i-16];
150 |   }
151 | 
152 |   /* Initialize working variables */
153 |   a = ctx->state[0];
154 |   b = ctx->state[1];
155 |   c = ctx->state[2];
156 |   d = ctx->state[3];
157 |   e = ctx->state[4];
158 |   f = ctx->state[5];
159 |   g = ctx->state[6];
160 |   h = ctx->state[7];
161 | 
162 |   /* Perform rounds */
163 | #if 0
164 |   for (i = 0; i < 80; i++) {
165 |     t1 = h + SIGMA1(e) + CH(e, f, g) + SHA512_constants[i] + data[i];
166 |     t2 = SIGMA0(a) + MAJ(a, b, c);
167 |     h = g;  g = f;  f = e;  e = d + t1;
168 |     d = c;  c = b;  b = a;  a = t1 + t2;
169 |   }
170 | #else
171 | #define STEP(a,b,c,d,e,f,g,h,i) \
172 |     t1 = h + SIGMA1(e) + CH(e, f, g) + SHA512_constants[i] + data[i]; \
173 |     t2 = SIGMA0(a) + MAJ(a, b, c); \
174 |     d = d + t1; \
175 |     h = t1 + t2
176 | 
177 |   for (i = 0; i < 80; i += 8) {
178 |     STEP(a,b,c,d,e,f,g,h,i);
179 |     STEP(h,a,b,c,d,e,f,g,i+1);
180 |     STEP(g,h,a,b,c,d,e,f,i+2);
181 |     STEP(f,g,h,a,b,c,d,e,i+3);
182 |     STEP(e,f,g,h,a,b,c,d,i+4);
183 |     STEP(d,e,f,g,h,a,b,c,i+5);
184 |     STEP(c,d,e,f,g,h,a,b,i+6);
185 |     STEP(b,c,d,e,f,g,h,a,i+7);
186 |   }
187 | #endif
188 | 
189 |   /* Update chaining values */
190 |   ctx->state[0] += a;
191 |   ctx->state[1] += b;
192 |   ctx->state[2] += c;
193 |   ctx->state[3] += d;
194 |   ctx->state[4] += e;
195 |   ctx->state[5] += f;
196 |   ctx->state[6] += g;
197 |   ctx->state[7] += h;
198 | }
199 | 
200 | EXPORT void SHA512_init(struct SHA512Context * ctx, int bitsize)
201 | {
202 |   switch (bitsize) {
203 |   case 512:
204 |     ctx->state[0] = UINT64_C(0x6a09e667f3bcc908);
205 |     ctx->state[1] = UINT64_C(0xbb67ae8584caa73b);
206 |     ctx->state[2] = UINT64_C(0x3c6ef372fe94f82b);
207 |     ctx->state[3] = UINT64_C(0xa54ff53a5f1d36f1 );
208 |     ctx->state[4] = UINT64_C(0x510e527fade682d1);
209 |     ctx->state[5] = UINT64_C(0x9b05688c2b3e6c1f);
210 |     ctx->state[6] = UINT64_C(0x1f83d9abfb41bd6b);
211 |     ctx->state[7] = UINT64_C(0x5be0cd19137e2179);
212 |     break;
213 |   case 384:
214 |     ctx->state[0] = UINT64_C(0xcbbb9d5dc1059ed8);
215 |     ctx->state[1] = UINT64_C(0x629a292a367cd507);
216 |     ctx->state[2] = UINT64_C(0x9159015a3070dd17);
217 |     ctx->state[3] = UINT64_C(0x152fecd8f70e5939 );
218 |     ctx->state[4] = UINT64_C(0x67332667ffc00b31);
219 |     ctx->state[5] = UINT64_C(0x8eb44a8768581511);
220 |     ctx->state[6] = UINT64_C(0xdb0c2e0d64f98fa7);
221 |     ctx->state[7] = UINT64_C(0x47b5481dbefa4fa4);
222 |     break;
223 |   case 256:
224 |     ctx->state[0] = UINT64_C(0x22312194fc2bf72c);
225 |     ctx->state[1] = UINT64_C(0x9f555fa3c84c64c2);
226 |     ctx->state[2] = UINT64_C(0x2393b86b6f53b151);
227 |     ctx->state[3] = UINT64_C(0x963877195940eabd );
228 |     ctx->state[4] = UINT64_C(0x96283ee2a88effe3);
229 |     ctx->state[5] = UINT64_C(0xbe5e1e2553863992);
230 |     ctx->state[6] = UINT64_C(0x2b0199fc2c85b8aa);
231 |     ctx->state[7] = UINT64_C(0x0eb72ddC81c52ca2);
232 |     break;
233 |   case 224:
234 |     ctx->state[0] = UINT64_C(0x8c3d37c819544da2);
235 |     ctx->state[1] = UINT64_C(0x73e1996689dcd4d6);
236 |     ctx->state[2] = UINT64_C(0x1dfab7ae32ff9c82);
237 |     ctx->state[3] = UINT64_C(0x679dd514582f9fcf );
238 |     ctx->state[4] = UINT64_C(0x0f6d2b697bd44da8);
239 |     ctx->state[5] = UINT64_C(0x77e36f7304C48942);
240 |     ctx->state[6] = UINT64_C(0x3f9d85a86a1d36C8);
241 |     ctx->state[7] = UINT64_C(0x1112e6ad91d692a1);
242 |     break;
243 |   default:
244 |     /* The bit size is wrong.  Just zero the state to produce 
245 |        incorrect hashes. */
246 |     memset(ctx->state, 0, sizeof(ctx->state));
247 |     break;
248 |   }
249 |   ctx->numbytes = 0;
250 |   ctx->length[0] = 0;
251 |   ctx->length[1] = 0;
252 | }
253 | 
254 | EXPORT void SHA512_add_data(struct SHA512Context * ctx, unsigned char * data,
255 |                      unsigned long len)
256 | {
257 |   u64 t;
258 | 
259 |   /* Update length */
260 |   t = ctx->length[1];
261 |   if ((ctx->length[1] = t + (u64) (len << 3)) < t)
262 |     ctx->length[0]++;    /* carry from low 64 bits to high 64 bits */
263 |   ctx->length[0] += (u64) len >> 61;
264 | 
265 |   /* If data was left in buffer, pad it with fresh data and munge block */
266 |   if (ctx->numbytes != 0) {
267 |     unsigned long l = 128 - ctx->numbytes;
268 |     if (len < l) {
269 |       memcpy(ctx->buffer + ctx->numbytes, data, len);
270 |       ctx->numbytes += len;
271 |       return;
272 |     }
273 |     memcpy(ctx->buffer + ctx->numbytes, data, l);
274 |     SHA512_transform(ctx);
275 |     data += l;
276 |     len -= l;
277 |   }
278 |   /* Munge data in 128-byte chunks */
279 |   while (len >= 128) {
280 |     memcpy(ctx->buffer, data, 128);
281 |     SHA512_transform(ctx);
282 |     data += 128;
283 |     len -= 128;
284 |   }
285 |   /* Save remaining data */
286 |   memcpy(ctx->buffer, data, len);
287 |   ctx->numbytes = len;
288 | }
289 | 
290 | EXPORT void SHA512_finish(struct SHA512Context * ctx, int bitsize,
291 |                    unsigned char * output)
292 | {
293 |   int i = ctx->numbytes;
294 | 
295 |   /* Set first char of padding to 0x80. There is always room. */
296 |   ctx->buffer[i++] = 0x80;
297 |   /* If we do not have room for the length (8 bytes), pad to 64 bytes
298 |      with zeroes and munge the data block */
299 |   if (i > 112) {
300 |     memset(ctx->buffer + i, 0, 128 - i);
301 |     SHA512_transform(ctx);
302 |     i = 0;
303 |   }
304 |   /* Pad to byte 112 with zeroes */
305 |   memset(ctx->buffer + i, 0, 112 - i);
306 |   /* Add length in big-endian */
307 |   SHA512_copy_and_swap(ctx->length, ctx->buffer + 112, 2);
308 |   /* Munge the final block */
309 |   SHA512_transform(ctx);
310 |   /* Final hash value is in ctx->state modulo big-endian conversion */
311 |   switch (bitsize) {
312 |   case 512:
313 |     SHA512_copy_and_swap(ctx->state, output, 8);
314 |     break;
315 |   case 384:
316 |     SHA512_copy_and_swap(ctx->state, output, 6);
317 |     break;
318 |   case 256:
319 |     SHA512_copy_and_swap(ctx->state, output, 4);
320 |     break;
321 |   case 224:
322 |     SHA512_copy_and_swap(ctx->state, output, 3);
323 | #ifdef ARCH_BIG_ENDIAN
324 |     memcpy(&ctx->state[24], &output[24], 4);
325 | #else
326 |     output[24] = (ctx->state[3] >> (8*7)) & 0xff;
327 |     output[25] = (ctx->state[3] >> (8*6)) & 0xff;
328 |     output[26] = (ctx->state[3] >> (8*5)) & 0xff;
329 |     output[27] = (ctx->state[3] >> (8*4)) & 0xff;
330 | #endif
331 |     break;
332 |   /* default: The bit size is wrong.  Produce no output. */
333 |   }
334 | }
335 | 


--------------------------------------------------------------------------------
/src/blake3_impl.h:
--------------------------------------------------------------------------------
  1 | #ifndef BLAKE3_IMPL_H
  2 | #define BLAKE3_IMPL_H
  3 | 
  4 | #include <assert.h>
  5 | #include <stdbool.h>
  6 | #include <stddef.h>
  7 | #include <stdint.h>
  8 | #include <string.h>
  9 | 
 10 | #include "blake3.h"
 11 | 
 12 | // internal flags
 13 | enum blake3_flags {
 14 |   CHUNK_START         = 1 << 0,
 15 |   CHUNK_END           = 1 << 1,
 16 |   PARENT              = 1 << 2,
 17 |   ROOT                = 1 << 3,
 18 |   KEYED_HASH          = 1 << 4,
 19 |   DERIVE_KEY_CONTEXT  = 1 << 5,
 20 |   DERIVE_KEY_MATERIAL = 1 << 6,
 21 | };
 22 | 
 23 | // This C implementation tries to support recent versions of GCC, Clang, and
 24 | // MSVC.
 25 | #if defined(_MSC_VER)
 26 | #define INLINE static __forceinline
 27 | #else
 28 | #define INLINE static inline __attribute__((always_inline))
 29 | #endif
 30 | 
 31 | #if defined(__x86_64__) || defined(_M_X64) 
 32 | #define IS_X86
 33 | #define IS_X86_64
 34 | #endif
 35 | 
 36 | #if defined(__i386__) || defined(_M_IX86)
 37 | #define IS_X86
 38 | #define IS_X86_32
 39 | #endif
 40 | 
 41 | #if defined(__aarch64__) || defined(_M_ARM64)
 42 | #define IS_AARCH64
 43 | #endif
 44 | 
 45 | #if defined(IS_X86)
 46 | #if defined(_MSC_VER)
 47 | #include <intrin.h>
 48 | #endif
 49 | #endif
 50 | 
 51 | #if !defined(BLAKE3_USE_NEON) 
 52 |   // If BLAKE3_USE_NEON not manually set, autodetect based on AArch64ness
 53 |   #if defined(IS_AARCH64)
 54 |     #define BLAKE3_USE_NEON 1
 55 |   #else
 56 |     #define BLAKE3_USE_NEON 0
 57 |   #endif
 58 | #endif
 59 | 
 60 | #if defined(IS_X86)
 61 | #define MAX_SIMD_DEGREE 16
 62 | #elif BLAKE3_USE_NEON == 1
 63 | #define MAX_SIMD_DEGREE 4
 64 | #else
 65 | #define MAX_SIMD_DEGREE 1
 66 | #endif
 67 | 
 68 | // There are some places where we want a static size that's equal to the
 69 | // MAX_SIMD_DEGREE, but also at least 2.
 70 | #define MAX_SIMD_DEGREE_OR_2 (MAX_SIMD_DEGREE > 2 ? MAX_SIMD_DEGREE : 2)
 71 | 
 72 | static const uint32_t IV[8] = {0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL,
 73 |                                0xA54FF53AUL, 0x510E527FUL, 0x9B05688CUL,
 74 |                                0x1F83D9ABUL, 0x5BE0CD19UL};
 75 | 
 76 | static const uint8_t MSG_SCHEDULE[7][16] = {
 77 |     {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
 78 |     {2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8},
 79 |     {3, 4, 10, 12, 13, 2, 7, 14, 6, 5, 9, 0, 11, 15, 8, 1},
 80 |     {10, 7, 12, 9, 14, 3, 13, 15, 4, 0, 11, 2, 5, 8, 1, 6},
 81 |     {12, 13, 9, 11, 15, 10, 14, 8, 7, 2, 5, 3, 0, 1, 6, 4},
 82 |     {9, 14, 11, 5, 8, 12, 15, 1, 13, 3, 0, 10, 2, 6, 4, 7},
 83 |     {11, 15, 5, 0, 1, 9, 8, 6, 14, 10, 2, 12, 3, 4, 7, 13},
 84 | };
 85 | 
 86 | /* Find index of the highest set bit */
 87 | /* x is assumed to be nonzero.       */
 88 | static unsigned int highest_one(uint64_t x) {
 89 | #if defined(__GNUC__) || defined(__clang__)
 90 |   return 63 ^ __builtin_clzll(x);
 91 | #elif defined(_MSC_VER) && defined(IS_X86_64)
 92 |   unsigned long index;
 93 |   _BitScanReverse64(&index, x);
 94 |   return index;
 95 | #elif defined(_MSC_VER) && defined(IS_X86_32)
 96 |   if(x >> 32) {
 97 |     unsigned long index;
 98 |     _BitScanReverse(&index, (unsigned long)(x >> 32));
 99 |     return 32 + index;
100 |   } else {
101 |     unsigned long index;
102 |     _BitScanReverse(&index, (unsigned long)x);
103 |     return index;
104 |   }
105 | #else
106 |   unsigned int c = 0;
107 |   if(x & 0xffffffff00000000ULL) { x >>= 32; c += 32; }
108 |   if(x & 0x00000000ffff0000ULL) { x >>= 16; c += 16; }
109 |   if(x & 0x000000000000ff00ULL) { x >>=  8; c +=  8; }
110 |   if(x & 0x00000000000000f0ULL) { x >>=  4; c +=  4; }
111 |   if(x & 0x000000000000000cULL) { x >>=  2; c +=  2; }
112 |   if(x & 0x0000000000000002ULL) {           c +=  1; }
113 |   return c;
114 | #endif
115 | }
116 | 
117 | // Count the number of 1 bits.
118 | INLINE unsigned int popcnt(uint64_t x) {
119 | #if defined(__GNUC__) || defined(__clang__)
120 |   return __builtin_popcountll(x);
121 | #else
122 |   unsigned int count = 0;
123 |   while (x != 0) {
124 |     count += 1;
125 |     x &= x - 1;
126 |   }
127 |   return count;
128 | #endif
129 | }
130 | 
131 | // Largest power of two less than or equal to x. As a special case, returns 1
132 | // when x is 0. 
133 | INLINE uint64_t round_down_to_power_of_2(uint64_t x) {
134 |   return 1ULL << highest_one(x | 1);
135 | }
136 | 
137 | INLINE uint32_t counter_low(uint64_t counter) { return (uint32_t)counter; }
138 | 
139 | INLINE uint32_t counter_high(uint64_t counter) {
140 |   return (uint32_t)(counter >> 32);
141 | }
142 | 
143 | INLINE uint32_t load32(const void *src) {
144 |   const uint8_t *p = (const uint8_t *)src;
145 |   return ((uint32_t)(p[0]) << 0) | ((uint32_t)(p[1]) << 8) |
146 |          ((uint32_t)(p[2]) << 16) | ((uint32_t)(p[3]) << 24);
147 | }
148 | 
149 | INLINE void load_key_words(const uint8_t key[BLAKE3_KEY_LEN],
150 |                            uint32_t key_words[8]) {
151 |   key_words[0] = load32(&key[0 * 4]);
152 |   key_words[1] = load32(&key[1 * 4]);
153 |   key_words[2] = load32(&key[2 * 4]);
154 |   key_words[3] = load32(&key[3 * 4]);
155 |   key_words[4] = load32(&key[4 * 4]);
156 |   key_words[5] = load32(&key[5 * 4]);
157 |   key_words[6] = load32(&key[6 * 4]);
158 |   key_words[7] = load32(&key[7 * 4]);
159 | }
160 | 
161 | INLINE void store32(void *dst, uint32_t w) {
162 |   uint8_t *p = (uint8_t *)dst;
163 |   p[0] = (uint8_t)(w >> 0);
164 |   p[1] = (uint8_t)(w >> 8);
165 |   p[2] = (uint8_t)(w >> 16);
166 |   p[3] = (uint8_t)(w >> 24);
167 | }
168 | 
169 | INLINE void store_cv_words(uint8_t bytes_out[32], uint32_t cv_words[8]) {
170 |   store32(&bytes_out[0 * 4], cv_words[0]);
171 |   store32(&bytes_out[1 * 4], cv_words[1]);
172 |   store32(&bytes_out[2 * 4], cv_words[2]);
173 |   store32(&bytes_out[3 * 4], cv_words[3]);
174 |   store32(&bytes_out[4 * 4], cv_words[4]);
175 |   store32(&bytes_out[5 * 4], cv_words[5]);
176 |   store32(&bytes_out[6 * 4], cv_words[6]);
177 |   store32(&bytes_out[7 * 4], cv_words[7]);
178 | }
179 | 
180 | EXPORT void blake3_compress_in_place(uint32_t cv[8],
181 |                                      const uint8_t block[BLAKE3_BLOCK_LEN],
182 |                                      uint8_t block_len, uint64_t counter,
183 |                                      uint8_t flags);
184 | 
185 | EXPORT void blake3_compress_xof(const uint32_t cv[8],
186 |                                 const uint8_t block[BLAKE3_BLOCK_LEN],
187 |                                 uint8_t block_len, uint64_t counter, uint8_t flags,
188 |                                 uint8_t out[64]);
189 | 
190 | EXPORT void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
191 |                              size_t blocks, const uint32_t key[8], uint64_t counter,
192 |                              bool increment_counter, uint8_t flags,
193 |                              uint8_t flags_start, uint8_t flags_end, uint8_t *out);
194 | 
195 | EXPORT size_t blake3_simd_degree(void);
196 | 
197 | 
198 | // Declarations for implementation-specific functions.
199 | EXPORT void blake3_compress_in_place_portable(uint32_t cv[8],
200 |                                               const uint8_t block[BLAKE3_BLOCK_LEN],
201 |                                               uint8_t block_len, uint64_t counter,
202 |                                               uint8_t flags);
203 | 
204 | EXPORT void blake3_compress_xof_portable(const uint32_t cv[8],
205 |                                          const uint8_t block[BLAKE3_BLOCK_LEN],
206 |                                          uint8_t block_len, uint64_t counter,
207 |                                          uint8_t flags, uint8_t out[64]);
208 | 
209 | EXPORT void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
210 |                                       size_t blocks, const uint32_t key[8],
211 |                                       uint64_t counter, bool increment_counter,
212 |                                       uint8_t flags, uint8_t flags_start,
213 |                                       uint8_t flags_end, uint8_t *out);
214 | 
215 | #if defined(IS_X86)
216 | #if !defined(BLAKE3_NO_SSE2)
217 | EXPORT void blake3_compress_in_place_sse2(uint32_t cv[8],
218 |                                           const uint8_t block[BLAKE3_BLOCK_LEN],
219 |                                           uint8_t block_len, uint64_t counter,
220 |                                           uint8_t flags);
221 | EXPORT void blake3_compress_xof_sse2(const uint32_t cv[8],
222 |                                      const uint8_t block[BLAKE3_BLOCK_LEN],
223 |                                      uint8_t block_len, uint64_t counter,
224 |                                      uint8_t flags, uint8_t out[64]);
225 | EXPORT void blake3_hash_many_sse2(const uint8_t *const *inputs, size_t num_inputs,
226 |                                   size_t blocks, const uint32_t key[8],
227 |                                   uint64_t counter, bool increment_counter,
228 |                                   uint8_t flags, uint8_t flags_start,
229 |                                   uint8_t flags_end, uint8_t *out);
230 | #endif
231 | #if !defined(BLAKE3_NO_SSE41)
232 | EXPORT void blake3_compress_in_place_sse41(uint32_t cv[8],
233 |                                            const uint8_t block[BLAKE3_BLOCK_LEN],
234 |                                            uint8_t block_len, uint64_t counter,
235 |                                            uint8_t flags);
236 | EXPORT void blake3_compress_xof_sse41(const uint32_t cv[8],
237 |                                       const uint8_t block[BLAKE3_BLOCK_LEN],
238 |                                       uint8_t block_len, uint64_t counter,
239 |                                       uint8_t flags, uint8_t out[64]);
240 | EXPORT void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
241 |                                    size_t blocks, const uint32_t key[8],
242 |                                    uint64_t counter, bool increment_counter,
243 |                                    uint8_t flags, uint8_t flags_start,
244 |                                    uint8_t flags_end, uint8_t *out);
245 | #endif
246 | #if !defined(BLAKE3_NO_AVX2)
247 | EXPORT void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
248 |                                   size_t blocks, const uint32_t key[8],
249 |                                   uint64_t counter, bool increment_counter,
250 |                                   uint8_t flags, uint8_t flags_start,
251 |                                   uint8_t flags_end, uint8_t *out);
252 | #endif
253 | #if !defined(BLAKE3_NO_AVX512)
254 | EXPORT void blake3_compress_in_place_avx512(uint32_t cv[8],
255 |                                             const uint8_t block[BLAKE3_BLOCK_LEN],
256 |                                             uint8_t block_len, uint64_t counter,
257 |                                             uint8_t flags);
258 | 
259 | EXPORT void blake3_compress_xof_avx512(const uint32_t cv[8],
260 |                                        const uint8_t block[BLAKE3_BLOCK_LEN],
261 |                                        uint8_t block_len, uint64_t counter,
262 |                                        uint8_t flags, uint8_t out[64]);
263 | 
264 | EXPORT void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
265 |                                     size_t blocks, const uint32_t key[8],
266 |                                     uint64_t counter, bool increment_counter,
267 |                                     uint8_t flags, uint8_t flags_start,
268 |                                     uint8_t flags_end, uint8_t *out);
269 | #endif
270 | #endif
271 | 
272 | #if BLAKE3_USE_NEON == 1
273 | EXPORT void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
274 |                                   size_t blocks, const uint32_t key[8],
275 |                                   uint64_t counter, bool increment_counter,
276 |                                   uint8_t flags, uint8_t flags_start,
277 |                                   uint8_t flags_end, uint8_t *out);
278 | #endif
279 | 
280 | 
281 | #endif /* BLAKE3_IMPL_H */
282 | 


--------------------------------------------------------------------------------