├── LICENSE
├── MANIFEST
├── MANIFEST.in
├── README.md
├── README.txt
├── setup.py
├── shacommon
    ├── common.h
    ├── sha256_m.h
    ├── sph_skein.h
    └── sph_types.h
├── skein.c
├── skein.h
├── skeinmodule.c
└── test.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2019 nakkie
 4 | Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | of this software and associated documentation files (the "Software"), to deal
 6 | in the Software without restriction, including without limitation the rights
 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the Software is
 9 | furnished to do so, subject to the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/MANIFEST:
--------------------------------------------------------------------------------
 1 | # file GENERATED by distutils, do NOT edit
 2 | setup.py
 3 | skein.c
 4 | skein.h
 5 | skeinmodule.c
 6 | shacommon/common.h
 7 | shacommon/sha256_m.h
 8 | shacommon/sph_skein.h
 9 | shacommon/sph_types.h
10 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | global-include *.c *.h
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | skein-hash-python
 2 | =====================
 3 | 
 4 | Python module with chipocoin's hashing algorithm used by some other tools
 5 | 
 6 | Install
 7 | -------
 8 | 
 9 | Python 2.7 is required as well as gcc.
10 | 
11 |     $ python setup.py install
12 | 
13 | 
14 | Python 3 is also supported.
15 | 
16 |     $ python3 setup.py install
17 | 
18 | 
19 | Install (via pip)
20 | ------------------
21 | 
22 | PyPI package is available via `pip` or `pip2`.
23 | 
24 |     $ sudo pip2 install skein_hash
25 | 
26 | 
27 | Python 3 is also supported.
28 | 
29 |     $ sudo pip3 install skein_hash
30 | 
31 | 
32 | Test
33 | -------
34 | 
35 | After installation, test hash.
36 | 
37 |     $ python test.py
38 |     
39 | or:
40 | 
41 |     $ python3 test.py
42 |     
43 | Credits
44 | -------
45 | 
46 | * Module maintained by @nakkie https://github.com/CHIPO-Project/skein-hash-python
47 | 


--------------------------------------------------------------------------------
/README.txt:
--------------------------------------------------------------------------------
 1 | skein-hash-python
 2 | =====================
 3 | 
 4 | Python module with chipocoin's hashing algorithm used by some other tools
 5 | 
 6 | Install
 7 | -------
 8 | 
 9 | Python 2.7 is required as well as gcc.
10 | 
11 |     $ python setup.py install
12 | 
13 | 
14 | Python 3 is also supported.
15 | 
16 |     $ python3 setup.py install
17 | 
18 | 
19 | Install (via pip)
20 | ------------------
21 | 
22 | PyPI package is available via `pip` or `pip2`.
23 | 
24 |     $ sudo pip2 install skein_hash
25 | 
26 | 
27 | Python 3 is also supported.
28 | 
29 |     $ sudo pip3 install skein_hash
30 | 
31 | 
32 | Test
33 | -------
34 | 
35 | After installation, test hash.
36 | 
37 |     $ python test.py
38 |     
39 | or:
40 | 
41 |     $ python3 test.py
42 |     
43 | Credits
44 | -------
45 | 
46 | * Module maintained by @nakkie https://github.com/CHIPO-Project/skein-hash-python
47 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | from distutils.core import setup, Extension
 3 | 
 4 | skein_hash_module = Extension('skein_hash',
 5 |                                sources = ['skeinmodule.c',
 6 |                                           'skein.c'],
 7 |                                include_dirs=['.', './shacommon'])
 8 | 
 9 | setup (name = 'coin_skein_hash',
10 |        version = '1.1',
11 |        package_data = {
12 |         '': ['*.h']
13 |         },
14 |        license="MIT",
15 |        author = 'nakkie',
16 |        author_email = 'nakkie@linux.jpn.com',
17 |        maintainer='nakkie',
18 |        maintainer_email='nakkie@linux.jpn.com',
19 |        description = 'Binding for CHIPO skein proof of work hashing.',
20 |        ext_modules = [skein_hash_module],
21 |        url = 'https://github.com/CHIPO-Project/skein-hash-python',
22 |        download_url = 'https://github.com/CHIPO-Project/skein-hash-python/archive/v1.0.tar.gz'
23 |        )
24 | 


--------------------------------------------------------------------------------
/shacommon/common.h:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2014 The Bitcoin developers
  2 | // Distributed under the MIT software license, see the accompanying
  3 | // file COPYING or http://www.opensource.org/licenses/mit-license.php.
  4 | 
  5 | #ifndef BITCOIN_CRYPTO_COMMON_H
  6 | #define BITCOIN_CRYPTO_COMMON_H
  7 | 
  8 | #include <stdint.h>
  9 | 
 10 | #if defined(HAVE_ENDIAN_H)
 11 | #include <endian.h>
 12 | #endif
 13 | 
 14 | uint32_t static inline ReadLE32(const unsigned char* ptr)
 15 | {
 16 | #if HAVE_DECL_LE32TOH == 1
 17 |     return le32toh(*((uint32_t*)ptr));
 18 | #elif !defined(WORDS_BIGENDIAN)
 19 |     return *((uint32_t*)ptr);
 20 | #else
 21 |     return ((uint32_t)ptr[3] << 24 | (uint32_t)ptr[2] << 16 | (uint32_t)ptr[1] << 8 | (uint32_t)ptr[0]);
 22 | #endif
 23 | }
 24 | 
 25 | uint64_t static inline ReadLE64(const unsigned char* ptr)
 26 | {
 27 | #if HAVE_DECL_LE64TOH == 1
 28 |     return le64toh(*((uint64_t*)ptr));
 29 | #elif !defined(WORDS_BIGENDIAN)
 30 |     return *((uint64_t*)ptr);
 31 | #else
 32 |     return ((uint64_t)ptr[7] << 56 | (uint64_t)ptr[6] << 48 | (uint64_t)ptr[5] << 40 | (uint64_t)ptr[4] << 32 |
 33 |             (uint64_t)ptr[3] << 24 | (uint64_t)ptr[2] << 16 | (uint64_t)ptr[1] << 8 | (uint64_t)ptr[0]);
 34 | #endif
 35 | }
 36 | 
 37 | void static inline WriteLE32(unsigned char* ptr, uint32_t x)
 38 | {
 39 | #if HAVE_DECL_HTOLE32 == 1
 40 |     *((uint32_t*)ptr) = htole32(x);
 41 | #elif !defined(WORDS_BIGENDIAN)
 42 |     *((uint32_t*)ptr) = x;
 43 | #else
 44 |     ptr[3] = x >> 24;
 45 |     ptr[2] = x >> 16;
 46 |     ptr[1] = x >> 8;
 47 |     ptr[0] = x;
 48 | #endif
 49 | }
 50 | 
 51 | void static inline WriteLE64(unsigned char* ptr, uint64_t x)
 52 | {
 53 | #if HAVE_DECL_HTOLE64 == 1
 54 |     *((uint64_t*)ptr) = htole64(x);
 55 | #elif !defined(WORDS_BIGENDIAN)
 56 |     *((uint64_t*)ptr) = x;
 57 | #else
 58 |     ptr[7] = x >> 56;
 59 |     ptr[6] = x >> 48;
 60 |     ptr[5] = x >> 40;
 61 |     ptr[4] = x >> 32;
 62 |     ptr[3] = x >> 24;
 63 |     ptr[2] = x >> 16;
 64 |     ptr[1] = x >> 8;
 65 |     ptr[0] = x;
 66 | #endif
 67 | }
 68 | 
 69 | uint32_t static inline ReadBE32(const unsigned char* ptr)
 70 | {
 71 | #if HAVE_DECL_BE32TOH == 1
 72 |     return be32toh(*((uint32_t*)ptr));
 73 | #else
 74 |     return ((uint32_t)ptr[0] << 24 | (uint32_t)ptr[1] << 16 | (uint32_t)ptr[2] << 8 | (uint32_t)ptr[3]);
 75 | #endif
 76 | }
 77 | 
 78 | uint64_t static inline ReadBE64(const unsigned char* ptr)
 79 | {
 80 | #if HAVE_DECL_BE64TOH == 1
 81 |     return be64toh(*((uint64_t*)ptr));
 82 | #else
 83 |     return ((uint64_t)ptr[0] << 56 | (uint64_t)ptr[1] << 48 | (uint64_t)ptr[2] << 40 | (uint64_t)ptr[3] << 32 |
 84 |             (uint64_t)ptr[4] << 24 | (uint64_t)ptr[5] << 16 | (uint64_t)ptr[6] << 8 | (uint64_t)ptr[7]);
 85 | #endif
 86 | }
 87 | 
 88 | void static inline WriteBE32(unsigned char* ptr, uint32_t x)
 89 | {
 90 | #if HAVE_DECL_HTOBE32 == 1
 91 |     *((uint32_t*)ptr) = htobe32(x);
 92 | #else
 93 |     ptr[0] = x >> 24;
 94 |     ptr[1] = x >> 16;
 95 |     ptr[2] = x >> 8;
 96 |     ptr[3] = x;
 97 | #endif
 98 | }
 99 | 
100 | void static inline WriteBE64(unsigned char* ptr, uint64_t x)
101 | {
102 | #if HAVE_DECL_HTOBE64 == 1
103 |     *((uint64_t*)ptr) = htobe64(x);
104 | #else
105 |     ptr[0] = x >> 56;
106 |     ptr[1] = x >> 48;
107 |     ptr[2] = x >> 40;
108 |     ptr[3] = x >> 32;
109 |     ptr[4] = x >> 24;
110 |     ptr[5] = x >> 16;
111 |     ptr[6] = x >> 8;
112 |     ptr[7] = x;
113 | #endif
114 | }
115 | 
116 | #endif // BITCOIN_CRYPTO_COMMON_H
117 | 


--------------------------------------------------------------------------------
/shacommon/sha256_m.h:
--------------------------------------------------------------------------------
  1 | #ifndef SHA256_H
  2 | #define SHA256_H
  3 | 
  4 | #if defined(WIN32) || defined(_WIN32) || defined(__WIN32) && !defined(__CYGWIN__)
  5 | #include "stdint.h"
  6 | #else
  7 | #include <stdint.h>
  8 | #endif
  9 | 
 10 | #include <string.h>
 11 | 
 12 | static __inline uint32_t
 13 | be32dec(const void *pp)
 14 | {
 15 | 	const uint8_t *p = (uint8_t const *)pp;
 16 | 
 17 | 	return ((uint32_t)(p[3]) + ((uint32_t)(p[2]) << 8) +
 18 | 	    ((uint32_t)(p[1]) << 16) + ((uint32_t)(p[0]) << 24));
 19 | }
 20 | 
 21 | static __inline void
 22 | be32enc(void *pp, uint32_t x)
 23 | {
 24 | 	uint8_t * p = (uint8_t *)pp;
 25 | 
 26 | 	p[3] = x & 0xff;
 27 | 	p[2] = (x >> 8) & 0xff;
 28 | 	p[1] = (x >> 16) & 0xff;
 29 | 	p[0] = (x >> 24) & 0xff;
 30 | }
 31 | 
 32 | static __inline uint32_t
 33 | le32dec(const void *pp)
 34 | {
 35 | 	const uint8_t *p = (uint8_t const *)pp;
 36 | 
 37 | 	return ((uint32_t)(p[0]) + ((uint32_t)(p[1]) << 8) +
 38 | 	    ((uint32_t)(p[2]) << 16) + ((uint32_t)(p[3]) << 24));
 39 | }
 40 | 
 41 | static __inline void
 42 | le32enc(void *pp, uint32_t x)
 43 | {
 44 | 	uint8_t * p = (uint8_t *)pp;
 45 | 
 46 | 	p[0] = x & 0xff;
 47 | 	p[1] = (x >> 8) & 0xff;
 48 | 	p[2] = (x >> 16) & 0xff;
 49 | 	p[3] = (x >> 24) & 0xff;
 50 | }
 51 | 
 52 | 
 53 | typedef struct SHA256Context {
 54 | 	uint32_t state[8];
 55 | 	uint32_t count[2];
 56 | 	unsigned char buf[64];
 57 | } SHA256_CTX;
 58 | 
 59 | /*
 60 |  * Encode a length len/4 vector of (uint32_t) into a length len vector of
 61 |  * (unsigned char) in big-endian form.  Assumes len is a multiple of 4.
 62 |  */
 63 | static void
 64 | be32enc_vect(unsigned char *dst, const uint32_t *src, size_t len)
 65 | {
 66 | 	size_t i;
 67 | 
 68 | 	for (i = 0; i < len / 4; i++)
 69 | 		be32enc(dst + i * 4, src[i]);
 70 | }
 71 | 
 72 | /*
 73 |  * Decode a big-endian length len vector of (unsigned char) into a length
 74 |  * len/4 vector of (uint32_t).  Assumes len is a multiple of 4.
 75 |  */
 76 | static void
 77 | be32dec_vect(uint32_t *dst, const unsigned char *src, size_t len)
 78 | {
 79 | 	size_t i;
 80 | 
 81 | 	for (i = 0; i < len / 4; i++)
 82 | 		dst[i] = be32dec(src + i * 4);
 83 | }
 84 | 
 85 | /* Elementary functions used by SHA256 */
 86 | #define sha_Ch(x, y, z)	((x & (y ^ z)) ^ z)
 87 | #define Maj(x, y, z)	((x & (y | z)) | (y & z))
 88 | #define SHR(x, n)	(x >> n)
 89 | #define ROTR(x, n)	((x >> n) | (x << (32 - n)))
 90 | #define sha_S0(x)		(ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
 91 | #define sha_S1(x)		(ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
 92 | #define sha_s0(x)		(ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
 93 | #define sha_s1(x)		(ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
 94 | 
 95 | /* SHA256 round function */
 96 | #define RND(a, b, c, d, e, f, g, h, k)			\
 97 | 	t0 = h + sha_S1(e) + sha_Ch(e, f, g) + k;		\
 98 | 	t1 = sha_S0(a) + Maj(a, b, c);			\
 99 | 	d += t0;					\
100 | 	h  = t0 + t1;
101 | 
102 | /* Adjusted round function for rotating state */
103 | #define RNDr(S, W, i, k)			\
104 | 	RND(S[(64 - i) % 8], S[(65 - i) % 8],	\
105 | 	    S[(66 - i) % 8], S[(67 - i) % 8],	\
106 | 	    S[(68 - i) % 8], S[(69 - i) % 8],	\
107 | 	    S[(70 - i) % 8], S[(71 - i) % 8],	\
108 | 	    W[i] + k)
109 | 
110 | /*
111 |  * SHA256 block compression function.  The 256-bit state is transformed via
112 |  * the 512-bit input block to produce a new state.
113 |  */
114 | static void
115 | SHA256_Transform(uint32_t * state, const unsigned char block[64])
116 | {
117 | 	uint32_t W[64];
118 | 	uint32_t S[8];
119 | 	uint32_t t0, t1;
120 | 	int i;
121 | 
122 | 	/* 1. Prepare message schedule W. */
123 | 	be32dec_vect(W, block, 64);
124 | 	for (i = 16; i < 64; i++)
125 | 		W[i] = sha_s1(W[i - 2]) + W[i - 7] + sha_s0(W[i - 15]) + W[i - 16];
126 | 
127 | 	/* 2. Initialize working variables. */
128 | 	memcpy(S, state, 32);
129 | 
130 | 	/* 3. Mix. */
131 | 	RNDr(S, W, 0, 0x428a2f98);
132 | 	RNDr(S, W, 1, 0x71374491);
133 | 	RNDr(S, W, 2, 0xb5c0fbcf);
134 | 	RNDr(S, W, 3, 0xe9b5dba5);
135 | 	RNDr(S, W, 4, 0x3956c25b);
136 | 	RNDr(S, W, 5, 0x59f111f1);
137 | 	RNDr(S, W, 6, 0x923f82a4);
138 | 	RNDr(S, W, 7, 0xab1c5ed5);
139 | 	RNDr(S, W, 8, 0xd807aa98);
140 | 	RNDr(S, W, 9, 0x12835b01);
141 | 	RNDr(S, W, 10, 0x243185be);
142 | 	RNDr(S, W, 11, 0x550c7dc3);
143 | 	RNDr(S, W, 12, 0x72be5d74);
144 | 	RNDr(S, W, 13, 0x80deb1fe);
145 | 	RNDr(S, W, 14, 0x9bdc06a7);
146 | 	RNDr(S, W, 15, 0xc19bf174);
147 | 	RNDr(S, W, 16, 0xe49b69c1);
148 | 	RNDr(S, W, 17, 0xefbe4786);
149 | 	RNDr(S, W, 18, 0x0fc19dc6);
150 | 	RNDr(S, W, 19, 0x240ca1cc);
151 | 	RNDr(S, W, 20, 0x2de92c6f);
152 | 	RNDr(S, W, 21, 0x4a7484aa);
153 | 	RNDr(S, W, 22, 0x5cb0a9dc);
154 | 	RNDr(S, W, 23, 0x76f988da);
155 | 	RNDr(S, W, 24, 0x983e5152);
156 | 	RNDr(S, W, 25, 0xa831c66d);
157 | 	RNDr(S, W, 26, 0xb00327c8);
158 | 	RNDr(S, W, 27, 0xbf597fc7);
159 | 	RNDr(S, W, 28, 0xc6e00bf3);
160 | 	RNDr(S, W, 29, 0xd5a79147);
161 | 	RNDr(S, W, 30, 0x06ca6351);
162 | 	RNDr(S, W, 31, 0x14292967);
163 | 	RNDr(S, W, 32, 0x27b70a85);
164 | 	RNDr(S, W, 33, 0x2e1b2138);
165 | 	RNDr(S, W, 34, 0x4d2c6dfc);
166 | 	RNDr(S, W, 35, 0x53380d13);
167 | 	RNDr(S, W, 36, 0x650a7354);
168 | 	RNDr(S, W, 37, 0x766a0abb);
169 | 	RNDr(S, W, 38, 0x81c2c92e);
170 | 	RNDr(S, W, 39, 0x92722c85);
171 | 	RNDr(S, W, 40, 0xa2bfe8a1);
172 | 	RNDr(S, W, 41, 0xa81a664b);
173 | 	RNDr(S, W, 42, 0xc24b8b70);
174 | 	RNDr(S, W, 43, 0xc76c51a3);
175 | 	RNDr(S, W, 44, 0xd192e819);
176 | 	RNDr(S, W, 45, 0xd6990624);
177 | 	RNDr(S, W, 46, 0xf40e3585);
178 | 	RNDr(S, W, 47, 0x106aa070);
179 | 	RNDr(S, W, 48, 0x19a4c116);
180 | 	RNDr(S, W, 49, 0x1e376c08);
181 | 	RNDr(S, W, 50, 0x2748774c);
182 | 	RNDr(S, W, 51, 0x34b0bcb5);
183 | 	RNDr(S, W, 52, 0x391c0cb3);
184 | 	RNDr(S, W, 53, 0x4ed8aa4a);
185 | 	RNDr(S, W, 54, 0x5b9cca4f);
186 | 	RNDr(S, W, 55, 0x682e6ff3);
187 | 	RNDr(S, W, 56, 0x748f82ee);
188 | 	RNDr(S, W, 57, 0x78a5636f);
189 | 	RNDr(S, W, 58, 0x84c87814);
190 | 	RNDr(S, W, 59, 0x8cc70208);
191 | 	RNDr(S, W, 60, 0x90befffa);
192 | 	RNDr(S, W, 61, 0xa4506ceb);
193 | 	RNDr(S, W, 62, 0xbef9a3f7);
194 | 	RNDr(S, W, 63, 0xc67178f2);
195 | 
196 | 	/* 4. Mix local working variables into global state */
197 | 	for (i = 0; i < 8; i++)
198 | 		state[i] += S[i];
199 | 
200 | 	/* Clean the stack. */
201 | 	memset(W, 0, 256);
202 | 	memset(S, 0, 32);
203 | 	t0 = t1 = 0;
204 | }
205 | 
206 | static unsigned char PAD[64] = {
207 | 	0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
208 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
209 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
210 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
211 | };
212 | 
213 | /* SHA-256 initialization.  Begins a SHA-256 operation. */
214 | static void
215 | SHA256_Init(SHA256_CTX * ctx)
216 | {
217 | 
218 | 	/* Zero bits processed so far */
219 | 	ctx->count[0] = ctx->count[1] = 0;
220 | 
221 | 	/* Magic initialization constants */
222 | 	ctx->state[0] = 0x6A09E667;
223 | 	ctx->state[1] = 0xBB67AE85;
224 | 	ctx->state[2] = 0x3C6EF372;
225 | 	ctx->state[3] = 0xA54FF53A;
226 | 	ctx->state[4] = 0x510E527F;
227 | 	ctx->state[5] = 0x9B05688C;
228 | 	ctx->state[6] = 0x1F83D9AB;
229 | 	ctx->state[7] = 0x5BE0CD19;
230 | }
231 | 
232 | /* Add bytes into the hash */
233 | static void
234 | SHA256_Update(SHA256_CTX * ctx, const void *in, size_t len)
235 | {
236 | 	uint32_t bitlen[2];
237 | 	uint32_t r;
238 | 	const unsigned char *src = (const unsigned char *)in;
239 | 
240 | 	/* Number of bytes left in the buffer from previous updates */
241 | 	r = (ctx->count[1] >> 3) & 0x3f;
242 | 
243 | 	/* Convert the length into a number of bits */
244 | 	bitlen[1] = ((uint32_t)len) << 3;
245 | 	bitlen[0] = (uint32_t)(len >> 29);
246 | 
247 | 	/* Update number of bits */
248 | 	if ((ctx->count[1] += bitlen[1]) < bitlen[1])
249 | 		ctx->count[0]++;
250 | 	ctx->count[0] += bitlen[0];
251 | 
252 | 	/* Handle the case where we don't need to perform any transforms */
253 | 	if (len < 64 - r) {
254 | 		memcpy(&ctx->buf[r], src, len);
255 | 		return;
256 | 	}
257 | 
258 | 	/* Finish the current block */
259 | 	memcpy(&ctx->buf[r], src, 64 - r);
260 | 	SHA256_Transform(ctx->state, ctx->buf);
261 | 	src += 64 - r;
262 | 	len -= 64 - r;
263 | 
264 | 	/* Perform complete blocks */
265 | 	while (len >= 64) {
266 | 		SHA256_Transform(ctx->state, src);
267 | 		src += 64;
268 | 		len -= 64;
269 | 	}
270 | 
271 | 	/* Copy left over data into buffer */
272 | 	memcpy(ctx->buf, src, len);
273 | }
274 | 
275 | /* Add padding and terminating bit-count. */
276 | static void
277 | SHA256_Pad(SHA256_CTX * ctx)
278 | {
279 | 	unsigned char len[8];
280 | 	uint32_t r, plen;
281 | 
282 | 	/*
283 | 	 * Convert length to a vector of bytes -- we do this now rather
284 | 	 * than later because the length will change after we pad.
285 | 	 */
286 | 	be32enc_vect(len, ctx->count, 8);
287 | 
288 | 	/* Add 1--64 bytes so that the resulting length is 56 mod 64 */
289 | 	r = (ctx->count[1] >> 3) & 0x3f;
290 | 	plen = (r < 56) ? (56 - r) : (120 - r);
291 | 	SHA256_Update(ctx, PAD, (size_t)plen);
292 | 
293 | 	/* Add the terminating bit-count */
294 | 	SHA256_Update(ctx, len, 8);
295 | }
296 | 
297 | /*
298 |  * SHA-256 finalization.  Pads the input data, exports the hash value,
299 |  * and clears the context state.
300 |  */
301 | static void
302 | SHA256_Final(unsigned char digest[32], SHA256_CTX * ctx)
303 | {
304 | 
305 | 	/* Add padding */
306 | 	SHA256_Pad(ctx);
307 | 
308 | 	/* Write the hash */
309 | 	be32enc_vect(digest, ctx->state, 32);
310 | 
311 | 	/* Clear the context state */
312 | 	memset((void *)ctx, 0, sizeof(*ctx));
313 | }
314 | 
315 |  // namespace sha256_y
316 | #endif
317 | 


--------------------------------------------------------------------------------
/shacommon/sph_skein.h:
--------------------------------------------------------------------------------
  1 | /* $Id: sph_skein.h 253 2011-06-07 18:33:10Z tp $ */
  2 | /**
  3 |  * Skein interface. The Skein specification defines three main
  4 |  * functions, called Skein-256, Skein-512 and Skein-1024, which can be
  5 |  * further parameterized with an output length. For the SHA-3
  6 |  * competition, Skein-512 is used for output sizes of 224, 256, 384 and
  7 |  * 512 bits; this is what this code implements. Thus, we hereafter call
  8 |  * Skein-224, Skein-256, Skein-384 and Skein-512 what the Skein
  9 |  * specification defines as Skein-512-224, Skein-512-256, Skein-512-384
 10 |  * and Skein-512-512, respectively.
 11 |  *
 12 |  * ==========================(LICENSE BEGIN)============================
 13 |  *
 14 |  * Copyright (c) 2007-2010  Projet RNRT SAPHIR
 15 |  * 
 16 |  * Permission is hereby granted, free of charge, to any person obtaining
 17 |  * a copy of this software and associated documentation files (the
 18 |  * "Software"), to deal in the Software without restriction, including
 19 |  * without limitation the rights to use, copy, modify, merge, publish,
 20 |  * distribute, sublicense, and/or sell copies of the Software, and to
 21 |  * permit persons to whom the Software is furnished to do so, subject to
 22 |  * the following conditions:
 23 |  * 
 24 |  * The above copyright notice and this permission notice shall be
 25 |  * included in all copies or substantial portions of the Software.
 26 |  * 
 27 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 28 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 29 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 30 |  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 31 |  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 32 |  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 33 |  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 34 |  *
 35 |  * ===========================(LICENSE END)=============================
 36 |  *
 37 |  * @file     sph_skein.h
 38 |  * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
 39 |  */
 40 | 
 41 | #ifndef SPH_SKEIN_H__
 42 | #define SPH_SKEIN_H__
 43 | 
 44 | #ifdef __cplusplus
 45 | extern "C"{
 46 | #endif
 47 | 
 48 | #include <stddef.h>
 49 | #include "sph_types.h"
 50 | 
 51 | #if SPH_64
 52 | 
 53 | /**
 54 |  * Output size (in bits) for Skein-224.
 55 |  */
 56 | #define SPH_SIZE_skein224   224
 57 | 
 58 | /**
 59 |  * Output size (in bits) for Skein-256.
 60 |  */
 61 | #define SPH_SIZE_skein256   256
 62 | 
 63 | /**
 64 |  * Output size (in bits) for Skein-384.
 65 |  */
 66 | #define SPH_SIZE_skein384   384
 67 | 
 68 | /**
 69 |  * Output size (in bits) for Skein-512.
 70 |  */
 71 | #define SPH_SIZE_skein512   512
 72 | 
 73 | /**
 74 |  * This structure is a context for Skein computations (with a 384- or
 75 |  * 512-bit output): it contains the intermediate values and some data
 76 |  * from the last entered block. Once a Skein computation has been
 77 |  * performed, the context can be reused for another computation.
 78 |  *
 79 |  * The contents of this structure are private. A running Skein computation
 80 |  * can be cloned by copying the context (e.g. with a simple
 81 |  * <code>memcpy()</code>).
 82 |  */
 83 | typedef struct {
 84 | #ifndef DOXYGEN_IGNORE
 85 | 	unsigned char buf[64];    /* first field, for alignment */
 86 | 	size_t ptr;
 87 | 	sph_u64 h0, h1, h2, h3, h4, h5, h6, h7;
 88 | 	sph_u64 bcount;
 89 | #endif
 90 | } sph_skein_big_context;
 91 | 
 92 | /**
 93 |  * Type for a Skein-224 context (identical to the common "big" context).
 94 |  */
 95 | typedef sph_skein_big_context sph_skein224_context;
 96 | 
 97 | /**
 98 |  * Type for a Skein-256 context (identical to the common "big" context).
 99 |  */
100 | typedef sph_skein_big_context sph_skein256_context;
101 | 
102 | /**
103 |  * Type for a Skein-384 context (identical to the common "big" context).
104 |  */
105 | typedef sph_skein_big_context sph_skein384_context;
106 | 
107 | /**
108 |  * Type for a Skein-512 context (identical to the common "big" context).
109 |  */
110 | typedef sph_skein_big_context sph_skein512_context;
111 | 
112 | /**
113 |  * Initialize a Skein-224 context. This process performs no memory allocation.
114 |  *
115 |  * @param cc   the Skein-224 context (pointer to a
116 |  *             <code>sph_skein224_context</code>)
117 |  */
118 | void sph_skein224_init(void *cc);
119 | 
120 | /**
121 |  * Process some data bytes. It is acceptable that <code>len</code> is zero
122 |  * (in which case this function does nothing).
123 |  *
124 |  * @param cc     the Skein-224 context
125 |  * @param data   the input data
126 |  * @param len    the input data length (in bytes)
127 |  */
128 | void sph_skein224(void *cc, const void *data, size_t len);
129 | 
130 | /**
131 |  * Terminate the current Skein-224 computation and output the result into
132 |  * the provided buffer. The destination buffer must be wide enough to
133 |  * accomodate the result (28 bytes). The context is automatically
134 |  * reinitialized.
135 |  *
136 |  * @param cc    the Skein-224 context
137 |  * @param dst   the destination buffer
138 |  */
139 | void sph_skein224_close(void *cc, void *dst);
140 | 
141 | /**
142 |  * Add a few additional bits (0 to 7) to the current computation, then
143 |  * terminate it and output the result in the provided buffer, which must
144 |  * be wide enough to accomodate the result (28 bytes). If bit number i
145 |  * in <code>ub</code> has value 2^i, then the extra bits are those
146 |  * numbered 7 downto 8-n (this is the big-endian convention at the byte
147 |  * level). The context is automatically reinitialized.
148 |  *
149 |  * @param cc    the Skein-224 context
150 |  * @param ub    the extra bits
151 |  * @param n     the number of extra bits (0 to 7)
152 |  * @param dst   the destination buffer
153 |  */
154 | void sph_skein224_addbits_and_close(
155 | 	void *cc, unsigned ub, unsigned n, void *dst);
156 | 
157 | /**
158 |  * Initialize a Skein-256 context. This process performs no memory allocation.
159 |  *
160 |  * @param cc   the Skein-256 context (pointer to a
161 |  *             <code>sph_skein256_context</code>)
162 |  */
163 | void sph_skein256_init(void *cc);
164 | 
165 | /**
166 |  * Process some data bytes. It is acceptable that <code>len</code> is zero
167 |  * (in which case this function does nothing).
168 |  *
169 |  * @param cc     the Skein-256 context
170 |  * @param data   the input data
171 |  * @param len    the input data length (in bytes)
172 |  */
173 | void sph_skein256(void *cc, const void *data, size_t len);
174 | 
175 | /**
176 |  * Terminate the current Skein-256 computation and output the result into
177 |  * the provided buffer. The destination buffer must be wide enough to
178 |  * accomodate the result (32 bytes). The context is automatically
179 |  * reinitialized.
180 |  *
181 |  * @param cc    the Skein-256 context
182 |  * @param dst   the destination buffer
183 |  */
184 | void sph_skein256_close(void *cc, void *dst);
185 | 
186 | /**
187 |  * Add a few additional bits (0 to 7) to the current computation, then
188 |  * terminate it and output the result in the provided buffer, which must
189 |  * be wide enough to accomodate the result (32 bytes). If bit number i
190 |  * in <code>ub</code> has value 2^i, then the extra bits are those
191 |  * numbered 7 downto 8-n (this is the big-endian convention at the byte
192 |  * level). The context is automatically reinitialized.
193 |  *
194 |  * @param cc    the Skein-256 context
195 |  * @param ub    the extra bits
196 |  * @param n     the number of extra bits (0 to 7)
197 |  * @param dst   the destination buffer
198 |  */
199 | void sph_skein256_addbits_and_close(
200 | 	void *cc, unsigned ub, unsigned n, void *dst);
201 | 
202 | /**
203 |  * Initialize a Skein-384 context. This process performs no memory allocation.
204 |  *
205 |  * @param cc   the Skein-384 context (pointer to a
206 |  *             <code>sph_skein384_context</code>)
207 |  */
208 | void sph_skein384_init(void *cc);
209 | 
210 | /**
211 |  * Process some data bytes. It is acceptable that <code>len</code> is zero
212 |  * (in which case this function does nothing).
213 |  *
214 |  * @param cc     the Skein-384 context
215 |  * @param data   the input data
216 |  * @param len    the input data length (in bytes)
217 |  */
218 | void sph_skein384(void *cc, const void *data, size_t len);
219 | 
220 | /**
221 |  * Terminate the current Skein-384 computation and output the result into
222 |  * the provided buffer. The destination buffer must be wide enough to
223 |  * accomodate the result (48 bytes). The context is automatically
224 |  * reinitialized.
225 |  *
226 |  * @param cc    the Skein-384 context
227 |  * @param dst   the destination buffer
228 |  */
229 | void sph_skein384_close(void *cc, void *dst);
230 | 
231 | /**
232 |  * Add a few additional bits (0 to 7) to the current computation, then
233 |  * terminate it and output the result in the provided buffer, which must
234 |  * be wide enough to accomodate the result (48 bytes). If bit number i
235 |  * in <code>ub</code> has value 2^i, then the extra bits are those
236 |  * numbered 7 downto 8-n (this is the big-endian convention at the byte
237 |  * level). The context is automatically reinitialized.
238 |  *
239 |  * @param cc    the Skein-384 context
240 |  * @param ub    the extra bits
241 |  * @param n     the number of extra bits (0 to 7)
242 |  * @param dst   the destination buffer
243 |  */
244 | void sph_skein384_addbits_and_close(
245 | 	void *cc, unsigned ub, unsigned n, void *dst);
246 | 
247 | /**
248 |  * Initialize a Skein-512 context. This process performs no memory allocation.
249 |  *
250 |  * @param cc   the Skein-512 context (pointer to a
251 |  *             <code>sph_skein512_context</code>)
252 |  */
253 | void sph_skein512_init(void *cc);
254 | 
255 | /**
256 |  * Process some data bytes. It is acceptable that <code>len</code> is zero
257 |  * (in which case this function does nothing).
258 |  *
259 |  * @param cc     the Skein-512 context
260 |  * @param data   the input data
261 |  * @param len    the input data length (in bytes)
262 |  */
263 | void sph_skein512(void *cc, const void *data, size_t len);
264 | 
265 | /**
266 |  * Terminate the current Skein-512 computation and output the result into
267 |  * the provided buffer. The destination buffer must be wide enough to
268 |  * accomodate the result (64 bytes). The context is automatically
269 |  * reinitialized.
270 |  *
271 |  * @param cc    the Skein-512 context
272 |  * @param dst   the destination buffer
273 |  */
274 | void sph_skein512_close(void *cc, void *dst);
275 | 
276 | /**
277 |  * Add a few additional bits (0 to 7) to the current computation, then
278 |  * terminate it and output the result in the provided buffer, which must
279 |  * be wide enough to accomodate the result (64 bytes). If bit number i
280 |  * in <code>ub</code> has value 2^i, then the extra bits are those
281 |  * numbered 7 downto 8-n (this is the big-endian convention at the byte
282 |  * level). The context is automatically reinitialized.
283 |  *
284 |  * @param cc    the Skein-512 context
285 |  * @param ub    the extra bits
286 |  * @param n     the number of extra bits (0 to 7)
287 |  * @param dst   the destination buffer
288 |  */
289 | void sph_skein512_addbits_and_close(
290 | 	void *cc, unsigned ub, unsigned n, void *dst);
291 | 
292 | #endif
293 | 
294 | #ifdef __cplusplus
295 | }
296 | #endif
297 | 
298 | #endif
299 | 


--------------------------------------------------------------------------------
/shacommon/sph_types.h:
--------------------------------------------------------------------------------
   1 | /* $Id: sph_types.h 260 2011-07-21 01:02:38Z tp $ */
   2 | /**
   3 |  * Basic type definitions.
   4 |  *
   5 |  * This header file defines the generic integer types that will be used
   6 |  * for the implementation of hash functions; it also contains helper
   7 |  * functions which encode and decode multi-byte integer values, using
   8 |  * either little-endian or big-endian conventions.
   9 |  *
  10 |  * This file contains a compile-time test on the size of a byte
  11 |  * (the <code>unsigned char</code> C type). If bytes are not octets,
  12 |  * i.e. if they do not have a size of exactly 8 bits, then compilation
  13 |  * is aborted. Architectures where bytes are not octets are relatively
  14 |  * rare, even in the embedded devices market. We forbid non-octet bytes
  15 |  * because there is no clear convention on how octet streams are encoded
  16 |  * on such systems.
  17 |  *
  18 |  * ==========================(LICENSE BEGIN)============================
  19 |  *
  20 |  * Copyright (c) 2007-2010  Projet RNRT SAPHIR
  21 |  * 
  22 |  * Permission is hereby granted, free of charge, to any person obtaining
  23 |  * a copy of this software and associated documentation files (the
  24 |  * "Software"), to deal in the Software without restriction, including
  25 |  * without limitation the rights to use, copy, modify, merge, publish,
  26 |  * distribute, sublicense, and/or sell copies of the Software, and to
  27 |  * permit persons to whom the Software is furnished to do so, subject to
  28 |  * the following conditions:
  29 |  * 
  30 |  * The above copyright notice and this permission notice shall be
  31 |  * included in all copies or substantial portions of the Software.
  32 |  * 
  33 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  34 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  35 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  36 |  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  37 |  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  38 |  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  39 |  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  40 |  *
  41 |  * ===========================(LICENSE END)=============================
  42 |  *
  43 |  * @file     sph_types.h
  44 |  * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
  45 |  */
  46 | 
  47 | #ifndef SPH_TYPES_H__
  48 | #define SPH_TYPES_H__
  49 | 
  50 | #include <limits.h>
  51 | 
  52 | /*
  53 |  * All our I/O functions are defined over octet streams. We do not know
  54 |  * how to handle input data if bytes are not octets.
  55 |  */
  56 | #if CHAR_BIT != 8
  57 | #error This code requires 8-bit bytes
  58 | #endif
  59 | 
  60 | /* ============= BEGIN documentation block for Doxygen ============ */
  61 | 
  62 | #ifdef DOXYGEN_IGNORE
  63 | 
  64 | /** @mainpage sphlib C code documentation
  65 |  *
  66 |  * @section overview Overview
  67 |  *
  68 |  * <code>sphlib</code> is a library which contains implementations of
  69 |  * various cryptographic hash functions. These pages have been generated
  70 |  * with <a href="http://www.doxygen.org/index.html">doxygen</a> and
  71 |  * document the API for the C implementations.
  72 |  *
  73 |  * The API is described in appropriate header files, which are available
  74 |  * in the "Files" section. Each hash function family has its own header,
  75 |  * whose name begins with <code>"sph_"</code> and contains the family
  76 |  * name. For instance, the API for the RIPEMD hash functions is available
  77 |  * in the header file <code>sph_ripemd.h</code>.
  78 |  *
  79 |  * @section principles API structure and conventions
  80 |  *
  81 |  * @subsection io Input/output conventions
  82 |  *
  83 |  * In all generality, hash functions operate over strings of bits.
  84 |  * Individual bits are rarely encountered in C programming or actual
  85 |  * communication protocols; most protocols converge on the ubiquitous
  86 |  * "octet" which is a group of eight bits. Data is thus expressed as a
  87 |  * stream of octets. The C programming language contains the notion of a
  88 |  * "byte", which is a data unit managed under the type <code>"unsigned
  89 |  * char"</code>. The C standard prescribes that a byte should hold at
  90 |  * least eight bits, but possibly more. Most modern architectures, even
  91 |  * in the embedded world, feature eight-bit bytes, i.e. map bytes to
  92 |  * octets.
  93 |  *
  94 |  * Nevertheless, for some of the implemented hash functions, an extra
  95 |  * API has been added, which allows the input of arbitrary sequences of
  96 |  * bits: when the computation is about to be closed, 1 to 7 extra bits
  97 |  * can be added. The functions for which this API is implemented include
  98 |  * the SHA-2 functions and all SHA-3 candidates.
  99 |  *
 100 |  * <code>sphlib</code> defines hash function which may hash octet streams,
 101 |  * i.e. streams of bits where the number of bits is a multiple of eight.
 102 |  * The data input functions in the <code>sphlib</code> API expect data
 103 |  * as anonymous pointers (<code>"const void *"</code>) with a length
 104 |  * (of type <code>"size_t"</code>) which gives the input data chunk length
 105 |  * in bytes. A byte is assumed to be an octet; the <code>sph_types.h</code>
 106 |  * header contains a compile-time test which prevents compilation on
 107 |  * architectures where this property is not met.
 108 |  *
 109 |  * The hash function output is also converted into bytes. All currently
 110 |  * implemented hash functions have an output width which is a multiple of
 111 |  * eight, and this is likely to remain true for new designs.
 112 |  *
 113 |  * Most hash functions internally convert input data into 32-bit of 64-bit
 114 |  * words, using either little-endian or big-endian conversion. The hash
 115 |  * output also often consists of such words, which are encoded into output
 116 |  * bytes with a similar endianness convention. Some hash functions have
 117 |  * been only loosely specified on that subject; when necessary,
 118 |  * <code>sphlib</code> has been tested against published "reference"
 119 |  * implementations in order to use the same conventions.
 120 |  *
 121 |  * @subsection shortname Function short name
 122 |  *
 123 |  * Each implemented hash function has a "short name" which is used
 124 |  * internally to derive the identifiers for the functions and context
 125 |  * structures which the function uses. For instance, MD5 has the short
 126 |  * name <code>"md5"</code>. Short names are listed in the next section,
 127 |  * for the implemented hash functions. In subsequent sections, the
 128 |  * short name will be assumed to be <code>"XXX"</code>: replace with the
 129 |  * actual hash function name to get the C identifier.
 130 |  *
 131 |  * Note: some functions within the same family share the same core
 132 |  * elements, such as update function or context structure. Correspondingly,
 133 |  * some of the defined types or functions may actually be macros which
 134 |  * transparently evaluate to another type or function name.
 135 |  *
 136 |  * @subsection context Context structure
 137 |  *
 138 |  * Each implemented hash fonction has its own context structure, available
 139 |  * under the type name <code>"sph_XXX_context"</code> for the hash function
 140 |  * with short name <code>"XXX"</code>. This structure holds all needed
 141 |  * state for a running hash computation.
 142 |  *
 143 |  * The contents of these structures are meant to be opaque, and private
 144 |  * to the implementation. However, these contents are specified in the
 145 |  * header files so that application code which uses <code>sphlib</code>
 146 |  * may access the size of those structures.
 147 |  *
 148 |  * The caller is responsible for allocating the context structure,
 149 |  * whether by dynamic allocation (<code>malloc()</code> or equivalent),
 150 |  * static allocation (a global permanent variable), as an automatic
 151 |  * variable ("on the stack"), or by any other mean which ensures proper
 152 |  * structure alignment. <code>sphlib</code> code performs no dynamic
 153 |  * allocation by itself.
 154 |  *
 155 |  * The context must be initialized before use, using the
 156 |  * <code>sph_XXX_init()</code> function. This function sets the context
 157 |  * state to proper initial values for hashing.
 158 |  *
 159 |  * Since all state data is contained within the context structure,
 160 |  * <code>sphlib</code> is thread-safe and reentrant: several hash
 161 |  * computations may be performed in parallel, provided that they do not
 162 |  * operate on the same context. Moreover, a running computation can be
 163 |  * cloned by copying the context (with a simple <code>memcpy()</code>):
 164 |  * the context and its clone are then independant and may be updated
 165 |  * with new data and/or closed without interfering with each other.
 166 |  * Similarly, a context structure can be moved in memory at will:
 167 |  * context structures contain no pointer, in particular no pointer to
 168 |  * themselves.
 169 |  *
 170 |  * @subsection dataio Data input
 171 |  *
 172 |  * Hashed data is input with the <code>sph_XXX()</code> fonction, which
 173 |  * takes as parameters a pointer to the context, a pointer to the data
 174 |  * to hash, and the number of data bytes to hash. The context is updated
 175 |  * with the new data.
 176 |  *
 177 |  * Data can be input in one or several calls, with arbitrary input lengths.
 178 |  * However, it is best, performance wise, to input data by relatively big
 179 |  * chunks (say a few kilobytes), because this allows <code>sphlib</code> to
 180 |  * optimize things and avoid internal copying.
 181 |  *
 182 |  * When all data has been input, the context can be closed with
 183 |  * <code>sph_XXX_close()</code>. The hash output is computed and written
 184 |  * into the provided buffer. The caller must take care to provide a
 185 |  * buffer of appropriate length; e.g., when using SHA-1, the output is
 186 |  * a 20-byte word, therefore the output buffer must be at least 20-byte
 187 |  * long.
 188 |  *
 189 |  * For some hash functions, the <code>sph_XXX_addbits_and_close()</code>
 190 |  * function can be used instead of <code>sph_XXX_close()</code>. This
 191 |  * function can take a few extra <strong>bits</strong> to be added at
 192 |  * the end of the input message. This allows hashing messages with a
 193 |  * bit length which is not a multiple of 8. The extra bits are provided
 194 |  * as an unsigned integer value, and a bit count. The bit count must be
 195 |  * between 0 and 7, inclusive. The extra bits are provided as bits 7 to
 196 |  * 0 (bits of numerical value 128, 64, 32... downto 0), in that order.
 197 |  * For instance, to add three bits of value 1, 1 and 0, the unsigned
 198 |  * integer will have value 192 (1*128 + 1*64 + 0*32) and the bit count
 199 |  * will be 3.
 200 |  *
 201 |  * The <code>SPH_SIZE_XXX</code> macro is defined for each hash function;
 202 |  * it evaluates to the function output size, expressed in bits. For instance,
 203 |  * <code>SPH_SIZE_sha1</code> evaluates to <code>160</code>.
 204 |  *
 205 |  * When closed, the context is automatically reinitialized and can be
 206 |  * immediately used for another computation. It is not necessary to call
 207 |  * <code>sph_XXX_init()</code> after a close. Note that
 208 |  * <code>sph_XXX_init()</code> can still be called to "reset" a context,
 209 |  * i.e. forget previously input data, and get back to the initial state.
 210 |  *
 211 |  * @subsection alignment Data alignment
 212 |  *
 213 |  * "Alignment" is a property of data, which is said to be "properly
 214 |  * aligned" when its emplacement in memory is such that the data can
 215 |  * be optimally read by full words. This depends on the type of access;
 216 |  * basically, some hash functions will read data by 32-bit or 64-bit
 217 |  * words. <code>sphlib</code> does not mandate such alignment for input
 218 |  * data, but using aligned data can substantially improve performance.
 219 |  *
 220 |  * As a rule, it is best to input data by chunks whose length (in bytes)
 221 |  * is a multiple of eight, and which begins at "generally aligned"
 222 |  * addresses, such as the base address returned by a call to
 223 |  * <code>malloc()</code>.
 224 |  *
 225 |  * @section functions Implemented functions
 226 |  *
 227 |  * We give here the list of implemented functions. They are grouped by
 228 |  * family; to each family corresponds a specific header file. Each
 229 |  * individual function has its associated "short name". Please refer to
 230 |  * the documentation for that header file to get details on the hash
 231 |  * function denomination and provenance.
 232 |  *
 233 |  * Note: the functions marked with a '(64)' in the list below are
 234 |  * available only if the C compiler provides an integer type of length
 235 |  * 64 bits or more. Such a type is mandatory in the latest C standard
 236 |  * (ISO 9899:1999, aka "C99") and is present in several older compilers
 237 |  * as well, so chances are that such a type is available.
 238 |  *
 239 |  * - HAVAL family: file <code>sph_haval.h</code>
 240 |  *   - HAVAL-128/3 (128-bit, 3 passes): short name: <code>haval128_3</code>
 241 |  *   - HAVAL-128/4 (128-bit, 4 passes): short name: <code>haval128_4</code>
 242 |  *   - HAVAL-128/5 (128-bit, 5 passes): short name: <code>haval128_5</code>
 243 |  *   - HAVAL-160/3 (160-bit, 3 passes): short name: <code>haval160_3</code>
 244 |  *   - HAVAL-160/4 (160-bit, 4 passes): short name: <code>haval160_4</code>
 245 |  *   - HAVAL-160/5 (160-bit, 5 passes): short name: <code>haval160_5</code>
 246 |  *   - HAVAL-192/3 (192-bit, 3 passes): short name: <code>haval192_3</code>
 247 |  *   - HAVAL-192/4 (192-bit, 4 passes): short name: <code>haval192_4</code>
 248 |  *   - HAVAL-192/5 (192-bit, 5 passes): short name: <code>haval192_5</code>
 249 |  *   - HAVAL-224/3 (224-bit, 3 passes): short name: <code>haval224_3</code>
 250 |  *   - HAVAL-224/4 (224-bit, 4 passes): short name: <code>haval224_4</code>
 251 |  *   - HAVAL-224/5 (224-bit, 5 passes): short name: <code>haval224_5</code>
 252 |  *   - HAVAL-256/3 (256-bit, 3 passes): short name: <code>haval256_3</code>
 253 |  *   - HAVAL-256/4 (256-bit, 4 passes): short name: <code>haval256_4</code>
 254 |  *   - HAVAL-256/5 (256-bit, 5 passes): short name: <code>haval256_5</code>
 255 |  * - MD2: file <code>sph_md2.h</code>, short name: <code>md2</code>
 256 |  * - MD4: file <code>sph_md4.h</code>, short name: <code>md4</code>
 257 |  * - MD5: file <code>sph_md5.h</code>, short name: <code>md5</code>
 258 |  * - PANAMA: file <code>sph_panama.h</code>, short name: <code>panama</code>
 259 |  * - RadioGatun family: file <code>sph_radiogatun.h</code>
 260 |  *   - RadioGatun[32]: short name: <code>radiogatun32</code>
 261 |  *   - RadioGatun[64]: short name: <code>radiogatun64</code> (64)
 262 |  * - RIPEMD family: file <code>sph_ripemd.h</code>
 263 |  *   - RIPEMD: short name: <code>ripemd</code>
 264 |  *   - RIPEMD-128: short name: <code>ripemd128</code>
 265 |  *   - RIPEMD-160: short name: <code>ripemd160</code>
 266 |  * - SHA-0: file <code>sph_sha0.h</code>, short name: <code>sha0</code>
 267 |  * - SHA-1: file <code>sph_sha1.h</code>, short name: <code>sha1</code>
 268 |  * - SHA-2 family, 32-bit hashes: file <code>sph_sha2.h</code>
 269 |  *   - SHA-224: short name: <code>sha224</code>
 270 |  *   - SHA-256: short name: <code>sha256</code>
 271 |  *   - SHA-384: short name: <code>sha384</code> (64)
 272 |  *   - SHA-512: short name: <code>sha512</code> (64)
 273 |  * - Tiger family: file <code>sph_tiger.h</code>
 274 |  *   - Tiger: short name: <code>tiger</code> (64)
 275 |  *   - Tiger2: short name: <code>tiger2</code> (64)
 276 |  * - WHIRLPOOL family: file <code>sph_whirlpool.h</code>
 277 |  *   - WHIRLPOOL-0: short name: <code>whirlpool0</code> (64)
 278 |  *   - WHIRLPOOL-1: short name: <code>whirlpool1</code> (64)
 279 |  *   - WHIRLPOOL: short name: <code>whirlpool</code> (64)
 280 |  *
 281 |  * The fourteen second-round SHA-3 candidates are also implemented;
 282 |  * when applicable, the implementations follow the "final" specifications
 283 |  * as published for the third round of the SHA-3 competition (BLAKE,
 284 |  * Groestl, JH, Keccak and Skein have been tweaked for third round).
 285 |  *
 286 |  * - BLAKE family: file <code>sph_blake.h</code>
 287 |  *   - BLAKE-224: short name: <code>blake224</code>
 288 |  *   - BLAKE-256: short name: <code>blake256</code>
 289 |  *   - BLAKE-384: short name: <code>blake384</code>
 290 |  *   - BLAKE-512: short name: <code>blake512</code>
 291 |  * - BMW (Blue Midnight Wish) family: file <code>sph_bmw.h</code>
 292 |  *   - BMW-224: short name: <code>bmw224</code>
 293 |  *   - BMW-256: short name: <code>bmw256</code>
 294 |  *   - BMW-384: short name: <code>bmw384</code> (64)
 295 |  *   - BMW-512: short name: <code>bmw512</code> (64)
 296 |  * - CubeHash family: file <code>sph_cubehash.h</code> (specified as
 297 |  *   CubeHash16/32 in the CubeHash specification)
 298 |  *   - CubeHash-224: short name: <code>cubehash224</code>
 299 |  *   - CubeHash-256: short name: <code>cubehash256</code>
 300 |  *   - CubeHash-384: short name: <code>cubehash384</code>
 301 |  *   - CubeHash-512: short name: <code>cubehash512</code>
 302 |  * - ECHO family: file <code>sph_echo.h</code>
 303 |  *   - ECHO-224: short name: <code>echo224</code>
 304 |  *   - ECHO-256: short name: <code>echo256</code>
 305 |  *   - ECHO-384: short name: <code>echo384</code>
 306 |  *   - ECHO-512: short name: <code>echo512</code>
 307 |  * - Fugue family: file <code>sph_fugue.h</code>
 308 |  *   - Fugue-224: short name: <code>fugue224</code>
 309 |  *   - Fugue-256: short name: <code>fugue256</code>
 310 |  *   - Fugue-384: short name: <code>fugue384</code>
 311 |  *   - Fugue-512: short name: <code>fugue512</code>
 312 |  * - Groestl family: file <code>sph_groestl.h</code>
 313 |  *   - Groestl-224: short name: <code>groestl224</code>
 314 |  *   - Groestl-256: short name: <code>groestl256</code>
 315 |  *   - Groestl-384: short name: <code>groestl384</code>
 316 |  *   - Groestl-512: short name: <code>groestl512</code>
 317 |  * - Hamsi family: file <code>sph_hamsi.h</code>
 318 |  *   - Hamsi-224: short name: <code>hamsi224</code>
 319 |  *   - Hamsi-256: short name: <code>hamsi256</code>
 320 |  *   - Hamsi-384: short name: <code>hamsi384</code>
 321 |  *   - Hamsi-512: short name: <code>hamsi512</code>
 322 |  * - JH family: file <code>sph_jh.h</code>
 323 |  *   - JH-224: short name: <code>jh224</code>
 324 |  *   - JH-256: short name: <code>jh256</code>
 325 |  *   - JH-384: short name: <code>jh384</code>
 326 |  *   - JH-512: short name: <code>jh512</code>
 327 |  * - Keccak family: file <code>sph_keccak.h</code>
 328 |  *   - Keccak-224: short name: <code>keccak224</code>
 329 |  *   - Keccak-256: short name: <code>keccak256</code>
 330 |  *   - Keccak-384: short name: <code>keccak384</code>
 331 |  *   - Keccak-512: short name: <code>keccak512</code>
 332 |  * - Luffa family: file <code>sph_luffa.h</code>
 333 |  *   - Luffa-224: short name: <code>luffa224</code>
 334 |  *   - Luffa-256: short name: <code>luffa256</code>
 335 |  *   - Luffa-384: short name: <code>luffa384</code>
 336 |  *   - Luffa-512: short name: <code>luffa512</code>
 337 |  * - Shabal family: file <code>sph_shabal.h</code>
 338 |  *   - Shabal-192: short name: <code>shabal192</code>
 339 |  *   - Shabal-224: short name: <code>shabal224</code>
 340 |  *   - Shabal-256: short name: <code>shabal256</code>
 341 |  *   - Shabal-384: short name: <code>shabal384</code>
 342 |  *   - Shabal-512: short name: <code>shabal512</code>
 343 |  * - SHAvite-3 family: file <code>sph_shavite.h</code>
 344 |  *   - SHAvite-224 (nominally "SHAvite-3 with 224-bit output"):
 345 |  *     short name: <code>shabal224</code>
 346 |  *   - SHAvite-256 (nominally "SHAvite-3 with 256-bit output"):
 347 |  *     short name: <code>shabal256</code>
 348 |  *   - SHAvite-384 (nominally "SHAvite-3 with 384-bit output"):
 349 |  *     short name: <code>shabal384</code>
 350 |  *   - SHAvite-512 (nominally "SHAvite-3 with 512-bit output"):
 351 |  *     short name: <code>shabal512</code>
 352 |  * - SIMD family: file <code>sph_simd.h</code>
 353 |  *   - SIMD-224: short name: <code>simd224</code>
 354 |  *   - SIMD-256: short name: <code>simd256</code>
 355 |  *   - SIMD-384: short name: <code>simd384</code>
 356 |  *   - SIMD-512: short name: <code>simd512</code>
 357 |  * - Skein family: file <code>sph_skein.h</code>
 358 |  *   - Skein-224 (nominally specified as Skein-512-224): short name:
 359 |  *     <code>skein224</code> (64)
 360 |  *   - Skein-256 (nominally specified as Skein-512-256): short name:
 361 |  *     <code>skein256</code> (64)
 362 |  *   - Skein-384 (nominally specified as Skein-512-384): short name:
 363 |  *     <code>skein384</code> (64)
 364 |  *   - Skein-512 (nominally specified as Skein-512-512): short name:
 365 |  *     <code>skein512</code> (64)
 366 |  *
 367 |  * For the second-round SHA-3 candidates, the functions are as specified
 368 |  * for round 2, i.e. with the "tweaks" that some candidates added
 369 |  * between round 1 and round 2. Also, some of the submitted packages for
 370 |  * round 2 contained errors, in the specification, reference code, or
 371 |  * both. <code>sphlib</code> implements the corrected versions.
 372 |  */
 373 | 
 374 | /** @hideinitializer
 375 |  * Unsigned integer type whose length is at least 32 bits; on most
 376 |  * architectures, it will have a width of exactly 32 bits. Unsigned C
 377 |  * types implement arithmetics modulo a power of 2; use the
 378 |  * <code>SPH_T32()</code> macro to ensure that the value is truncated
 379 |  * to exactly 32 bits. Unless otherwise specified, all macros and
 380 |  * functions which accept <code>sph_u32</code> values assume that these
 381 |  * values fit on 32 bits, i.e. do not exceed 2^32-1, even on architectures
 382 |  * where <code>sph_u32</code> is larger than that.
 383 |  */
 384 | typedef __arch_dependant__ sph_u32;
 385 | 
 386 | /** @hideinitializer
 387 |  * Signed integer type corresponding to <code>sph_u32</code>; it has
 388 |  * width 32 bits or more.
 389 |  */
 390 | typedef __arch_dependant__ sph_s32;
 391 | 
 392 | /** @hideinitializer
 393 |  * Unsigned integer type whose length is at least 64 bits; on most
 394 |  * architectures which feature such a type, it will have a width of
 395 |  * exactly 64 bits. C99-compliant platform will have this type; it
 396 |  * is also defined when the GNU compiler (gcc) is used, and on
 397 |  * platforms where <code>unsigned long</code> is large enough. If this
 398 |  * type is not available, then some hash functions which depends on
 399 |  * a 64-bit type will not be available (most notably SHA-384, SHA-512,
 400 |  * Tiger and WHIRLPOOL).
 401 |  */
 402 | typedef __arch_dependant__ sph_u64;
 403 | 
 404 | /** @hideinitializer
 405 |  * Signed integer type corresponding to <code>sph_u64</code>; it has
 406 |  * width 64 bits or more.
 407 |  */
 408 | typedef __arch_dependant__ sph_s64;
 409 | 
 410 | /**
 411 |  * This macro expands the token <code>x</code> into a suitable
 412 |  * constant expression of type <code>sph_u32</code>. Depending on
 413 |  * how this type is defined, a suffix such as <code>UL</code> may
 414 |  * be appended to the argument.
 415 |  *
 416 |  * @param x   the token to expand into a suitable constant expression
 417 |  */
 418 | #define SPH_C32(x)
 419 | 
 420 | /**
 421 |  * Truncate a 32-bit value to exactly 32 bits. On most systems, this is
 422 |  * a no-op, recognized as such by the compiler.
 423 |  *
 424 |  * @param x   the value to truncate (of type <code>sph_u32</code>)
 425 |  */
 426 | #define SPH_T32(x)
 427 | 
 428 | /**
 429 |  * Rotate a 32-bit value by a number of bits to the left. The rotate
 430 |  * count must reside between 1 and 31. This macro assumes that its
 431 |  * first argument fits in 32 bits (no extra bit allowed on machines where
 432 |  * <code>sph_u32</code> is wider); both arguments may be evaluated
 433 |  * several times.
 434 |  *
 435 |  * @param x   the value to rotate (of type <code>sph_u32</code>)
 436 |  * @param n   the rotation count (between 1 and 31, inclusive)
 437 |  */
 438 | #define SPH_ROTL32(x, n)
 439 | 
 440 | /**
 441 |  * Rotate a 32-bit value by a number of bits to the left. The rotate
 442 |  * count must reside between 1 and 31. This macro assumes that its
 443 |  * first argument fits in 32 bits (no extra bit allowed on machines where
 444 |  * <code>sph_u32</code> is wider); both arguments may be evaluated
 445 |  * several times.
 446 |  *
 447 |  * @param x   the value to rotate (of type <code>sph_u32</code>)
 448 |  * @param n   the rotation count (between 1 and 31, inclusive)
 449 |  */
 450 | #define SPH_ROTR32(x, n)
 451 | 
 452 | /**
 453 |  * This macro is defined on systems for which a 64-bit type has been
 454 |  * detected, and is used for <code>sph_u64</code>.
 455 |  */
 456 | #define SPH_64
 457 | 
 458 | /**
 459 |  * This macro is defined on systems for the "native" integer size is
 460 |  * 64 bits (64-bit values fit in one register).
 461 |  */
 462 | #define SPH_64_TRUE
 463 | 
 464 | /**
 465 |  * This macro expands the token <code>x</code> into a suitable
 466 |  * constant expression of type <code>sph_u64</code>. Depending on
 467 |  * how this type is defined, a suffix such as <code>ULL</code> may
 468 |  * be appended to the argument. This macro is defined only if a
 469 |  * 64-bit type was detected and used for <code>sph_u64</code>.
 470 |  *
 471 |  * @param x   the token to expand into a suitable constant expression
 472 |  */
 473 | #define SPH_C64(x)
 474 | 
 475 | /**
 476 |  * Truncate a 64-bit value to exactly 64 bits. On most systems, this is
 477 |  * a no-op, recognized as such by the compiler. This macro is defined only
 478 |  * if a 64-bit type was detected and used for <code>sph_u64</code>.
 479 |  *
 480 |  * @param x   the value to truncate (of type <code>sph_u64</code>)
 481 |  */
 482 | #define SPH_T64(x)
 483 | 
 484 | /**
 485 |  * Rotate a 64-bit value by a number of bits to the left. The rotate
 486 |  * count must reside between 1 and 63. This macro assumes that its
 487 |  * first argument fits in 64 bits (no extra bit allowed on machines where
 488 |  * <code>sph_u64</code> is wider); both arguments may be evaluated
 489 |  * several times. This macro is defined only if a 64-bit type was detected
 490 |  * and used for <code>sph_u64</code>.
 491 |  *
 492 |  * @param x   the value to rotate (of type <code>sph_u64</code>)
 493 |  * @param n   the rotation count (between 1 and 63, inclusive)
 494 |  */
 495 | #define SPH_ROTL64(x, n)
 496 | 
 497 | /**
 498 |  * Rotate a 64-bit value by a number of bits to the left. The rotate
 499 |  * count must reside between 1 and 63. This macro assumes that its
 500 |  * first argument fits in 64 bits (no extra bit allowed on machines where
 501 |  * <code>sph_u64</code> is wider); both arguments may be evaluated
 502 |  * several times. This macro is defined only if a 64-bit type was detected
 503 |  * and used for <code>sph_u64</code>.
 504 |  *
 505 |  * @param x   the value to rotate (of type <code>sph_u64</code>)
 506 |  * @param n   the rotation count (between 1 and 63, inclusive)
 507 |  */
 508 | #define SPH_ROTR64(x, n)
 509 | 
 510 | /**
 511 |  * This macro evaluates to <code>inline</code> or an equivalent construction,
 512 |  * if available on the compilation platform, or to nothing otherwise. This
 513 |  * is used to declare inline functions, for which the compiler should
 514 |  * endeavour to include the code directly in the caller. Inline functions
 515 |  * are typically defined in header files as replacement for macros.
 516 |  */
 517 | #define SPH_INLINE
 518 | 
 519 | /**
 520 |  * This macro is defined if the platform has been detected as using
 521 |  * little-endian convention. This implies that the <code>sph_u32</code>
 522 |  * type (and the <code>sph_u64</code> type also, if it is defined) has
 523 |  * an exact width (i.e. exactly 32-bit, respectively 64-bit).
 524 |  */
 525 | #define SPH_LITTLE_ENDIAN
 526 | 
 527 | /**
 528 |  * This macro is defined if the platform has been detected as using
 529 |  * big-endian convention. This implies that the <code>sph_u32</code>
 530 |  * type (and the <code>sph_u64</code> type also, if it is defined) has
 531 |  * an exact width (i.e. exactly 32-bit, respectively 64-bit).
 532 |  */
 533 | #define SPH_BIG_ENDIAN
 534 | 
 535 | /**
 536 |  * This macro is defined if 32-bit words (and 64-bit words, if defined)
 537 |  * can be read from and written to memory efficiently in little-endian
 538 |  * convention. This is the case for little-endian platforms, and also
 539 |  * for the big-endian platforms which have special little-endian access
 540 |  * opcodes (e.g. Ultrasparc).
 541 |  */
 542 | #define SPH_LITTLE_FAST
 543 | 
 544 | /**
 545 |  * This macro is defined if 32-bit words (and 64-bit words, if defined)
 546 |  * can be read from and written to memory efficiently in big-endian
 547 |  * convention. This is the case for little-endian platforms, and also
 548 |  * for the little-endian platforms which have special big-endian access
 549 |  * opcodes.
 550 |  */
 551 | #define SPH_BIG_FAST
 552 | 
 553 | /**
 554 |  * On some platforms, this macro is defined to an unsigned integer type
 555 |  * into which pointer values may be cast. The resulting value can then
 556 |  * be tested for being a multiple of 2, 4 or 8, indicating an aligned
 557 |  * pointer for, respectively, 16-bit, 32-bit or 64-bit memory accesses.
 558 |  */
 559 | #define SPH_UPTR
 560 | 
 561 | /**
 562 |  * When defined, this macro indicates that unaligned memory accesses
 563 |  * are possible with only a minor penalty, and thus should be prefered
 564 |  * over strategies which first copy data to an aligned buffer.
 565 |  */
 566 | #define SPH_UNALIGNED
 567 | 
 568 | /**
 569 |  * Byte-swap a 32-bit word (i.e. <code>0x12345678</code> becomes
 570 |  * <code>0x78563412</code>). This is an inline function which resorts
 571 |  * to inline assembly on some platforms, for better performance.
 572 |  *
 573 |  * @param x   the 32-bit value to byte-swap
 574 |  * @return  the byte-swapped value
 575 |  */
 576 | static inline sph_u32 sph_bswap32(sph_u32 x);
 577 | 
 578 | /**
 579 |  * Byte-swap a 64-bit word. This is an inline function which resorts
 580 |  * to inline assembly on some platforms, for better performance. This
 581 |  * function is defined only if a suitable 64-bit type was found for
 582 |  * <code>sph_u64</code>
 583 |  *
 584 |  * @param x   the 64-bit value to byte-swap
 585 |  * @return  the byte-swapped value
 586 |  */
 587 | static inline sph_u64 sph_bswap64(sph_u64 x);
 588 | 
 589 | /**
 590 |  * Decode a 16-bit unsigned value from memory, in little-endian convention
 591 |  * (least significant byte comes first).
 592 |  *
 593 |  * @param src   the source address
 594 |  * @return  the decoded value
 595 |  */
 596 | static inline unsigned sph_dec16le(const void *src);
 597 | 
 598 | /**
 599 |  * Encode a 16-bit unsigned value into memory, in little-endian convention
 600 |  * (least significant byte comes first).
 601 |  *
 602 |  * @param dst   the destination buffer
 603 |  * @param val   the value to encode
 604 |  */
 605 | static inline void sph_enc16le(void *dst, unsigned val);
 606 | 
 607 | /**
 608 |  * Decode a 16-bit unsigned value from memory, in big-endian convention
 609 |  * (most significant byte comes first).
 610 |  *
 611 |  * @param src   the source address
 612 |  * @return  the decoded value
 613 |  */
 614 | static inline unsigned sph_dec16be(const void *src);
 615 | 
 616 | /**
 617 |  * Encode a 16-bit unsigned value into memory, in big-endian convention
 618 |  * (most significant byte comes first).
 619 |  *
 620 |  * @param dst   the destination buffer
 621 |  * @param val   the value to encode
 622 |  */
 623 | static inline void sph_enc16be(void *dst, unsigned val);
 624 | 
 625 | /**
 626 |  * Decode a 32-bit unsigned value from memory, in little-endian convention
 627 |  * (least significant byte comes first).
 628 |  *
 629 |  * @param src   the source address
 630 |  * @return  the decoded value
 631 |  */
 632 | static inline sph_u32 sph_dec32le(const void *src);
 633 | 
 634 | /**
 635 |  * Decode a 32-bit unsigned value from memory, in little-endian convention
 636 |  * (least significant byte comes first). This function assumes that the
 637 |  * source address is suitably aligned for a direct access, if the platform
 638 |  * supports such things; it can thus be marginally faster than the generic
 639 |  * <code>sph_dec32le()</code> function.
 640 |  *
 641 |  * @param src   the source address
 642 |  * @return  the decoded value
 643 |  */
 644 | static inline sph_u32 sph_dec32le_aligned(const void *src);
 645 | 
 646 | /**
 647 |  * Encode a 32-bit unsigned value into memory, in little-endian convention
 648 |  * (least significant byte comes first).
 649 |  *
 650 |  * @param dst   the destination buffer
 651 |  * @param val   the value to encode
 652 |  */
 653 | static inline void sph_enc32le(void *dst, sph_u32 val);
 654 | 
 655 | /**
 656 |  * Encode a 32-bit unsigned value into memory, in little-endian convention
 657 |  * (least significant byte comes first). This function assumes that the
 658 |  * destination address is suitably aligned for a direct access, if the
 659 |  * platform supports such things; it can thus be marginally faster than
 660 |  * the generic <code>sph_enc32le()</code> function.
 661 |  *
 662 |  * @param dst   the destination buffer
 663 |  * @param val   the value to encode
 664 |  */
 665 | static inline void sph_enc32le_aligned(void *dst, sph_u32 val);
 666 | 
 667 | /**
 668 |  * Decode a 32-bit unsigned value from memory, in big-endian convention
 669 |  * (most significant byte comes first).
 670 |  *
 671 |  * @param src   the source address
 672 |  * @return  the decoded value
 673 |  */
 674 | static inline sph_u32 sph_dec32be(const void *src);
 675 | 
 676 | /**
 677 |  * Decode a 32-bit unsigned value from memory, in big-endian convention
 678 |  * (most significant byte comes first). This function assumes that the
 679 |  * source address is suitably aligned for a direct access, if the platform
 680 |  * supports such things; it can thus be marginally faster than the generic
 681 |  * <code>sph_dec32be()</code> function.
 682 |  *
 683 |  * @param src   the source address
 684 |  * @return  the decoded value
 685 |  */
 686 | static inline sph_u32 sph_dec32be_aligned(const void *src);
 687 | 
 688 | /**
 689 |  * Encode a 32-bit unsigned value into memory, in big-endian convention
 690 |  * (most significant byte comes first).
 691 |  *
 692 |  * @param dst   the destination buffer
 693 |  * @param val   the value to encode
 694 |  */
 695 | static inline void sph_enc32be(void *dst, sph_u32 val);
 696 | 
 697 | /**
 698 |  * Encode a 32-bit unsigned value into memory, in big-endian convention
 699 |  * (most significant byte comes first). This function assumes that the
 700 |  * destination address is suitably aligned for a direct access, if the
 701 |  * platform supports such things; it can thus be marginally faster than
 702 |  * the generic <code>sph_enc32be()</code> function.
 703 |  *
 704 |  * @param dst   the destination buffer
 705 |  * @param val   the value to encode
 706 |  */
 707 | static inline void sph_enc32be_aligned(void *dst, sph_u32 val);
 708 | 
 709 | /**
 710 |  * Decode a 64-bit unsigned value from memory, in little-endian convention
 711 |  * (least significant byte comes first). This function is defined only
 712 |  * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
 713 |  *
 714 |  * @param src   the source address
 715 |  * @return  the decoded value
 716 |  */
 717 | static inline sph_u64 sph_dec64le(const void *src);
 718 | 
 719 | /**
 720 |  * Decode a 64-bit unsigned value from memory, in little-endian convention
 721 |  * (least significant byte comes first). This function assumes that the
 722 |  * source address is suitably aligned for a direct access, if the platform
 723 |  * supports such things; it can thus be marginally faster than the generic
 724 |  * <code>sph_dec64le()</code> function. This function is defined only
 725 |  * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
 726 |  *
 727 |  * @param src   the source address
 728 |  * @return  the decoded value
 729 |  */
 730 | static inline sph_u64 sph_dec64le_aligned(const void *src);
 731 | 
 732 | /**
 733 |  * Encode a 64-bit unsigned value into memory, in little-endian convention
 734 |  * (least significant byte comes first). This function is defined only
 735 |  * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
 736 |  *
 737 |  * @param dst   the destination buffer
 738 |  * @param val   the value to encode
 739 |  */
 740 | static inline void sph_enc64le(void *dst, sph_u64 val);
 741 | 
 742 | /**
 743 |  * Encode a 64-bit unsigned value into memory, in little-endian convention
 744 |  * (least significant byte comes first). This function assumes that the
 745 |  * destination address is suitably aligned for a direct access, if the
 746 |  * platform supports such things; it can thus be marginally faster than
 747 |  * the generic <code>sph_enc64le()</code> function. This function is defined
 748 |  * only if a suitable 64-bit type was detected and used for
 749 |  * <code>sph_u64</code>.
 750 |  *
 751 |  * @param dst   the destination buffer
 752 |  * @param val   the value to encode
 753 |  */
 754 | static inline void sph_enc64le_aligned(void *dst, sph_u64 val);
 755 | 
 756 | /**
 757 |  * Decode a 64-bit unsigned value from memory, in big-endian convention
 758 |  * (most significant byte comes first). This function is defined only
 759 |  * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
 760 |  *
 761 |  * @param src   the source address
 762 |  * @return  the decoded value
 763 |  */
 764 | static inline sph_u64 sph_dec64be(const void *src);
 765 | 
 766 | /**
 767 |  * Decode a 64-bit unsigned value from memory, in big-endian convention
 768 |  * (most significant byte comes first). This function assumes that the
 769 |  * source address is suitably aligned for a direct access, if the platform
 770 |  * supports such things; it can thus be marginally faster than the generic
 771 |  * <code>sph_dec64be()</code> function. This function is defined only
 772 |  * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
 773 |  *
 774 |  * @param src   the source address
 775 |  * @return  the decoded value
 776 |  */
 777 | static inline sph_u64 sph_dec64be_aligned(const void *src);
 778 | 
 779 | /**
 780 |  * Encode a 64-bit unsigned value into memory, in big-endian convention
 781 |  * (most significant byte comes first). This function is defined only
 782 |  * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
 783 |  *
 784 |  * @param dst   the destination buffer
 785 |  * @param val   the value to encode
 786 |  */
 787 | static inline void sph_enc64be(void *dst, sph_u64 val);
 788 | 
 789 | /**
 790 |  * Encode a 64-bit unsigned value into memory, in big-endian convention
 791 |  * (most significant byte comes first). This function assumes that the
 792 |  * destination address is suitably aligned for a direct access, if the
 793 |  * platform supports such things; it can thus be marginally faster than
 794 |  * the generic <code>sph_enc64be()</code> function. This function is defined
 795 |  * only if a suitable 64-bit type was detected and used for
 796 |  * <code>sph_u64</code>.
 797 |  *
 798 |  * @param dst   the destination buffer
 799 |  * @param val   the value to encode
 800 |  */
 801 | static inline void sph_enc64be_aligned(void *dst, sph_u64 val);
 802 | 
 803 | #endif
 804 | 
 805 | /* ============== END documentation block for Doxygen ============= */
 806 | 
 807 | #ifndef DOXYGEN_IGNORE
 808 | 
 809 | /*
 810 |  * We want to define the types "sph_u32" and "sph_u64" which hold
 811 |  * unsigned values of at least, respectively, 32 and 64 bits. These
 812 |  * tests should select appropriate types for most platforms. The
 813 |  * macro "SPH_64" is defined if the 64-bit is supported.
 814 |  */
 815 | 
 816 | #undef SPH_64
 817 | #undef SPH_64_TRUE
 818 | 
 819 | #if defined __STDC__ && __STDC_VERSION__ >= 199901L
 820 | 
 821 | /*
 822 |  * On C99 implementations, we can use <stdint.h> to get an exact 64-bit
 823 |  * type, if any, or otherwise use a wider type (which must exist, for
 824 |  * C99 conformance).
 825 |  */
 826 | 
 827 | #include <stdint.h>
 828 | 
 829 | #ifdef UINT32_MAX
 830 | typedef uint32_t sph_u32;
 831 | typedef int32_t sph_s32;
 832 | #else
 833 | typedef uint_fast32_t sph_u32;
 834 | typedef int_fast32_t sph_s32;
 835 | #endif
 836 | #if !SPH_NO_64
 837 | #ifdef UINT64_MAX
 838 | typedef uint64_t sph_u64;
 839 | typedef int64_t sph_s64;
 840 | #else
 841 | typedef uint_fast64_t sph_u64;
 842 | typedef int_fast64_t sph_s64;
 843 | #endif
 844 | #endif
 845 | 
 846 | #define SPH_C32(x)    ((sph_u32)(x))
 847 | #if !SPH_NO_64
 848 | #define SPH_C64(x)    ((sph_u64)(x))
 849 | #define SPH_64  1
 850 | #endif
 851 | 
 852 | #else
 853 | 
 854 | /*
 855 |  * On non-C99 systems, we use "unsigned int" if it is wide enough,
 856 |  * "unsigned long" otherwise. This supports all "reasonable" architectures.
 857 |  * We have to be cautious: pre-C99 preprocessors handle constants
 858 |  * differently in '#if' expressions. Hence the shifts to test UINT_MAX.
 859 |  */
 860 | 
 861 | #if ((UINT_MAX >> 11) >> 11) >= 0x3FF
 862 | 
 863 | typedef unsigned int sph_u32;
 864 | typedef int sph_s32;
 865 | 
 866 | #define SPH_C32(x)    ((sph_u32)(x ## U))
 867 | 
 868 | #else
 869 | 
 870 | typedef unsigned long sph_u32;
 871 | typedef long sph_s32;
 872 | 
 873 | #define SPH_C32(x)    ((sph_u32)(x ## UL))
 874 | 
 875 | #endif
 876 | 
 877 | #if !SPH_NO_64
 878 | 
 879 | /*
 880 |  * We want a 64-bit type. We use "unsigned long" if it is wide enough (as
 881 |  * is common on 64-bit architectures such as AMD64, Alpha or Sparcv9),
 882 |  * "unsigned long long" otherwise, if available. We use ULLONG_MAX to
 883 |  * test whether "unsigned long long" is available; we also know that
 884 |  * gcc features this type, even if the libc header do not know it.
 885 |  */
 886 | 
 887 | #if ((ULONG_MAX >> 31) >> 31) >= 3
 888 | 
 889 | typedef unsigned long sph_u64;
 890 | typedef long sph_s64;
 891 | 
 892 | #define SPH_C64(x)    ((sph_u64)(x ## UL))
 893 | 
 894 | #define SPH_64  1
 895 | 
 896 | #elif ((ULLONG_MAX >> 31) >> 31) >= 3 || defined __GNUC__
 897 | 
 898 | typedef unsigned long long sph_u64;
 899 | typedef long long sph_s64;
 900 | 
 901 | #define SPH_C64(x)    ((sph_u64)(x ## ULL))
 902 | 
 903 | #define SPH_64  1
 904 | 
 905 | #else
 906 | 
 907 | /*
 908 |  * No 64-bit type...
 909 |  */
 910 | 
 911 | #endif
 912 | 
 913 | #endif
 914 | 
 915 | #endif
 916 | 
 917 | /*
 918 |  * If the "unsigned long" type has length 64 bits or more, then this is
 919 |  * a "true" 64-bit architectures. This is also true with Visual C on
 920 |  * amd64, even though the "long" type is limited to 32 bits.
 921 |  */
 922 | #if SPH_64 && (((ULONG_MAX >> 31) >> 31) >= 3 || defined _M_X64)
 923 | #define SPH_64_TRUE   1
 924 | #endif
 925 | 
 926 | /*
 927 |  * Implementation note: some processors have specific opcodes to perform
 928 |  * a rotation. Recent versions of gcc recognize the expression above and
 929 |  * use the relevant opcodes, when appropriate.
 930 |  */
 931 | 
 932 | #define SPH_T32(x)    ((x) & SPH_C32(0xFFFFFFFF))
 933 | #define SPH_ROTL32(x, n)   SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
 934 | #define SPH_ROTR32(x, n)   SPH_ROTL32(x, (32 - (n)))
 935 | 
 936 | #if SPH_64
 937 | 
 938 | #define SPH_T64(x)    ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
 939 | #define SPH_ROTL64(x, n)   SPH_T64(((x) << (n)) | ((x) >> (64 - (n))))
 940 | #define SPH_ROTR64(x, n)   SPH_ROTL64(x, (64 - (n)))
 941 | 
 942 | #endif
 943 | 
 944 | #ifndef DOXYGEN_IGNORE
 945 | /*
 946 |  * Define SPH_INLINE to be an "inline" qualifier, if available. We define
 947 |  * some small macro-like functions which benefit greatly from being inlined.
 948 |  */
 949 | #if (defined __STDC__ && __STDC_VERSION__ >= 199901L) || defined __GNUC__
 950 | #define SPH_INLINE inline
 951 | #elif defined _MSC_VER
 952 | #define SPH_INLINE __inline
 953 | #else
 954 | #define SPH_INLINE
 955 | #endif
 956 | #endif
 957 | 
 958 | /*
 959 |  * We define some macros which qualify the architecture. These macros
 960 |  * may be explicit set externally (e.g. as compiler parameters). The
 961 |  * code below sets those macros if they are not already defined.
 962 |  *
 963 |  * Most macros are boolean, thus evaluate to either zero or non-zero.
 964 |  * The SPH_UPTR macro is special, in that it evaluates to a C type,
 965 |  * or is not defined.
 966 |  *
 967 |  * SPH_UPTR             if defined: unsigned type to cast pointers into
 968 |  *
 969 |  * SPH_UNALIGNED        non-zero if unaligned accesses are efficient
 970 |  * SPH_LITTLE_ENDIAN    non-zero if architecture is known to be little-endian
 971 |  * SPH_BIG_ENDIAN       non-zero if architecture is known to be big-endian
 972 |  * SPH_LITTLE_FAST      non-zero if little-endian decoding is fast
 973 |  * SPH_BIG_FAST         non-zero if big-endian decoding is fast
 974 |  *
 975 |  * If SPH_UPTR is defined, then encoding and decoding of 32-bit and 64-bit
 976 |  * values will try to be "smart". Either SPH_LITTLE_ENDIAN or SPH_BIG_ENDIAN
 977 |  * _must_ be non-zero in those situations. The 32-bit and 64-bit types
 978 |  * _must_ also have an exact width.
 979 |  *
 980 |  * SPH_SPARCV9_GCC_32   UltraSPARC-compatible with gcc, 32-bit mode
 981 |  * SPH_SPARCV9_GCC_64   UltraSPARC-compatible with gcc, 64-bit mode
 982 |  * SPH_SPARCV9_GCC      UltraSPARC-compatible with gcc
 983 |  * SPH_I386_GCC         x86-compatible (32-bit) with gcc
 984 |  * SPH_I386_MSVC        x86-compatible (32-bit) with Microsoft Visual C
 985 |  * SPH_AMD64_GCC        x86-compatible (64-bit) with gcc
 986 |  * SPH_AMD64_MSVC       x86-compatible (64-bit) with Microsoft Visual C
 987 |  * SPH_PPC32_GCC        PowerPC, 32-bit, with gcc
 988 |  * SPH_PPC64_GCC        PowerPC, 64-bit, with gcc
 989 |  *
 990 |  * TODO: enhance automatic detection, for more architectures and compilers.
 991 |  * Endianness is the most important. SPH_UNALIGNED and SPH_UPTR help with
 992 |  * some very fast functions (e.g. MD4) when using unaligned input data.
 993 |  * The CPU-specific-with-GCC macros are useful only for inline assembly,
 994 |  * normally restrained to this header file.
 995 |  */
 996 | 
 997 | /*
 998 |  * 32-bit x86, aka "i386 compatible".
 999 |  */
1000 | #if defined __i386__ || defined _M_IX86
1001 | 
1002 | #define SPH_DETECT_UNALIGNED         1
1003 | #define SPH_DETECT_LITTLE_ENDIAN     1
1004 | #define SPH_DETECT_UPTR              sph_u32
1005 | #ifdef __GNUC__
1006 | #define SPH_DETECT_I386_GCC          1
1007 | #endif
1008 | #ifdef _MSC_VER
1009 | #define SPH_DETECT_I386_MSVC         1
1010 | #endif
1011 | 
1012 | /*
1013 |  * 64-bit x86, hereafter known as "amd64".
1014 |  */
1015 | #elif defined __x86_64 || defined _M_X64
1016 | 
1017 | #define SPH_DETECT_UNALIGNED         1
1018 | #define SPH_DETECT_LITTLE_ENDIAN     1
1019 | #define SPH_DETECT_UPTR              sph_u64
1020 | #ifdef __GNUC__
1021 | #define SPH_DETECT_AMD64_GCC         1
1022 | #endif
1023 | #ifdef _MSC_VER
1024 | #define SPH_DETECT_AMD64_MSVC        1
1025 | #endif
1026 | 
1027 | /*
1028 |  * 64-bit Sparc architecture (implies v9).
1029 |  */
1030 | #elif ((defined __sparc__ || defined __sparc) && defined __arch64__) \
1031 | 	|| defined __sparcv9
1032 | 
1033 | #define SPH_DETECT_BIG_ENDIAN        1
1034 | #define SPH_DETECT_UPTR              sph_u64
1035 | #ifdef __GNUC__
1036 | #define SPH_DETECT_SPARCV9_GCC_64    1
1037 | #define SPH_DETECT_LITTLE_FAST       1
1038 | #endif
1039 | 
1040 | /*
1041 |  * 32-bit Sparc.
1042 |  */
1043 | #elif (defined __sparc__ || defined __sparc) \
1044 | 	&& !(defined __sparcv9 || defined __arch64__)
1045 | 
1046 | #define SPH_DETECT_BIG_ENDIAN        1
1047 | #define SPH_DETECT_UPTR              sph_u32
1048 | #if defined __GNUC__ && defined __sparc_v9__
1049 | #define SPH_DETECT_SPARCV9_GCC_32    1
1050 | #define SPH_DETECT_LITTLE_FAST       1
1051 | #endif
1052 | 
1053 | /*
1054 |  * ARM, little-endian.
1055 |  */
1056 | #elif defined __arm__ && __ARMEL__
1057 | 
1058 | #define SPH_DETECT_LITTLE_ENDIAN     1
1059 | 
1060 | /*
1061 |  * MIPS, little-endian.
1062 |  */
1063 | #elif MIPSEL || _MIPSEL || __MIPSEL || __MIPSEL__
1064 | 
1065 | #define SPH_DETECT_LITTLE_ENDIAN     1
1066 | 
1067 | /*
1068 |  * MIPS, big-endian.
1069 |  */
1070 | #elif MIPSEB || _MIPSEB || __MIPSEB || __MIPSEB__
1071 | 
1072 | #define SPH_DETECT_BIG_ENDIAN        1
1073 | 
1074 | /*
1075 |  * PowerPC.
1076 |  */
1077 | #elif defined __powerpc__ || defined __POWERPC__ || defined __ppc__ \
1078 | 	|| defined _ARCH_PPC
1079 | 
1080 | /*
1081 |  * Note: we do not declare cross-endian access to be "fast": even if
1082 |  * using inline assembly, implementation should still assume that
1083 |  * keeping the decoded word in a temporary is faster than decoding
1084 |  * it again.
1085 |  */
1086 | #if defined __GNUC__
1087 | #if SPH_64_TRUE
1088 | #define SPH_DETECT_PPC64_GCC         1
1089 | #else
1090 | #define SPH_DETECT_PPC32_GCC         1
1091 | #endif
1092 | #endif
1093 | 
1094 | #if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN
1095 | #define SPH_DETECT_BIG_ENDIAN        1
1096 | #elif defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN
1097 | #define SPH_DETECT_LITTLE_ENDIAN     1
1098 | #endif
1099 | 
1100 | /*
1101 |  * Itanium, 64-bit.
1102 |  */
1103 | #elif defined __ia64 || defined __ia64__ \
1104 | 	|| defined __itanium__ || defined _M_IA64
1105 | 
1106 | #if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN
1107 | #define SPH_DETECT_BIG_ENDIAN        1
1108 | #else
1109 | #define SPH_DETECT_LITTLE_ENDIAN     1
1110 | #endif
1111 | #if defined __LP64__ || defined _LP64
1112 | #define SPH_DETECT_UPTR              sph_u64
1113 | #else
1114 | #define SPH_DETECT_UPTR              sph_u32
1115 | #endif
1116 | 
1117 | #endif
1118 | 
1119 | #if defined SPH_DETECT_SPARCV9_GCC_32 || defined SPH_DETECT_SPARCV9_GCC_64
1120 | #define SPH_DETECT_SPARCV9_GCC       1
1121 | #endif
1122 | 
1123 | #if defined SPH_DETECT_UNALIGNED && !defined SPH_UNALIGNED
1124 | #define SPH_UNALIGNED         SPH_DETECT_UNALIGNED
1125 | #endif
1126 | #if defined SPH_DETECT_UPTR && !defined SPH_UPTR
1127 | #define SPH_UPTR              SPH_DETECT_UPTR
1128 | #endif
1129 | #if defined SPH_DETECT_LITTLE_ENDIAN && !defined SPH_LITTLE_ENDIAN
1130 | #define SPH_LITTLE_ENDIAN     SPH_DETECT_LITTLE_ENDIAN
1131 | #endif
1132 | #if defined SPH_DETECT_BIG_ENDIAN && !defined SPH_BIG_ENDIAN
1133 | #define SPH_BIG_ENDIAN        SPH_DETECT_BIG_ENDIAN
1134 | #endif
1135 | #if defined SPH_DETECT_LITTLE_FAST && !defined SPH_LITTLE_FAST
1136 | #define SPH_LITTLE_FAST       SPH_DETECT_LITTLE_FAST
1137 | #endif
1138 | #if defined SPH_DETECT_BIG_FAST && !defined SPH_BIG_FAST
1139 | #define SPH_BIG_FAST    SPH_DETECT_BIG_FAST
1140 | #endif
1141 | #if defined SPH_DETECT_SPARCV9_GCC_32 && !defined SPH_SPARCV9_GCC_32
1142 | #define SPH_SPARCV9_GCC_32    SPH_DETECT_SPARCV9_GCC_32
1143 | #endif
1144 | #if defined SPH_DETECT_SPARCV9_GCC_64 && !defined SPH_SPARCV9_GCC_64
1145 | #define SPH_SPARCV9_GCC_64    SPH_DETECT_SPARCV9_GCC_64
1146 | #endif
1147 | #if defined SPH_DETECT_SPARCV9_GCC && !defined SPH_SPARCV9_GCC
1148 | #define SPH_SPARCV9_GCC       SPH_DETECT_SPARCV9_GCC
1149 | #endif
1150 | #if defined SPH_DETECT_I386_GCC && !defined SPH_I386_GCC
1151 | #define SPH_I386_GCC          SPH_DETECT_I386_GCC
1152 | #endif
1153 | #if defined SPH_DETECT_I386_MSVC && !defined SPH_I386_MSVC
1154 | #define SPH_I386_MSVC         SPH_DETECT_I386_MSVC
1155 | #endif
1156 | #if defined SPH_DETECT_AMD64_GCC && !defined SPH_AMD64_GCC
1157 | #define SPH_AMD64_GCC         SPH_DETECT_AMD64_GCC
1158 | #endif
1159 | #if defined SPH_DETECT_AMD64_MSVC && !defined SPH_AMD64_MSVC
1160 | #define SPH_AMD64_MSVC        SPH_DETECT_AMD64_MSVC
1161 | #endif
1162 | #if defined SPH_DETECT_PPC32_GCC && !defined SPH_PPC32_GCC
1163 | #define SPH_PPC32_GCC         SPH_DETECT_PPC32_GCC
1164 | #endif
1165 | #if defined SPH_DETECT_PPC64_GCC && !defined SPH_PPC64_GCC
1166 | #define SPH_PPC64_GCC         SPH_DETECT_PPC64_GCC
1167 | #endif
1168 | 
1169 | #if SPH_LITTLE_ENDIAN && !defined SPH_LITTLE_FAST
1170 | #define SPH_LITTLE_FAST              1
1171 | #endif
1172 | #if SPH_BIG_ENDIAN && !defined SPH_BIG_FAST
1173 | #define SPH_BIG_FAST                 1
1174 | #endif
1175 | 
1176 | #if defined SPH_UPTR && !(SPH_LITTLE_ENDIAN || SPH_BIG_ENDIAN)
1177 | #error SPH_UPTR defined, but endianness is not known.
1178 | #endif
1179 | 
1180 | #if SPH_I386_GCC && !SPH_NO_ASM
1181 | 
1182 | /*
1183 |  * On x86 32-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit
1184 |  * values.
1185 |  */
1186 | 
1187 | static SPH_INLINE sph_u32
1188 | sph_bswap32(sph_u32 x)
1189 | {
1190 | 	__asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x));
1191 | 	return x;
1192 | }
1193 | 
1194 | #if SPH_64
1195 | 
1196 | static SPH_INLINE sph_u64
1197 | sph_bswap64(sph_u64 x)
1198 | {
1199 | 	return ((sph_u64)sph_bswap32((sph_u32)x) << 32)
1200 | 		| (sph_u64)sph_bswap32((sph_u32)(x >> 32));
1201 | }
1202 | 
1203 | #endif
1204 | 
1205 | #elif SPH_AMD64_GCC && !SPH_NO_ASM
1206 | 
1207 | /*
1208 |  * On x86 64-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit
1209 |  * and 64-bit values.
1210 |  */
1211 | 
1212 | static SPH_INLINE sph_u32
1213 | sph_bswap32(sph_u32 x)
1214 | {
1215 | 	__asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x));
1216 | 	return x;
1217 | }
1218 | 
1219 | #if SPH_64
1220 | 
1221 | static SPH_INLINE sph_u64
1222 | sph_bswap64(sph_u64 x)
1223 | {
1224 | 	__asm__ __volatile__ ("bswapq %0" : "=r" (x) : "0" (x));
1225 | 	return x;
1226 | }
1227 | 
1228 | #endif
1229 | 
1230 | /*
1231 |  * Disabled code. Apparently, Microsoft Visual C 2005 is smart enough
1232 |  * to generate proper opcodes for endianness swapping with the pure C
1233 |  * implementation below.
1234 |  *
1235 | 
1236 | #elif SPH_I386_MSVC && !SPH_NO_ASM
1237 | 
1238 | static __inline sph_u32 __declspec(naked) __fastcall
1239 | sph_bswap32(sph_u32 x)
1240 | {
1241 | 	__asm {
1242 | 		bswap  ecx
1243 | 		mov    eax,ecx
1244 | 		ret
1245 | 	}
1246 | }
1247 | 
1248 | #if SPH_64
1249 | 
1250 | static SPH_INLINE sph_u64
1251 | sph_bswap64(sph_u64 x)
1252 | {
1253 | 	return ((sph_u64)sph_bswap32((sph_u32)x) << 32)
1254 | 		| (sph_u64)sph_bswap32((sph_u32)(x >> 32));
1255 | }
1256 | 
1257 | #endif
1258 | 
1259 |  *
1260 |  * [end of disabled code]
1261 |  */
1262 | 
1263 | #else
1264 | 
1265 | static SPH_INLINE sph_u32
1266 | sph_bswap32(sph_u32 x)
1267 | {
1268 | 	x = SPH_T32((x << 16) | (x >> 16));
1269 | 	x = ((x & SPH_C32(0xFF00FF00)) >> 8)
1270 | 		| ((x & SPH_C32(0x00FF00FF)) << 8);
1271 | 	return x;
1272 | }
1273 | 
1274 | #if SPH_64
1275 | 
1276 | /**
1277 |  * Byte-swap a 64-bit value.
1278 |  *
1279 |  * @param x   the input value
1280 |  * @return  the byte-swapped value
1281 |  */
1282 | static SPH_INLINE sph_u64
1283 | sph_bswap64(sph_u64 x)
1284 | {
1285 | 	x = SPH_T64((x << 32) | (x >> 32));
1286 | 	x = ((x & SPH_C64(0xFFFF0000FFFF0000)) >> 16)
1287 | 		| ((x & SPH_C64(0x0000FFFF0000FFFF)) << 16);
1288 | 	x = ((x & SPH_C64(0xFF00FF00FF00FF00)) >> 8)
1289 | 		| ((x & SPH_C64(0x00FF00FF00FF00FF)) << 8);
1290 | 	return x;
1291 | }
1292 | 
1293 | #endif
1294 | 
1295 | #endif
1296 | 
1297 | #if SPH_SPARCV9_GCC && !SPH_NO_ASM
1298 | 
1299 | /*
1300 |  * On UltraSPARC systems, native ordering is big-endian, but it is
1301 |  * possible to perform little-endian read accesses by specifying the
1302 |  * address space 0x88 (ASI_PRIMARY_LITTLE). Basically, either we use
1303 |  * the opcode "lda [%reg]0x88,%dst", where %reg is the register which
1304 |  * contains the source address and %dst is the destination register,
1305 |  * or we use "lda [%reg+imm]%asi,%dst", which uses the %asi register
1306 |  * to get the address space name. The latter format is better since it
1307 |  * combines an addition and the actual access in a single opcode; but
1308 |  * it requires the setting (and subsequent resetting) of %asi, which is
1309 |  * slow. Some operations (i.e. MD5 compression function) combine many
1310 |  * successive little-endian read accesses, which may share the same
1311 |  * %asi setting. The macros below contain the appropriate inline
1312 |  * assembly.
1313 |  */
1314 | 
1315 | #define SPH_SPARCV9_SET_ASI   \
1316 | 	sph_u32 sph_sparcv9_asi; \
1317 | 	__asm__ __volatile__ ( \
1318 | 		"rd %%asi,%0\n\twr %%g0,0x88,%%asi" : "=r" (sph_sparcv9_asi));
1319 | 
1320 | #define SPH_SPARCV9_RESET_ASI  \
1321 | 	__asm__ __volatile__ ("wr %%g0,%0,%%asi" : : "r" (sph_sparcv9_asi));
1322 | 
1323 | #define SPH_SPARCV9_DEC32LE(base, idx)   ({ \
1324 | 		sph_u32 sph_sparcv9_tmp; \
1325 | 		__asm__ __volatile__ ("lda [%1+" #idx "*4]%%asi,%0" \
1326 | 			: "=r" (sph_sparcv9_tmp) : "r" (base)); \
1327 | 		sph_sparcv9_tmp; \
1328 | 	})
1329 | 
1330 | #endif
1331 | 
1332 | static SPH_INLINE void
1333 | sph_enc16be(void *dst, unsigned val)
1334 | {
1335 | 	((unsigned char *)dst)[0] = (val >> 8);
1336 | 	((unsigned char *)dst)[1] = val;
1337 | }
1338 | 
1339 | static SPH_INLINE unsigned
1340 | sph_dec16be(const void *src)
1341 | {
1342 | 	return ((unsigned)(((const unsigned char *)src)[0]) << 8)
1343 | 		| (unsigned)(((const unsigned char *)src)[1]);
1344 | }
1345 | 
1346 | static SPH_INLINE void
1347 | sph_enc16le(void *dst, unsigned val)
1348 | {
1349 | 	((unsigned char *)dst)[0] = val;
1350 | 	((unsigned char *)dst)[1] = val >> 8;
1351 | }
1352 | 
1353 | static SPH_INLINE unsigned
1354 | sph_dec16le(const void *src)
1355 | {
1356 | 	return (unsigned)(((const unsigned char *)src)[0])
1357 | 		| ((unsigned)(((const unsigned char *)src)[1]) << 8);
1358 | }
1359 | 
1360 | /**
1361 |  * Encode a 32-bit value into the provided buffer (big endian convention).
1362 |  *
1363 |  * @param dst   the destination buffer
1364 |  * @param val   the 32-bit value to encode
1365 |  */
1366 | static SPH_INLINE void
1367 | sph_enc32be(void *dst, sph_u32 val)
1368 | {
1369 | #if defined SPH_UPTR
1370 | #if SPH_UNALIGNED
1371 | #if SPH_LITTLE_ENDIAN
1372 | 	val = sph_bswap32(val);
1373 | #endif
1374 | 	*(sph_u32 *)dst = val;
1375 | #else
1376 | 	if (((SPH_UPTR)dst & 3) == 0) {
1377 | #if SPH_LITTLE_ENDIAN
1378 | 		val = sph_bswap32(val);
1379 | #endif
1380 | 		*(sph_u32 *)dst = val;
1381 | 	} else {
1382 | 		((unsigned char *)dst)[0] = (val >> 24);
1383 | 		((unsigned char *)dst)[1] = (val >> 16);
1384 | 		((unsigned char *)dst)[2] = (val >> 8);
1385 | 		((unsigned char *)dst)[3] = val;
1386 | 	}
1387 | #endif
1388 | #else
1389 | 	((unsigned char *)dst)[0] = (val >> 24);
1390 | 	((unsigned char *)dst)[1] = (val >> 16);
1391 | 	((unsigned char *)dst)[2] = (val >> 8);
1392 | 	((unsigned char *)dst)[3] = val;
1393 | #endif
1394 | }
1395 | 
1396 | /**
1397 |  * Encode a 32-bit value into the provided buffer (big endian convention).
1398 |  * The destination buffer must be properly aligned.
1399 |  *
1400 |  * @param dst   the destination buffer (32-bit aligned)
1401 |  * @param val   the value to encode
1402 |  */
1403 | static SPH_INLINE void
1404 | sph_enc32be_aligned(void *dst, sph_u32 val)
1405 | {
1406 | #if SPH_LITTLE_ENDIAN
1407 | 	*(sph_u32 *)dst = sph_bswap32(val);
1408 | #elif SPH_BIG_ENDIAN
1409 | 	*(sph_u32 *)dst = val;
1410 | #else
1411 | 	((unsigned char *)dst)[0] = (val >> 24);
1412 | 	((unsigned char *)dst)[1] = (val >> 16);
1413 | 	((unsigned char *)dst)[2] = (val >> 8);
1414 | 	((unsigned char *)dst)[3] = val;
1415 | #endif
1416 | }
1417 | 
1418 | /**
1419 |  * Decode a 32-bit value from the provided buffer (big endian convention).
1420 |  *
1421 |  * @param src   the source buffer
1422 |  * @return  the decoded value
1423 |  */
1424 | static SPH_INLINE sph_u32
1425 | sph_dec32be(const void *src)
1426 | {
1427 | #if defined SPH_UPTR
1428 | #if SPH_UNALIGNED
1429 | #if SPH_LITTLE_ENDIAN
1430 | 	return sph_bswap32(*(const sph_u32 *)src);
1431 | #else
1432 | 	return *(const sph_u32 *)src;
1433 | #endif
1434 | #else
1435 | 	if (((SPH_UPTR)src & 3) == 0) {
1436 | #if SPH_LITTLE_ENDIAN
1437 | 		return sph_bswap32(*(const sph_u32 *)src);
1438 | #else
1439 | 		return *(const sph_u32 *)src;
1440 | #endif
1441 | 	} else {
1442 | 		return ((sph_u32)(((const unsigned char *)src)[0]) << 24)
1443 | 			| ((sph_u32)(((const unsigned char *)src)[1]) << 16)
1444 | 			| ((sph_u32)(((const unsigned char *)src)[2]) << 8)
1445 | 			| (sph_u32)(((const unsigned char *)src)[3]);
1446 | 	}
1447 | #endif
1448 | #else
1449 | 	return ((sph_u32)(((const unsigned char *)src)[0]) << 24)
1450 | 		| ((sph_u32)(((const unsigned char *)src)[1]) << 16)
1451 | 		| ((sph_u32)(((const unsigned char *)src)[2]) << 8)
1452 | 		| (sph_u32)(((const unsigned char *)src)[3]);
1453 | #endif
1454 | }
1455 | 
1456 | /**
1457 |  * Decode a 32-bit value from the provided buffer (big endian convention).
1458 |  * The source buffer must be properly aligned.
1459 |  *
1460 |  * @param src   the source buffer (32-bit aligned)
1461 |  * @return  the decoded value
1462 |  */
1463 | static SPH_INLINE sph_u32
1464 | sph_dec32be_aligned(const void *src)
1465 | {
1466 | #if SPH_LITTLE_ENDIAN
1467 | 	return sph_bswap32(*(const sph_u32 *)src);
1468 | #elif SPH_BIG_ENDIAN
1469 | 	return *(const sph_u32 *)src;
1470 | #else
1471 | 	return ((sph_u32)(((const unsigned char *)src)[0]) << 24)
1472 | 		| ((sph_u32)(((const unsigned char *)src)[1]) << 16)
1473 | 		| ((sph_u32)(((const unsigned char *)src)[2]) << 8)
1474 | 		| (sph_u32)(((const unsigned char *)src)[3]);
1475 | #endif
1476 | }
1477 | 
1478 | /**
1479 |  * Encode a 32-bit value into the provided buffer (little endian convention).
1480 |  *
1481 |  * @param dst   the destination buffer
1482 |  * @param val   the 32-bit value to encode
1483 |  */
1484 | static SPH_INLINE void
1485 | sph_enc32le(void *dst, sph_u32 val)
1486 | {
1487 | #if defined SPH_UPTR
1488 | #if SPH_UNALIGNED
1489 | #if SPH_BIG_ENDIAN
1490 | 	val = sph_bswap32(val);
1491 | #endif
1492 | 	*(sph_u32 *)dst = val;
1493 | #else
1494 | 	if (((SPH_UPTR)dst & 3) == 0) {
1495 | #if SPH_BIG_ENDIAN
1496 | 		val = sph_bswap32(val);
1497 | #endif
1498 | 		*(sph_u32 *)dst = val;
1499 | 	} else {
1500 | 		((unsigned char *)dst)[0] = val;
1501 | 		((unsigned char *)dst)[1] = (val >> 8);
1502 | 		((unsigned char *)dst)[2] = (val >> 16);
1503 | 		((unsigned char *)dst)[3] = (val >> 24);
1504 | 	}
1505 | #endif
1506 | #else
1507 | 	((unsigned char *)dst)[0] = val;
1508 | 	((unsigned char *)dst)[1] = (val >> 8);
1509 | 	((unsigned char *)dst)[2] = (val >> 16);
1510 | 	((unsigned char *)dst)[3] = (val >> 24);
1511 | #endif
1512 | }
1513 | 
1514 | /**
1515 |  * Encode a 32-bit value into the provided buffer (little endian convention).
1516 |  * The destination buffer must be properly aligned.
1517 |  *
1518 |  * @param dst   the destination buffer (32-bit aligned)
1519 |  * @param val   the value to encode
1520 |  */
1521 | static SPH_INLINE void
1522 | sph_enc32le_aligned(void *dst, sph_u32 val)
1523 | {
1524 | #if SPH_LITTLE_ENDIAN
1525 | 	*(sph_u32 *)dst = val;
1526 | #elif SPH_BIG_ENDIAN
1527 | 	*(sph_u32 *)dst = sph_bswap32(val);
1528 | #else
1529 | 	((unsigned char *)dst)[0] = val;
1530 | 	((unsigned char *)dst)[1] = (val >> 8);
1531 | 	((unsigned char *)dst)[2] = (val >> 16);
1532 | 	((unsigned char *)dst)[3] = (val >> 24);
1533 | #endif
1534 | }
1535 | 
1536 | /**
1537 |  * Decode a 32-bit value from the provided buffer (little endian convention).
1538 |  *
1539 |  * @param src   the source buffer
1540 |  * @return  the decoded value
1541 |  */
1542 | static SPH_INLINE sph_u32
1543 | sph_dec32le(const void *src)
1544 | {
1545 | #if defined SPH_UPTR
1546 | #if SPH_UNALIGNED
1547 | #if SPH_BIG_ENDIAN
1548 | 	return sph_bswap32(*(const sph_u32 *)src);
1549 | #else
1550 | 	return *(const sph_u32 *)src;
1551 | #endif
1552 | #else
1553 | 	if (((SPH_UPTR)src & 3) == 0) {
1554 | #if SPH_BIG_ENDIAN
1555 | #if SPH_SPARCV9_GCC && !SPH_NO_ASM
1556 | 		sph_u32 tmp;
1557 | 
1558 | 		/*
1559 | 		 * "__volatile__" is needed here because without it,
1560 | 		 * gcc-3.4.3 miscompiles the code and performs the
1561 | 		 * access before the test on the address, thus triggering
1562 | 		 * a bus error...
1563 | 		 */
1564 | 		__asm__ __volatile__ (
1565 | 			"lda [%1]0x88,%0" : "=r" (tmp) : "r" (src));
1566 | 		return tmp;
1567 | /*
1568 |  * On PowerPC, this turns out not to be worth the effort: the inline
1569 |  * assembly makes GCC optimizer uncomfortable, which tends to nullify
1570 |  * the decoding gains.
1571 |  *
1572 |  * For most hash functions, using this inline assembly trick changes
1573 |  * hashing speed by less than 5% and often _reduces_ it. The biggest
1574 |  * gains are for MD4 (+11%) and CubeHash (+30%). For all others, it is
1575 |  * less then 10%. The speed gain on CubeHash is probably due to the
1576 |  * chronic shortage of registers that CubeHash endures; for the other
1577 |  * functions, the generic code appears to be efficient enough already.
1578 |  *
1579 | #elif (SPH_PPC32_GCC || SPH_PPC64_GCC) && !SPH_NO_ASM
1580 | 		sph_u32 tmp;
1581 | 
1582 | 		__asm__ __volatile__ (
1583 | 			"lwbrx %0,0,%1" : "=r" (tmp) : "r" (src));
1584 | 		return tmp;
1585 |  */
1586 | #else
1587 | 		return sph_bswap32(*(const sph_u32 *)src);
1588 | #endif
1589 | #else
1590 | 		return *(const sph_u32 *)src;
1591 | #endif
1592 | 	} else {
1593 | 		return (sph_u32)(((const unsigned char *)src)[0])
1594 | 			| ((sph_u32)(((const unsigned char *)src)[1]) << 8)
1595 | 			| ((sph_u32)(((const unsigned char *)src)[2]) << 16)
1596 | 			| ((sph_u32)(((const unsigned char *)src)[3]) << 24);
1597 | 	}
1598 | #endif
1599 | #else
1600 | 	return (sph_u32)(((const unsigned char *)src)[0])
1601 | 		| ((sph_u32)(((const unsigned char *)src)[1]) << 8)
1602 | 		| ((sph_u32)(((const unsigned char *)src)[2]) << 16)
1603 | 		| ((sph_u32)(((const unsigned char *)src)[3]) << 24);
1604 | #endif
1605 | }
1606 | 
1607 | /**
1608 |  * Decode a 32-bit value from the provided buffer (little endian convention).
1609 |  * The source buffer must be properly aligned.
1610 |  *
1611 |  * @param src   the source buffer (32-bit aligned)
1612 |  * @return  the decoded value
1613 |  */
1614 | static SPH_INLINE sph_u32
1615 | sph_dec32le_aligned(const void *src)
1616 | {
1617 | #if SPH_LITTLE_ENDIAN
1618 | 	return *(const sph_u32 *)src;
1619 | #elif SPH_BIG_ENDIAN
1620 | #if SPH_SPARCV9_GCC && !SPH_NO_ASM
1621 | 	sph_u32 tmp;
1622 | 
1623 | 	__asm__ __volatile__ ("lda [%1]0x88,%0" : "=r" (tmp) : "r" (src));
1624 | 	return tmp;
1625 | /*
1626 |  * Not worth it generally.
1627 |  *
1628 | #elif (SPH_PPC32_GCC || SPH_PPC64_GCC) && !SPH_NO_ASM
1629 | 	sph_u32 tmp;
1630 | 
1631 | 	__asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (tmp) : "r" (src));
1632 | 	return tmp;
1633 |  */
1634 | #else
1635 | 	return sph_bswap32(*(const sph_u32 *)src);
1636 | #endif
1637 | #else
1638 | 	return (sph_u32)(((const unsigned char *)src)[0])
1639 | 		| ((sph_u32)(((const unsigned char *)src)[1]) << 8)
1640 | 		| ((sph_u32)(((const unsigned char *)src)[2]) << 16)
1641 | 		| ((sph_u32)(((const unsigned char *)src)[3]) << 24);
1642 | #endif
1643 | }
1644 | 
1645 | #if SPH_64
1646 | 
1647 | /**
1648 |  * Encode a 64-bit value into the provided buffer (big endian convention).
1649 |  *
1650 |  * @param dst   the destination buffer
1651 |  * @param val   the 64-bit value to encode
1652 |  */
1653 | static SPH_INLINE void
1654 | sph_enc64be(void *dst, sph_u64 val)
1655 | {
1656 | #if defined SPH_UPTR
1657 | #if SPH_UNALIGNED
1658 | #if SPH_LITTLE_ENDIAN
1659 | 	val = sph_bswap64(val);
1660 | #endif
1661 | 	*(sph_u64 *)dst = val;
1662 | #else
1663 | 	if (((SPH_UPTR)dst & 7) == 0) {
1664 | #if SPH_LITTLE_ENDIAN
1665 | 		val = sph_bswap64(val);
1666 | #endif
1667 | 		*(sph_u64 *)dst = val;
1668 | 	} else {
1669 | 		((unsigned char *)dst)[0] = (val >> 56);
1670 | 		((unsigned char *)dst)[1] = (val >> 48);
1671 | 		((unsigned char *)dst)[2] = (val >> 40);
1672 | 		((unsigned char *)dst)[3] = (val >> 32);
1673 | 		((unsigned char *)dst)[4] = (val >> 24);
1674 | 		((unsigned char *)dst)[5] = (val >> 16);
1675 | 		((unsigned char *)dst)[6] = (val >> 8);
1676 | 		((unsigned char *)dst)[7] = val;
1677 | 	}
1678 | #endif
1679 | #else
1680 | 	((unsigned char *)dst)[0] = (val >> 56);
1681 | 	((unsigned char *)dst)[1] = (val >> 48);
1682 | 	((unsigned char *)dst)[2] = (val >> 40);
1683 | 	((unsigned char *)dst)[3] = (val >> 32);
1684 | 	((unsigned char *)dst)[4] = (val >> 24);
1685 | 	((unsigned char *)dst)[5] = (val >> 16);
1686 | 	((unsigned char *)dst)[6] = (val >> 8);
1687 | 	((unsigned char *)dst)[7] = val;
1688 | #endif
1689 | }
1690 | 
1691 | /**
1692 |  * Encode a 64-bit value into the provided buffer (big endian convention).
1693 |  * The destination buffer must be properly aligned.
1694 |  *
1695 |  * @param dst   the destination buffer (64-bit aligned)
1696 |  * @param val   the value to encode
1697 |  */
1698 | static SPH_INLINE void
1699 | sph_enc64be_aligned(void *dst, sph_u64 val)
1700 | {
1701 | #if SPH_LITTLE_ENDIAN
1702 | 	*(sph_u64 *)dst = sph_bswap64(val);
1703 | #elif SPH_BIG_ENDIAN
1704 | 	*(sph_u64 *)dst = val;
1705 | #else
1706 | 	((unsigned char *)dst)[0] = (val >> 56);
1707 | 	((unsigned char *)dst)[1] = (val >> 48);
1708 | 	((unsigned char *)dst)[2] = (val >> 40);
1709 | 	((unsigned char *)dst)[3] = (val >> 32);
1710 | 	((unsigned char *)dst)[4] = (val >> 24);
1711 | 	((unsigned char *)dst)[5] = (val >> 16);
1712 | 	((unsigned char *)dst)[6] = (val >> 8);
1713 | 	((unsigned char *)dst)[7] = val;
1714 | #endif
1715 | }
1716 | 
1717 | /**
1718 |  * Decode a 64-bit value from the provided buffer (big endian convention).
1719 |  *
1720 |  * @param src   the source buffer
1721 |  * @return  the decoded value
1722 |  */
1723 | static SPH_INLINE sph_u64
1724 | sph_dec64be(const void *src)
1725 | {
1726 | #if defined SPH_UPTR
1727 | #if SPH_UNALIGNED
1728 | #if SPH_LITTLE_ENDIAN
1729 | 	return sph_bswap64(*(const sph_u64 *)src);
1730 | #else
1731 | 	return *(const sph_u64 *)src;
1732 | #endif
1733 | #else
1734 | 	if (((SPH_UPTR)src & 7) == 0) {
1735 | #if SPH_LITTLE_ENDIAN
1736 | 		return sph_bswap64(*(const sph_u64 *)src);
1737 | #else
1738 | 		return *(const sph_u64 *)src;
1739 | #endif
1740 | 	} else {
1741 | 		return ((sph_u64)(((const unsigned char *)src)[0]) << 56)
1742 | 			| ((sph_u64)(((const unsigned char *)src)[1]) << 48)
1743 | 			| ((sph_u64)(((const unsigned char *)src)[2]) << 40)
1744 | 			| ((sph_u64)(((const unsigned char *)src)[3]) << 32)
1745 | 			| ((sph_u64)(((const unsigned char *)src)[4]) << 24)
1746 | 			| ((sph_u64)(((const unsigned char *)src)[5]) << 16)
1747 | 			| ((sph_u64)(((const unsigned char *)src)[6]) << 8)
1748 | 			| (sph_u64)(((const unsigned char *)src)[7]);
1749 | 	}
1750 | #endif
1751 | #else
1752 | 	return ((sph_u64)(((const unsigned char *)src)[0]) << 56)
1753 | 		| ((sph_u64)(((const unsigned char *)src)[1]) << 48)
1754 | 		| ((sph_u64)(((const unsigned char *)src)[2]) << 40)
1755 | 		| ((sph_u64)(((const unsigned char *)src)[3]) << 32)
1756 | 		| ((sph_u64)(((const unsigned char *)src)[4]) << 24)
1757 | 		| ((sph_u64)(((const unsigned char *)src)[5]) << 16)
1758 | 		| ((sph_u64)(((const unsigned char *)src)[6]) << 8)
1759 | 		| (sph_u64)(((const unsigned char *)src)[7]);
1760 | #endif
1761 | }
1762 | 
1763 | /**
1764 |  * Decode a 64-bit value from the provided buffer (big endian convention).
1765 |  * The source buffer must be properly aligned.
1766 |  *
1767 |  * @param src   the source buffer (64-bit aligned)
1768 |  * @return  the decoded value
1769 |  */
1770 | static SPH_INLINE sph_u64
1771 | sph_dec64be_aligned(const void *src)
1772 | {
1773 | #if SPH_LITTLE_ENDIAN
1774 | 	return sph_bswap64(*(const sph_u64 *)src);
1775 | #elif SPH_BIG_ENDIAN
1776 | 	return *(const sph_u64 *)src;
1777 | #else
1778 | 	return ((sph_u64)(((const unsigned char *)src)[0]) << 56)
1779 | 		| ((sph_u64)(((const unsigned char *)src)[1]) << 48)
1780 | 		| ((sph_u64)(((const unsigned char *)src)[2]) << 40)
1781 | 		| ((sph_u64)(((const unsigned char *)src)[3]) << 32)
1782 | 		| ((sph_u64)(((const unsigned char *)src)[4]) << 24)
1783 | 		| ((sph_u64)(((const unsigned char *)src)[5]) << 16)
1784 | 		| ((sph_u64)(((const unsigned char *)src)[6]) << 8)
1785 | 		| (sph_u64)(((const unsigned char *)src)[7]);
1786 | #endif
1787 | }
1788 | 
1789 | /**
1790 |  * Encode a 64-bit value into the provided buffer (little endian convention).
1791 |  *
1792 |  * @param dst   the destination buffer
1793 |  * @param val   the 64-bit value to encode
1794 |  */
1795 | static SPH_INLINE void
1796 | sph_enc64le(void *dst, sph_u64 val)
1797 | {
1798 | #if defined SPH_UPTR
1799 | #if SPH_UNALIGNED
1800 | #if SPH_BIG_ENDIAN
1801 | 	val = sph_bswap64(val);
1802 | #endif
1803 | 	*(sph_u64 *)dst = val;
1804 | #else
1805 | 	if (((SPH_UPTR)dst & 7) == 0) {
1806 | #if SPH_BIG_ENDIAN
1807 | 		val = sph_bswap64(val);
1808 | #endif
1809 | 		*(sph_u64 *)dst = val;
1810 | 	} else {
1811 | 		((unsigned char *)dst)[0] = val;
1812 | 		((unsigned char *)dst)[1] = (val >> 8);
1813 | 		((unsigned char *)dst)[2] = (val >> 16);
1814 | 		((unsigned char *)dst)[3] = (val >> 24);
1815 | 		((unsigned char *)dst)[4] = (val >> 32);
1816 | 		((unsigned char *)dst)[5] = (val >> 40);
1817 | 		((unsigned char *)dst)[6] = (val >> 48);
1818 | 		((unsigned char *)dst)[7] = (val >> 56);
1819 | 	}
1820 | #endif
1821 | #else
1822 | 	((unsigned char *)dst)[0] = val;
1823 | 	((unsigned char *)dst)[1] = (val >> 8);
1824 | 	((unsigned char *)dst)[2] = (val >> 16);
1825 | 	((unsigned char *)dst)[3] = (val >> 24);
1826 | 	((unsigned char *)dst)[4] = (val >> 32);
1827 | 	((unsigned char *)dst)[5] = (val >> 40);
1828 | 	((unsigned char *)dst)[6] = (val >> 48);
1829 | 	((unsigned char *)dst)[7] = (val >> 56);
1830 | #endif
1831 | }
1832 | 
1833 | /**
1834 |  * Encode a 64-bit value into the provided buffer (little endian convention).
1835 |  * The destination buffer must be properly aligned.
1836 |  *
1837 |  * @param dst   the destination buffer (64-bit aligned)
1838 |  * @param val   the value to encode
1839 |  */
1840 | static SPH_INLINE void
1841 | sph_enc64le_aligned(void *dst, sph_u64 val)
1842 | {
1843 | #if SPH_LITTLE_ENDIAN
1844 | 	*(sph_u64 *)dst = val;
1845 | #elif SPH_BIG_ENDIAN
1846 | 	*(sph_u64 *)dst = sph_bswap64(val);
1847 | #else
1848 | 	((unsigned char *)dst)[0] = val;
1849 | 	((unsigned char *)dst)[1] = (val >> 8);
1850 | 	((unsigned char *)dst)[2] = (val >> 16);
1851 | 	((unsigned char *)dst)[3] = (val >> 24);
1852 | 	((unsigned char *)dst)[4] = (val >> 32);
1853 | 	((unsigned char *)dst)[5] = (val >> 40);
1854 | 	((unsigned char *)dst)[6] = (val >> 48);
1855 | 	((unsigned char *)dst)[7] = (val >> 56);
1856 | #endif
1857 | }
1858 | 
1859 | /**
1860 |  * Decode a 64-bit value from the provided buffer (little endian convention).
1861 |  *
1862 |  * @param src   the source buffer
1863 |  * @return  the decoded value
1864 |  */
1865 | static SPH_INLINE sph_u64
1866 | sph_dec64le(const void *src)
1867 | {
1868 | #if defined SPH_UPTR
1869 | #if SPH_UNALIGNED
1870 | #if SPH_BIG_ENDIAN
1871 | 	return sph_bswap64(*(const sph_u64 *)src);
1872 | #else
1873 | 	return *(const sph_u64 *)src;
1874 | #endif
1875 | #else
1876 | 	if (((SPH_UPTR)src & 7) == 0) {
1877 | #if SPH_BIG_ENDIAN
1878 | #if SPH_SPARCV9_GCC_64 && !SPH_NO_ASM
1879 | 		sph_u64 tmp;
1880 | 
1881 | 		__asm__ __volatile__ (
1882 | 			"ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src));
1883 | 		return tmp;
1884 | /*
1885 |  * Not worth it generally.
1886 |  *
1887 | #elif SPH_PPC32_GCC && !SPH_NO_ASM
1888 | 		return (sph_u64)sph_dec32le_aligned(src)
1889 | 			| ((sph_u64)sph_dec32le_aligned(
1890 | 				(const char *)src + 4) << 32);
1891 | #elif SPH_PPC64_GCC && !SPH_NO_ASM
1892 | 		sph_u64 tmp;
1893 | 
1894 | 		__asm__ __volatile__ (
1895 | 			"ldbrx %0,0,%1" : "=r" (tmp) : "r" (src));
1896 | 		return tmp;
1897 |  */
1898 | #else
1899 | 		return sph_bswap64(*(const sph_u64 *)src);
1900 | #endif
1901 | #else
1902 | 		return *(const sph_u64 *)src;
1903 | #endif
1904 | 	} else {
1905 | 		return (sph_u64)(((const unsigned char *)src)[0])
1906 | 			| ((sph_u64)(((const unsigned char *)src)[1]) << 8)
1907 | 			| ((sph_u64)(((const unsigned char *)src)[2]) << 16)
1908 | 			| ((sph_u64)(((const unsigned char *)src)[3]) << 24)
1909 | 			| ((sph_u64)(((const unsigned char *)src)[4]) << 32)
1910 | 			| ((sph_u64)(((const unsigned char *)src)[5]) << 40)
1911 | 			| ((sph_u64)(((const unsigned char *)src)[6]) << 48)
1912 | 			| ((sph_u64)(((const unsigned char *)src)[7]) << 56);
1913 | 	}
1914 | #endif
1915 | #else
1916 | 	return (sph_u64)(((const unsigned char *)src)[0])
1917 | 		| ((sph_u64)(((const unsigned char *)src)[1]) << 8)
1918 | 		| ((sph_u64)(((const unsigned char *)src)[2]) << 16)
1919 | 		| ((sph_u64)(((const unsigned char *)src)[3]) << 24)
1920 | 		| ((sph_u64)(((const unsigned char *)src)[4]) << 32)
1921 | 		| ((sph_u64)(((const unsigned char *)src)[5]) << 40)
1922 | 		| ((sph_u64)(((const unsigned char *)src)[6]) << 48)
1923 | 		| ((sph_u64)(((const unsigned char *)src)[7]) << 56);
1924 | #endif
1925 | }
1926 | 
1927 | /**
1928 |  * Decode a 64-bit value from the provided buffer (little endian convention).
1929 |  * The source buffer must be properly aligned.
1930 |  *
1931 |  * @param src   the source buffer (64-bit aligned)
1932 |  * @return  the decoded value
1933 |  */
1934 | static SPH_INLINE sph_u64
1935 | sph_dec64le_aligned(const void *src)
1936 | {
1937 | #if SPH_LITTLE_ENDIAN
1938 | 	return *(const sph_u64 *)src;
1939 | #elif SPH_BIG_ENDIAN
1940 | #if SPH_SPARCV9_GCC_64 && !SPH_NO_ASM
1941 | 	sph_u64 tmp;
1942 | 
1943 | 	__asm__ __volatile__ ("ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src));
1944 | 	return tmp;
1945 | /*
1946 |  * Not worth it generally.
1947 |  *
1948 | #elif SPH_PPC32_GCC && !SPH_NO_ASM
1949 | 	return (sph_u64)sph_dec32le_aligned(src)
1950 | 		| ((sph_u64)sph_dec32le_aligned((const char *)src + 4) << 32);
1951 | #elif SPH_PPC64_GCC && !SPH_NO_ASM
1952 | 	sph_u64 tmp;
1953 | 
1954 | 	__asm__ __volatile__ ("ldbrx %0,0,%1" : "=r" (tmp) : "r" (src));
1955 | 	return tmp;
1956 |  */
1957 | #else
1958 | 	return sph_bswap64(*(const sph_u64 *)src);
1959 | #endif
1960 | #else
1961 | 	return (sph_u64)(((const unsigned char *)src)[0])
1962 | 		| ((sph_u64)(((const unsigned char *)src)[1]) << 8)
1963 | 		| ((sph_u64)(((const unsigned char *)src)[2]) << 16)
1964 | 		| ((sph_u64)(((const unsigned char *)src)[3]) << 24)
1965 | 		| ((sph_u64)(((const unsigned char *)src)[4]) << 32)
1966 | 		| ((sph_u64)(((const unsigned char *)src)[5]) << 40)
1967 | 		| ((sph_u64)(((const unsigned char *)src)[6]) << 48)
1968 | 		| ((sph_u64)(((const unsigned char *)src)[7]) << 56);
1969 | #endif
1970 | }
1971 | 
1972 | #endif
1973 | 
1974 | #endif /* Doxygen excluded block */
1975 | 
1976 | #endif
1977 | 


--------------------------------------------------------------------------------
/skein.c:
--------------------------------------------------------------------------------
   1 | /* $Id: skein.c 254 2011-06-07 19:38:58Z tp $ */
   2 | /*
   3 |  * Skein implementation.
   4 |  *
   5 |  * ==========================(LICENSE BEGIN)============================
   6 |  *
   7 |  * Copyright (c) 2007-2010  Projet RNRT SAPHIR
   8 |  * 
   9 |  * Permission is hereby granted, free of charge, to any person obtaining
  10 |  * a copy of this software and associated documentation files (the
  11 |  * "Software"), to deal in the Software without restriction, including
  12 |  * without limitation the rights to use, copy, modify, merge, publish,
  13 |  * distribute, sublicense, and/or sell copies of the Software, and to
  14 |  * permit persons to whom the Software is furnished to do so, subject to
  15 |  * the following conditions:
  16 |  * 
  17 |  * The above copyright notice and this permission notice shall be
  18 |  * included in all copies or substantial portions of the Software.
  19 |  * 
  20 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  21 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  23 |  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  24 |  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  25 |  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  26 |  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27 |  *
  28 |  * ===========================(LICENSE END)=============================
  29 |  *
  30 |  * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
  31 |  */
  32 | 
  33 | #include <stddef.h>
  34 | #include <string.h>
  35 | 
  36 | #include "shacommon/sha256_m.h"
  37 | #include "shacommon/common.h"
  38 | #include "shacommon/sph_skein.h"
  39 | 
  40 | #ifdef __cplusplus
  41 | extern "C"{
  42 | #endif
  43 | 
  44 | 
  45 | #if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_SKEIN
  46 | #define SPH_SMALL_FOOTPRINT_SKEIN   1
  47 | #endif
  48 | 
  49 | #ifdef _MSC_VER
  50 | #pragma warning (disable: 4146)
  51 | #endif
  52 | 
  53 | #if SPH_64
  54 | 
  55 | #if 0
  56 | /* obsolete */
  57 | /*
  58 |  * M5_ ## s ## _ ## i  evaluates to s+i mod 5 (0 <= s <= 18, 0 <= i <= 3).
  59 |  */
  60 | 
  61 | #define M5_0_0    0
  62 | #define M5_0_1    1
  63 | #define M5_0_2    2
  64 | #define M5_0_3    3
  65 | 
  66 | #define M5_1_0    1
  67 | #define M5_1_1    2
  68 | #define M5_1_2    3
  69 | #define M5_1_3    4
  70 | 
  71 | #define M5_2_0    2
  72 | #define M5_2_1    3
  73 | #define M5_2_2    4
  74 | #define M5_2_3    0
  75 | 
  76 | #define M5_3_0    3
  77 | #define M5_3_1    4
  78 | #define M5_3_2    0
  79 | #define M5_3_3    1
  80 | 
  81 | #define M5_4_0    4
  82 | #define M5_4_1    0
  83 | #define M5_4_2    1
  84 | #define M5_4_3    2
  85 | 
  86 | #define M5_5_0    0
  87 | #define M5_5_1    1
  88 | #define M5_5_2    2
  89 | #define M5_5_3    3
  90 | 
  91 | #define M5_6_0    1
  92 | #define M5_6_1    2
  93 | #define M5_6_2    3
  94 | #define M5_6_3    4
  95 | 
  96 | #define M5_7_0    2
  97 | #define M5_7_1    3
  98 | #define M5_7_2    4
  99 | #define M5_7_3    0
 100 | 
 101 | #define M5_8_0    3
 102 | #define M5_8_1    4
 103 | #define M5_8_2    0
 104 | #define M5_8_3    1
 105 | 
 106 | #define M5_9_0    4
 107 | #define M5_9_1    0
 108 | #define M5_9_2    1
 109 | #define M5_9_3    2
 110 | 
 111 | #define M5_10_0   0
 112 | #define M5_10_1   1
 113 | #define M5_10_2   2
 114 | #define M5_10_3   3
 115 | 
 116 | #define M5_11_0   1
 117 | #define M5_11_1   2
 118 | #define M5_11_2   3
 119 | #define M5_11_3   4
 120 | 
 121 | #define M5_12_0   2
 122 | #define M5_12_1   3
 123 | #define M5_12_2   4
 124 | #define M5_12_3   0
 125 | 
 126 | #define M5_13_0   3
 127 | #define M5_13_1   4
 128 | #define M5_13_2   0
 129 | #define M5_13_3   1
 130 | 
 131 | #define M5_14_0   4
 132 | #define M5_14_1   0
 133 | #define M5_14_2   1
 134 | #define M5_14_3   2
 135 | 
 136 | #define M5_15_0   0
 137 | #define M5_15_1   1
 138 | #define M5_15_2   2
 139 | #define M5_15_3   3
 140 | 
 141 | #define M5_16_0   1
 142 | #define M5_16_1   2
 143 | #define M5_16_2   3
 144 | #define M5_16_3   4
 145 | 
 146 | #define M5_17_0   2
 147 | #define M5_17_1   3
 148 | #define M5_17_2   4
 149 | #define M5_17_3   0
 150 | 
 151 | #define M5_18_0   3
 152 | #define M5_18_1   4
 153 | #define M5_18_2   0
 154 | #define M5_18_3   1
 155 | #endif
 156 | 
 157 | /*
 158 |  * M9_ ## s ## _ ## i  evaluates to s+i mod 9 (0 <= s <= 18, 0 <= i <= 7).
 159 |  */
 160 | 
 161 | #define M9_0_0    0
 162 | #define M9_0_1    1
 163 | #define M9_0_2    2
 164 | #define M9_0_3    3
 165 | #define M9_0_4    4
 166 | #define M9_0_5    5
 167 | #define M9_0_6    6
 168 | #define M9_0_7    7
 169 | 
 170 | #define M9_1_0    1
 171 | #define M9_1_1    2
 172 | #define M9_1_2    3
 173 | #define M9_1_3    4
 174 | #define M9_1_4    5
 175 | #define M9_1_5    6
 176 | #define M9_1_6    7
 177 | #define M9_1_7    8
 178 | 
 179 | #define M9_2_0    2
 180 | #define M9_2_1    3
 181 | #define M9_2_2    4
 182 | #define M9_2_3    5
 183 | #define M9_2_4    6
 184 | #define M9_2_5    7
 185 | #define M9_2_6    8
 186 | #define M9_2_7    0
 187 | 
 188 | #define M9_3_0    3
 189 | #define M9_3_1    4
 190 | #define M9_3_2    5
 191 | #define M9_3_3    6
 192 | #define M9_3_4    7
 193 | #define M9_3_5    8
 194 | #define M9_3_6    0
 195 | #define M9_3_7    1
 196 | 
 197 | #define M9_4_0    4
 198 | #define M9_4_1    5
 199 | #define M9_4_2    6
 200 | #define M9_4_3    7
 201 | #define M9_4_4    8
 202 | #define M9_4_5    0
 203 | #define M9_4_6    1
 204 | #define M9_4_7    2
 205 | 
 206 | #define M9_5_0    5
 207 | #define M9_5_1    6
 208 | #define M9_5_2    7
 209 | #define M9_5_3    8
 210 | #define M9_5_4    0
 211 | #define M9_5_5    1
 212 | #define M9_5_6    2
 213 | #define M9_5_7    3
 214 | 
 215 | #define M9_6_0    6
 216 | #define M9_6_1    7
 217 | #define M9_6_2    8
 218 | #define M9_6_3    0
 219 | #define M9_6_4    1
 220 | #define M9_6_5    2
 221 | #define M9_6_6    3
 222 | #define M9_6_7    4
 223 | 
 224 | #define M9_7_0    7
 225 | #define M9_7_1    8
 226 | #define M9_7_2    0
 227 | #define M9_7_3    1
 228 | #define M9_7_4    2
 229 | #define M9_7_5    3
 230 | #define M9_7_6    4
 231 | #define M9_7_7    5
 232 | 
 233 | #define M9_8_0    8
 234 | #define M9_8_1    0
 235 | #define M9_8_2    1
 236 | #define M9_8_3    2
 237 | #define M9_8_4    3
 238 | #define M9_8_5    4
 239 | #define M9_8_6    5
 240 | #define M9_8_7    6
 241 | 
 242 | #define M9_9_0    0
 243 | #define M9_9_1    1
 244 | #define M9_9_2    2
 245 | #define M9_9_3    3
 246 | #define M9_9_4    4
 247 | #define M9_9_5    5
 248 | #define M9_9_6    6
 249 | #define M9_9_7    7
 250 | 
 251 | #define M9_10_0   1
 252 | #define M9_10_1   2
 253 | #define M9_10_2   3
 254 | #define M9_10_3   4
 255 | #define M9_10_4   5
 256 | #define M9_10_5   6
 257 | #define M9_10_6   7
 258 | #define M9_10_7   8
 259 | 
 260 | #define M9_11_0   2
 261 | #define M9_11_1   3
 262 | #define M9_11_2   4
 263 | #define M9_11_3   5
 264 | #define M9_11_4   6
 265 | #define M9_11_5   7
 266 | #define M9_11_6   8
 267 | #define M9_11_7   0
 268 | 
 269 | #define M9_12_0   3
 270 | #define M9_12_1   4
 271 | #define M9_12_2   5
 272 | #define M9_12_3   6
 273 | #define M9_12_4   7
 274 | #define M9_12_5   8
 275 | #define M9_12_6   0
 276 | #define M9_12_7   1
 277 | 
 278 | #define M9_13_0   4
 279 | #define M9_13_1   5
 280 | #define M9_13_2   6
 281 | #define M9_13_3   7
 282 | #define M9_13_4   8
 283 | #define M9_13_5   0
 284 | #define M9_13_6   1
 285 | #define M9_13_7   2
 286 | 
 287 | #define M9_14_0   5
 288 | #define M9_14_1   6
 289 | #define M9_14_2   7
 290 | #define M9_14_3   8
 291 | #define M9_14_4   0
 292 | #define M9_14_5   1
 293 | #define M9_14_6   2
 294 | #define M9_14_7   3
 295 | 
 296 | #define M9_15_0   6
 297 | #define M9_15_1   7
 298 | #define M9_15_2   8
 299 | #define M9_15_3   0
 300 | #define M9_15_4   1
 301 | #define M9_15_5   2
 302 | #define M9_15_6   3
 303 | #define M9_15_7   4
 304 | 
 305 | #define M9_16_0   7
 306 | #define M9_16_1   8
 307 | #define M9_16_2   0
 308 | #define M9_16_3   1
 309 | #define M9_16_4   2
 310 | #define M9_16_5   3
 311 | #define M9_16_6   4
 312 | #define M9_16_7   5
 313 | 
 314 | #define M9_17_0   8
 315 | #define M9_17_1   0
 316 | #define M9_17_2   1
 317 | #define M9_17_3   2
 318 | #define M9_17_4   3
 319 | #define M9_17_5   4
 320 | #define M9_17_6   5
 321 | #define M9_17_7   6
 322 | 
 323 | #define M9_18_0   0
 324 | #define M9_18_1   1
 325 | #define M9_18_2   2
 326 | #define M9_18_3   3
 327 | #define M9_18_4   4
 328 | #define M9_18_5   5
 329 | #define M9_18_6   6
 330 | #define M9_18_7   7
 331 | 
 332 | /*
 333 |  * M3_ ## s ## _ ## i  evaluates to s+i mod 3 (0 <= s <= 18, 0 <= i <= 1).
 334 |  */
 335 | 
 336 | #define M3_0_0    0
 337 | #define M3_0_1    1
 338 | #define M3_1_0    1
 339 | #define M3_1_1    2
 340 | #define M3_2_0    2
 341 | #define M3_2_1    0
 342 | #define M3_3_0    0
 343 | #define M3_3_1    1
 344 | #define M3_4_0    1
 345 | #define M3_4_1    2
 346 | #define M3_5_0    2
 347 | #define M3_5_1    0
 348 | #define M3_6_0    0
 349 | #define M3_6_1    1
 350 | #define M3_7_0    1
 351 | #define M3_7_1    2
 352 | #define M3_8_0    2
 353 | #define M3_8_1    0
 354 | #define M3_9_0    0
 355 | #define M3_9_1    1
 356 | #define M3_10_0   1
 357 | #define M3_10_1   2
 358 | #define M3_11_0   2
 359 | #define M3_11_1   0
 360 | #define M3_12_0   0
 361 | #define M3_12_1   1
 362 | #define M3_13_0   1
 363 | #define M3_13_1   2
 364 | #define M3_14_0   2
 365 | #define M3_14_1   0
 366 | #define M3_15_0   0
 367 | #define M3_15_1   1
 368 | #define M3_16_0   1
 369 | #define M3_16_1   2
 370 | #define M3_17_0   2
 371 | #define M3_17_1   0
 372 | #define M3_18_0   0
 373 | #define M3_18_1   1
 374 | 
 375 | #define XCAT(x, y)     XCAT_(x, y)
 376 | #define XCAT_(x, y)    x ## y
 377 | 
 378 | #if 0
 379 | /* obsolete */
 380 | #define SKSI(k, s, i)   XCAT(k, XCAT(XCAT(XCAT(M5_, s), _), i))
 381 | #define SKST(t, s, v)   XCAT(t, XCAT(XCAT(XCAT(M3_, s), _), v))
 382 | #endif
 383 | 
 384 | #define SKBI(k, s, i)   XCAT(k, XCAT(XCAT(XCAT(M9_, s), _), i))
 385 | #define SKBT(t, s, v)   XCAT(t, XCAT(XCAT(XCAT(M3_, s), _), v))
 386 | 
 387 | #if 0
 388 | /* obsolete */
 389 | #define TFSMALL_KINIT(k0, k1, k2, k3, k4, t0, t1, t2)   do { \
 390 | 		k4 = (k0 ^ k1) ^ (k2 ^ k3) ^ SPH_C64(0x1BD11BDAA9FC1A22); \
 391 | 		t2 = t0 ^ t1; \
 392 | 	} while (0)
 393 | #endif
 394 | 
 395 | #define TFBIG_KINIT(k0, k1, k2, k3, k4, k5, k6, k7, k8, t0, t1, t2)   do { \
 396 | 		k8 = ((k0 ^ k1) ^ (k2 ^ k3)) ^ ((k4 ^ k5) ^ (k6 ^ k7)) \
 397 | 			^ SPH_C64(0x1BD11BDAA9FC1A22); \
 398 | 		t2 = t0 ^ t1; \
 399 | 	} while (0)
 400 | 
 401 | #if 0
 402 | /* obsolete */
 403 | #define TFSMALL_ADDKEY(w0, w1, w2, w3, k, t, s)   do { \
 404 | 		w0 = SPH_T64(w0 + SKSI(k, s, 0)); \
 405 | 		w1 = SPH_T64(w1 + SKSI(k, s, 1) + SKST(t, s, 0)); \
 406 | 		w2 = SPH_T64(w2 + SKSI(k, s, 2) + SKST(t, s, 1)); \
 407 | 		w3 = SPH_T64(w3 + SKSI(k, s, 3) + (sph_u64)s); \
 408 | 	} while (0)
 409 | #endif
 410 | 
 411 | #if SPH_SMALL_FOOTPRINT_SKEIN
 412 | 
 413 | #define TFBIG_ADDKEY(s, tt0, tt1)   do { \
 414 | 		p0 = SPH_T64(p0 + h[s + 0]); \
 415 | 		p1 = SPH_T64(p1 + h[s + 1]); \
 416 | 		p2 = SPH_T64(p2 + h[s + 2]); \
 417 | 		p3 = SPH_T64(p3 + h[s + 3]); \
 418 | 		p4 = SPH_T64(p4 + h[s + 4]); \
 419 | 		p5 = SPH_T64(p5 + h[s + 5] + tt0); \
 420 | 		p6 = SPH_T64(p6 + h[s + 6] + tt1); \
 421 | 		p7 = SPH_T64(p7 + h[s + 7] + (sph_u64)s); \
 422 | 	} while (0)
 423 | 
 424 | #else
 425 | 
 426 | #define TFBIG_ADDKEY(w0, w1, w2, w3, w4, w5, w6, w7, k, t, s)   do { \
 427 | 		w0 = SPH_T64(w0 + SKBI(k, s, 0)); \
 428 | 		w1 = SPH_T64(w1 + SKBI(k, s, 1)); \
 429 | 		w2 = SPH_T64(w2 + SKBI(k, s, 2)); \
 430 | 		w3 = SPH_T64(w3 + SKBI(k, s, 3)); \
 431 | 		w4 = SPH_T64(w4 + SKBI(k, s, 4)); \
 432 | 		w5 = SPH_T64(w5 + SKBI(k, s, 5) + SKBT(t, s, 0)); \
 433 | 		w6 = SPH_T64(w6 + SKBI(k, s, 6) + SKBT(t, s, 1)); \
 434 | 		w7 = SPH_T64(w7 + SKBI(k, s, 7) + (sph_u64)s); \
 435 | 	} while (0)
 436 | 
 437 | #endif
 438 | 
 439 | #if 0
 440 | /* obsolete */
 441 | #define TFSMALL_MIX(x0, x1, rc)   do { \
 442 | 		x0 = SPH_T64(x0 + x1); \
 443 | 		x1 = SPH_ROTL64(x1, rc) ^ x0; \
 444 | 	} while (0)
 445 | #endif
 446 | 
 447 | #define TFBIG_MIX(x0, x1, rc)   do { \
 448 | 		x0 = SPH_T64(x0 + x1); \
 449 | 		x1 = SPH_ROTL64(x1, rc) ^ x0; \
 450 | 	} while (0)
 451 | 
 452 | #if 0
 453 | /* obsolete */
 454 | #define TFSMALL_MIX4(w0, w1, w2, w3, rc0, rc1)  do { \
 455 | 		TFSMALL_MIX(w0, w1, rc0); \
 456 | 		TFSMALL_MIX(w2, w3, rc1); \
 457 | 	} while (0)
 458 | #endif
 459 | 
 460 | #define TFBIG_MIX8(w0, w1, w2, w3, w4, w5, w6, w7, rc0, rc1, rc2, rc3)  do { \
 461 | 		TFBIG_MIX(w0, w1, rc0); \
 462 | 		TFBIG_MIX(w2, w3, rc1); \
 463 | 		TFBIG_MIX(w4, w5, rc2); \
 464 | 		TFBIG_MIX(w6, w7, rc3); \
 465 | 	} while (0)
 466 | 
 467 | #if 0
 468 | /* obsolete */
 469 | #define TFSMALL_4e(s)   do { \
 470 | 		TFSMALL_ADDKEY(p0, p1, p2, p3, h, t, s); \
 471 | 		TFSMALL_MIX4(p0, p1, p2, p3, 14, 16); \
 472 | 		TFSMALL_MIX4(p0, p3, p2, p1, 52, 57); \
 473 | 		TFSMALL_MIX4(p0, p1, p2, p3, 23, 40); \
 474 | 		TFSMALL_MIX4(p0, p3, p2, p1,  5, 37); \
 475 | 	} while (0)
 476 | 
 477 | #define TFSMALL_4o(s)   do { \
 478 | 		TFSMALL_ADDKEY(p0, p1, p2, p3, h, t, s); \
 479 | 		TFSMALL_MIX4(p0, p1, p2, p3, 25, 33); \
 480 | 		TFSMALL_MIX4(p0, p3, p2, p1, 46, 12); \
 481 | 		TFSMALL_MIX4(p0, p1, p2, p3, 58, 22); \
 482 | 		TFSMALL_MIX4(p0, p3, p2, p1, 32, 32); \
 483 | 	} while (0)
 484 | #endif
 485 | 
 486 | #if SPH_SMALL_FOOTPRINT_SKEIN
 487 | 
 488 | #define TFBIG_4e(s)   do { \
 489 | 		TFBIG_ADDKEY(s, t0, t1); \
 490 | 		TFBIG_MIX8(p0, p1, p2, p3, p4, p5, p6, p7, 46, 36, 19, 37); \
 491 | 		TFBIG_MIX8(p2, p1, p4, p7, p6, p5, p0, p3, 33, 27, 14, 42); \
 492 | 		TFBIG_MIX8(p4, p1, p6, p3, p0, p5, p2, p7, 17, 49, 36, 39); \
 493 | 		TFBIG_MIX8(p6, p1, p0, p7, p2, p5, p4, p3, 44,  9, 54, 56); \
 494 | 	} while (0)
 495 | 
 496 | #define TFBIG_4o(s)   do { \
 497 | 		TFBIG_ADDKEY(s, t1, t2); \
 498 | 		TFBIG_MIX8(p0, p1, p2, p3, p4, p5, p6, p7, 39, 30, 34, 24); \
 499 | 		TFBIG_MIX8(p2, p1, p4, p7, p6, p5, p0, p3, 13, 50, 10, 17); \
 500 | 		TFBIG_MIX8(p4, p1, p6, p3, p0, p5, p2, p7, 25, 29, 39, 43); \
 501 | 		TFBIG_MIX8(p6, p1, p0, p7, p2, p5, p4, p3,  8, 35, 56, 22); \
 502 | 	} while (0)
 503 | 
 504 | #else
 505 | 
 506 | #define TFBIG_4e(s)   do { \
 507 | 		TFBIG_ADDKEY(p0, p1, p2, p3, p4, p5, p6, p7, h, t, s); \
 508 | 		TFBIG_MIX8(p0, p1, p2, p3, p4, p5, p6, p7, 46, 36, 19, 37); \
 509 | 		TFBIG_MIX8(p2, p1, p4, p7, p6, p5, p0, p3, 33, 27, 14, 42); \
 510 | 		TFBIG_MIX8(p4, p1, p6, p3, p0, p5, p2, p7, 17, 49, 36, 39); \
 511 | 		TFBIG_MIX8(p6, p1, p0, p7, p2, p5, p4, p3, 44,  9, 54, 56); \
 512 | 	} while (0)
 513 | 
 514 | #define TFBIG_4o(s)   do { \
 515 | 		TFBIG_ADDKEY(p0, p1, p2, p3, p4, p5, p6, p7, h, t, s); \
 516 | 		TFBIG_MIX8(p0, p1, p2, p3, p4, p5, p6, p7, 39, 30, 34, 24); \
 517 | 		TFBIG_MIX8(p2, p1, p4, p7, p6, p5, p0, p3, 13, 50, 10, 17); \
 518 | 		TFBIG_MIX8(p4, p1, p6, p3, p0, p5, p2, p7, 25, 29, 39, 43); \
 519 | 		TFBIG_MIX8(p6, p1, p0, p7, p2, p5, p4, p3,  8, 35, 56, 22); \
 520 | 	} while (0)
 521 | 
 522 | #endif
 523 | 
 524 | #if 0
 525 | /* obsolete */
 526 | #define UBI_SMALL(etype, extra)  do { \
 527 | 		sph_u64 h4, t0, t1, t2; \
 528 | 		sph_u64 m0 = sph_dec64le(buf +  0); \
 529 | 		sph_u64 m1 = sph_dec64le(buf +  8); \
 530 | 		sph_u64 m2 = sph_dec64le(buf + 16); \
 531 | 		sph_u64 m3 = sph_dec64le(buf + 24); \
 532 | 		sph_u64 p0 = m0; \
 533 | 		sph_u64 p1 = m1; \
 534 | 		sph_u64 p2 = m2; \
 535 | 		sph_u64 p3 = m3; \
 536 | 		t0 = SPH_T64(bcount << 5) + (sph_u64)(extra); \
 537 | 		t1 = (bcount >> 59) + ((sph_u64)(etype) << 55); \
 538 | 		TFSMALL_KINIT(h0, h1, h2, h3, h4, t0, t1, t2); \
 539 | 		TFSMALL_4e(0); \
 540 | 		TFSMALL_4o(1); \
 541 | 		TFSMALL_4e(2); \
 542 | 		TFSMALL_4o(3); \
 543 | 		TFSMALL_4e(4); \
 544 | 		TFSMALL_4o(5); \
 545 | 		TFSMALL_4e(6); \
 546 | 		TFSMALL_4o(7); \
 547 | 		TFSMALL_4e(8); \
 548 | 		TFSMALL_4o(9); \
 549 | 		TFSMALL_4e(10); \
 550 | 		TFSMALL_4o(11); \
 551 | 		TFSMALL_4e(12); \
 552 | 		TFSMALL_4o(13); \
 553 | 		TFSMALL_4e(14); \
 554 | 		TFSMALL_4o(15); \
 555 | 		TFSMALL_4e(16); \
 556 | 		TFSMALL_4o(17); \
 557 | 		TFSMALL_ADDKEY(p0, p1, p2, p3, h, t, 18); \
 558 | 		h0 = m0 ^ p0; \
 559 | 		h1 = m1 ^ p1; \
 560 | 		h2 = m2 ^ p2; \
 561 | 		h3 = m3 ^ p3; \
 562 | 	} while (0)
 563 | #endif
 564 | 
 565 | #if SPH_SMALL_FOOTPRINT_SKEIN
 566 | 
 567 | #define UBI_BIG(etype, extra)  do { \
 568 | 		sph_u64 t0, t1, t2; \
 569 | 		unsigned u; \
 570 | 		sph_u64 m0 = sph_dec64le_aligned(buf +  0); \
 571 | 		sph_u64 m1 = sph_dec64le_aligned(buf +  8); \
 572 | 		sph_u64 m2 = sph_dec64le_aligned(buf + 16); \
 573 | 		sph_u64 m3 = sph_dec64le_aligned(buf + 24); \
 574 | 		sph_u64 m4 = sph_dec64le_aligned(buf + 32); \
 575 | 		sph_u64 m5 = sph_dec64le_aligned(buf + 40); \
 576 | 		sph_u64 m6 = sph_dec64le_aligned(buf + 48); \
 577 | 		sph_u64 m7 = sph_dec64le_aligned(buf + 56); \
 578 | 		sph_u64 p0 = m0; \
 579 | 		sph_u64 p1 = m1; \
 580 | 		sph_u64 p2 = m2; \
 581 | 		sph_u64 p3 = m3; \
 582 | 		sph_u64 p4 = m4; \
 583 | 		sph_u64 p5 = m5; \
 584 | 		sph_u64 p6 = m6; \
 585 | 		sph_u64 p7 = m7; \
 586 | 		t0 = SPH_T64(bcount << 6) + (sph_u64)(extra); \
 587 | 		t1 = (bcount >> 58) + ((sph_u64)(etype) << 55); \
 588 | 		TFBIG_KINIT(h[0], h[1], h[2], h[3], h[4], h[5], \
 589 | 			h[6], h[7], h[8], t0, t1, t2); \
 590 | 		for (u = 0; u <= 15; u += 3) { \
 591 | 			h[u +  9] = h[u + 0]; \
 592 | 			h[u + 10] = h[u + 1]; \
 593 | 			h[u + 11] = h[u + 2]; \
 594 | 		} \
 595 | 		for (u = 0; u < 9; u ++) { \
 596 | 			sph_u64 s = u << 1; \
 597 | 			sph_u64 tmp; \
 598 | 			TFBIG_4e(s); \
 599 | 			TFBIG_4o(s + 1); \
 600 | 			tmp = t2; \
 601 | 			t2 = t1; \
 602 | 			t1 = t0; \
 603 | 			t0 = tmp; \
 604 | 		} \
 605 | 		TFBIG_ADDKEY(18, t0, t1); \
 606 | 		h[0] = m0 ^ p0; \
 607 | 		h[1] = m1 ^ p1; \
 608 | 		h[2] = m2 ^ p2; \
 609 | 		h[3] = m3 ^ p3; \
 610 | 		h[4] = m4 ^ p4; \
 611 | 		h[5] = m5 ^ p5; \
 612 | 		h[6] = m6 ^ p6; \
 613 | 		h[7] = m7 ^ p7; \
 614 | 	} while (0)
 615 | 
 616 | #else
 617 | 
 618 | #define UBI_BIG(etype, extra)  do { \
 619 | 		sph_u64 h8, t0, t1, t2; \
 620 | 		sph_u64 m0 = sph_dec64le_aligned(buf +  0); \
 621 | 		sph_u64 m1 = sph_dec64le_aligned(buf +  8); \
 622 | 		sph_u64 m2 = sph_dec64le_aligned(buf + 16); \
 623 | 		sph_u64 m3 = sph_dec64le_aligned(buf + 24); \
 624 | 		sph_u64 m4 = sph_dec64le_aligned(buf + 32); \
 625 | 		sph_u64 m5 = sph_dec64le_aligned(buf + 40); \
 626 | 		sph_u64 m6 = sph_dec64le_aligned(buf + 48); \
 627 | 		sph_u64 m7 = sph_dec64le_aligned(buf + 56); \
 628 | 		sph_u64 p0 = m0; \
 629 | 		sph_u64 p1 = m1; \
 630 | 		sph_u64 p2 = m2; \
 631 | 		sph_u64 p3 = m3; \
 632 | 		sph_u64 p4 = m4; \
 633 | 		sph_u64 p5 = m5; \
 634 | 		sph_u64 p6 = m6; \
 635 | 		sph_u64 p7 = m7; \
 636 | 		t0 = SPH_T64(bcount << 6) + (sph_u64)(extra); \
 637 | 		t1 = (bcount >> 58) + ((sph_u64)(etype) << 55); \
 638 | 		TFBIG_KINIT(h0, h1, h2, h3, h4, h5, h6, h7, h8, t0, t1, t2); \
 639 | 		TFBIG_4e(0); \
 640 | 		TFBIG_4o(1); \
 641 | 		TFBIG_4e(2); \
 642 | 		TFBIG_4o(3); \
 643 | 		TFBIG_4e(4); \
 644 | 		TFBIG_4o(5); \
 645 | 		TFBIG_4e(6); \
 646 | 		TFBIG_4o(7); \
 647 | 		TFBIG_4e(8); \
 648 | 		TFBIG_4o(9); \
 649 | 		TFBIG_4e(10); \
 650 | 		TFBIG_4o(11); \
 651 | 		TFBIG_4e(12); \
 652 | 		TFBIG_4o(13); \
 653 | 		TFBIG_4e(14); \
 654 | 		TFBIG_4o(15); \
 655 | 		TFBIG_4e(16); \
 656 | 		TFBIG_4o(17); \
 657 | 		TFBIG_ADDKEY(p0, p1, p2, p3, p4, p5, p6, p7, h, t, 18); \
 658 | 		h0 = m0 ^ p0; \
 659 | 		h1 = m1 ^ p1; \
 660 | 		h2 = m2 ^ p2; \
 661 | 		h3 = m3 ^ p3; \
 662 | 		h4 = m4 ^ p4; \
 663 | 		h5 = m5 ^ p5; \
 664 | 		h6 = m6 ^ p6; \
 665 | 		h7 = m7 ^ p7; \
 666 | 	} while (0)
 667 | 
 668 | #endif
 669 | 
 670 | #if 0
 671 | /* obsolete */
 672 | #define DECL_STATE_SMALL \
 673 | 	sph_u64 h0, h1, h2, h3; \
 674 | 	sph_u64 bcount;
 675 | 
 676 | #define READ_STATE_SMALL(sc)   do { \
 677 | 		h0 = (sc)->h0; \
 678 | 		h1 = (sc)->h1; \
 679 | 		h2 = (sc)->h2; \
 680 | 		h3 = (sc)->h3; \
 681 | 		bcount = sc->bcount; \
 682 | 	} while (0)
 683 | 
 684 | #define WRITE_STATE_SMALL(sc)   do { \
 685 | 		(sc)->h0 = h0; \
 686 | 		(sc)->h1 = h1; \
 687 | 		(sc)->h2 = h2; \
 688 | 		(sc)->h3 = h3; \
 689 | 		sc->bcount = bcount; \
 690 | 	} while (0)
 691 | #endif
 692 | 
 693 | #if SPH_SMALL_FOOTPRINT_SKEIN
 694 | 
 695 | #define DECL_STATE_BIG \
 696 | 	sph_u64 h[27]; \
 697 | 	sph_u64 bcount;
 698 | 
 699 | #define READ_STATE_BIG(sc)   do { \
 700 | 		h[0] = (sc)->h0; \
 701 | 		h[1] = (sc)->h1; \
 702 | 		h[2] = (sc)->h2; \
 703 | 		h[3] = (sc)->h3; \
 704 | 		h[4] = (sc)->h4; \
 705 | 		h[5] = (sc)->h5; \
 706 | 		h[6] = (sc)->h6; \
 707 | 		h[7] = (sc)->h7; \
 708 | 		bcount = sc->bcount; \
 709 | 	} while (0)
 710 | 
 711 | #define WRITE_STATE_BIG(sc)   do { \
 712 | 		(sc)->h0 = h[0]; \
 713 | 		(sc)->h1 = h[1]; \
 714 | 		(sc)->h2 = h[2]; \
 715 | 		(sc)->h3 = h[3]; \
 716 | 		(sc)->h4 = h[4]; \
 717 | 		(sc)->h5 = h[5]; \
 718 | 		(sc)->h6 = h[6]; \
 719 | 		(sc)->h7 = h[7]; \
 720 | 		sc->bcount = bcount; \
 721 | 	} while (0)
 722 | 
 723 | #else
 724 | 
 725 | #define DECL_STATE_BIG \
 726 | 	sph_u64 h0, h1, h2, h3, h4, h5, h6, h7; \
 727 | 	sph_u64 bcount;
 728 | 
 729 | #define READ_STATE_BIG(sc)   do { \
 730 | 		h0 = (sc)->h0; \
 731 | 		h1 = (sc)->h1; \
 732 | 		h2 = (sc)->h2; \
 733 | 		h3 = (sc)->h3; \
 734 | 		h4 = (sc)->h4; \
 735 | 		h5 = (sc)->h5; \
 736 | 		h6 = (sc)->h6; \
 737 | 		h7 = (sc)->h7; \
 738 | 		bcount = sc->bcount; \
 739 | 	} while (0)
 740 | 
 741 | #define WRITE_STATE_BIG(sc)   do { \
 742 | 		(sc)->h0 = h0; \
 743 | 		(sc)->h1 = h1; \
 744 | 		(sc)->h2 = h2; \
 745 | 		(sc)->h3 = h3; \
 746 | 		(sc)->h4 = h4; \
 747 | 		(sc)->h5 = h5; \
 748 | 		(sc)->h6 = h6; \
 749 | 		(sc)->h7 = h7; \
 750 | 		sc->bcount = bcount; \
 751 | 	} while (0)
 752 | 
 753 | #endif
 754 | 
 755 | #if 0
 756 | /* obsolete */
 757 | static void
 758 | skein_small_init(sph_skein_small_context *sc, const sph_u64 *iv)
 759 | {
 760 | 	sc->h0 = iv[0];
 761 | 	sc->h1 = iv[1];
 762 | 	sc->h2 = iv[2];
 763 | 	sc->h3 = iv[3];
 764 | 	sc->bcount = 0;
 765 | 	sc->ptr = 0;
 766 | }
 767 | #endif
 768 | 
 769 | static void
 770 | skein_big_init(sph_skein_big_context *sc, const sph_u64 *iv)
 771 | {
 772 | 	sc->h0 = iv[0];
 773 | 	sc->h1 = iv[1];
 774 | 	sc->h2 = iv[2];
 775 | 	sc->h3 = iv[3];
 776 | 	sc->h4 = iv[4];
 777 | 	sc->h5 = iv[5];
 778 | 	sc->h6 = iv[6];
 779 | 	sc->h7 = iv[7];
 780 | 	sc->bcount = 0;
 781 | 	sc->ptr = 0;
 782 | }
 783 | 
 784 | #if 0
 785 | /* obsolete */
 786 | static void
 787 | skein_small_core(sph_skein_small_context *sc, const void *data, size_t len)
 788 | {
 789 | 	unsigned char *buf;
 790 | 	size_t ptr, clen;
 791 | 	unsigned first;
 792 | 	DECL_STATE_SMALL
 793 | 
 794 | 	buf = sc->buf;
 795 | 	ptr = sc->ptr;
 796 | 	clen = (sizeof sc->buf) - ptr;
 797 | 	if (len <= clen) {
 798 | 		memcpy(buf + ptr, data, len);
 799 | 		sc->ptr = ptr + len;
 800 | 		return;
 801 | 	}
 802 | 	if (clen != 0) {
 803 | 		memcpy(buf + ptr, data, clen);
 804 | 		data = (const unsigned char *)data + clen;
 805 | 		len -= clen;
 806 | 	}
 807 | 
 808 | #if SPH_SMALL_FOOTPRINT_SKEIN
 809 | 
 810 | 	READ_STATE_SMALL(sc);
 811 | 	first = (bcount == 0) << 7;
 812 | 	for (;;) {
 813 | 		bcount ++;
 814 | 		UBI_SMALL(96 + first, 0);
 815 | 		if (len <= sizeof sc->buf)
 816 | 			break;
 817 | 		first = 0;
 818 | 		memcpy(buf, data, sizeof sc->buf);
 819 | 		data = (const unsigned char *)data + sizeof sc->buf;
 820 | 		len -= sizeof sc->buf;
 821 | 	}
 822 | 	WRITE_STATE_SMALL(sc);
 823 | 	sc->ptr = len;
 824 | 	memcpy(buf, data, len);
 825 | 
 826 | #else
 827 | 
 828 | 	/*
 829 | 	 * Unrolling the loop yields a slight performance boost, while
 830 | 	 * keeping the code size aorund 24 kB on 32-bit x86.
 831 | 	 */
 832 | 	READ_STATE_SMALL(sc);
 833 | 	first = (bcount == 0) << 7;
 834 | 	for (;;) {
 835 | 		bcount ++;
 836 | 		UBI_SMALL(96 + first, 0);
 837 | 		if (len <= sizeof sc->buf)
 838 | 			break;
 839 | 		buf = (unsigned char *)data;
 840 | 		bcount ++;
 841 | 		UBI_SMALL(96, 0);
 842 | 		if (len <= 2 * sizeof sc->buf) {
 843 | 			data = buf + sizeof sc->buf;
 844 | 			len -= sizeof sc->buf;
 845 | 			break;
 846 | 		}
 847 | 		buf += sizeof sc->buf;
 848 | 		data = buf + sizeof sc->buf;
 849 | 		first = 0;
 850 | 		len -= 2 * sizeof sc->buf;
 851 | 	}
 852 | 	WRITE_STATE_SMALL(sc);
 853 | 	sc->ptr = len;
 854 | 	memcpy(sc->buf, data, len);
 855 | 
 856 | #endif
 857 | }
 858 | #endif
 859 | 
 860 | static void
 861 | skein_big_core(sph_skein_big_context *sc, const void *data, size_t len)
 862 | {
 863 | 	/*
 864 | 	 * The Skein "final bit" in the tweak is troublesome here,
 865 | 	 * because if the input has a length which is a multiple of the
 866 | 	 * block size (512 bits) then that bit must be set for the
 867 | 	 * final block, which is full of message bits (padding in
 868 | 	 * Skein can be reduced to no extra bit at all). However, this
 869 | 	 * function cannot know whether it processes the last chunks of
 870 | 	 * the message or not. Hence we may keep a full block of buffered
 871 | 	 * data (64 bytes).
 872 | 	 */
 873 | 	unsigned char *buf;
 874 | 	size_t ptr;
 875 | 	unsigned first;
 876 | 	DECL_STATE_BIG
 877 | 
 878 | 	buf = sc->buf;
 879 | 	ptr = sc->ptr;
 880 | 	if (len <= (sizeof sc->buf) - ptr) {
 881 | 		memcpy(buf + ptr, data, len);
 882 | 		ptr += len;
 883 | 		sc->ptr = ptr;
 884 | 		return;
 885 | 	}
 886 | 
 887 | 	READ_STATE_BIG(sc);
 888 | 	first = (bcount == 0) << 7;
 889 | 	do {
 890 | 		size_t clen;
 891 | 
 892 | 		if (ptr == sizeof sc->buf) {
 893 | 			bcount ++;
 894 | 			UBI_BIG(96 + first, 0);
 895 | 			first = 0;
 896 | 			ptr = 0;
 897 | 		}
 898 | 		clen = (sizeof sc->buf) - ptr;
 899 | 		if (clen > len)
 900 | 			clen = len;
 901 | 		memcpy(buf + ptr, data, clen);
 902 | 		ptr += clen;
 903 | 		data = (const unsigned char *)data + clen;
 904 | 		len -= clen;
 905 | 	} while (len > 0);
 906 | 	WRITE_STATE_BIG(sc);
 907 | 	sc->ptr = ptr;
 908 | }
 909 | 
 910 | #if 0
 911 | /* obsolete */
 912 | static void
 913 | skein_small_close(sph_skein_small_context *sc, unsigned ub, unsigned n,
 914 | 	void *dst, size_t out_len)
 915 | {
 916 | 	unsigned char *buf;
 917 | 	size_t ptr;
 918 | 	unsigned et;
 919 | 	int i;
 920 | 	DECL_STATE_SMALL
 921 | 
 922 | 	if (n != 0) {
 923 | 		unsigned z;
 924 | 		unsigned char x;
 925 | 
 926 | 		z = 0x80 >> n;
 927 | 		x = ((ub & -z) | z) & 0xFF;
 928 | 		skein_small_core(sc, &x, 1);
 929 | 	}
 930 | 
 931 | 	buf = sc->buf;
 932 | 	ptr = sc->ptr;
 933 | 	READ_STATE_SMALL(sc);
 934 | 	memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
 935 | 	et = 352 + ((bcount == 0) << 7) + (n != 0);
 936 | 	for (i = 0; i < 2; i ++) {
 937 | 		UBI_SMALL(et, ptr);
 938 | 		if (i == 0) {
 939 | 			memset(buf, 0, sizeof sc->buf);
 940 | 			bcount = 0;
 941 | 			et = 510;
 942 | 			ptr = 8;
 943 | 		}
 944 | 	}
 945 | 
 946 | 	sph_enc64le_aligned(buf +  0, h0);
 947 | 	sph_enc64le_aligned(buf +  8, h1);
 948 | 	sph_enc64le_aligned(buf + 16, h2);
 949 | 	sph_enc64le_aligned(buf + 24, h3);
 950 | 	memcpy(dst, buf, out_len);
 951 | }
 952 | #endif
 953 | 
 954 | static void
 955 | skein_big_close(sph_skein_big_context *sc, unsigned ub, unsigned n,
 956 | 	void *dst, size_t out_len)
 957 | {
 958 | 	unsigned char *buf;
 959 | 	size_t ptr;
 960 | 	unsigned et;
 961 | 	int i;
 962 | #if SPH_SMALL_FOOTPRINT_SKEIN
 963 | 	size_t u;
 964 | #endif
 965 | 	DECL_STATE_BIG
 966 | 
 967 | 	/*
 968 | 	 * Add bit padding if necessary.
 969 | 	 */
 970 | 	if (n != 0) {
 971 | 		unsigned z;
 972 | 		unsigned char x;
 973 | 
 974 | 		z = 0x80 >> n;
 975 | 		x = ((ub & -z) | z) & 0xFF;
 976 | 		skein_big_core(sc, &x, 1);
 977 | 	}
 978 | 
 979 | 	buf = sc->buf;
 980 | 	ptr = sc->ptr;
 981 | 
 982 | 	/*
 983 | 	 * At that point, if ptr == 0, then the message was empty;
 984 | 	 * otherwise, there is between 1 and 64 bytes (inclusive) which
 985 | 	 * are yet to be processed. Either way, we complete the buffer
 986 | 	 * to a full block with zeros (the Skein specification mandates
 987 | 	 * that an empty message is padded so that there is at least
 988 | 	 * one block to process).
 989 | 	 *
 990 | 	 * Once this block has been processed, we do it again, with
 991 | 	 * a block full of zeros, for the output (that block contains
 992 | 	 * the encoding of "0", over 8 bytes, then padded with zeros).
 993 | 	 */
 994 | 	READ_STATE_BIG(sc);
 995 | 	memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
 996 | 	et = 352 + ((bcount == 0) << 7) + (n != 0);
 997 | 	for (i = 0; i < 2; i ++) {
 998 | 		UBI_BIG(et, ptr);
 999 | 		if (i == 0) {
1000 | 			memset(buf, 0, sizeof sc->buf);
1001 | 			bcount = 0;
1002 | 			et = 510;
1003 | 			ptr = 8;
1004 | 		}
1005 | 	}
1006 | 
1007 | #if SPH_SMALL_FOOTPRINT_SKEIN
1008 | 
1009 | 	/*
1010 | 	 * We use a temporary buffer because we must support the case
1011 | 	 * where output size is not a multiple of 64 (namely, a 224-bit
1012 | 	 * output).
1013 | 	 */
1014 | 	for (u = 0; u < out_len; u += 8)
1015 | 		sph_enc64le_aligned(buf + u, h[u >> 3]);
1016 | 	memcpy(dst, buf, out_len);
1017 | 
1018 | #else
1019 | 
1020 | 	sph_enc64le_aligned(buf +  0, h0);
1021 | 	sph_enc64le_aligned(buf +  8, h1);
1022 | 	sph_enc64le_aligned(buf + 16, h2);
1023 | 	sph_enc64le_aligned(buf + 24, h3);
1024 | 	sph_enc64le_aligned(buf + 32, h4);
1025 | 	sph_enc64le_aligned(buf + 40, h5);
1026 | 	sph_enc64le_aligned(buf + 48, h6);
1027 | 	sph_enc64le_aligned(buf + 56, h7);
1028 | 	memcpy(dst, buf, out_len);
1029 | 
1030 | #endif
1031 | }
1032 | 
1033 | #if 0
1034 | /* obsolete */
1035 | static const sph_u64 IV224[] = {
1036 | 	SPH_C64(0xC6098A8C9AE5EA0B), SPH_C64(0x876D568608C5191C),
1037 | 	SPH_C64(0x99CB88D7D7F53884), SPH_C64(0x384BDDB1AEDDB5DE)
1038 | };
1039 | 
1040 | static const sph_u64 IV256[] = {
1041 | 	SPH_C64(0xFC9DA860D048B449), SPH_C64(0x2FCA66479FA7D833),
1042 | 	SPH_C64(0xB33BC3896656840F), SPH_C64(0x6A54E920FDE8DA69)
1043 | };
1044 | #endif
1045 | 
1046 | static const sph_u64 IV224[] = {
1047 | 	SPH_C64(0xCCD0616248677224), SPH_C64(0xCBA65CF3A92339EF),
1048 | 	SPH_C64(0x8CCD69D652FF4B64), SPH_C64(0x398AED7B3AB890B4),
1049 | 	SPH_C64(0x0F59D1B1457D2BD0), SPH_C64(0x6776FE6575D4EB3D),
1050 | 	SPH_C64(0x99FBC70E997413E9), SPH_C64(0x9E2CFCCFE1C41EF7)
1051 | };
1052 | 
1053 | static const sph_u64 IV256[] = {
1054 | 	SPH_C64(0xCCD044A12FDB3E13), SPH_C64(0xE83590301A79A9EB),
1055 | 	SPH_C64(0x55AEA0614F816E6F), SPH_C64(0x2A2767A4AE9B94DB),
1056 | 	SPH_C64(0xEC06025E74DD7683), SPH_C64(0xE7A436CDC4746251),
1057 | 	SPH_C64(0xC36FBAF9393AD185), SPH_C64(0x3EEDBA1833EDFC13)
1058 | };
1059 | 
1060 | static const sph_u64 IV384[] = {
1061 | 	SPH_C64(0xA3F6C6BF3A75EF5F), SPH_C64(0xB0FEF9CCFD84FAA4),
1062 | 	SPH_C64(0x9D77DD663D770CFE), SPH_C64(0xD798CBF3B468FDDA),
1063 | 	SPH_C64(0x1BC4A6668A0E4465), SPH_C64(0x7ED7D434E5807407),
1064 | 	SPH_C64(0x548FC1ACD4EC44D6), SPH_C64(0x266E17546AA18FF8)
1065 | };
1066 | 
1067 | static const sph_u64 IV512[] = {
1068 | 	SPH_C64(0x4903ADFF749C51CE), SPH_C64(0x0D95DE399746DF03),
1069 | 	SPH_C64(0x8FD1934127C79BCE), SPH_C64(0x9A255629FF352CB1),
1070 | 	SPH_C64(0x5DB62599DF6CA7B0), SPH_C64(0xEABE394CA9D5C3F4),
1071 | 	SPH_C64(0x991112C71A75B523), SPH_C64(0xAE18A40B660FCC33)
1072 | };
1073 | 
1074 | #if 0
1075 | /* obsolete */
1076 | /* see sph_skein.h */
1077 | void
1078 | sph_skein224_init(void *cc)
1079 | {
1080 | 	skein_small_init(cc, IV224);
1081 | }
1082 | 
1083 | /* see sph_skein.h */
1084 | void
1085 | sph_skein224(void *cc, const void *data, size_t len)
1086 | {
1087 | 	skein_small_core(cc, data, len);
1088 | }
1089 | 
1090 | /* see sph_skein.h */
1091 | void
1092 | sph_skein224_close(void *cc, void *dst)
1093 | {
1094 | 	sph_skein224_addbits_and_close(cc, 0, 0, dst);
1095 | }
1096 | 
1097 | /* see sph_skein.h */
1098 | void
1099 | sph_skein224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1100 | {
1101 | 	skein_small_close(cc, ub, n, dst, 28);
1102 | 	sph_skein224_init(cc);
1103 | }
1104 | 
1105 | /* see sph_skein.h */
1106 | void
1107 | sph_skein256_init(void *cc)
1108 | {
1109 | 	skein_small_init(cc, IV256);
1110 | }
1111 | 
1112 | /* see sph_skein.h */
1113 | void
1114 | sph_skein256(void *cc, const void *data, size_t len)
1115 | {
1116 | 	skein_small_core(cc, data, len);
1117 | }
1118 | 
1119 | /* see sph_skein.h */
1120 | void
1121 | sph_skein256_close(void *cc, void *dst)
1122 | {
1123 | 	sph_skein256_addbits_and_close(cc, 0, 0, dst);
1124 | }
1125 | 
1126 | /* see sph_skein.h */
1127 | void
1128 | sph_skein256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1129 | {
1130 | 	skein_small_close(cc, ub, n, dst, 32);
1131 | 	sph_skein256_init(cc);
1132 | }
1133 | #endif
1134 | 
1135 | /* see sph_skein.h */
1136 | void
1137 | sph_skein224_init(void *cc)
1138 | {
1139 | 	skein_big_init(cc, IV224);
1140 | }
1141 | 
1142 | /* see sph_skein.h */
1143 | void
1144 | sph_skein224(void *cc, const void *data, size_t len)
1145 | {
1146 | 	skein_big_core(cc, data, len);
1147 | }
1148 | 
1149 | /* see sph_skein.h */
1150 | void
1151 | sph_skein224_close(void *cc, void *dst)
1152 | {
1153 | 	sph_skein224_addbits_and_close(cc, 0, 0, dst);
1154 | }
1155 | 
1156 | /* see sph_skein.h */
1157 | void
1158 | sph_skein224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1159 | {
1160 | 	skein_big_close(cc, ub, n, dst, 28);
1161 | 	sph_skein224_init(cc);
1162 | }
1163 | 
1164 | /* see sph_skein.h */
1165 | void
1166 | sph_skein256_init(void *cc)
1167 | {
1168 | 	skein_big_init(cc, IV256);
1169 | }
1170 | 
1171 | /* see sph_skein.h */
1172 | void
1173 | sph_skein256(void *cc, const void *data, size_t len)
1174 | {
1175 | 	skein_big_core(cc, data, len);
1176 | }
1177 | 
1178 | /* see sph_skein.h */
1179 | void
1180 | sph_skein256_close(void *cc, void *dst)
1181 | {
1182 | 	sph_skein256_addbits_and_close(cc, 0, 0, dst);
1183 | }
1184 | 
1185 | /* see sph_skein.h */
1186 | void
1187 | sph_skein256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1188 | {
1189 | 	skein_big_close(cc, ub, n, dst, 32);
1190 | 	sph_skein256_init(cc);
1191 | }
1192 | 
1193 | /* see sph_skein.h */
1194 | void
1195 | sph_skein384_init(void *cc)
1196 | {
1197 | 	skein_big_init(cc, IV384);
1198 | }
1199 | 
1200 | /* see sph_skein.h */
1201 | void
1202 | sph_skein384(void *cc, const void *data, size_t len)
1203 | {
1204 | 	skein_big_core(cc, data, len);
1205 | }
1206 | 
1207 | /* see sph_skein.h */
1208 | void
1209 | sph_skein384_close(void *cc, void *dst)
1210 | {
1211 | 	sph_skein384_addbits_and_close(cc, 0, 0, dst);
1212 | }
1213 | 
1214 | /* see sph_skein.h */
1215 | void
1216 | sph_skein384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1217 | {
1218 | 	skein_big_close(cc, ub, n, dst, 48);
1219 | 	sph_skein384_init(cc);
1220 | }
1221 | 
1222 | /* see sph_skein.h */
1223 | void
1224 | sph_skein512_init(void *cc)
1225 | {
1226 | 	skein_big_init(cc, IV512);
1227 | }
1228 | 
1229 | /* see sph_skein.h */
1230 | void
1231 | sph_skein512(void *cc, const void *data, size_t len)
1232 | {
1233 | 	skein_big_core(cc, data, len);
1234 | }
1235 | 
1236 | /* see sph_skein.h */
1237 | void
1238 | sph_skein512_close(void *cc, void *dst)
1239 | {
1240 | 	sph_skein512_addbits_and_close(cc, 0, 0, dst);
1241 | }
1242 | 
1243 | /* see sph_skein.h */
1244 | void
1245 | sph_skein512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1246 | {
1247 | 	skein_big_close(cc, ub, n, dst, 64);
1248 | 	sph_skein512_init(cc);
1249 | }
1250 | 
1251 | #endif
1252 | 
1253 | void skein_hash(const char* input, char* output)
1254 | {
1255 |     char temp[64];
1256 |     uint32_t len = 80; // check your header size
1257 | 
1258 |     sph_skein512_context ctx_skien;
1259 |     sph_skein512_init(&ctx_skien);
1260 |     sph_skein512(&ctx_skien, input, len);
1261 |     sph_skein512_close(&ctx_skien, &temp);
1262 | 
1263 |     SHA256_CTX ctx_sha256;
1264 |     SHA256_Init(&ctx_sha256);
1265 |     SHA256_Update(&ctx_sha256, &temp, 64);
1266 |     SHA256_Final((unsigned char*) output, &ctx_sha256);
1267 | }
1268 | 
1269 | 
1270 | #ifdef __cplusplus
1271 | }
1272 | #endif
1273 | 


--------------------------------------------------------------------------------
/skein.h:
--------------------------------------------------------------------------------
 1 | #ifndef SKEIN_H
 2 | #define SKEIN_H
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | void skein_hash(const char* input, char* output);
 9 | 
10 | #ifdef __cplusplus
11 | }
12 | #endif
13 | 
14 | #endif
15 | 


--------------------------------------------------------------------------------
/skeinmodule.c:
--------------------------------------------------------------------------------
 1 | #include <Python.h>
 2 | 
 3 | #include "skein.h"
 4 | 
 5 | static PyObject *skein_getpowhash(PyObject *self, PyObject *args)
 6 | {
 7 |     char *output;
 8 |     PyObject *value;
 9 | #if PY_MAJOR_VERSION >= 3
10 |     PyBytesObject *input;
11 | #else
12 |     PyStringObject *input;
13 | #endif
14 |     if (!PyArg_ParseTuple(args, "S", &input))
15 |         return NULL;
16 |     Py_INCREF(input);
17 |     output = PyMem_Malloc(32);
18 | 
19 | #if PY_MAJOR_VERSION >= 3
20 |     skein_hash((char *)PyBytes_AsString((PyObject*) input), output);
21 | #else
22 |     skein_hash((char *)PyString_AsString((PyObject*) input), output);
23 | #endif
24 |     Py_DECREF(input);
25 | #if PY_MAJOR_VERSION >= 3
26 |     value = Py_BuildValue("y#", output, 32);
27 | #else
28 |     value = Py_BuildValue("s#", output, 32);
29 | #endif
30 |     PyMem_Free(output);
31 |     return value;
32 | }
33 | 
34 | static PyMethodDef SkeinMethods[] = {
35 |     { "getPoWHash", skein_getpowhash, METH_VARARGS, "Returns the proof of work hash using skein hash" },
36 |     { NULL, NULL, 0, NULL }
37 | };
38 | 
39 | #if PY_MAJOR_VERSION >= 3
40 | static struct PyModuleDef SkeinModule = {
41 |     PyModuleDef_HEAD_INIT,
42 |     "skein_hash",
43 |     "...",
44 |     -1,
45 |     SkeinMethods
46 | };
47 | 
48 | PyMODINIT_FUNC PyInit_skein_hash(void) {
49 |     return PyModule_Create(&SkeinModule);
50 | }
51 | 
52 | #else
53 | 
54 | PyMODINIT_FUNC initskein_hash(void) {
55 |     (void) Py_InitModule("skein_hash", SkeinMethods);
56 | }
57 | #endif
58 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | # to run : pytest test.py
 2 | 
 3 | try:
 4 |     from StringIO import StringIO ## for Python 2
 5 | except ImportError:
 6 |     from io import StringIO ## for Python 3
 7 | 
 8 | import skein_hash
 9 | import weakref
10 | import binascii
11 | from binascii import unhexlify, hexlify
12 | 
13 | import unittest
14 | 
15 | # block 211 for livenet.
16 | best_hash_value = '00000011d6202b031df083700a5c11138bc05d8eec7e01757fc816974dc85c78'
17 | header_hex_value = "03000000ea3f8b2d4f24d71b30a5941ad42bac191a147c41638fa39947d79ac4080000006daea14b9fdd2fe650a699f84b0c381e478b1897788bcd07ace0130c9d18614b74edd85dacb9191da5c99900"
18 |  
19 | class TestSequenceFunctions(unittest.TestCase):
20 | 
21 |     def setUp(self):
22 |         self.my_best_hash = best_hash_value
23 |         self.my_block_header = unhexlify(header_hex_value)
24 | 
25 |     def test_skein_hash(self):
26 |         self.module_pow_hash = hexlify(skein_hash.getPoWHash(self.my_block_header)[::-1]).decode('utf-8')
27 |         self.assertEqual(self.module_pow_hash, self.my_best_hash)
28 | 
29 | if __name__ == '__main__':
30 |     unittest.main()
31 | 
32 | 


--------------------------------------------------------------------------------