├── .gitignore
├── LICENSE
├── README.md
├── consttime_memcmp.c
├── cst_time_memcmp.pro
└── main.c


/.gitignore:
--------------------------------------------------------------------------------
1 | cst_time_memcmp.pro.user
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Christophe Meessen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Constant time memcmp() function
  2 | 
  3 | ``` C
  4 | int cst_time_memcmp(const void *m1, const void *m2, size_t n)
  5 | ```
  6 | 
  7 | The `cst_time_memcmp()` function return the result of a lexicographical 
  8 | comparison of the byte array `m1` and `m2`. The processing time is independed
  9 | of the byte values of `m1` and `m2`. It varies only with `n`. 
 10 | 
 11 | The `cst_time_memcmp()` function returns `0` when the `n` first bytes of `m1` and 
 12 | `m2` are equal. When the first different byte is found at index `k`, with `k < 0`, 
 13 | the function returns `-1` when `m1[k] < m2[k]`, and `+1` when `m1[k] > m2[k]`. 
 14 | Appart from the comparision result, this function reveals nothing of the byte 
 15 | values in arrays `m1` and `m2`.
 16 | 
 17 | The function returns also `0` when at least one of the following conditions is true.
 18 | 
 19 | - `n` is zero ;
 20 | - `m1` and `m2` are the same memory area ;
 21 | - `m1` is NULL ;
 22 | - `m2` is NULL.
 23 | 
 24 | ## Table of content
 25 | 
 26 |   * [Algorithm](#algorithm)
 27 |   * [Alternate instructions](#alternate-instructions)
 28 |   * [The code](#the-code)
 29 |     * [Fastest implementation using subscipt](#fastest-implementation-using-subscipt)
 30 |     * [Fastest implementation using pointers](#fastest-implementation-using-pointers)
 31 |     * [Safest implementation using subscript](#safest-implementation-using-subscript)
 32 |     * [Safest implementation using pointers](#safest-implementation-using-pointers)
 33 |     * [Reference code from NetBSD](#reference-code-from-netbsd)
 34 |   * [Verification](#verification)
 35 |     * [Discussion](#discussion)
 36 |     * [Output of the program](#output-of-the-program)
 37 | 
 38 | ## Algorithm
 39 | 
 40 | The following code illustrate the none constant time memcmp() algoritm's logic. 
 41 | 
 42 | ``` C
 43 | int memcmp(const unsigned char *m1, const unsigned char *m1, size_t n) {
 44 |     size_t i;
 45 |     for (i = 0; i < n; ++i ) {
 46 |         int diff = m1[i] - m2[i];
 47 |         if (diff != 0)
 48 |             return (diff < 0) ? -1 : +1;
 49 |     }
 50 |     return 0;
 51 | }
 52 | ```
 53 | 
 54 | The constant time memcmp() function's algorithm is illustrated below
 55 | 
 56 | ``` C
 57 | int cst_time_memcmp(const unsigned char *m1, const unsigned char *m1, size_t n) {
 58 |     int res = 0, diff;
 59 |     if (n > 0) {
 60 |         do {
 61 |             --n;
 62 |             diff = m1[n] - m2[n];
 63 |             if (diff != 0)
 64 |                 res = diff;
 65 |         } while (n != 0);
 66 |     }
 67 |     return (res > 0) - (res < 0);
 68 | }
 69 | ```
 70 | 
 71 | The above code compares the bytes from the last to the first byte.
 72 | Every time different bytes are found, the difference is stored in res.
 73 | Res contains thus the difference of the last different bytes met.
 74 | The function result is the same as the standard memcmp() function.
 75 | The difference is that all bytes have been compared.
 76 | 
 77 | For a constant time memcmp() function we can't use conditional 
 78 | branching instructions because the processing time will change
 79 | with the branch followed.
 80 | 
 81 | For this reason the if instruction in the loop is replaced with
 82 | a constant time instruction yielding the same result. 
 83 | 
 84 | ``` C
 85 | if (diff != 0)
 86 |     res = diff;
 87 | ```
 88 | 
 89 | is replaced with
 90 | 
 91 | ``` C
 92 | res = (res & -!diff) | diff;
 93 | ```
 94 | 
 95 | In the above instruction, `-!diff` is -1 (0xFFFFFFFF) when `diff == 0`
 96 | and 0 otherwise.
 97 |     
 98 | ## Alternate instructions
 99 | 
100 | The expression `!diff` could be compiled into machine code performing a branch. 
101 | It depends on the compiler smartness and the machine code instruction set. 
102 | Some processors have a very limited machine instruction set and compilers are
103 | constrained to translate `!diff` into machine code using a branching 
104 | instruction. This is not the case with x86 processors and good compilers.
105 | 
106 | When in doubt, or to provide safe portable code, the instruction
107 | 
108 | ``` C
109 | res = (res & -!diff) | diff;
110 | ```
111 |     
112 | must be replaced with
113 | 
114 | ``` C
115 | res = (res & (((diff - 1) & ~diff) >> 8)) | diff;
116 | ```
117 |     
118 | The function will be slightly slower but wont need a branching.
119 | 
120 | Another solution, shown below, use a table to implement the function.
121 | But processing speed could be affected by partial caching of 
122 | the table and thus indirectly reveal something of `m1` and `m2`
123 | comparison. Use of this method is thus discouraged.
124 | 
125 | ``` C
126 | static signed char tbl[256] = {-1, 0, ... 0 };
127 | res = (res & tbl[(unsigned char)diff]) | diff;
128 | ```
129 | 
130 | The generation of the return value from `res` could also be 
131 | compiled into machine code using branching instructions. To 
132 | avoid this in order to obtain portable code, the 
133 | instruction
134 | 
135 | ``` C
136 | return (res > 0) - (res < 0);
137 | ```
138 | 
139 | must be replaced with
140 | 
141 | ``` C
142 | return ((res - 1) >> 8) + (res >> 8) + 1;
143 | ```
144 |     
145 | In the above expression, `res >> 8` is -1 when `res < 0` and `(res - 1) >> 8` is
146 | -1 when `res <= 0`.
147 | 
148 | ## The code
149 | 
150 | As a summary here are the different version on the code. See below in the 
151 | program output to see execution time comparision. You may test your 
152 | own configuration. 
153 | 
154 | 
155 | ### Fastest implementation using subscipt
156 | 
157 | ``` C   
158 | int cst_time_memcmp_fastest1(const void *m1, const void *m2, size_t n) 
159 | {
160 |     const unsigned char *pm1 = (const unsigned char*)m1; 
161 |     const unsigned char *pm2 = (const unsigned char*)m2; 
162 |     int res = 0, diff;
163 |     if (n > 0) {
164 |         do {
165 |             --n;
166 |             diff = pm1[n] - pm2[n];
167 |             res = (res & -!diff) | diff;
168 |         } while (n != 0);
169 |     }
170 |     return (res > 0) - (res < 0);
171 | }
172 | ```
173 | 
174 | ### Fastest implementation using pointers
175 | 
176 | ``` C
177 | int cst_time_memcmp_fastest2(const void *m1, const void *m2, size_t n) 
178 | {
179 |     const unsigned char *pm1 = (const unsigned char*)m1 + n; 
180 |     const unsigned char *pm2 = (const unsigned char*)m2 + n; 
181 |     int res = 0;
182 |     if (n > 0) {
183 |         do {
184 |             int diff = *--pm1 - *--pm2;
185 |             res = (res & -!diff) | diff;
186 |         } while (pm1 != m1);
187 |     }
188 |     return (res > 0) - (res < 0);
189 | }
190 | ```
191 | 
192 | ### Safest implementation using subscript
193 | 
194 | ``` C
195 | int cst_time_memcmp_safest1(const void *m1, const void *m2, size_t n) 
196 | {
197 |     const unsigned char *pm1 = (const unsigned char*)m1; 
198 |     const unsigned char *pm2 = (const unsigned char*)m2; 
199 |     int res = 0, diff;
200 |     if (n > 0) {
201 |         do {
202 |             --n;
203 |             diff = pm1[n] - pm2[n];
204 |             res = (res & (((diff - 1) & ~diff) >> 8)) | diff;
205 |         } while (n != 0);
206 |     }
207 |     return ((res - 1) >> 8) + (res >> 8) + 1;
208 | }
209 | ```
210 | 
211 | ### Safest implementation using pointers
212 | 
213 | ``` C
214 | int cst_time_memcmp_safest2(const void *m1, const void *m2, size_t n) 
215 | {
216 |     const unsigned char *pm1 = (const unsigned char*)m1 + n; 
217 |     const unsigned char *pm2 = (const unsigned char*)m2 + n; 
218 |     int res = 0;
219 |     if (n > 0) {
220 |         do {
221 |             int diff = *--pm1 - *--pm2;
222 |             res = (res & (((diff - 1) & ~diff) >> 8)) | diff;
223 |         } while (pm1 != m1);
224 |     }
225 |     return ((res - 1) >> 8) + (res >> 8) + 1;
226 | }
227 | ```
228 | 
229 | ### Alternate constant time memcmp function
230 | 
231 | The following constant time `memcmp` function code was found here: ftp://ftp.icm.edu.pl/pub/NetBSD/misc/apb/consttime_memcmp.c. It was proposed to NetBSD, but finally rejected by the developpers because they didn't found any use case for such a function. Test of byte array equality is the most frequent and where constant time matters. This code is thus used as reference implementation and is unrelated to NetBSD. See issue [#2](/../../issues/2) for the discussion and clarification. 
232 | 
233 | ``` C
234 | int consttime_memcmp(const void *b1, const void *b2, size_t len)
235 | {
236 |     const uint8_t *c1, *c2;
237 |     uint16_t d, r, m;
238 |     uint16_t v;
239 | 
240 |     c1 = b1;
241 |     c2 = b2;
242 |     r = 0;
243 |     while (len) {
244 |         v = ((uint16_t)(uint8_t)r)+255;
245 |         m = v/256-1;
246 |         d = (uint16_t)((int)*c1 - (int)*c2);
247 |         r |= (d & m);
248 |         ++c1;
249 |         ++c2;
250 |         --len;
251 |     }
252 |     return (int)((int32_t)(uint16_t)((uint32_t)r + 0x8000) - 0x8000);
253 | }
254 | ```
255 |     
256 | 
257 | ## Verification
258 | 
259 | The provided code comes with validation tests and a processing time
260 | measurement.
261 | 
262 | We compare our code execution time with the reference function `consttime_memcmp()`. 
263 | Unfortunately this function doesn't return -1 or 1 and thus reveal
264 | something of the compared memory zone.
265 | 
266 | The test consist in comparing two buffers containing 1 MiB of same bytes.
267 | 
268 | - In test 1, both buffers are filled with 0.
269 | - In test 2, 1 buffer is half filled with 0xFF and the other half with 0. The other buffer is filled with 0.
270 | - In test 3, 1 buffer is fully filled with 0xFF, while the other buffer is filled with 0.
271 | 
272 | Each measurement performs the buffer comparision 100 times.
273 | 100 measurments are performed to compute the mean and standard deviation.
274 | 
275 | 
276 | ### Discussion 
277 | 
278 | The fastest code is indeed the fastest. The safest code is slightly slower but 
279 | still faster than the `consttime_memcmp()` function. Using pointers instead
280 | of subscript operators is also slightly faster. 
281 | 
282 | It is valid but also an overkill to use the `cst_time_memcmp` function to simply test byte array equality. A simpler and faster algoritm exist for that and you'll find this function in all good crypto library. 
283 | 
284 | While some people may not see a use case for a constant time byte array lexicographical comparison, it doesn't prove no use case exist. Nevertheless, if there are no obvious and frequent use case, it is perfectly justified to not include it in a library. Whoever need it can copy the code from here. 
285 | 
286 | ### Output of the program 
287 | 
288 |     $ gcc -O3 main.c -lm && ./a.out
289 |     Start testing
290 |     
291 |     ... all tests successfull ...
292 |     
293 |     ---- cst_time_memcmp_fastest1
294 |     test 1 : mean=104.149000 ms stddev=0.095697  n=100
295 |     test 2 : mean=104.121130 ms stddev=0.052406  n=100
296 |     test 3 : mean=104.573070 ms stddev=2.732596  n=100
297 |     x : 0xFFE95738
298 |     ---- cst_time_memcmp_fastest2
299 |     test 1 : mean=96.809990 ms stddev=0.037059  n=100
300 |     test 2 : mean=97.133720 ms stddev=1.474790  n=100
301 |     test 3 : mean=96.839680 ms stddev=0.999072  n=100
302 |     x : 0xFFE95738
303 |     ---- cst_time_memcmp_safest1
304 |     test 1 : mean=116.339240 ms stddev=0.049447  n=100
305 |     test 2 : mean=116.334330 ms stddev=0.035672  n=100
306 |     test 3 : mean=116.335910 ms stddev=0.037543  n=100
307 |     x : 0xFFE95738
308 |     ---- cst_time_memcmp_safest2
309 |     test 1 : mean=110.067570 ms stddev=0.033885  n=100
310 |     test 2 : mean=110.075140 ms stddev=0.061724  n=100
311 |     test 3 : mean=110.066070 ms stddev=0.030296  n=100
312 |     x : 0xFFE95738
313 |     ---- consttime_memcmp
314 |     test 1 : mean=159.558610 ms stddev=0.030275  n=100
315 |     test 2 : mean=159.561470 ms stddev=0.036409  n=100
316 |     test 3 : mean=159.763520 ms stddev=0.358880  n=100
317 |     x : 0xE96DE0C8
318 |     done
319 | 
320 |     
321 |     
322 | 


--------------------------------------------------------------------------------
/consttime_memcmp.c:
--------------------------------------------------------------------------------
  1 | #include <stddef.h>
  2 | #include <inttypes.h>
  3 | 
  4 | /*
  5 |  * "constant time" memcmp.  Time taken depends on the buffer length, of
  6 |  * course, but not on the content of the buffers.
  7 |  *
  8 |  * Just like the ordinary memcmp function, the return value is
  9 |  * tri-state: <0, 0, or >0.  However, applications that need a
 10 |  * constant-time memory comparison function usually need only a
 11 |  * two-state result, signalling only whether the inputs were identical
 12 |  * or different, but not signalling which of the inputs was larger.
 13 |  * This code could be made significantly faster and simpler if the
 14 |  * requirement for a tri-state result were removed.
 15 |  *
 16 |  * In order to protect against adversaries who can observe timing,
 17 |  * cache hits or misses, page faults, etc., and who can use such
 18 |  * observations to learn something about the relationship between the
 19 |  * contents of the two buffers, we have to perform exactly the same
 20 |  * instructions and memory accesses regardless of the contents of the
 21 |  * buffers.  We can't stop as soon as we find a difference, we can't
 22 |  * take different conditional branches depending on the data, and we
 23 |  * can't use different pointers or array indexes depending on the data.
 24 |  *
 25 |  * Further reading:
 26 |  *
 27 |  * .Rs
 28 |  * .%A Paul C. Kocher
 29 |  * .%T Timing Attacks on Implementations of Diffie-Hellman, RSA, DSS, and Other Systems
 30 |  * .%D 1996
 31 |  * .%J CRYPTO 1996
 32 |  * .%P 104-113
 33 |  * .%U http://www.cryptography.com/timingattack/paper.html
 34 |  * .%U http://www.webcitation.org/query?url=http%3A%2F%2Fwww.cryptography.com%2Ftimingattack%2Fpaper.html&date=2012-10-17
 35 |  * .Re
 36 |  *
 37 |  * .Rs
 38 |  * .%A D. Boneh
 39 |  * .%A D. Brumley
 40 |  * .%T Remote timing attacks are practical
 41 |  * .%D August 2003
 42 |  * .%J Proceedings of the 12th Usenix Security Symposium, 2003
 43 |  * .%U https://crypto.stanford.edu/~dabo/abstracts/ssl-timing.html
 44 |  * .%U http://www.webcitation.org/query?url=https%3A%2F%2Fcrypto.stanford.edu%2F%7Edabo%2Fabstracts%2Fssl-timing.html&date=2012-10-17
 45 |  * .%U http://www.webcitation.org/query?url=http%3A%2F%2Fcrypto.stanford.edu%2F%7Edabo%2Fpubs%2Fpapers%2Fssl-timing.pdf&date=2012-10-17
 46 |  * .Es
 47 |  *
 48 |  * .Rs
 49 |  * .%A Coda Hale
 50 |  * .%T A Lesson In Timing Attacks (or, Don't use MessageDigest.isEquals)
 51 |  * .%D 13 Aug 2009
 52 |  * .%U http://codahale.com/a-lesson-in-timing-attacks/
 53 |  * .%U http://www.webcitation.org/query?url=http%3A%2F%2Fcodahale.com%2Fa-lesson-in-timing-attacks%2F&date=2012-10-17
 54 |  * .Re
 55 |  *
 56 |  */
 57 | 
 58 | /*
 59 |  * A note on portability:
 60 |  *
 61 |  * We assume that char is exactly 8 bits, the same as uint8_t, and that
 62 |  * integer types with exactly 16 bits and exactly 32 bits exist.  (If
 63 |  * there is ever a need to change this, then the actual requirement is
 64 |  * that we need a type that is at least two bits wider than char, and
 65 |  * another type that is at least two bits wider than that, or we need to
 66 |  * fake it somehow.)
 67 |  *
 68 |  * We do not assume any particular size for the plain "int" type, except
 69 |  * that it is at least 16 bits, as is guaranteed by the C language
 70 |  * standard.
 71 |  *
 72 |  * We do not assume that signed integer overflow is harmless.  We
 73 |  * ensure that signed integer overflow does not occur, so that
 74 |  * implementation-defined overflow behaviour is not invoked.
 75 |  *
 76 |  * We rely on the C standard's guarantees regarding the wraparound
 77 |  * behaviour of unsigned integer arithmetic, and on the analagous
 78 |  * guarantees regarding conversions from signed types to narrower
 79 |  * unsigned types.
 80 |  *
 81 |  * We do not assume that the platform uses two's complement arithmetic.
 82 |  */
 83 | 
 84 | /*
 85 |  * How hard do we have to try to prevent unwanted compiler optimisations?
 86 |  *
 87 |  * Try compiling with "#define USE_VOLATILE_TEMPORARY 0", and examine
 88 |  * the compiler output.  If the only conditional tests in the entire
 89 |  * function are to test whether len is zero, then all is well, but try
 90 |  * again with different optimisation flags to be sure.  If the compiler
 91 |  * emitted code with conditional tests that do anything other than
 92 |  * testing whether len is zero, then that's a problem, so try again with
 93 |  * "#define USE_VOLATILE_TEMPORARY 1".  If it's still bad, then you are
 94 |  * out of luck.
 95 |  */
 96 | #define USE_VOLATILE_TEMPORARY 0
 97 | 
 98 | int consttime_memcmp(const void *b1, const void *b2, size_t len)
 99 | {
100 | 	const uint8_t *c1, *c2;
101 | 	uint16_t d, r, m;
102 | 
103 | #if USE_VOLATILE_TEMPORARY
104 | 	volatile uint16_t v;
105 | #else
106 | 	uint16_t v;
107 | #endif
108 | 
109 | 	c1 = b1;
110 | 	c2 = b2;
111 | 
112 | 	r = 0;
113 | 	while (len) {
114 | 		/*
115 | 		 * Take the low 8 bits of r (in the range 0x00 to 0xff,
116 | 		 * or 0 to 255);
117 | 		 * As explained elsewhere, the low 8 bits of r will be zero
118 | 		 * if and only if all bytes compared so far were identical;
119 | 		 * Zero-extend to a 16-bit type (in the range 0x0000 to
120 | 		 * 0x00ff);
121 | 		 * Add 255, yielding a result in the range 255 to 510;
122 | 		 * Save that in a volatile variable to prevent
123 | 		 * the compiler from trying any shortcuts (the
124 | 		 * use of a volatile variable depends on "#ifdef
125 | 		 * USE_VOLATILE_TEMPORARY", and most compilers won't
126 | 		 * need it);
127 | 		 * Divide by 256 yielding a result of 1 if the original
128 | 		 * value of r was non-zero, or 0 if r was zero;
129 | 		 * Subtract 1, yielding 0 if r was non-zero, or -1 if r
130 | 		 * was zero;
131 | 		 * Convert to uint16_t, yielding 0x0000 if r was
132 | 		 * non-zero, or 0xffff if r was zero;
133 | 		 * Save in m.
134 | 		 */
135 | 		v = ((uint16_t)(uint8_t)r)+255;
136 | 		m = v/256-1;
137 | 
138 | 		/*
139 | 		 * Get the values from *c1 and *c2 as uint8_t (each will
140 | 		 * be in the range 0 to 255, or 0x00 to 0xff);
141 | 		 * Convert them to signed int values (still in the
142 | 		 * range 0 to 255);
143 | 		 * Subtract them using signed arithmetic, yielding a
144 | 		 * result in the range -255 to +255;
145 | 		 * Convert to uint16_t, yielding a result in the range
146 | 		 * 0xff01 to 0xffff (for what was previously -255 to
147 | 		 * -1), or 0, or in the range 0x0001 to 0x00ff (for what
148 | 		 * was previously +1 to +255).
149 | 		 */
150 | 		d = (uint16_t)((int)*c1 - (int)*c2);
151 | 
152 | 		/*
153 | 		 * If the low 8 bits of r were previously 0, then m
154 | 		 * is now 0xffff, so (d & m) is the same as d, so we
155 | 		 * effectively copy d to r;
156 | 		 * Otherwise, if r was previously non-zero, then m is
157 | 		 * now 0, so (d & m) is zero, so leave r unchanged.
158 | 		 * Note that the low 8 bits of d will be zero if and
159 | 		 * only if d == 0, which happens when *c1 == *c2.
160 | 		 * The low 8 bits of r are thus zero if and only if the
161 | 		 * entirety of r is zero, which happens if and only if
162 | 		 * all bytes compared so far were equal.  As soon as a
163 | 		 * non-zero value is stored in r, it remains unchanged
164 | 		 * for the remainder of the loop.
165 | 		 */
166 | 		r |= (d & m);
167 | 
168 | 		/*
169 | 		 * Increment pointers, decrement length, and loop.
170 | 		 */
171 | 		++c1;
172 | 		++c2;
173 | 		--len;
174 | 	}
175 | 
176 | 	/*
177 | 	 * At this point, r is an unsigned value, which will be 0 if the
178 | 	 * final result should be zero, or in the range 0x0001 to 0x00ff
179 | 	 * (1 to 255) if the final result should be positive, or in the
180 | 	 * range 0xff01 to 0xffff (65281 to 65535) if the final result
181 | 	 * should be negative.
182 | 	 *
183 | 	 * We want to convert the unsigned values in the range 0xff01
184 | 	 * to 0xffff to signed values in the range -255 to -1, while
185 | 	 * converting the other unsigned values to equivalent signed
186 | 	 * values (0, or +1 to +255).
187 | 	 *
188 | 	 * On a machine with two's complement arithmetic, simply copying
189 | 	 * the underlying bits (with sign extension if int is wider than
190 | 	 * 16 bits) would do the job, so something like this might work:
191 | 	 *
192 | 	 *     return (int16_t)r;
193 | 	 *
194 | 	 * However, that invokes implementation-defined behaviour,
195 | 	 * because values larger than 32767 can't fit in a signed 16-bit
196 | 	 * integer without overflow.
197 | 	 *
198 | 	 * To avoid any implementation-defined behaviour, we go through
199 | 	 * these contortions:
200 | 	 *
201 | 	 * a. Calculate ((uint32_t)r + 0x8000).  The cast to uint32_t
202 | 	 *    it to prevent problems on platforms where int is narrower
203 | 	 *    than 32 bits.  If int is a larger than 32-bits, then the
204 | 	 *    usual arithmetic conversions cause this addition to be
205 | 	 *    done in unsigned int arithmetic.  If int is 32 bits
206 | 	 *    or narrower, then this addition is done in uint32_t
207 | 	 *    arithmetic.  In either case, no overflow or wraparound
208 | 	 *    occurs, and the result from this step has a value that
209 | 	 *    will be one of 0x00008000 (32768), or in the range
210 | 	 *    0x00008001 to 0x000080ff (32769 to 33023), or in the range
211 | 	 *    0x00017f01 to 0x00017fff (98049 to 98303).
212 | 	 *
213 | 	 * b. Cast the result from (a) to uint16_t.  This effectively
214 | 	 *    discards the high bits of the result, in a way that is
215 | 	 *    well defined by the C language.  The result from this step
216 | 	 *    will be of type uint16_t, and its value will be one of
217 | 	 *    0x8000 (32768), or in the range 0x8001 to 0x80ff (32769 to
218 | 	 *    33023), or in the range 0x7f01 to 0x7fff (32513 to
219 | 	 *    32767).
220 | 	 *
221 | 	 * c. Cast the result from (b) to int32_t.  We use int32_t
222 | 	 *    instead of int because we need a type that's strictly
223 | 	 *    larger than 16 bits, and the C standard allows
224 | 	 *    implementations where int is only 16 bits.  The result
225 | 	 *    from this step will be of type int32_t, and its value wll
226 | 	 *    be one of 0x00008000 (32768), or in the range 0x00008001
227 | 	 *    to 0x000080ff (32769 to 33023), or in the range 0x00007f01
228 | 	 *    to 0x00007fff (32513 to 32767).
229 | 	 *
230 | 	 * d. Take the result from (c) and subtract 0x8000 (32768) using
231 | 	 *    signed int32_t arithmetic.  The result from this step will
232 | 	 *    be of type int32_t and the value will be one of
233 | 	 *    0x00000000 (0), or in the range 0x00000001 to 0x000000ff
234 | 	 *    (+1 to +255), or in the range 0xffffff01 to 0xffffffff
235 | 	 *    (-255 to -1).
236 | 	 *
237 | 	 * e. Cast the result from (d) to int.  This does nothing
238 | 	 *    interesting, except to make explicit what would have been
239 | 	 *    implicit in the return statement.  The final result is an
240 | 	 *    int in the range -255 to +255.
241 | 	 *
242 | 	 * Unfortunately, compilers don't seem to be good at figuring
243 | 	 * out that most of this can be optimised away by careful choice
244 | 	 * of register width and sign extension.
245 | 	 *
246 | 	 */
247 | 	return (/*e*/ int)(/*d*/
248 | 	    (/*c*/ int32_t)(/*b*/ uint16_t)(/*a*/ (uint32_t)r + 0x8000)
249 | 	    - 0x8000);
250 | }
251 | 


--------------------------------------------------------------------------------
/cst_time_memcmp.pro:
--------------------------------------------------------------------------------
1 | TEMPLATE = app
2 | CONFIG += console
3 | CONFIG -= app_bundle
4 | CONFIG -= qt
5 | SOURCES += main.c
6 | 
7 | 


--------------------------------------------------------------------------------
/main.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <sys/time.h>
  3 | #include <memory.h>
  4 | #include <stdlib.h>
  5 | #include <math.h>
  6 | #include <inttypes.h>
  7 | 
  8 | 
  9 | /* The cst_time_memcmp() function compares the first n bytes (each interpreted
 10 |  * as unsigned char) of the n bytes long memory areas m1 and m2 in a time
 11 |  * exactly proportional to n.
 12 |  *
 13 |  * The cst_time_memcmp() function returns 0 if the n first bytes of m1 and
 14 |  * m2 are equal. If the first different byte is found at index k, the function
 15 |  * returns -1 if m1[k] < m2[k], and +1 if m1[k] > m2[k]. Appart from the
 16 |  * comparision result, this function reveals nothing of m1 or m2.
 17 |  *
 18 |  * The function returns also 0 when at least one of the following conditions
 19 |  * is true.
 20 |  *
 21 |  * - n is zero ;
 22 |  * - m1 and m2 are the same memory area ;
 23 |  * - m1 is NULL ;
 24 |  * - m2 is NULL.
 25 |  */
 26 | int cst_time_memcmp_fastest2(const void *m1, const void *m2, size_t n)
 27 | {
 28 |     int res = 0;
 29 |     if (m1 != m2 && n && m1 && m2) {
 30 |         const unsigned char *pm1 = (const unsigned char *)m1 + n;
 31 |         const unsigned char *pm2 = (const unsigned char *)m2 + n;
 32 |         do {
 33 |             int diff = *--pm1 - *--pm2;
 34 |             // if (diff != 0) res = diff;
 35 |             res = (res & -!diff) | diff;
 36 |         } while (pm1 != m1);
 37 |     }
 38 |     return (res > 0) - (res < 0);
 39 | }
 40 | 
 41 | int cst_time_memcmp_fastest1(const void *m1, const void *m2, size_t n)
 42 | {
 43 |     int res = 0, diff;
 44 |     if (m1 != m2 && n && m1 && m2) {
 45 |         const unsigned char *pm1 = (const unsigned char *)m1;
 46 |         const unsigned char *pm2 = (const unsigned char *)m2;
 47 |         do {
 48 |             --n;
 49 |             diff = pm1[n] - pm2[n];
 50 |             // if (diff != 0) res = diff;
 51 |             res = (res & -!diff) | diff;
 52 |         } while (n != 0);
 53 |     }
 54 |     return (res > 0) - (res < 0);
 55 | }
 56 | 
 57 | int cst_time_memcmp_safest1(const void *m1, const void *m2, size_t n)
 58 | {
 59 |     int res = 0, diff;
 60 |     if (m1 != m2 && n && m1 && m2) {
 61 |         const unsigned char *pm1 = (const unsigned char *)m1;
 62 |         const unsigned char *pm2 = (const unsigned char *)m2;
 63 |         do {
 64 |             --n;
 65 |             diff = pm1[n] - pm2[n];
 66 |             // if (diff != 0) res = diff;
 67 |             res = (res & (((diff - 1) & ~diff) >> 8)) | diff;
 68 |         } while (n != 0);
 69 |     }
 70 |     return ((res - 1) >> 8) + (res >> 8) + 1;
 71 | }
 72 | 
 73 | 
 74 | int cst_time_memcmp_safest2(const void *m1, const void *m2, size_t n)
 75 | {
 76 |     int res = 0, diff;
 77 |     if (m1 != m2 && n && m1 && m2) {
 78 |         const unsigned char *pm1 = (const unsigned char *)m1 + n;
 79 |         const unsigned char *pm2 = (const unsigned char *)m2 + n;
 80 |         do {
 81 |             diff = *--pm1 - *--pm2;
 82 |             // if (diff != 0) res = diff;
 83 |             res = (res & (((diff - 1) & ~diff) >> 8)) | diff;
 84 |         } while (pm1 != m1);
 85 |     }
 86 |     return ((res - 1) >> 8) + (res >> 8) + 1;
 87 | }
 88 | 
 89 | 
 90 | int consttime_memcmp(const void *b1, const void *b2, size_t len)
 91 | {
 92 |     const uint8_t *c1, *c2;
 93 |     uint16_t d, r, m;
 94 |     uint16_t v;
 95 | 
 96 |     c1 = b1;
 97 |     c2 = b2;
 98 |     r = 0;
 99 |     while (len) {
100 |         v = ((uint16_t)(uint8_t)r)+255;
101 |         m = v/256-1;
102 |         d = (uint16_t)((int)*c1 - (int)*c2);
103 |         r |= (d & m);
104 |         ++c1;
105 |         ++c2;
106 |         --len;
107 |     }
108 |     return (int)((int32_t)(uint16_t)((uint32_t)r + 0x8000) - 0x8000);
109 | }
110 | 
111 | 
112 | const char *data_to_hex(const unsigned char *a, size_t n)
113 | {
114 |     static char buf[100];
115 |     if (!a) {
116 |         sprintf(buf, "NULL");
117 |     } else if (n < 33) {
118 |         char *p = buf;
119 |         size_t i;
120 |         for (i = 0; i < n; ++i) {
121 |             p += sprintf(p, "%02X ", a[i]);
122 |         }
123 |         if (p != buf) {
124 |             *--p = '\0';
125 |         }
126 |     } else {
127 |         sprintf(buf, "data[%zu] too long", n);
128 |     }
129 |     return buf;
130 | }
131 | 
132 | 
133 | #define TEST_ONE_SMALLER(A,B,N,F) do {  \
134 |     if (F(A, B, N) < 0)                 \
135 |         printf("SUCCESS test a < b\n"); \
136 |     else                                \
137 |         printf("FAIL !  test a < b\n"); \
138 | } while (0)
139 | 
140 | #define TEST_ONE_EQUAL(A,B,N,F) do {    \
141 |     if (F(A, B, N) == 0)                \
142 |         printf("SUCCESS test a = b\n"); \
143 |     else                                \
144 |         printf("FAIL !  test a = b\n"); \
145 | } while (0)
146 | 
147 | #define TEST_ONE_BIGGER(A,B,N,F) do {   \
148 |     if (F(A, B, N) > 0)                 \
149 |         printf("SUCCESS test a > b\n"); \
150 |     else                                \
151 |         printf("FAIL !  test a > b\n"); \
152 | } while (0)
153 | 
154 | 
155 | const unsigned char d[6][5] = {
156 |     { 0x12, 0x34, 0x56, 0x78, 0x90 },
157 |     { 0x12, 0x34, 0x56, 0x78, 0x90 },
158 |     { 0x12, 0x34, 0x56, 0x90, 0x78 },
159 |     { 0x12, 0x34, 0x78, 0x56, 0x90 },
160 |     { 0x12, 0x90, 0x34, 0x56, 0x78 },
161 |     { 0x90, 0x12, 0x34, 0x56, 0x78 }
162 | };
163 | 
164 | #define TEST_ALL(F) do {                                         \
165 |     size_t i, j, n = 6, l = 5;                                   \
166 |     printf("---- %s\n", #F );                                    \
167 |     for (i = 0; i < n; ++i) {                                    \
168 |         for (j = 0; j < n; ++j) {                                \
169 |             if (i == j)                                          \
170 |                 continue;                                        \
171 |             puts("");                                            \
172 |             printf("        a(%p)= %s\n", d[i], data_to_hex(d[i], l)); \
173 |             printf("        b(%p)= %s\n", d[j], data_to_hex(d[j], l)); \
174 |             if (i < 2 && j < 2)                                  \
175 |                 TEST_ONE_EQUAL(d[i], d[j], l, F);                \
176 |             else if (i < j)                                      \
177 |                 TEST_ONE_SMALLER(d[i], d[j], l, F);              \
178 |             else                                                 \
179 |                 TEST_ONE_BIGGER(d[i], d[j], l, F);               \
180 |         }                                                        \
181 |     }                                                            \
182 | } while (0)
183 | 
184 | 
185 | #define TEST_TIME_ONE(A,B,K,L,N,F,MEAN,VAR) do{                \
186 |     struct timeval t1, t2;                                     \
187 |     size_t i,j, nval=0;                                        \
188 |     double m1 = 0.0, m2 = 0.0;                                 \
189 |     int tmp = x;                                               \
190 |     for (j = 0; j < N; ++j) {                                  \
191 |         double elapsed_time_ms;                                \
192 |         gettimeofday(&t1, NULL);                               \
193 |         for (i = 0; i < L; ++i) {                              \
194 |             tmp += F(A, B, K) * i;                             \
195 |         }                                                      \
196 |         gettimeofday(&t2, NULL);                               \
197 |         elapsed_time_ms = (t2.tv_sec - t1.tv_sec) * 1000.0;    \
198 |         elapsed_time_ms += (t2.tv_usec - t1.tv_usec) / 1000.0; \
199 |         ++nval;                                                \
200 |         double delta = elapsed_time_ms - m1;                   \
201 |         m1 += delta / nval;                                    \
202 |         m2 += delta * (elapsed_time_ms - m1);                  \
203 |     }                                                          \
204 |     VAR = m2 / (nval - 1);                                     \
205 |     MEAN = m1;                                                 \
206 |     x += tmp;                                                  \
207 | } while(0)
208 | 
209 | 
210 | #define TEST_TIME(F) do {                                      \
211 |     size_t k = 1024*1024, l = 100, n = 100;                    \
212 |     unsigned char *buf1 = malloc(k), *buf2 = malloc(k);        \
213 |     double mean = 0.0, var = 0.0;                              \
214 |     int x = 0;                                                 \
215 |     memset(buf1, 0, k);                                        \
216 |     memset(buf2, 0, k);                                        \
217 |     printf("---- %s\n", #F );                                  \
218 |     TEST_TIME_ONE(buf1, buf2, k, l, n, F, mean, var);          \
219 |     TEST_TIME_ONE(buf1, buf2, k, l, n, F, mean, var);          \
220 |     printf("test 1 : mean=%f ms stddev=%f  n=%zu\n",           \
221 |            mean, sqrt(var), n);                                \
222 |     memset(buf2+k/2, 0xFF, k-k/2);                             \
223 |     TEST_TIME_ONE(buf1, buf2, k, l, n, F, mean, var);          \
224 |     printf("test 2 : mean=%f ms stddev=%f  n=%zu\n",           \
225 |            mean, sqrt(var), n);                                \
226 |     memset(buf2, 0xFF, k);                                     \
227 |     TEST_TIME_ONE(buf1, buf2, k, l, n, F, mean, var);          \
228 |     printf("test 3 : mean=%f ms stddev=%f  n=%zu\n",           \
229 |            mean, sqrt(var), n);                                \
230 |     printf("x : 0x%08X\n", x);                                 \
231 | } while (0);
232 | 
233 | 
234 | int main(void)
235 | {
236 |     puts("Start testing");
237 | 
238 |     TEST_ALL(cst_time_memcmp_fastest1);
239 |     TEST_ALL(cst_time_memcmp_fastest2);
240 |     TEST_ALL(cst_time_memcmp_safest1);
241 |     TEST_ALL(cst_time_memcmp_safest2);
242 |     TEST_ALL(consttime_memcmp);
243 |     puts("");
244 | 
245 |     TEST_TIME(cst_time_memcmp_fastest1);
246 |     TEST_TIME(cst_time_memcmp_fastest2);
247 |     TEST_TIME(cst_time_memcmp_safest1);
248 |     TEST_TIME(cst_time_memcmp_safest2);
249 |     TEST_TIME(consttime_memcmp);
250 | 
251 |     puts("done");
252 |     return 0;
253 | }
254 | 
255 | 


--------------------------------------------------------------------------------