├── include
    ├── rng.h
    ├── steps.h
    ├── verif.h
    ├── klpt.h
    ├── printing.h
    ├── uintbig.h
    ├── isomorphism.h
    ├── fp.h
    ├── sqisign.h
    ├── tedwards.h
    ├── toolbox.h
    ├── precomputed.h
    ├── tradeoff.h
    ├── constants.h
    ├── poly.h
    ├── mont.h
    ├── two_walks.h
    ├── fp2.h
    ├── isogenies.h
    ├── ideal.h
    ├── idiso.h
    └── cycle.h
├── src
    ├── steps_default.c
    ├── tune2c
    ├── p6983
    │   ├── tunecycles.out
    │   ├── constants.c
    │   ├── steps_tunecycles.c
    │   ├── fp2.c
    │   ├── fp.c
    │   └── fp.s
    ├── uint.c
    ├── steps.c
    ├── rng.c
    ├── uint.s
    ├── tunecycles.c
    ├── isomorphism.c
    ├── verif.c
    ├── two_walks.c
    ├── isogenies.c
    ├── toolbox.c
    ├── tedwards.c
    └── mitm.c
├── LICENSE
├── bench
    ├── keygen.c
    ├── sign.c
    └── verif.c
├── test
    ├── sqisign.c
    ├── montxy.c
    ├── mont.c
    ├── fp.c
    ├── isom.c
    ├── mitm.c
    ├── two_walks.c
    ├── mitm2.c
    ├── isogenies_mult.c
    ├── isogenies.c
    ├── klpt.c
    └── arith.c
├── README.md
└── Makefile


/include/rng.h:
--------------------------------------------------------------------------------
1 | #ifndef RNG_H
2 | #define RNG_H
3 | 
4 | #include <stdlib.h>
5 | 
6 | void randombytes(void *x, size_t l);
7 | 
8 | #endif
9 | 


--------------------------------------------------------------------------------
/src/steps_default.c:
--------------------------------------------------------------------------------
 1 | #include "steps.h"
 2 | 
 3 | int steps_guess(long long *bs,long long *gs,long long l)
 4 | {
 5 |   if (l == 587) {
 6 |     *bs = 16;
 7 |     *gs = 9;
 8 |     return 1;
 9 |   }
10 |   return 0;
11 | }
12 | 


--------------------------------------------------------------------------------
/include/steps.h:
--------------------------------------------------------------------------------
 1 | #ifndef STEPS_H
 2 | #define STEPS_H
 3 | 
 4 | /* assumes l >= 3, l odd */
 5 | /* guarantees (b,g) = (0,0) _or_ the following: */
 6 | /* b > 0; b is even; g > 0; 4*b*g <= l-1 */
 7 | /* tries to choose (b,g) sensibly */
 8 | void steps(long long *bs,long long *gs,long long l);
 9 | 
10 | /* internal API for tuning to see bs,gs effects: */
11 | void steps_override(long long bs,long long gs);
12 | 
13 | /* internal API for tuning to select bs,gs: */
14 | int steps_guess(long long *bs,long long *gs,long long l);
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/include/verif.h:
--------------------------------------------------------------------------------
 1 | #include "two_walks.h"
 2 | #include "isogenies.h"
 3 | 
 4 | // Compress a sequence of 2-walks of length len to a sequence of
 5 | // integers (n bits + 4 hint bits).
 6 | //
 7 | // zip must have space for len words
 8 | void compress(uint64_t *zip, const two_walk *walk, long len);
 9 | 
10 | // Inverse of the above
11 | //
12 | // A is the starting curve. At the end of the routine it is the
13 | // arrival curve.
14 | void decompress_old(two_walk *walk, proj *A, const uint64_t *zip, long len);
15 | 
16 | // Deterministically apply an isogeny of degree 3^a·5^b from A
17 | // TODO: merge with challenge in sqisign
18 | void challenge_alt(proj *A, const uintbig *m);
19 | 


--------------------------------------------------------------------------------
/include/klpt.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef KLPT_H
 3 | #define KLPT_H
 4 | 
 5 | #include <pari/pari.h>
 6 | 
 7 | // runs KLPT for the left ideal I in the special order of the quaternion algebra A
 8 | // the result is an ideal equivalent to I of norm dividing the integer whose factorisation matrix is fm
 9 | // Assumes the basis of A is 1, i, j, j*i, where i^2 = -1 and j^2 = -p
10 | GEN klpt_special_smooth(GEN I, GEN fm);
11 | 
12 | // same as above, when I is of norm a small power of two (in which case one cannot find an equivalent prime ideal of small norm)
13 | GEN klpt_special_smooth_small_2e_input(GEN I, GEN fm);
14 | 
15 | GEN klpt_general_power(GEN I, GEN K, GEN l);
16 | 
17 | 
18 | #endif
19 | 
20 | 
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/src/tune2c:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import sys
 4 | 
 5 | data = []
 6 | 
 7 | for line in sys.stdin:
 8 |   data += [tuple(int(x) for x in line.strip().split())]
 9 | 
10 | data.sort()
11 | 
12 | print('int steps_guess(long long *bs,long long *gs,long long l)')
13 | print('{')
14 | 
15 | while len(data) > 0:
16 |   l0,bs0,gs0,bench0,baseline0 = data[0]
17 |   samebsgs = 1
18 |   while samebsgs < len(data):
19 |     l,bs,gs,bench,baseline = data[samebsgs]
20 |     if (bs,gs) != (bs0,gs0): break
21 |     samebsgs += 1
22 | 
23 |   for l,bs,gs,bench,baseline in data[:samebsgs]:
24 |     print('  /* l=%d: bs=%d gs=%d bench=%d baseline=%d */' % (l,bs,gs,bench,baseline))
25 | 
26 |   print('  if (l <= %d) { *bs = %d; *gs = %d; return 1; }' % (data[samebsgs-1][0],bs0,gs0))
27 | 
28 |   data = data[samebsgs:]
29 | 
30 | print('  return 0;')
31 | print('}')
32 | 


--------------------------------------------------------------------------------
/src/p6983/tunecycles.out:
--------------------------------------------------------------------------------
 1 | 3 0 0 10898 10866
 2 | 43 0 0 111358 110954
 3 | 103 0 0 260916 277756
 4 | 109 0 0 278950 275532
 5 | 199 0 0 499932 496918
 6 | 227 0 0 567720 566702
 7 | 419 14 7 927284 1039492
 8 | 491 14 8 1048384 1222546
 9 | 569 14 10 1164240 1432688
10 | 631 14 11 1292240 1569390
11 | 677 16 10 1354698 1699296
12 | 857 16 13 1664550 2171694
13 | 859 16 13 1666950 2165976
14 | 883 20 11 1714210 2206862
15 | 1019 18 14 1914868 2571866
16 | 1171 22 13 2141964 2909050
17 | 1879 30 15 3003372 4850172
18 | 2713 32 21 3999778 6837360
19 | 4283 38 28 5876690 10815114
20 | 5 0 0 13996 13874
21 | 7 0 0 19770 19650
22 | 11 0 0 30392 30060
23 | 31 0 0 81956 81678
24 | 83 0 0 210558 210460
25 | 107 0 0 271658 271148
26 | 137 0 0 344948 342378
27 | 751 16 11 1491070 1880982
28 | 827 16 12 1627010 2079036
29 | 3691 38 24 5223138 9152296
30 | 4019 40 25 5592828 9977994
31 | 6983 62 28 8551860 17354004
32 | 


--------------------------------------------------------------------------------
/src/uint.c:
--------------------------------------------------------------------------------
 1 | #include "uintbig.h"
 2 | #include <gmp.h>
 3 | 
 4 | #define N_LIMBS (4 * 64 / GMP_LIMB_BITS)
 5 | 
 6 | const uintbig uintbig_1 = { 1, 0, 0, 0 };
 7 | 
 8 | void uintbig_set(uintbig *x, uint64_t y) {
 9 |   x->c[0] = y;
10 |   x->c[1] = x->c[2] = x->c[3] = 0;
11 | }
12 | 
13 | bool uintbig_bit(uintbig const *x, uint64_t k) {
14 |   return x->c[k / 64] >> (k % 64) & 1;
15 | }
16 | 
17 | bool uintbig_add3(uintbig *x, uintbig const *y, uintbig const *z) {
18 |   return mpn_add_n(x->c, y->c, z->c, N_LIMBS);
19 | }
20 | bool uintbig_sub3(uintbig *x, uintbig const *y, uintbig const *z) {
21 |   return mpn_sub_n(x->c, y->c, z->c, N_LIMBS);
22 | }
23 | 
24 | void uintbig_mul3_64(uintbig *x, uintbig const *y, uint64_t z) {
25 |   mpn_mul_1(x->c, y->c, N_LIMBS, z);
26 | }
27 | uint64_t uintbig_div3_64(uintbig *x, uintbig const *y, uint64_t z) {
28 |   return mpn_divmod_1(x->c, y->c, N_LIMBS, z);
29 | }
30 | 


--------------------------------------------------------------------------------
/src/steps.c:
--------------------------------------------------------------------------------
 1 | #include "steps.h"
 2 | 
 3 | static int steps_overridden;
 4 | static long long steps_override_bs;
 5 | static long long steps_override_gs;
 6 | 
 7 | void steps_override(long long bs,long long gs)
 8 | {
 9 |   steps_overridden = 1;
10 |   steps_override_bs = bs;
11 |   steps_override_gs = gs;
12 | }
13 | 
14 | void steps(long long *bs,long long *gs,long long l)
15 | {
16 |   long long b,g;
17 | 
18 |   if (steps_overridden) {
19 |     b = steps_override_bs;
20 |     g = steps_override_gs;
21 |   } else if (!steps_guess(&b,&g,l)) {
22 |     b = 0;
23 |     do {
24 |       b += 2;
25 |       g = (l-1)/(4*b);
26 |     } while (g >= b);
27 |   }
28 | 
29 |   /* enforce rules for b,g: */
30 | 
31 |   if (b < 0) b = 0;
32 |   if (g < 0) g = 0;
33 | 
34 |   if (b&1) ++b;
35 |   if (b)
36 |     if (g > (l-1)/(4*b))
37 |       g = (l-1)/(4*b);
38 | 
39 |   if (!g) b = 0;
40 |   if (!b) g = 0;
41 | 
42 |   *bs = b;
43 |   *gs = g;
44 | }
45 | 


--------------------------------------------------------------------------------
/src/rng.c:
--------------------------------------------------------------------------------
 1 | #define _XOPEN_SOURCE
 2 | #include "rng.h"
 3 | #include <stdlib.h>
 4 | #include <unistd.h>
 5 | #include <fcntl.h>
 6 | #include <assert.h>
 7 | 
 8 | static void urandom(void *x, size_t l)
 9 | {
10 | 	// printf("WARNING: irreproducible randomness\n");
11 |     static int fd = -1;
12 |     ssize_t n;
13 |     if (fd < 0 && 0 > (fd = open("/dev/urandom", O_RDONLY)))
14 |         exit(1);
15 |     for (size_t i = 0; i < l; i += n)
16 |         if (0 >= (n = read(fd, (char *) x + i, l - i)))
17 |             exit(2);
18 | }
19 | 
20 | static void drand(void *x, size_t l)
21 | {
22 |   for (size_t i = 0; i < l; i += 4) {
23 |     long b = mrand48();
24 |     for (int j = 0; j < 4 && i+j < l; j++)
25 |       ((char *)x)[i+j] = (b >> 8*j) & 0xff;
26 |   }
27 | }
28 | 
29 | void _randombytes(void *x, size_t l) {
30 |   drand(x, l);
31 | }
32 | 
33 | /* Ridiculous hack for cross-platform assembly */
34 | void randombytes(void *x, size_t l)
35 | {
36 |   _randombytes(x, l);
37 | }
38 | 
39 | 


--------------------------------------------------------------------------------
/include/printing.h:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <assert.h>
 3 | #include <inttypes.h>
 4 | #include "mont.h"
 5 | 
 6 | static inline void print_big(const uintbig *x) {
 7 |   printf("%" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
 8 | 	 x->c[0], x->c[1], x->c[2], x->c[3]);
 9 | }
10 | 
11 | 
12 | static inline void print_fp2(const fp2 *x) {
13 |   printf("re ");
14 |   print_big(&x->re.x);
15 |   printf("im ");
16 |   print_big(&x->im.x);
17 | }
18 | 
19 | static inline unsigned long fp2_hash(fp2 x) {
20 |   return (x.re.x.c[0]+3*x.re.x.c[1]+5*x.re.x.c[2]+7*x.re.x.c[3]
21 | 	  +11*x.im.x.c[0]+13*x.im.x.c[1]+17*x.im.x.c[2]+23*x.im.x.c[3]) % 100003;
22 | }
23 | static inline fp2 fp2_ratio(fp2 *x, fp2 *y) {
24 |   fp2 tmp;
25 |   tmp = *y;
26 |   assert(!fp2_iszero(&tmp));
27 |   fp2_inv(&tmp);
28 |   fp2_mul2(&tmp, x);
29 |   return tmp;
30 | }
31 | static inline void proj2_print(proj2 x) {
32 |   if (fp2_iszero(&x.z)) { printf("(infty)"); }
33 |   else { printf("(%lu,%lu)", fp2_hash(fp2_ratio(&x.x,&x.z)), fp2_hash(fp2_ratio(&x.y,&x.z))); }
34 | }
35 | 


--------------------------------------------------------------------------------
/include/uintbig.h:
--------------------------------------------------------------------------------
 1 | #ifndef UINT_H
 2 | #define UINT_H
 3 | 
 4 | #include <stdbool.h>
 5 | #include <stdint.h>
 6 | 
 7 | #define BITS 256
 8 | 
 9 | // 256 bits unsigned integers
10 | typedef struct uintbig {
11 |     uint64_t c[4];
12 | } uintbig;
13 | 
14 | extern const uintbig uintbig_1;
15 | 
16 | void uintbig_set(uintbig *x, uint64_t y);
17 | 
18 | bool uintbig_bit(uintbig const *x, uint64_t k);
19 | 
20 | static inline bool uintbig_iszero(const uintbig *a) {
21 |   return !(a->c[0] || a->c[1] || a->c[2] || a->c[3]);
22 | }
23 | 
24 | static inline bool uintbig_equal(const uintbig *a, const uintbig *b) {
25 |   return ((a->c[0]==b->c[0]) && (a->c[1]==b->c[1]) && (a->c[2]==b->c[2]) && (a->c[3]==b->c[3]));
26 | }
27 | 
28 | bool uintbig_add3(uintbig *x, uintbig const *y, uintbig const *z); /* returns carry */
29 | bool uintbig_sub3(uintbig *x, uintbig const *y, uintbig const *z); /* returns borrow */
30 | 
31 | void uintbig_mul3_64(uintbig *x, uintbig const *y, uint64_t z);
32 | uint64_t uintbig_div3_64(uintbig *x, uintbig const *y, uint64_t z); /* returns remainder */
33 | 
34 | #endif
35 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2020, The SQISign team
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining
 4 | a copy of this software and associated documentation files (the
 5 | "Software"), to deal in the Software without restriction, including
 6 | without limitation the rights to use, copy, modify, merge, publish,
 7 | distribute, sublicense, and/or sell copies of the Software, and to
 8 | permit persons to whom the Software is furnished to do so, subject to
 9 | the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/include/isomorphism.h:
--------------------------------------------------------------------------------
 1 | #ifndef ISOMORPHISM_H
 2 | #define ISOMORPHISM_H
 3 | 
 4 | #include "mont.h"
 5 | #include <stdio.h>
 6 | 
 7 | // The j-invariant divided by 256
 8 | void jinv256(proj *j, const proj *A);
 9 | 
10 | // Represents the isomorphism that maps (X:Z) ↦ ( (Nx X - Nz Z) : (D Z) )
11 | typedef struct isomorphism {
12 |   fp2 Nx, Nz, D;
13 | } isomorphism;
14 | 
15 | // Given curves A and B, computes an isomorphism A -> B
16 | //
17 | // It works for curves j-invariant 0 or 1728, however this is probably
18 | // not the function you're looking for.
19 | void mont_isom(isomorphism *isom, const proj *A, const proj *B);
20 | 
21 | // Change A to an equivalent A-invariant, and produce associated
22 | // isomorphism
23 | void rand_isom(isomorphism *isom, proj *A);
24 | 
25 | static inline void trivial_isom(isomorphism *isom) {
26 | 	isomorphism id = {fp2_1,fp2_0,fp2_1}; *isom = id;
27 | }
28 | 
29 | // Apply isomorphism to point P
30 | static inline void mont_isom_apply(const isomorphism *isom, proj *P) {
31 |   fp2 tmp;
32 |   fp2_mul2(&P->x, &isom->Nx);
33 |   fp2_mul3(&tmp, &P->z, &isom->Nz);
34 |   fp2_sub2(&P->x, &tmp);
35 |   fp2_mul2(&P->z, &isom->D);
36 | }
37 | 
38 | 
39 | #endif
40 | 


--------------------------------------------------------------------------------
/include/fp.h:
--------------------------------------------------------------------------------
 1 | #ifndef FP_H
 2 | #define FP_H
 3 | 
 4 | #include "uintbig.h"
 5 | 
 6 | /* fp is in the Montgomery domain, so interpreting that
 7 |    as an integer should never make sense.
 8 |    enable compiler warnings when mixing up uintbig and fp. */
 9 | typedef struct fp {
10 |     uintbig x;
11 | } fp;
12 | 
13 | extern const uintbig p;
14 | extern const fp fp_0;
15 | extern const fp fp_1;
16 | 
17 | void fp_set(fp *x, uint64_t y);
18 | void fp_cswap(fp *x, fp *y, bool c);
19 | 
20 | static inline bool fp_iszero(const fp *a) { return uintbig_iszero(&a->x); }
21 | 
22 | void fp_enc(fp *x, uintbig const *y); /* encode to Montgomery representation */
23 | void fp_dec(uintbig *x, fp const *y); /* decode from Montgomery representation */
24 | 
25 | void fp_add2(fp *x, fp const *y);
26 | void fp_sub2(fp *x, fp const *y);
27 | void fp_mul2(fp *x, fp const *y);
28 | 
29 | void fp_add3(fp *x, fp const *y, fp const *z);
30 | void fp_sub3(fp *x, fp const *y, fp const *z);
31 | void fp_mul3(fp *x, fp const *y, fp const *z);
32 | 
33 | void fp_sq1(fp *x);
34 | void fp_sq2(fp *x, fp const *y);
35 | void fp_inv(fp *x);
36 | bool fp_issquare(fp *x);
37 | void fp_sqrt(fp *x);
38 | 
39 | void fp_random(fp *x);
40 | 
41 | extern long long fp_mul_count;
42 | 
43 | static inline void fp_neg1(fp *x)
44 | {
45 |   fp_sub3(x,&fp_0,x);
46 | }
47 | 
48 | static inline void fp_neg2(fp *x,const fp *y)
49 | {
50 |   fp_sub3(x,&fp_0,y);
51 | }
52 | 
53 | #endif
54 | 


--------------------------------------------------------------------------------
/include/sqisign.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef SQISIGN_H
 3 | #define SQISIGN_H
 4 | 
 5 | #include <pari/pari.h>
 6 | #include "idiso.h"
 7 | 
 8 | typedef struct public_key {
 9 |   proj E;
10 | } public_key;
11 | 
12 | typedef struct secret_key {
13 |   GEN I_large_prime;
14 |   GEN I_two;
15 |   GEN I_T;
16 |   special_isogeny phi_T;
17 |   two_walk_long phi_two;
18 | } secret_key;
19 | 
20 | typedef struct signature {
21 |   proj E_com;
22 |   two_walk_long sigma;
23 | } signature;
24 | 
25 | typedef struct compressed_signature {
26 |   proj E_com;
27 |   uint64_t *zip;
28 | } compressed_signature;
29 | 
30 | 
31 | void init_compressed_sig(compressed_signature *comp_sigma);
32 | void free_compressed_sig(compressed_signature *comp_sigma);
33 | void keygen(public_key *pk, secret_key *sk);
34 | void commitment(GEN *coeff, GEN *I, odd_isogeny *phi_com);
35 | void challenge(proj *E_cha, const uintbig *m, const proj *E_com, const proj *basis_plus, const proj *basis_minus, GEN *dlog, proj *basis_two);
36 | void response(two_walk_long *sigma, uint64_t *zip, GEN coeff_ker_challenge_commitment, const secret_key *sk, const proj *basis_two, const proj *E_cha);
37 | void sign(compressed_signature *comp_sigma, const secret_key *sk, const public_key *pk, const uintbig *m);
38 | void decompress(two_walk *walk, proj *A, const uint64_t *zip, long len,long last_step);
39 | bool verif(compressed_signature *comp_sigma, const public_key *pk,const uintbig *m);
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------
/bench/keygen.c:
--------------------------------------------------------------------------------
 1 | #define _XOPEN_SOURCE
 2 | 
 3 | #include <getopt.h>
 4 | #include <inttypes.h>
 5 | #include <stdio.h>
 6 | #include <time.h> 
 7 | #include <pari/pari.h>
 8 | 
 9 | #include "precomputed.h"
10 | #include "sqisign.h"
11 | 
12 | static __inline__ uint64_t rdtsc(void)
13 | {
14 |     uint32_t hi, lo;
15 |     __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
16 |     return lo | (uint64_t) hi << 32;
17 | }
18 | 
19 | int main(int argc, char **argv) {
20 |   int samples = 100, seed = 1;
21 |   
22 |   int opt;
23 |   while ((opt = getopt(argc, argv, "s:r:h")) != -1) {
24 |     switch (opt) {
25 |     case 's':
26 |       samples = atoi(optarg);
27 |       break;
28 |     case 'r':
29 |       seed = atoi(optarg);
30 |       break;
31 |     default:
32 |       fprintf(stderr,
33 | 	      "Usage: %s [-s samples] [-r seed]\n",
34 | 	      argv[0]);
35 |       exit(-1);
36 |     }
37 |   }
38 |   
39 |   pari_init(800000000, 1<<18);
40 |   init_precomputations();
41 |   
42 |   setrand(mkintn(1, seed));
43 |   srand48(seed);
44 | 
45 |   printf("### Keygen\n");
46 |   printf("# cycles\tms\n");
47 |   for (int i = 0; i < samples; i++) {
48 |     public_key pk;
49 |     secret_key sk;
50 | 
51 |     clock_t t = -clock();
52 |     uint64_t c = -rdtsc();
53 |     keygen(&pk, &sk);
54 |     c += rdtsc();
55 |     t += clock();
56 | 
57 |     printf("%" PRIu64 "\t%.3lf\n", c, 1000. * t / CLOCKS_PER_SEC);   
58 |   }
59 |     
60 |   return 0;
61 | }
62 | 


--------------------------------------------------------------------------------
/include/tedwards.h:
--------------------------------------------------------------------------------
 1 | #ifndef TEDWARDS_H
 2 | #define TEDWARDS_H
 3 | 
 4 | #include <pari/pari.h>
 5 | #include "uintbig.h"
 6 | #include "fp2.h"
 7 | #include "mont.h"
 8 | 
 9 | // a*x^2+y^2=1+d*x^2*y^2
10 | 
11 | typedef struct point {
12 |     fp2 x;
13 |     fp2 y;
14 |     fp2 z;
15 |     fp2 t; // t = x*y/z
16 | } point;
17 | 
18 | extern const point ted_0;
19 | 
20 | bool ted_is_on_curve(point const *P, proj const *E);
21 | bool ted_equal(point const *P, point const *Q);
22 | 
23 | void ted_double(point *Q, proj const *E, point const *P);
24 | void ted_add(point *S, proj const *E, point const *P, point const *Q);
25 | void ted_mul(point *res, point const *P, proj const *E, uintbig const *k);
26 | 
27 | void ted_miller_phi(fp2 *phi, proj const *E, point const *P1, point const *P2, point const *Q, bool dou);
28 | void ted_miller(fp2 *res, fp2 *res2, proj const *E, point const *P, point const *Q, point const *Q2, uintbig const *k);
29 | void ted_weil(fp2 *res, proj const *E, point const *P, point const *Q, uintbig const *k);
30 | 
31 | void ted_neg(point *Q, point const *P);
32 | bool ted_iszero(point const *P);
33 | void mont_to_ted(proj *E, proj const *A, bool twist);
34 | void mont_to_ted_point(point *Q, proj const *A, proj const *P);
35 | void ted_to_mont_point(proj *Q, point const *P);
36 | 
37 | bool ted_bidim_log_weil(long *a, long *b, const proj *E, const point *Q, const point *P1, const point *P2, long ell);
38 | bool ted_bidim_log(GEN *a, GEN *b, const proj *E, const point *Q, const point *P1, const point *P2, long ell, long e);
39 | 
40 | #endif
41 | 


--------------------------------------------------------------------------------
/test/sqisign.c:
--------------------------------------------------------------------------------
 1 | #define _XOPEN_SOURCE
 2 | 
 3 | #include <stdint.h>
 4 | #include <stdio.h>
 5 | #include <stdlib.h>
 6 | #include <time.h>
 7 | #include <pari/pari.h>
 8 | #include <assert.h>
 9 | 
10 | 
11 | #include "ideal.h"
12 | #include "idiso.h"
13 | #include "constants.h"
14 | #include "precomputed.h"
15 | #include "tedwards.h"
16 | #include "isogenies.h"
17 | #include "klpt.h"
18 | #include "toolbox.h"
19 | #include "sqisign.h"
20 | #include "mont.h"
21 | 
22 | // argv[1] is the random seed; default = 1
23 | int main(int argc, char *argv[]){
24 |     pari_init(800000000, 1<<18);
25 |     init_precomputations();
26 | 
27 |     setrand(stoi(1));
28 |     srand48(1);
29 |     if( argc > 1 ) {
30 |       setrand(strtoi(argv[1]));
31 |       srand48(atoi(argv[1]));
32 |     }
33 | 
34 |     for (int i = 0; i < 10; i++) {
35 |       uintbig m;
36 |       randombytes(m.c, 32);
37 | 
38 |       public_key pk;
39 |       secret_key sk;
40 | 
41 |       // printf("Key generation\n");
42 |       keygen(&pk, &sk);
43 | 
44 |       // printf("sk->I_T\n");
45 |       // output(sk.I_T);
46 |       // sk.I_T = gcopy(sk.I_T);
47 |       compressed_signature comp_sigma;
48 |       init_compressed_sig(&comp_sigma);
49 |       // signature Sigma;
50 | 
51 | 
52 |       sign(&comp_sigma ,&sk, &pk, &m);
53 | 
54 |       assert(verif(&comp_sigma,&pk,&m));
55 | 
56 |       randombytes(m.c, 32);
57 |       assert(!verif(&comp_sigma,&pk,&m));
58 |       free_compressed_sig(&comp_sigma);
59 |     }
60 | 
61 |     printf("    \033[1;32mAll tests passed\033[0m\n");
62 |     exit(0);
63 | 
64 |     return 0;
65 | }
66 | 


--------------------------------------------------------------------------------
/test/montxy.c:
--------------------------------------------------------------------------------
 1 | #define _XOPEN_SOURCE
 2 | #include <stdlib.h>
 3 | #include <stdio.h>
 4 | #include <assert.h>
 5 | #include <pari/pari.h>
 6 | 
 7 | #include "mont.h"
 8 | #include "constants.h"
 9 | 
10 | 
11 | int main() {
12 |   srand48(1);
13 |   
14 |   // Only implementing for p=3 mod 4, for the moment
15 |   assert(class_mod_4 == 3);
16 | 
17 |   proj A = { fp2_0, fp2_1 };
18 | 
19 |   proj pt;
20 |   proj2 P, Q,R,S;
21 |   uintbig k;
22 |   for (int i = 0; i < 200; i++) {
23 |     do {
24 |       fp2_random(&pt.x); fp2_random(&pt.z);
25 |     } while (!is_on_curve(&pt,&A));
26 |     P.x = pt.x;
27 |     P.z = pt.z;
28 |     xLIFT(&P.y, &A, &pt);
29 | 
30 |     assert(xy_is_on_curve(&A, &P));
31 | 
32 |     xyNEG(&Q, &P);
33 |     xyADD(&R, &A, &P, &Q);
34 | 
35 |     assert(xy_is_on_curve(&A, &R));
36 |     assert(xy_is_zero(&R));
37 | 
38 |     xyDBL(&R, &A, &P);
39 |     xyDBL(&S, &A, &R);
40 |     xyADD(&R, &A, &P, &Q);
41 | 
42 |     assert(xy_is_on_curve(&A, &R));
43 |     assert(xy_is_zero(&R));
44 | 
45 |     do {
46 |       fp2_random(&pt.x); fp2_random(&pt.z);
47 |     } while (!is_on_curve(&pt,&A));
48 |     Q.x = pt.x;
49 |     Q.z = pt.z;
50 |     xLIFT(&Q.y, &A, &pt);
51 | 
52 |     assert(xy_is_on_curve(&A, &Q));
53 | 
54 |     xyADD(&R, &A, &P, &Q);
55 | 
56 | 
57 |     xyDBL(&R, &A, &P);
58 |     xyADD(&R, &A, &R, &P);
59 |     xyADD(&R, &A, &R, &P);
60 |     xyADD(&R, &A, &R, &P);
61 |     xyADD(&R, &A, &R, &P);
62 |     xyADD(&R, &A, &R, &P);
63 | 
64 | 
65 |     uintbig_set(&k, 7);
66 |     xyMUL(&S, &A, &P, &k);
67 | 
68 |     assert(xy_equal(&R,&S));
69 | 
70 | 
71 |   }
72 |   
73 |   printf("    \033[1;32mAll tests passed\033[0m\n");
74 |   exit(0);
75 | }
76 | 


--------------------------------------------------------------------------------
/include/toolbox.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef TOOLBOX_H
 3 | #define TOOLBOX_H
 4 | 
 5 | #include <pari/pari.h>
 6 | #include <stdbool.h>
 7 | 
 8 | clock_t tic();
 9 | float tac(); /* time in ms since last tic */
10 | float TAC(const char *str); /* same, but prints it with label 'str' */
11 | float toc(const clock_t t); /* time in ms since t */
12 | float TOC(const clock_t t, const char *str); /* same, but prints it with label 'str' */
13 | 
14 | // computes the factorisation matrix of x1 by x2 given their factorisation matrices f1 and f2
15 | GEN famat_div(GEN f1, GEN f2);
16 | 
17 | // returns the first divisor in f1 (first prime in the list with non-zero exponent)
18 | // if f2 is given, it is set to the updated factorisation, where the returned factor has been removed
19 | GEN famat_pop(GEN f1, GEN* f2);
20 | 
21 | // returns a random divisor of the factorisation matrix f1, with product at least B
22 | // if f2 is given, it is set to f1 divided by the random divisor
23 | GEN famat_random(GEN f1, GEN B);
24 | 
25 | // returns the product
26 | GEN famat_prod(GEN f);
27 | 
28 | // returns the n-th prime divisor in f, where primes are counted with multiplicity (the 3rd prime of 2*3^2*5 is 3 because 2,3,3,5)
29 | GEN famat_get_ith(GEN f, GEN n);
30 | 
31 | int cornacchia_extended(GEN N, GEN *x, GEN *y);
32 | 
33 | // solve x^2 + y^2 + p(u^2 + v^2) = M, with (u,v) != (0,0)
34 | // when parity != 0, ensures that (x+v) and (y+u) are not both even
35 | // (this means that x + y*i + u*j + v*ji is not is 2*Order(1,i,(1-ji)/2, (i+j)/2)
36 | GEN norm_equation_special(GEN p, GEN M, long parity, bool randomized);
37 | 
38 | GEN lattice_nearest_plane(GEN lat, GEN target, long flag);
39 | 
40 | #endif
41 | 
42 | 
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/test/mont.c:
--------------------------------------------------------------------------------
 1 | #define _XOPEN_SOURCE
 2 | #include <stdlib.h>
 3 | #include <stdio.h>
 4 | #include <assert.h>
 5 | 
 6 | #include "mont.h"
 7 | #include "constants.h"
 8 | 
 9 | int main() {
10 |   srand48(1);
11 | 
12 |   // Only implementing for p=3 mod 4, for the moment
13 |   assert(class_mod_4 == 3);
14 | 
15 |   proj A = { fp2_0, fp2_1 };
16 | 
17 |   proj P, Q, Qk;
18 |   for (int i = 0; i < 200; i++) {
19 |     fp2_random(&P.x); fp2_random(&P.z);
20 |     fp2_random(&Q.x); fp2_random(&Q.z);
21 |     uintbig k, scal, ord;
22 |     uintbig_set(&k, i+3);
23 |     xMUL(&Qk, &A, &Q, &k);
24 | 
25 | 
26 | 
27 |     const long *factors;
28 |     long len;
29 |     if (is_on_curve(&P, &A)) {
30 |       len = p_plus_len;
31 |       factors = p_plus_fact;
32 |       uintbig_add3(&ord, &p, &uintbig_1);
33 |     } else {
34 |       len = p_minus_len;
35 |       factors = p_minus_fact;
36 |       uintbig_sub3(&ord, &p, &uintbig_1);
37 |     }
38 |     long fact = factors[i % len];
39 | 
40 |     // Cofactor multiplication
41 |     long rem = uintbig_div3_64(&ord, &ord, fact);
42 |     assert(rem == 0);
43 |     xMUL(&P, &A, &P, &ord);
44 | 
45 |     proj Z;
46 |     uintbig_set(&scal, fact);
47 |     xMUL(&Z, &A, &P, &scal);
48 |     assert(fp2_iszero(&Z.z));
49 | 
50 |     if (!fp2_iszero(&P.z)) {
51 |       proj AA = A;
52 |       xISOG(&A, &Q, &P, fact);
53 |       xISOG(&AA, &Qk, &P, fact);
54 | 
55 |       fp2_sub2(&AA.x, &A.x);
56 |       fp2_sub2(&AA.z, &A.z);
57 |       assert(fp2_iszero(&AA.x) && fp2_iszero(&AA.z));
58 | 
59 |       xMUL(&Q, &A, &Q, &k);
60 |       fp2_mul2(&Q.x, &Qk.z);
61 |       fp2_mul2(&Q.z, &Qk.x);
62 |       fp2_sub2(&Q.x, &Q.z);
63 |       assert(fp2_iszero(&Q.x));
64 |     }
65 |   }
66 | 
67 |   printf("    \033[1;32mAll tests passed\033[0m\n");
68 |   exit(0);
69 | }
70 | 


--------------------------------------------------------------------------------
/include/precomputed.h:
--------------------------------------------------------------------------------
 1 | #ifndef PRECOMPUTED_H
 2 | #define PRECOMPUTED_H
 3 | 
 4 | #include <pari/pari.h>
 5 | #include <stdbool.h>
 6 | #include "constants.h"
 7 | #include "mont.h"
 8 | #include "tedwards.h"
 9 | 
10 | 
11 | // each basis entry is a triple of the form P,Q,P+Q
12 | extern proj torsion_basis[][3];
13 | extern proj torsion_basis_sum[3];
14 | extern point torsion_basis_ted_sum[3];
15 | extern proj torsion_basis_twist[][3];
16 | extern proj torsion_basis_twist_sum[3];
17 | extern point torsion_basis_twist_ted_sum[3];
18 | extern proj torsion_basis_two[3];
19 | 
20 | 
21 | struct precomp_struct {
22 |     // quaternion data
23 | 
24 |     GEN p; // the prime
25 |     GEN B; // the quaternion algebra
26 |     GEN qf; // the quadratic form defined by the reduced norm with respect to the standard basis
27 |     GEN O0; // the cannonical maximal order
28 |     GEN one;
29 |     GEN i;
30 |     GEN j;
31 |     GEN ji;
32 |     GEN torsion_fm; // factorisation matrix of the available torsion
33 | 
34 |     GEN O0_b1; // 1
35 |     GEN O0_b2; // i
36 |     GEN O0_b3; // (1-ji)/2
37 |     GEN O0_b4; // (i+j)/2
38 | 
39 |     GEN O0_to_standard;
40 |     GEN standard_to_O0;
41 | 
42 | 
43 |     // elliptic curve data
44 | 
45 |     proj E0;
46 | 
47 |     GEN *action_2, *action_3, *action_4;
48 |     GEN *action_twist_2, *action_twist_3, *action_twist_4;
49 |     GEN action_two_2, action_two_3, action_two_4;
50 | 
51 |     GEN gen_p_plus_fact, gen_p_minus_fact; // factorisation of p+1 and p-1
52 |     GEN gen_p_plus_primary, gen_p_minus_primary; // primary decomposition (list of prime powers)
53 |     GEN gen_odd_torsion;
54 | };
55 | 
56 | 
57 | extern struct precomp_struct global_setup;
58 | 
59 | void init_precomputations();
60 | 
61 | 
62 | long ell_to_index(long ell, bool *twist);
63 | long ell_to_e(long ell);
64 | 
65 | #endif
66 | 


--------------------------------------------------------------------------------
/include/tradeoff.h:
--------------------------------------------------------------------------------
 1 | #ifndef TRADEOFF_H
 2 | #define TRADEOFF_H
 3 | 
 4 | #include <pari/pari.h>
 5 | #include "isogenies.h"
 6 | #include "two_walks.h"
 7 | 
 8 | //Given that F is at most T, the easiest way to include F in the norm of the output is in the strong approximation step
 9 | //
10 | //same as klpt_general_power but for the norm of the output must be F 2^e where F is the integer whose factorization matrix is fm. In practice F is a divisor of T.
11 | GEN klpt_general_power_T(GEN I, GEN K, GEN l,GEN fm)
12 | 
13 | 
14 | // this is to translate the output of klpt to the signing isogeny, the slight modification is that the norm is a power of 2 time a divisor of T. This can be accomplished by applying ideal_to_isogeny_two on I1,
15 | // and then translating I2 to an isogeny (which is already a suboperation performed in ideal_to_isogeny_two)
16 | //
17 | // T = global_setup.gen_odd_torsion
18 | // I = I1 * I2 is a left O0-ideal of norm dividing T^3 2^e  for some positive integer e where I1 has norm dividing T^2 2^e and is a compatible input for ideal_to_isogeny_two, I_2 has norm dividing T
19 | // J = I1 + O0*T^2
20 | // K is a left O0-ideal equivalent to J of norm a power of 2
21 | // Finds phi such that phi_I = phi_2 * phi_1 * phi_J
22 | // Finds L equivalent to I1 of norm dividing T^2
23 | void ideal_to_isogeny_two_T(two_walk_long *phi_res2,special_isogeny *phi_resT, GEN *L, special_isogeny *phi_L, GEN I, GEN J, GEN K, const special_isogeny *phi_J, const two_walk_long *phi_K);
24 | 
25 | 
26 | //Then, we need a way to compress the signing isogeny
27 | //
28 | //compress a 2^e T isogeny to a sequence of integers
29 | void compress_T(uint64_t *zip, const two_walk *walk2, const special_isogeny *walkT, long len);
30 | 
31 | //Inverse of the above
32 | // A is the starting curve
33 | void decompress_T(special_isogeny *walkT,two_walk *walk2, proj *A, const uint64_t *zip, long len);
34 | 


--------------------------------------------------------------------------------
/bench/sign.c:
--------------------------------------------------------------------------------
 1 | #define _XOPEN_SOURCE
 2 | 
 3 | #include <getopt.h>
 4 | #include <inttypes.h>
 5 | #include <stdio.h>
 6 | #include <time.h>
 7 | #include <pari/pari.h>
 8 | 
 9 | #include "precomputed.h"
10 | #include "sqisign.h"
11 | #include "constants.h"
12 | 
13 | static __inline__ uint64_t rdtsc(void)
14 | {
15 |     uint32_t hi, lo;
16 |     __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
17 |     return lo | (uint64_t) hi << 32;
18 | }
19 | 
20 | int main(int argc, char **argv) {
21 |   int keys = 10, samples = 10, seed = 1;
22 | 
23 |   int opt;
24 |   while ((opt = getopt(argc, argv, "k:s:r:h")) != -1) {
25 |     switch (opt) {
26 |     case 'k':
27 |       keys = atoi(optarg);
28 |       break;
29 |     case 's':
30 |       samples = atoi(optarg);
31 |       break;
32 |     case 'r':
33 |       seed = atoi(optarg);
34 |       break;
35 |     default:
36 |       fprintf(stderr,
37 | 	      "Usage: %s [-k keys] [-s samples] [-r seed]\n",
38 | 	      argv[0]);
39 |       exit(-1);
40 |     }
41 |   }
42 | 
43 |   pari_init(800000000, 1<<18);
44 |   init_precomputations();
45 | 
46 |   setrand(mkintn(1, seed));
47 |   srand48(seed);
48 | 
49 |   printf("### Sign\n");
50 |   printf("# key\tcycles\t\tms\t\tlength\n");
51 |   for (int k = 0; k < keys; k++) {
52 |     public_key pk;
53 |     secret_key sk;
54 |     keygen(&pk, &sk);
55 | 
56 |     for (int i = 0; i < samples; i++) {
57 |       // signature Sigma;
58 |       compressed_signature comp_sigma;
59 |       init_compressed_sig(&comp_sigma);
60 |       uintbig m;
61 |       randombytes(m.c, 32);
62 | 
63 |       clock_t t = -clock();
64 |       uint64_t c = -rdtsc();
65 |       sign(&comp_sigma ,&sk, &pk, &m);
66 |       c += rdtsc();
67 |       t += clock();
68 |       free_compressed_sig(&comp_sigma);
69 |   //     int len = 0;
70 |   //     for (int j = 0; j < Sigma.sigma.len; j++)
71 | 	// len += Sigma.sigma.phi[j].len;
72 | 
73 |       printf("%d\t%" PRIu64 "\t%.3lf\t%ld\n", k, c, 1000. * t / CLOCKS_PER_SEC, signing_length);
74 |     }
75 |   }
76 | 
77 |   return 0;
78 | }
79 | 


--------------------------------------------------------------------------------
/bench/verif.c:
--------------------------------------------------------------------------------
 1 | #define _XOPEN_SOURCE
 2 | 
 3 | #include <getopt.h>
 4 | #include <inttypes.h>
 5 | #include <stdio.h>
 6 | #include <time.h>
 7 | #include <pari/pari.h>
 8 | 
 9 | #include "precomputed.h"
10 | #include "sqisign.h"
11 | 
12 | static __inline__ uint64_t rdtsc(void)
13 | {
14 |     uint32_t hi, lo;
15 |     __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
16 |     return lo | (uint64_t) hi << 32;
17 | }
18 | 
19 | int main(int argc, char **argv) {
20 |   int keys = 5, sigs = 5, samples = 10, seed = 1;
21 | 
22 |   int opt;
23 |   while ((opt = getopt(argc, argv, "k:m:s:r:h")) != -1) {
24 |     switch (opt) {
25 |     case 'k':
26 |       keys = atoi(optarg);
27 |       break;
28 |     case 'm':
29 |       sigs = atoi(optarg);
30 |       break;
31 |     case 's':
32 |       samples = atoi(optarg);
33 |       break;
34 |     case 'r':
35 |       seed = atoi(optarg);
36 |       break;
37 |     default:
38 |       fprintf(stderr,
39 | 	      "Usage: %s [-k keys] [-m signatures] [-s samples] [-r seed]\n",
40 | 	      argv[0]);
41 |       exit(-1);
42 |     }
43 |   }
44 | 
45 |   pari_init(800000000, 1<<18);
46 |   init_precomputations();
47 | 
48 |   setrand(mkintn(1, seed));
49 |   srand48(seed);
50 | 
51 |   printf("### Verify\n");
52 |   printf("# key\tmessage\tcycles\t\tms\t\tlength\n");
53 |   for (int k = 0; k < keys; k++) {
54 |     public_key pk;
55 |     secret_key sk;
56 |     keygen(&pk, &sk);
57 | 
58 |     for (int s = 0; s < sigs; s++) {
59 |       uintbig m;
60 |       randombytes(m.c, 32);
61 |       // signature Sigma;
62 |       compressed_signature comp_sigma;
63 |       init_compressed_sig(&comp_sigma);
64 |       sign( &comp_sigma, &sk, &pk, &m);
65 | 
66 |       for (int i = 0; i < samples; i++) {
67 | 	clock_t t = -clock();
68 | 	uint64_t c = -rdtsc();
69 |   verif(&comp_sigma, &pk, &m);
70 | 
71 | 	c += rdtsc();
72 | 	t += clock();
73 | 
74 | 	printf("%d\t%d\t%" PRIu64 "\t%.3lf\n", k, s, c, 1000. * t / CLOCKS_PER_SEC);
75 |       }
76 |       free_compressed_sig(&comp_sigma);
77 |     }
78 |   }
79 | 
80 |   return 0;
81 | }
82 | 


--------------------------------------------------------------------------------
/test/fp.c:
--------------------------------------------------------------------------------
 1 | #define _XOPEN_SOURCE
 2 | #include <stdlib.h>
 3 | #include <stdio.h>
 4 | #include <assert.h>
 5 | 
 6 | #include "fp.h"
 7 | #include "fp2.h"
 8 | 
 9 | int main(){
10 |   srand48(1);
11 |   fp a, b, c, d, e, f;
12 | 
13 |   // Testing distributivity + commutativity
14 |   for (int i = 0; i < 100; i++) {
15 |     fp_random(&a);
16 |     fp_random(&b);
17 |     fp_random(&c);
18 | 
19 |     fp_add3(&d, &b, &c);
20 |     fp_mul2(&d, &a);
21 |     fp_mul3(&e, &b, &a);
22 |     fp_mul3(&f, &c, &a);
23 |     fp_add2(&e, &f);
24 |     fp_sub2(&d, &e);
25 | 
26 |     assert(fp_iszero(&d));
27 | 
28 |     fp_neg2(&b, &a);
29 |     fp_add2(&a, &b);
30 |     assert(fp_iszero(&a));
31 |   }
32 | 
33 |   // Testing inverse
34 |   for (int i = 0; i < 100; i++) {
35 |     fp_random(&a);
36 |     b = a;
37 |     fp_inv(&a);
38 |     fp_mul3(&c, &a, &b);
39 | 
40 |     assert(c.x.c[0] == fp_1.x.c[0]);
41 |     assert(c.x.c[1] == fp_1.x.c[1]);
42 |     assert(c.x.c[2] == fp_1.x.c[2]);
43 |     assert(c.x.c[3] == fp_1.x.c[3]);
44 |     assert(fp_issquare(&a) == fp_issquare(&b));
45 |   }
46 | 
47 |   /* Same for GF(p^2) */
48 |   fp2 A, B, C, D, E, F;
49 | 
50 |   // Testing distributivity + commutativity
51 |   for (int i = 0; i < 100; i++) {
52 |     fp2_random(&A);
53 |     fp2_random(&B);
54 |     fp2_random(&C);
55 | 
56 |     fp2_add3(&D, &B, &C);
57 |     fp2_mul2(&D, &A);
58 |     fp2_mul3(&E, &B, &A);
59 |     fp2_mul3(&F, &C, &A);
60 |     fp2_add2(&E, &F);
61 |     fp2_sub2(&D, &E);
62 | 
63 |     assert(fp2_iszero(&D));
64 |     
65 |     fp2_neg2(&B, &A);
66 |     fp2_add2(&A, &B);
67 |     assert(fp2_iszero(&A));
68 |   }
69 | 
70 |   // Testing inverse
71 |   for (int i = 0; i < 100; i++) {
72 |     fp2_random(&A);
73 |     B = A;
74 |     fp2_inv(&A);
75 |     fp2_mul3(&C, &A, &B);
76 | 
77 |     assert(C.re.x.c[0] == fp_1.x.c[0]);
78 |     assert(C.re.x.c[1] == fp_1.x.c[1]);
79 |     assert(C.re.x.c[2] == fp_1.x.c[2]);
80 |     assert(C.re.x.c[3] == fp_1.x.c[3]);
81 |     assert(fp_iszero(&C.im));
82 |   }
83 |   
84 |   printf("    \033[1;32mAll tests passed\033[0m\n");
85 |   exit(0);
86 | }
87 | 
88 | 


--------------------------------------------------------------------------------
/test/isom.c:
--------------------------------------------------------------------------------
 1 | #define _XOPEN_SOURCE
 2 | #include <stdlib.h>
 3 | #include <stdio.h>
 4 | #include <assert.h>
 5 | 
 6 | #include "isomorphism.h"
 7 | #include "isogenies.h"
 8 | #include "constants.h"
 9 | 
10 | static isog_degree mont_order(const proj *P, const proj *A, bool twist) {
11 |   
12 |     const long *fact, *mult;
13 |     long len;
14 |       
15 | 
16 |     if(!twist) {
17 |         fact = p_plus_fact; mult = p_plus_mult;
18 |         len = p_plus_len;
19 |     }
20 |     else {
21 |         fact = p_minus_fact; mult = p_minus_mult;
22 |         len = p_minus_len;
23 |     }
24 | 
25 |   proj tmp;
26 |   uintbig cof;
27 |   isog_degree deg = degree_co((isog_degree){ 0 }, mult, len);
28 |   for (int j = 0; j < len; j++) {
29 |     degree_unset(&deg, j);
30 |     degree_to_uint(&cof, deg, fact, len);
31 |     xMUL(&tmp, A, P, &cof);
32 |     uintbig_set(&cof, fact[j]);
33 |     uint8_t v = 0;
34 |     for ( ; !mont_iszero(&tmp); v++) {
35 |       xMUL(&tmp, A, &tmp, &cof);
36 |     }
37 |     degree_set(&deg, j, v);
38 |   }
39 |   return deg;
40 | }
41 | 
42 | int main() {
43 |   srand48(1);
44 |   
45 |   isomorphism isom1, isom2;
46 |   proj A = {fp2_0, fp2_1};
47 |   proj B, P, Q, j1, j2;
48 | 
49 |   for (int i = 0; i < 10; i++) {
50 |     fp2_random(&P.x); P.z = fp2_1;
51 | 
52 |     bool oncurve = is_on_curve(&P, &A);
53 | 
54 |     if (oncurve) {
55 |       xMUL(&P, &A, &P, &p_plus_odd_cofactor);
56 |     } else {
57 |       xMUL(&P, &A, &P, &p_minus_odd_cofactor);
58 |     }
59 |     
60 |     isog_degree deg = mont_order(&P, &A, !oncurve);
61 |     B = A;
62 |     rand_isom(&isom1, &B);
63 |     jinv256(&j1, &A);
64 |     jinv256(&j2, &B);
65 |     assert(mont_equal(&j1,&j2));
66 | 
67 |     Q = P;
68 |     mont_isom_apply(&isom1, &Q);
69 |     assert(is_on_curve(&Q, &B) == oncurve);
70 |     assert(deg.val == mont_order(&Q, &B, !oncurve).val);
71 |     
72 |     mont_isom(&isom2, &A, &B);
73 |     mont_isom_apply(&isom2, &P);
74 |     assert(is_on_curve(&P, &B) == oncurve);
75 |     assert(deg.val == mont_order(&P, &B, !oncurve).val);
76 |   }
77 | 
78 |   printf("    \033[1;32mAll tests passed\033[0m\n");
79 |   exit(0);
80 | }
81 | 


--------------------------------------------------------------------------------
/test/mitm.c:
--------------------------------------------------------------------------------
 1 | #define _XOPEN_SOURCE
 2 | #include <stdlib.h>
 3 | #include <stdio.h>
 4 | #include <assert.h>
 5 | 
 6 | #include "mont.h"
 7 | #include "constants.h"
 8 | #include "two_walks.h"
 9 | 
10 | int main() {
11 |   srand48(1);
12 |   
13 |   // Only implementing for p=3 mod 4, for the moment
14 |   assert(class_mod_4 == 3);
15 | 
16 |   two_walk phi;
17 |   phi.A = (proj){ fp2_0, fp2_1 };
18 |   proj B, P4, tmp;
19 | 
20 |   for (int i = 0; i < 100; i++) {
21 |   phi.A = (proj){ fp2_0, fp2_1 };
22 |     phi.len = 4 + i % 7;
23 | 
24 |     // sample point on curve
25 |     while (true) {
26 |       fp2_random(&phi.ker.x); fp2_random(&phi.ker.z);
27 |       if (!is_on_curve(&phi.ker, &phi.A))
28 | 	continue;
29 |       // multiply by cofactor
30 |       xMUL(&phi.ker, &phi.A, &phi.ker, &p_even_cofactor);
31 |       for (int i = 0; i < two_tors_height - phi.len - 2; i++)
32 | 	xDBL(&phi.ker, &phi.A, &phi.ker);
33 |       P4 = phi.ker;
34 |       xDBL(&phi.ker, &phi.A, &phi.ker);
35 |       xDBL(&phi.ker, &phi.A, &phi.ker);
36 |       // check order
37 |       tmp = phi.ker;
38 |       for (int i = 1; i < phi.len; i++)
39 | 	xDBL(&tmp, &phi.A, &tmp);
40 |       if (!fp2_iszero(&tmp.x) && !mont_iszero(&tmp))
41 | 	break;
42 |     }
43 |     
44 |     // Evaluate isogeny
45 |     eval_walk(&phi, &B, &P4);
46 | 
47 |     // Change orientation of B
48 |     fp2 a, b;
49 |     proj j1, j2, inf;
50 |     jinv256(&j1, &B);
51 |     xDBL(&tmp, &B, &P4);
52 |     xDBL(&inf, &B, &tmp);
53 |     assert(mont_iszero(&inf));
54 |     fp2_add3(&a, &tmp.x, &tmp.x);
55 |     fp2_add2(&a, &tmp.x);
56 |     fp2_mul2(&a, &B.z);
57 |     fp2_mul2(&B.x, &tmp.z);
58 |     fp2_add2(&B.x, &a);
59 |     fp2_mul2(&B.x, &P4.z);
60 |     fp2_mul3(&a, &P4.x, &tmp.z);
61 |     fp2_mul3(&b, &P4.z, &tmp.x);
62 |     fp2_sub2(&a, &b);
63 |     fp2_mul2(&B.z, &a);
64 |     jinv256(&j2, &B);
65 |     assert(mont_equal(&j1, &j2));
66 | 
67 |     // MITM
68 |     assert(MITM(&phi, &phi.A, &B, phi.len));
69 |     // check
70 |     eval_walk(&phi, &B, &P4);
71 |     jinv256(&j2, &B);
72 |     assert(mont_equal(&j1, &j2));
73 |     
74 |     phi.A = B;
75 |   }
76 |   
77 |   printf("    \033[1;32mAll tests passed\033[0m\n");
78 |   exit(0);
79 | }
80 | 


--------------------------------------------------------------------------------
/include/constants.h:
--------------------------------------------------------------------------------
 1 | #ifndef CONSTANTS_H
 2 | #define CONSTANTS_H
 3 | 
 4 | #include "uintbig.h"
 5 | #include <assert.h>
 6 | 
 7 | extern const long class_mod_4;
 8 | extern const long two_tors_height;
 9 | // The cofactor of 2^two_tors_height in p±1
10 | extern const uintbig p_even_cofactor;
11 | extern const long security_level;
12 | 
13 | // the signing isogeny has degree 2^signing_length
14 | extern const long signing_length;
15 | // we have the equality signin_length = two_tors_height * (signing_length_two_tors_height_step -1 ) + last_step_length
16 | extern const long signing_length_two_tors_height_step;
17 | extern const long last_step_length;
18 | 
19 | 
20 | // The useful odd factors in p-1
21 | extern const long p_minus_len;
22 | extern const long p_minus_fact[];
23 | extern const long p_minus_mult[];
24 | // The cofactor of the useful odd torsion in p-1
25 | extern const uintbig p_minus_odd_cofactor;
26 | 
27 | // The useful odd factors in p+1
28 | extern const long p_plus_len;
29 | extern const long p_plus_fact[];
30 | extern const long p_plus_mult[];
31 | // The cofactor of the useful odd torsion in p+1
32 | extern const uintbig p_plus_odd_cofactor;
33 | 
34 | // the multiplicities to take to obtain log2(p) bits of torsion (for commitment)
35 | extern const long p_minus_mult_com[];
36 | extern const long p_plus_mult_com[];
37 | 
38 | // the multiplicities to take to obtain log2(p)/2 bits of torsion (for challenge)
39 | extern const long p_minus_mult_cha[];
40 | extern const long p_plus_mult_cha[];
41 | 
42 | // inverse mapping of p_plus_fact and p_minus_fact
43 | // Warning: unsafe if ell is not in the factors!
44 | static inline long ell_to_index(long ell, bool *twist) {
45 |   *twist = false;
46 |   for (const long *f = p_plus_fact; *f <= ell; f++)
47 |     if (*f == ell)
48 |       return f - p_plus_fact;
49 |   *twist = true;
50 |   for (const long *f = p_minus_fact; *f <= ell; f++)
51 |     if (*f == ell)
52 |       return f - p_minus_fact;
53 |   assert(0);
54 |   return(0);
55 | }
56 | static inline long ell_to_e(long ell) {
57 |   if (ell == 2)
58 |     return two_tors_height;
59 |   bool twist;
60 |   int index = ell_to_index(ell, &twist);
61 |   return (twist ? p_minus_mult : p_plus_mult)[index];
62 | }
63 | 
64 | #endif
65 | 


--------------------------------------------------------------------------------
/src/p6983/constants.c:
--------------------------------------------------------------------------------
 1 | // p = 73743043621499797449074820543863456997944695372324032511999999999999999999999
 2 | //
 3 | // p-1 = 2 * 3^53 * 43 * 103^2 * 109 * 199 * 227 * 419 * 491 * 569 * 631 * 677 * 857 * 859 * 883 * 1019 * 1171 * 1879 * 2713 * 4283
 4 | // p+1 = 2^33 * 5^21 * 7^2 * 11 * 31 * 83 * 107 * 137 * 751 * 827 * 3691 * 4019 * 6983 * 517434778561 * 26602537156291
 5 | 
 6 | #include "constants.h"
 7 | 
 8 | const long class_mod_4 = 3;
 9 | const long two_tors_height = 33;
10 | 
11 | const long security_level = 128;
12 | 
13 | const long signing_length=1000 ;
14 | 
15 | const long signing_length_two_tors_height_step = 31;
16 | const long last_step_length = 10;
17 | 
18 | const uintbig p_plus_odd_cofactor = { 0x68cd740600000000, 0x0016c5bcbd22f015, 0, 0 };
19 | const uintbig p_minus_odd_cofactor = { 2, 0, 0, 0 };
20 | const uintbig p_even_cofactor = { 0xa52ca964a8652149, 0x1bb9479de8d8027c,
21 | 				  0xdb3c54c8592e3b52, 0x51848ab2 };
22 | 
23 | #define M_LEN 19
24 | const long p_minus_len = M_LEN;
25 | const long p_minus_fact[M_LEN] =
26 |   {  3, 43, 103, 109, 199, 227, 419, 491, 569, 631, 677, 857, 859, 883,
27 |     1019, 1171, 1879, 2713, 4283 };
28 | const long p_minus_mult[M_LEN] =
29 |   { 53,  1,   2,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
30 |        1,    1,    1,    1,    1 };
31 | 
32 | 
33 | 
34 | 
35 | #define P_LEN 12
36 | const long p_plus_len = P_LEN;
37 | const long p_plus_fact[P_LEN] =
38 |   {  5, 7, 11, 31, 83, 107, 137, 751, 827, 3691, 4019, 6983 };
39 | const long p_plus_mult[P_LEN] =
40 |   { 21, 2,  1,  1,  1,   1,   1,   1,   1,    1,    1,    1 };
41 | 
42 | 
43 | // the multiplicities to take to obtain log2(p) bits of torsion (for commitment)
44 | const long p_minus_mult_com[M_LEN] =
45 |   { 0,  1,   2,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
46 |        1,    1,    1,    1,    1 };
47 | const long p_plus_mult_com[P_LEN] =
48 |   { 0, 2,  1,  1,  1,   1,   1,   1,   1,    1,    1,    1 };
49 | 
50 | 
51 | // the multiplicities to take to obtain log2(p)/2 bits of torsion (for challenge)
52 | const long p_minus_mult_cha[M_LEN] =
53 |   { 53,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
54 |        0,    0,    0,    0,    0 };
55 | const long p_plus_mult_cha[P_LEN] =
56 |   { 21, 0,  0,  0,  0,   0,   0,   0,   0,    0,    0,    0 };
57 | 


--------------------------------------------------------------------------------
/test/two_walks.c:
--------------------------------------------------------------------------------
 1 | #define _XOPEN_SOURCE
 2 | #include <stdlib.h>
 3 | #include <stdio.h>
 4 | #include <assert.h>
 5 | 
 6 | #include "mont.h"
 7 | #include "constants.h"
 8 | #include "two_walks.h"
 9 | 
10 | int main() {
11 |   srand48(1);
12 |   // Only implementing for p=3 mod 4, for the moment
13 |   assert(class_mod_4 == 3);
14 | 
15 |   two_walk phi;
16 |   phi.A = (proj){ fp2_0, fp2_1 };
17 |   proj A, B, C, P, Pbak, Pbbak;
18 | 
19 |   for (int i = 0; i < 100; i++) {
20 |     // sample point on curve
21 |     while (true) {
22 |       fp2_random(&phi.ker.x); fp2_random(&phi.ker.z);
23 |       if (!is_on_curve(&phi.ker, &phi.A))
24 | 	continue;
25 |       // multiply by cofactor
26 |       xMUL(&phi.ker, &phi.A, &phi.ker, &p_even_cofactor);
27 |       // computer order
28 |       P = phi.ker;
29 |       for (phi.len = 0; !mont_iszero(&P) && !fp2_iszero(&P.x); phi.len++)
30 | 	xDBL(&P, &phi.A, &P);
31 |       if (mont_iszero(&P) && phi.len > 0)
32 | 	break;
33 |     }
34 |     
35 |     // Sample point
36 |     fp2_random(&P.x); fp2_random(&P.z);
37 |     bool oncurve = is_on_curve(&P, &phi.A);
38 |     
39 |     // Evaluate isogeny
40 |     A = phi.A;
41 |     Pbak = P;
42 |     eval_walk(&phi, &B, &P);
43 |     Pbbak = P;
44 |     assert(is_on_curve(&P, &B) == oncurve);
45 |     
46 |     // Evaluate dual
47 |     eval_dual(&phi, &B, &P);
48 |     // Check dual isogeny equation
49 |     for (int i = 0; i < phi.len; i++)
50 |       xDBL(&Pbak, &phi.A, &Pbak);
51 |     assert(mont_equal(&Pbak, &P));
52 | 
53 |     // Double check dual
54 |     isomorphism isom;
55 |     proj j1, j2;
56 |     // construct dual
57 |     dual_walk(&phi);
58 |     jinv256(&j1, &B);
59 |     jinv256(&j2, &phi.A);
60 |     assert(mont_equal(&j1, &j2));
61 |     // move Pbbak to new curve
62 |     mont_isom(&isom, &B, &phi.A);
63 |     mont_isom_apply(&isom, &Pbbak);
64 |     assert(is_on_curve(&Pbbak, &phi.A) == oncurve);
65 |     // evaluate dual
66 |     eval_walk(&phi, &C, &Pbbak);
67 |     assert(is_on_curve(&Pbbak, &C) == oncurve);
68 |     // check consistency
69 |     jinv256(&j1, &C);
70 |     jinv256(&j2, &A);    
71 |     assert(mont_equal(&j1, &j2));
72 |     mont_isom(&isom, &C, &A);
73 |     mont_isom_apply(&isom, &Pbbak);
74 |     assert(mont_equal(&Pbbak, &P));
75 |   }
76 |   
77 |   printf("    \033[1;32mAll tests passed\033[0m\n");
78 |   exit(0);
79 | }
80 | 


--------------------------------------------------------------------------------
/include/poly.h:
--------------------------------------------------------------------------------
 1 | #ifndef POLY_H
 2 | #define POLY_H
 3 | 
 4 | #include "fp2.h"
 5 | 
 6 | void poly_mul(fp2 *c,const fp2 *a,long long alen,const fp2 *b,long long blen);
 7 | 
 8 | /* assumes !alen or !blen or clen <= alen+blen-1 */
 9 | void poly_mul_low(fp2 *c,long long clen,const fp2 *a,long long alen,const fp2 *b,long long blen);
10 | 
11 | /* assumes !alen or !blen or cstart <= alen+blen-1 */
12 | void poly_mul_high(fp2 *c,long long cstart,const fp2 *a,long long alen,const fp2 *b,long long blen);
13 | 
14 | /* assumes !alen or !blen or: 0 <= cstart; 0 <= clen; cstart+clen <= alen+blen-1 */
15 | void poly_mul_mid(fp2 *c,long long cstart,long long clen,const fp2 *a,long long alen,const fp2 *b,long long blen);
16 | 
17 | /* input (and output) polynomials are self-reciprocal */
18 | void poly_mul_selfreciprocal(fp2 *c,const fp2 *a,long long alen,const fp2 *b,long long blen);
19 | 
20 | /* input: T[0...3n-1] has n 3-coeff polys */
21 | /* output: T[0...2n] has 1 (2n+1)-coeff poly */
22 | /* namely the product of the original polys */
23 | void poly_multiprod2(fp2 *T,long long n);
24 | 
25 | /* poly_multiprod2 with polys guaranteed to be self-reciprocal */
26 | void poly_multiprod2_selfreciprocal(fp2 *T,long long n);
27 | 
28 | /* XXX: should integrate this into multieval_precompute */
29 | /* input: P[0...2n-1] has n 2-coeff polys */
30 | /* output: number of coeffs in product tree (minus n) */
31 | /* tree itself (without P) is stored in T */
32 | /* for n>=2, product is stored in final n+1 coeffs of T */
33 | long long poly_tree1(fp2 *T,const fp2 *P,long long n);
34 | 
35 | long long poly_tree1size(long long n);
36 | 
37 | /* input: polynomial f with flen>0 coeffs */
38 | /* output: n scaled values v[0],...,v[n-1] of f */
39 | /* evaluation points: roots of the n 2-coeff polys in P */
40 | /* another input: T from poly_tree1 */
41 | /* scaling: v[i] is value multiplied by a function of (P,i) */
42 | /* namely a product of powers of leading coefficients from P */
43 | void poly_multieval(fp2 *v,long long n,const fp2 *f,long long flen,const fp2 *P,const fp2 *T);
44 | 
45 | void poly_multieval_precompute(fp2 *precomp,long long n,long long flen,const fp2 *P,const fp2 *T);
46 | 
47 | long long poly_multieval_precomputesize(long long n,long long flen);
48 | 
49 | void poly_multieval_postcompute(fp2 *v,long long n,const fp2 *f,long long flen,const fp2 *P,const fp2 *T,const fp2 *precomp);
50 | 
51 | #endif
52 | 


--------------------------------------------------------------------------------
/include/mont.h:
--------------------------------------------------------------------------------
 1 | #ifndef MONT_H
 2 | #define MONT_H
 3 | 
 4 | #include "uintbig.h"
 5 | #include "fp2.h"
 6 | 
 7 | // curve of the form y^2 = x^3 + (a/c)*x^2 + x
 8 | // twist of the form B*y^2 = x^3 + (a/c)*x^2 + x where B = Fp2_inv(fp2_non_residue());
 9 | 
10 | /* P^1 over fp2. */
11 | typedef struct proj {
12 |     fp2 x;
13 |     fp2 z;
14 | } proj;
15 | 
16 | /* P^2 over fp2. */
17 | typedef struct proj2 {
18 |     fp2 x;
19 |     fp2 y;
20 |     fp2 z;
21 | } proj2;
22 | 
23 | void xDBL(proj *Q, proj const *A, proj const *P);
24 | void xADD(proj *S, proj const *P, proj const *Q, proj const *PQ);
25 | void xDBLADD(proj *R, proj *S, proj const *P, proj const *Q, proj const *PQ, proj const *A);
26 | void xMUL(proj *Q, proj const *A, proj const *P, uintbig const *k);
27 | void xBIDIM(proj *S, proj const *A, proj const *P, uintbig const *k, proj const *Q, uintbig const *l, proj const *PQ);
28 | void xISOG_many(proj *A, proj *P, int n, proj const *K, long long k);
29 | static inline void xISOG(proj *A, proj *P, proj const *K, long long k) { xISOG_many(A, P, 1, K, k); }
30 | void xISOG_old(proj *A, proj *P, proj const *K, long long k);
31 | 
32 | bool is_on_curve(const proj *P, const proj *A);
33 | bool mont_equal(proj const *P1, proj const *P2);
34 | bool mont_iszero(proj const *P);
35 | 
36 | 
37 | void normalize_proj(proj *A);
38 | 
39 | // returns false of it is on the curve, true if it is on the twist
40 | bool xLIFT(fp2 *y, const proj *A, const proj *P);
41 | 
42 | // Given x(P) and x(Q) both in A or both not in A, computes x(P±Q)
43 | void xBILIFT(proj *PQ1, proj *PQ2, const proj *P, const proj *Q, const proj *A);
44 | 
45 | // computes P1+P2 or P1-P2
46 | // slow, should not be used for fast arithmetic, only when xADD cannot be used
47 | void mont_add(proj *Q, proj const *A, proj const *P1, proj const *P2);
48 | 
49 | 
50 | void xyADD(proj2 *Q, proj const *A, proj2 const *P1, proj2 const *P2);
51 | void xyDBL(proj2 *Q, proj const *A, proj2 const *P1);
52 | void xyNEG(proj2 *Q, proj2 const *P1);
53 | void xyMUL(proj2 *Q, proj const *A, proj2 const *P, uintbig const *k);
54 | bool xy_is_on_curve(const proj *A, const proj2 *P);
55 | bool xy_is_zero(const proj2 *P);
56 | bool xy_equal(const proj2 *P1, const proj2 *P2);
57 | void xtoxy(proj2 *Q, const proj *A, const proj *P);
58 | void xytox(proj *Q, const proj2 *P);
59 | 
60 | //perform DLP on points of order 2^e, not very optimized but works
61 | bool mont_two_DLP(uint64_t *a,const proj *A, const proj *Q, const proj *P,const proj *PQ,long e);
62 | 
63 | #endif
64 | 


--------------------------------------------------------------------------------
/src/uint.s:
--------------------------------------------------------------------------------
  1 | .intel_syntax noprefix
  2 | 
  3 | .global uintbig_1
  4 | .global _uintbig_1
  5 | uintbig_1: _uintbig_1:
  6 |     .quad 1, 0, 0, 0
  7 | 
  8 | 
  9 | .text
 10 | 
 11 | .global uintbig_set
 12 | .global _uintbig_set
 13 | uintbig_set: _uintbig_set:
 14 |     cld
 15 |     mov rax, rsi
 16 |     stosq
 17 |     xor rax, rax
 18 |     mov rcx, 3
 19 |     rep stosq
 20 |     ret
 21 | 
 22 | 
 23 | .global uintbig_bit
 24 | .global _uintbig_bit
 25 | uintbig_bit: _uintbig_bit:
 26 |     mov rcx, rsi
 27 |     and rcx, 0x3f
 28 |     shr rsi, 6
 29 |     mov rax, [rdi + 8*rsi]
 30 |     shr rax, cl
 31 |     and rax, 1
 32 |     ret
 33 | 
 34 | 
 35 | .global uintbig_add3
 36 | .global _uintbig_add3
 37 | uintbig_add3: _uintbig_add3:
 38 |     mov rax, [rsi +  0]
 39 |     add rax, [rdx +  0]
 40 |     mov [rdi +  0], rax
 41 |     .set k, 1
 42 |     .rept 3
 43 |         mov rax, [rsi + 8*k]
 44 |         adc rax, [rdx + 8*k]
 45 |         mov [rdi + 8*k], rax
 46 |         .set k, k+1
 47 |     .endr
 48 |     setc al
 49 |     movzx rax, al
 50 |     ret
 51 | 
 52 | .global uintbig_sub3
 53 | .global _uintbig_sub3
 54 | uintbig_sub3: _uintbig_sub3:
 55 |     mov rax, [rsi +  0]
 56 |     sub rax, [rdx +  0]
 57 |     mov [rdi +  0], rax
 58 |     .set k, 1
 59 |     .rept 3
 60 |         mov rax, [rsi + 8*k]
 61 |         sbb rax, [rdx + 8*k]
 62 |         mov [rdi + 8*k], rax
 63 |         .set k, k+1
 64 |     .endr
 65 |     setc al
 66 |     movzx rax, al
 67 |     ret
 68 | 
 69 | 
 70 | .global uintbig_mul3_64
 71 | .global _uintbig_mul3_64
 72 | uintbig_mul3_64: _uintbig_mul3_64:
 73 | 
 74 |     mulx r10, rax, [rsi +  0]
 75 |     mov [rdi +  0], rax
 76 | 
 77 |     mulx r11, rax, [rsi +  8]
 78 |     add  rax, r10
 79 |     mov [rdi +  8], rax
 80 | 
 81 |     mulx r10, rax, [rsi + 16]
 82 |     adcx rax, r11
 83 |     mov [rdi + 16], rax
 84 | 
 85 |     mulx r11, rax, [rsi + 24]
 86 |     adcx rax, r10
 87 |     mov [rdi + 24], rax
 88 | 
 89 |     ret
 90 | 
 91 | .global uintbig_div3_64
 92 | .global _uintbig_div3_64
 93 | uintbig_div3_64: _uintbig_div3_64:
 94 |     mov r10, rdx
 95 |     mov rdx, 0
 96 | 
 97 |     mov rax, [rsi + 24]
 98 |     div r10
 99 |     mov [rdi + 24], rax
100 | 
101 |     mov rax, [rsi + 16]
102 |     div r10
103 |     mov [rdi + 16], rax
104 | 
105 |     mov rax, [rsi +  8]
106 |     div r10
107 |     mov [rdi +  8], rax
108 | 
109 |     mov rax, [rsi +  0]
110 |     div r10
111 |     mov [rdi +  0], rax
112 | 
113 |     mov rax, rdx
114 | 
115 |     ret
116 | 


--------------------------------------------------------------------------------
/include/two_walks.h:
--------------------------------------------------------------------------------
 1 | #ifndef TWO_WALKS_H
 2 | #define TWO_WALKS_H
 3 | 
 4 | #include "mont.h"
 5 | #include "isomorphism.h"
 6 | #include "constants.h"
 7 | 
 8 | // An isogeny of degree 2^len.
 9 | //
10 | // A is the domain curve
11 | // ker is the kernel of order 2^len
12 | //
13 | // Condition: [2^(len-1)]ker must not be the (0,0) point.
14 | typedef struct two_walk {
15 |   proj A;
16 |   proj ker;
17 |   long len;
18 | } two_walk;
19 | 
20 | // Evaluate 2-isogeny of kernel K at P
21 | void two_isog(const proj *K, proj *P);
22 | // The dual of the above
23 | void two_isog_dual(const proj *K, proj *P);
24 | 
25 | // Evaluate 2-isogeny walk phi : A -> B at point P.
26 | // B is set to the image curve, and P to the image point.
27 | void eval_walk(const two_walk *phi, proj *B, proj *P);
28 | 
29 | // same but P is an array of length cardinality
30 | void eval_walk_mult(const two_walk *phi, proj *B, proj *P, long cardinality);
31 | 
32 | // Internal function. You probably don't want to call this.
33 | void eval_walk_rec(proj *A, proj *K, long len, bool advance, proj *P, long stacklen);
34 | 
35 | // Compute the dual walk to phi : A -> ?? ≃ B.
36 | //
37 | // P ∈ B is first converted to an element of the image of phi using an
38 | // isomorphism, and then pushed through the dual
39 | void eval_dual(const two_walk *phi, const proj *B, proj *P);
40 | 
41 | // Compute the dual of phi
42 | void dual_walk(two_walk* phi);
43 | 
44 | // finds isom such that phi*(isom inverse) can be evaluated
45 | // set phi_new phi*(isom inverse)
46 | // then set P to phi_new(isom(P)), and A to the target curve
47 | void eval_walk_isom(isomorphism *isom, two_walk *phi_new, proj *B, proj *R, const two_walk *phi, const proj *P);
48 | 
49 | void eval_walk_isom_mult(isomorphism *isom, two_walk *phi_new, proj *B, const two_walk *phi, proj *P, long cardinality);
50 | 
51 | // Find a walk phi : from -> two of length len by MITM.
52 | // It uses a hash table to accomodate for 2^tab_size entries.
53 | //
54 | // It assumes that the walk from both ends does *not* start with the
55 | // isogeny of kernel (0,0).
56 | //
57 | // returns true if walk is found, false otherwise
58 | //
59 | // Do not call with tab_size = 0
60 | bool MITM_cutoff(two_walk *phi, const proj *from, const proj *to, long len, long tab_size);
61 | static inline bool MITM(two_walk *phi, const proj *from, const proj *to, long len) {
62 |   return MITM_cutoff(phi, from, to, len, len / 2);
63 | }
64 | bool MITM2(two_walk *eta, const proj *from, const proj *to, long length);
65 | 
66 | 
67 | #endif
68 | 


--------------------------------------------------------------------------------
/include/fp2.h:
--------------------------------------------------------------------------------
 1 | #ifndef FP2_H
 2 | #define FP2_H
 3 | 
 4 | #include "fp.h"
 5 | #include "rng.h"
 6 | 
 7 | typedef struct fp2 {
 8 |   fp re, im;
 9 | } fp2;
10 | 
11 | extern const fp2 fp2_0;
12 | #define fp2_1 ((fp2){ fp_1, fp_0 })
13 | #define fp2_i ((fp2){ fp_0, fp_1 })
14 | 
15 | // Return 3 + 2i
16 | static inline fp2 fp2_non_residue() {
17 |   fp2 res;
18 |   fp_add3(&res.im, &fp_1, &fp_1);
19 |   fp_add3(&res.re, &res.im, &fp_1);
20 |   return res;
21 | }
22 | 
23 | static inline bool fp2_iszero(const fp2 *a) {
24 |   return fp_iszero(&a->re) && fp_iszero(&a->im);
25 | }
26 | 
27 | static inline bool fp2_equal(const fp2 *a, const fp2 *b) {
28 |   return (uintbig_equal(&a->re.x,&b->re.x)) && (uintbig_equal(&a->im.x,&b->im.x));
29 | }
30 | 
31 | void fp2_frob2(fp2 *x, const fp2 *y);
32 | static inline void fp2_frob1(fp2 *x) { fp2_frob2(x, x); }
33 | 
34 | static inline void fp2_set(fp2 *x, uint64_t y) {
35 |   fp_set(&x->re, y);
36 |   fp_set(&x->im, 0);
37 | }
38 | static inline void fp2_cswap(fp2 *x, fp2 *y, bool c) {
39 |   fp_cswap(&x->re, &y->re, c);
40 |   fp_cswap(&x->im, &y->im, c);
41 | }
42 | 
43 | static inline void fp2_add2(fp2 *x, fp2 const *y) {
44 |   fp_add2(&x->re, &y->re);
45 |   fp_add2(&x->im, &y->im);
46 | }
47 | static inline void fp2_sub2(fp2 *x, fp2 const *y) {
48 |   fp_sub2(&x->re, &y->re);
49 |   fp_sub2(&x->im, &y->im);
50 | }
51 | 
52 | static inline void fp2_add3(fp2 *x, fp2 const *y, fp2 const *z) {
53 |   fp_add3(&x->re, &y->re, &z->re);
54 |   fp_add3(&x->im, &y->im, &z->im);
55 | }
56 | static inline void fp2_sub3(fp2 *x, fp2 const *y, fp2 const *z) {
57 |   fp_sub3(&x->re, &y->re, &z->re);
58 |   fp_sub3(&x->im, &y->im, &z->im);
59 | }
60 | 
61 | void fp2_mul3(fp2 *x, fp2 const *y, fp2 const *z);
62 | static inline void fp2_mul2(fp2 *x, fp2 const *y) { fp2_mul3(x, x, y); }
63 | 
64 | void fp2_sq2(fp2 *x, fp2 const *y);
65 | static inline void fp2_sq1(fp2 *x) { fp2_sq2(x, x); }
66 | 
67 | void fp2_inv(fp2 *x);
68 | bool fp2_issquare(fp2 *x);
69 | 
70 | static inline void fp2_random(fp2 *x) {
71 |   fp_random(&x->re);
72 |   fp_random(&x->im);
73 | }
74 | 
75 | 
76 | static inline void fp2_neg1(fp2 *x)
77 | {
78 |   fp_neg1(&x->re);
79 |   fp_neg1(&x->im);
80 | }
81 | 
82 | static inline void fp2_neg2(fp2 *x, const fp2 *y)
83 | {
84 |   fp_neg2(&x->re, &y->re);
85 |   fp_neg2(&x->im, &y->im);
86 | }
87 | 
88 | void fp2_exp(fp2 *res, fp2 const *x, uintbig const *k);
89 | 
90 | void fp2_sqrt(fp2 *x);
91 | 
92 | bool fp2_dlp_naive(long *res, const fp2 *h, const fp2 *g, long ell);
93 | 
94 | #endif
95 | 


--------------------------------------------------------------------------------
/test/mitm2.c:
--------------------------------------------------------------------------------
 1 | #define _XOPEN_SOURCE
 2 | #include <stdlib.h>
 3 | #include <stdio.h>
 4 | #include <assert.h>
 5 | 
 6 | #include "mont.h"
 7 | #include "constants.h"
 8 | #include "two_walks.h"
 9 | #include "toolbox.h"
10 | 
11 | 
12 | 
13 | int main() {
14 |   srand48(1);
15 | 
16 |   clock_t t;
17 |   float accumulated_time_ms = 0.;
18 |   int repetitions = 100;
19 |   
20 |   // Only implementing for p=3 mod 4, for the moment
21 |   assert(class_mod_4 == 3);
22 | 
23 |   two_walk phi;
24 |   phi.A = (proj){ fp2_0, fp2_1 };
25 |   proj B, P4, tmp;
26 | 
27 |   for (int i = 0; i < repetitions; i++) {
28 |     phi.A = (proj){ fp2_0, fp2_1 };
29 |     phi.len = (i%19)+1;
30 | 
31 |     // sample point on curve
32 |     while (true) {
33 |       fp2_random(&phi.ker.x); fp2_random(&phi.ker.z);
34 |       if (!is_on_curve(&phi.ker, &phi.A))
35 | 	       continue;
36 |       // multiply by cofactor
37 |       xMUL(&phi.ker, &phi.A, &phi.ker, &p_even_cofactor);
38 |       for (int i = 0; i < two_tors_height - phi.len - 2; i++)
39 | 	       xDBL(&phi.ker, &phi.A, &phi.ker);
40 |       P4 = phi.ker;
41 |       xDBL(&phi.ker, &phi.A, &phi.ker);
42 |       xDBL(&phi.ker, &phi.A, &phi.ker);
43 |       // check order
44 |       tmp = phi.ker;
45 |       for (int i = 1; i < phi.len; i++)
46 | 	       xDBL(&tmp, &phi.A, &tmp);
47 |       if (!fp2_iszero(&tmp.x) && !mont_iszero(&tmp))
48 | 	       break;
49 |     }
50 |     
51 |     // Evaluate isogeny
52 |     eval_walk(&phi, &B, &P4);
53 | 
54 |     // Change orientation of B
55 |     fp2 a, b;
56 |     proj j1, j2, inf;
57 |     jinv256(&j1, &B);
58 |     xDBL(&tmp, &B, &P4);
59 |     xDBL(&inf, &B, &tmp);
60 |     assert(mont_iszero(&inf));
61 |     fp2_add3(&a, &tmp.x, &tmp.x);
62 |     fp2_add2(&a, &tmp.x);
63 |     fp2_mul2(&a, &B.z);
64 |     fp2_mul2(&B.x, &tmp.z);
65 |     fp2_add2(&B.x, &a);
66 |     fp2_mul2(&B.x, &P4.z);
67 |     fp2_mul3(&a, &P4.x, &tmp.z);
68 |     fp2_mul3(&b, &P4.z, &tmp.x);
69 |     fp2_sub2(&a, &b);
70 |     fp2_mul2(&B.z, &a);
71 |     jinv256(&j2, &B);
72 |     assert(mont_equal(&j1, &j2));
73 | 
74 |     // MITM
75 | 
76 |     t = tic();
77 |     bool found = MITM2(&phi, &phi.A, &B, phi.len);
78 |     assert(found);
79 |     accumulated_time_ms += toc(t);
80 | 
81 |     // check
82 |     eval_walk(&phi, &B, &P4);
83 |     jinv256(&j2, &B);
84 |     assert(mont_equal(&j1, &j2));
85 |     
86 |     phi.A = B;
87 |   }
88 |   //printf("average time\t [%f ms]\n",  (accumulated_time_ms / repetitions));
89 |   printf("    \033[1;32mAll tests passed\033[0m\n");
90 |   exit(0);
91 | }
92 | 


--------------------------------------------------------------------------------
/test/isogenies_mult.c:
--------------------------------------------------------------------------------
 1 | #define _XOPEN_SOURCE
 2 | #include <stdlib.h>
 3 | #include <stdio.h>
 4 | #include <assert.h>
 5 | 
 6 | #include "mont.h"
 7 | #include "constants.h"
 8 | #include "isogenies.h"
 9 | #include "toolbox.h"
10 | 
11 | static isog_degree mont_order(const proj *P, const proj *A, const long *fact, const long *mult, long len) {
12 |   proj tmp;
13 |   uintbig cof;
14 |   isog_degree deg = degree_co((isog_degree){ 0 }, mult, len);
15 |   for (int j = 0; j < len; j++) {
16 |     degree_unset(&deg, j);
17 |     degree_to_uint(&cof, deg, fact, len);
18 |     xMUL(&tmp, A, P, &cof);
19 |     uintbig_set(&cof, fact[j]);
20 |     uint8_t v = 0;
21 |     for ( ; !mont_iszero(&tmp); v++) {
22 |       xMUL(&tmp, A, &tmp, &cof);
23 |     }
24 |     degree_set(&deg, j, v);
25 |   }
26 |   return deg;
27 | }
28 | 
29 | int main() {
30 |   srand48(1);
31 |   // Only implementing for p=3 mod 4, for the moment
32 |   assert(class_mod_4 == 3);
33 | 
34 | 
35 |   float accumulated_time_ms = 0.;
36 |   int repetitions = 50;
37 |   clock_t t;
38 |   
39 |   proj A = { fp2_0, fp2_1 };
40 | 
41 |   odd_isogeny phi;
42 |   proj P[2];
43 | 
44 |     
45 |     for (int i = 0; i < repetitions; i++) {
46 |       // sample point on curve
47 |       do {
48 |        fp2_random(&phi.kernel_plus.x); fp2_random(&phi.kernel_plus.z);
49 |       } while (!is_on_curve(&phi.kernel_plus, &A));
50 |       // multiply by cofactor
51 |       xMUL(&phi.kernel_plus, &A, &phi.kernel_plus, &p_plus_odd_cofactor);
52 |       // computer order
53 |       phi.deg_plus = mont_order(&phi.kernel_plus, &A, p_plus_fact, p_plus_mult, p_plus_len);
54 |     
55 | 
56 |       do {
57 |        fp2_random(&phi.kernel_minus.x); fp2_random(&phi.kernel_minus.z);
58 |       } while (is_on_curve(&phi.kernel_minus, &A));
59 |       // multiply by cofactor
60 |       xMUL(&phi.kernel_minus, &A, &phi.kernel_minus, &p_minus_odd_cofactor);
61 |       // computer order
62 |       phi.deg_minus = mont_order(&phi.kernel_minus, &A, p_minus_fact, p_minus_mult, p_minus_len);
63 |     
64 | 
65 | 
66 |       // Sample point
67 |      fp2_random(&P[0].x); fp2_random(&P[0].z);
68 |      fp2_random(&P[1].x); fp2_random(&P[1].z);
69 | 
70 | 
71 |       t = tic();
72 |       eval_mult(&A, &phi, P, 2);
73 |       //dual(&A, &phi);
74 |       //fp2_sqrt(&P[0].x);
75 |       accumulated_time_ms += toc(t);
76 |     }
77 |     printf("average time\t [%f ms]\n",  (accumulated_time_ms / repetitions));
78 |     accumulated_time_ms = 0.;
79 | 
80 | 
81 | 
82 | 
83 |   
84 |   printf("    \033[1;32mAll tests passed\033[0m\n");
85 |   exit(0);
86 | }
87 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # SQISign: compact post-quantum signatures from quaternions and isogenies
 2 | 
 3 | This code implements the original version of the isogeny-based signature scheme SQISign. For details see this [paper](https://eprint.iacr.org/2020/1240).
 4 | 
 5 | An updated and optimized version can be found [here](https://github.com/SQISign/sqisign-ec23).
 6 | 
 7 | (C) 2020, The SQISign team. MIT license.
 8 | 
 9 | ## Dependencies
10 | 
11 | The code depends on the latest stable version of the [PARI/GP
12 | library](http://pari.math.u-bordeaux.fr/), 2.11.4.
13 | 
14 | The code has an optional dependency on [GMP](https://gmplib.org/),
15 | which is also an optional dependency of PARI/GP and is typically
16 | installed along with it.
17 | 
18 | ## Supported platforms
19 | 
20 | The code compiles and runs on Linux and MacOS.
21 | 
22 | It contains two implementations of the low-level arithmetic functions:
23 | 
24 | - One based on handwritten assembly for the x86-64 platform,
25 |   starting from Broadwell architectures.
26 | 
27 | - One based on GMP.
28 | 
29 | By default, both versions are compiled and tested.
30 | 
31 | ## Compile
32 | 
33 | To compile and test the code, run
34 | 
35 | ```
36 | make
37 | make check
38 | ```
39 | 
40 | The tests typically take 2-3 minutes.
41 | 
42 | To only compile and test the assembly version:
43 | 
44 | ```
45 | make asm
46 | make check_asm
47 | ```
48 | 
49 | To only compile and test the GMP version:
50 | 
51 | ```
52 | make gmp
53 | make check_gmp
54 | ```
55 | 
56 | ## Tuning
57 | 
58 | The algorithms have already been tuned on a modern processors, and the
59 | default values should be fine. However, should you wish to fine-tune
60 | for your machine, run:
61 | 
62 | ```
63 | make tune
64 | make
65 | ```
66 | 
67 | Be patient, tuning takes several minutes.
68 | 
69 | ## Run benchmarks
70 | 
71 | To run benchmarks type
72 | 
73 | ```
74 | make benchmark
75 | ```
76 | 
77 | Allow a few minutes for the benchmarks to complete.  This produces
78 | files named `bench_xxx.tsv` containing timing information on the
79 | various parts of the signature.
80 | 
81 | By default, only benchmarks for the assembly version are produced. If
82 | you want benchmarks for the GMP version, run
83 | 
84 | ```
85 | make benchmark_gmp
86 | ```
87 | 
88 | ### Timings
89 | 
90 | The following are timings (medians) obtained running the benchmarks
91 | above on an Intel Core i7-6700 CPU @ 3.40GHz with Turbo Boost
92 | disabled.
93 | 
94 | |        | Mcycles |    ms |
95 | |:-------|--------:|------:|
96 | | keygen |   1,959 |   575 |
97 | | sign   |   7,767 | 2,279 |
98 | | verify |     142 |    42 |
99 | 


--------------------------------------------------------------------------------
/include/isogenies.h:
--------------------------------------------------------------------------------
 1 | #ifndef ISOGENIES_H
 2 | #define ISOGENIES_H
 3 | 
 4 | #include "constants.h"
 5 | #include "uintbig.h"
 6 | #include "mont.h"
 7 | 
 8 | // A packed struct representing the degree of an isogeny as a bitfield
 9 | // of valuations corresponding to p_minus_fact or p_plus_fact (see
10 | // constants.c)
11 | typedef struct isog_degree {
12 |   uint64_t val;    // The first byte is used for the valuation of 3 or
13 |                    // 5; the rest is used for all other valuations, 2
14 |                    // bits per factor.
15 | 
16 |                    // It is obviously limited to 29 factors
17 | } isog_degree;
18 | 
19 | /* Getters and setters for isog_degree */
20 | 
21 | static inline uint8_t degree_get(isog_degree deg, int i) {
22 |   return i == 0 ? deg.val & 0xff : (deg.val >> (i*2 + 6)) & 0x3;
23 | }
24 | // Set degree to 1
25 | static inline void degree_one(isog_degree *deg) { deg->val = 0; }
26 | // Set valuation of i-th factor to 0
27 | static inline void degree_unset(isog_degree *deg, int i) {
28 |   deg->val &= ~(i == 0 ? 0xff : (3ll << (i*2 + 6)));
29 | }
30 | // Set valuation of i-th factor. Do call degree_one or degree_unset
31 | // before this!
32 | static inline void degree_set(isog_degree *deg, int i, uint64_t val) {
33 |   deg->val |= val << (i*2 + !!i * 6);
34 | }
35 | 
36 | /*
37 |   Operations on the degree that depend on parameters.
38 |   
39 |   These expect parameters from constants.c, such as p_minus_fact, etc.
40 |  */
41 | 
42 | // Return the complement of the degree, i.e. (p ± 1)/deg
43 | static inline isog_degree degree_co(isog_degree deg, const long *mult, long len) {
44 |   isog_degree res = { 0 };
45 |   for (int i = 0; i < len; i++) {
46 |     degree_set(&res, i, mult[i] - degree_get(deg, i));
47 |   }
48 |   return res;
49 | }
50 | static inline void degree_to_uint(uintbig *res, isog_degree deg, const long *fact, long len) {
51 |   *res = uintbig_1;
52 |   for (int i = 0; i < len; i++) {
53 |     uint8_t val = degree_get(deg, i);
54 |     for (int j = 0; j < val; j++)
55 |       uintbig_mul3_64(res, res, fact[i]);
56 |   }
57 | }
58 | 
59 | // An isogeny of odd degree dividing (p²-1)
60 | typedef struct odd_isogeny {
61 |   proj kernel_plus, kernel_minus;
62 |   isog_degree deg_plus, deg_minus;
63 | } odd_isogeny;
64 | 
65 | // Evaluate isogeny phi : A -> ?? at point P.
66 | // A is set to the image curve, and P to the image point.
67 | void eval(proj *A, const odd_isogeny *phi, proj *P);
68 | 
69 | void eval_mult(proj *A, const odd_isogeny *phi, proj *P, int n);
70 | 
71 | 
72 | // Compute the dual of phi : A -> ??.
73 | // phi is set to the dual and A is set to the new domain.
74 | void dual(proj *A, odd_isogeny *phi);
75 | 
76 | 
77 | 
78 | #endif
79 | 


--------------------------------------------------------------------------------
/src/p6983/steps_tunecycles.c:
--------------------------------------------------------------------------------
 1 | int steps_guess(long long *bs,long long *gs,long long l)
 2 | {
 3 |   /* l=3: bs=0 gs=0 bench=10898 baseline=10866 */
 4 |   /* l=5: bs=0 gs=0 bench=13996 baseline=13874 */
 5 |   /* l=7: bs=0 gs=0 bench=19770 baseline=19650 */
 6 |   /* l=11: bs=0 gs=0 bench=30392 baseline=30060 */
 7 |   /* l=31: bs=0 gs=0 bench=81956 baseline=81678 */
 8 |   /* l=43: bs=0 gs=0 bench=111358 baseline=110954 */
 9 |   /* l=83: bs=0 gs=0 bench=210558 baseline=210460 */
10 |   /* l=103: bs=0 gs=0 bench=260916 baseline=277756 */
11 |   /* l=107: bs=0 gs=0 bench=271658 baseline=271148 */
12 |   /* l=109: bs=0 gs=0 bench=278950 baseline=275532 */
13 |   /* l=137: bs=0 gs=0 bench=344948 baseline=342378 */
14 |   /* l=199: bs=0 gs=0 bench=499932 baseline=496918 */
15 |   /* l=227: bs=0 gs=0 bench=567720 baseline=566702 */
16 |   if (l <= 227) { *bs = 0; *gs = 0; return 1; }
17 |   /* l=419: bs=14 gs=7 bench=927284 baseline=1039492 */
18 |   if (l <= 419) { *bs = 14; *gs = 7; return 1; }
19 |   /* l=491: bs=14 gs=8 bench=1048384 baseline=1222546 */
20 |   if (l <= 491) { *bs = 14; *gs = 8; return 1; }
21 |   /* l=569: bs=14 gs=10 bench=1164240 baseline=1432688 */
22 |   if (l <= 569) { *bs = 14; *gs = 10; return 1; }
23 |   /* l=631: bs=14 gs=11 bench=1292240 baseline=1569390 */
24 |   if (l <= 631) { *bs = 14; *gs = 11; return 1; }
25 |   /* l=677: bs=16 gs=10 bench=1354698 baseline=1699296 */
26 |   if (l <= 677) { *bs = 16; *gs = 10; return 1; }
27 |   /* l=751: bs=16 gs=11 bench=1491070 baseline=1880982 */
28 |   if (l <= 751) { *bs = 16; *gs = 11; return 1; }
29 |   /* l=827: bs=16 gs=12 bench=1627010 baseline=2079036 */
30 |   if (l <= 827) { *bs = 16; *gs = 12; return 1; }
31 |   /* l=857: bs=16 gs=13 bench=1664550 baseline=2171694 */
32 |   /* l=859: bs=16 gs=13 bench=1666950 baseline=2165976 */
33 |   if (l <= 859) { *bs = 16; *gs = 13; return 1; }
34 |   /* l=883: bs=20 gs=11 bench=1714210 baseline=2206862 */
35 |   if (l <= 883) { *bs = 20; *gs = 11; return 1; }
36 |   /* l=1019: bs=18 gs=14 bench=1914868 baseline=2571866 */
37 |   if (l <= 1019) { *bs = 18; *gs = 14; return 1; }
38 |   /* l=1171: bs=22 gs=13 bench=2141964 baseline=2909050 */
39 |   if (l <= 1171) { *bs = 22; *gs = 13; return 1; }
40 |   /* l=1879: bs=30 gs=15 bench=3003372 baseline=4850172 */
41 |   if (l <= 1879) { *bs = 30; *gs = 15; return 1; }
42 |   /* l=2713: bs=32 gs=21 bench=3999778 baseline=6837360 */
43 |   if (l <= 2713) { *bs = 32; *gs = 21; return 1; }
44 |   /* l=3691: bs=38 gs=24 bench=5223138 baseline=9152296 */
45 |   if (l <= 3691) { *bs = 38; *gs = 24; return 1; }
46 |   /* l=4019: bs=40 gs=25 bench=5592828 baseline=9977994 */
47 |   if (l <= 4019) { *bs = 40; *gs = 25; return 1; }
48 |   /* l=4283: bs=38 gs=28 bench=5876690 baseline=10815114 */
49 |   if (l <= 4283) { *bs = 38; *gs = 28; return 1; }
50 |   /* l=6983: bs=62 gs=28 bench=8551860 baseline=17354004 */
51 |   if (l <= 6983) { *bs = 62; *gs = 28; return 1; }
52 |   return 0;
53 | }
54 | 


--------------------------------------------------------------------------------
/test/isogenies.c:
--------------------------------------------------------------------------------
 1 | #define _XOPEN_SOURCE
 2 | #include <stdlib.h>
 3 | #include <stdio.h>
 4 | #include <assert.h>
 5 | 
 6 | #include "mont.h"
 7 | #include "constants.h"
 8 | #include "isogenies.h"
 9 | 
10 | isog_degree order(const proj *P, const proj *A, const long *fact, const long *mult, long len) {
11 |   proj tmp;
12 |   uintbig cof;
13 |   isog_degree deg = degree_co((isog_degree){ 0 }, mult, len);
14 |   for (int j = 0; j < len; j++) {
15 |     degree_unset(&deg, j);
16 |     degree_to_uint(&cof, deg, fact, len);
17 |     xMUL(&tmp, A, P, &cof);
18 |     uintbig_set(&cof, fact[j]);
19 |     uint8_t v = 0;
20 |     for ( ; !mont_iszero(&tmp); v++) {
21 |       xMUL(&tmp, A, &tmp, &cof);
22 |     }
23 |     degree_set(&deg, j, v);
24 |   }
25 |   return deg;
26 | }
27 | 
28 | int main() {
29 |   srand48(1);
30 |   // Only implementing for p=3 mod 4, for the moment
31 |   assert(class_mod_4 == 3);
32 | 
33 |   proj A = { fp2_0, fp2_1 }, Abak;
34 | 
35 |   odd_isogeny phi;
36 |   proj P, Pbak;
37 |   for (int oncurve = 0; oncurve < 2; oncurve++) {
38 |     proj *kernel, *nkernel;
39 |     isog_degree *deg_k, *deg_nk;
40 |     const uintbig *cofactor;
41 |     const long *fact, *mult;
42 |     long len;
43 | 
44 |     if (oncurve) {
45 |       kernel = &phi.kernel_plus; nkernel =  &phi.kernel_minus;
46 |       deg_k = &phi.deg_plus; deg_nk = &phi.deg_minus;
47 |       cofactor = &p_plus_odd_cofactor;
48 |       fact = p_plus_fact; mult = p_plus_mult;
49 |       len = p_plus_len;
50 |     } else {
51 |       kernel = &phi.kernel_minus; nkernel =  &phi.kernel_plus;
52 |       deg_k = &phi.deg_minus; deg_nk = &phi.deg_plus;
53 |       cofactor = &p_minus_odd_cofactor;
54 |       fact = p_minus_fact; mult = p_minus_mult;
55 |       len = p_minus_len;
56 |     }
57 | 
58 |     for (int i = 0; i < 10; i++) {
59 |       // sample point on curve
60 |       do {
61 | 	fp2_random(&kernel->x); fp2_random(&kernel->z);
62 |       } while (is_on_curve(kernel, &A) != oncurve);
63 |       // multiply by cofactor
64 |       xMUL(kernel, &A, kernel, cofactor);
65 |       // computer order
66 |       *deg_k = order(kernel, &A, fact, mult, len);
67 | 
68 |       // trivial kernel on twist
69 |       nkernel->x = fp2_1;
70 |       nkernel->z = fp2_0;
71 |       degree_one(deg_nk);
72 | 
73 |       // Sample point on twist
74 |       do {
75 | 	fp2_random(&P.x); fp2_random(&P.z);
76 |       } while (is_on_curve(&P, &A) == oncurve);
77 | 
78 |       // Evaluate isogeny
79 |       Abak = A; Pbak = P;
80 |       eval(&A, &phi, &P);
81 |       // Compute and evaluate dual, check image equality
82 |       A = Abak;
83 |       dual(&A, &phi);
84 |       eval(&A, &phi, &P);
85 |       assert(mont_equal(&A, &Abak));
86 |       // Check dual isogeny equation
87 |       uintbig ord;
88 |       degree_to_uint(&ord, *deg_k, fact, len);
89 |       xMUL(&Pbak, &A, &Pbak, &ord);
90 |       assert(mont_equal(&Pbak, &P));
91 |     }
92 |   }
93 | 
94 |   printf("    \033[1;32mAll tests passed\033[0m\n");
95 |   exit(0);
96 | }
97 | 


--------------------------------------------------------------------------------
/src/p6983/fp2.c:
--------------------------------------------------------------------------------
  1 | #include "fp2.h"
  2 | 
  3 | #define FP_LIMBS (4 * 64 / GMP_LIMB_BITS)
  4 | 
  5 | const fp2 fp2_0 = { { 0, 0, 0, 0 }, { 0, 0, 0, 0 } };
  6 | 
  7 | /* Arithmetic modulo X^2 + 1 */
  8 | 
  9 | void fp2_mul3(fp2 *x, fp2 const *y, fp2 const *z) {
 10 |   fp xsum, xim;
 11 |   fp_add3(&xsum, &y->re, &y->im);
 12 |   fp_add3(&xim, &z->re, &z->im);
 13 |   fp_mul2(&xsum, &xim);
 14 |   fp_mul3(&xim, &y->im, &z->im);
 15 |   fp_mul3(&x->re, &y->re, &z->re);
 16 |   fp_sub3(&x->im, &xsum, &xim);
 17 |   fp_sub2(&x->im, &x->re);
 18 |   fp_sub2(&x->re, &xim);
 19 | }
 20 | 
 21 | void fp2_sq2(fp2 *x, fp2 const *y) {
 22 |   fp sum, diff;
 23 |   fp_add3(&sum, &y->re, &y->im);
 24 |   fp_sub3(&diff, &y->re, &y->im);
 25 |   fp_mul3(&x->im, &y->re, &y->im);
 26 |   fp_add2(&x->im, &x->im);
 27 |   fp_mul3(&x->re, &sum, &diff);
 28 | }
 29 | 
 30 | void fp2_inv(fp2 *x) {
 31 |   fp inorm, im2;
 32 |   fp_sq2(&inorm, &x->re);
 33 |   fp_sq2(&im2, &x->im);
 34 |   fp_add2(&inorm, &im2);
 35 |   fp_inv(&inorm);
 36 |   fp_mul2(&x->re, &inorm);
 37 |   fp_mul2(&x->im, &inorm);
 38 |   fp_neg1(&x->im);
 39 | }
 40 | 
 41 | bool fp2_issquare(fp2 *x) {
 42 |   fp_sq1(&x->re);
 43 |   fp_sq1(&x->im);
 44 |   fp_add2(&x->re, &x->im);
 45 |   return fp_issquare(&x->re);
 46 | }
 47 | 
 48 | void fp2_frob2(fp2 *x, const fp2 *y) {
 49 |   x->re = y->re;
 50 |   fp_neg2(&x->im, &y->im);
 51 | }
 52 | 
 53 | void fp2_exp(fp2 *res, fp2 const *x, uintbig const *k)
 54 | {
 55 |     if (fp2_iszero(x)) { *res = *x; return; }
 56 |     const fp2 xcopy = *x;
 57 |     *res = fp2_1;
 58 | 
 59 |     unsigned long i = BITS;
 60 |     while (--i && !uintbig_bit(k, i));
 61 |     do {
 62 |         fp2_sq1(res);
 63 |         if (uintbig_bit(k, i)) {
 64 |             fp2_mul2(res, &xcopy);
 65 |         }
 66 |     } while (i--);
 67 | }
 68 | 
 69 | 
 70 | 
 71 | // dlp of h in basis g, which has order ell, naive implementation
 72 | bool fp2_dlp_naive(long *res, const fp2 *h, const fp2 *g, long ell) {
 73 |     long logarithm = 0;
 74 |     fp2 x = fp2_1;
 75 | 
 76 |     for (int i = 0; i < ell; ++i) {
 77 |         if (fp2_equal(h,&x)) { *res = logarithm; return true;}
 78 |         logarithm++;
 79 |         fp2_mul2(&x,g);
 80 |     }
 81 | 
 82 |     return false;
 83 | }
 84 | 
 85 | 
 86 | void fp2_sqrt(fp2 *x) {
 87 |     if (fp_iszero(&x->im)) {
 88 |         fp x_re_copy = x->re;
 89 | 
 90 |         if (fp_issquare(&x_re_copy)) {
 91 |             fp_sqrt(&x->re);
 92 |             return;
 93 |         }
 94 |         else {
 95 |             fp_neg2(&x->im, &x->re);
 96 |             fp_sqrt(&x->im);
 97 |             fp_set(&x->re, 0);
 98 |             return;
 99 |         }
100 |     }
101 | 
102 |     fp sdelta, re, tmp1, tmp2, inv2, im;
103 | 
104 |     // sdelta = sqrt(re^2 + im^2)
105 |     fp_sq2(&sdelta, &x->re);
106 |     fp_sq2(&tmp1, &x->im);
107 |     fp_add2(&sdelta, &tmp1);
108 | 
109 |     fp_sqrt(&sdelta);
110 | 
111 |     fp_set(&inv2,2);
112 |     fp_inv(&inv2);
113 | 
114 |     fp_add3(&re,&x->re,&sdelta);
115 |     fp_mul2(&re,&inv2);
116 |     tmp2 = re;
117 | 
118 |     if (!fp_issquare(&tmp2)) {
119 |         fp_sub3(&re,&x->re,&sdelta);
120 |         fp_mul2(&re,&inv2);
121 |     }
122 | 
123 |     fp_sqrt(&re);
124 | 
125 |     im = re;
126 | 
127 |     fp_inv(&im);
128 |     fp_mul2(&im,&inv2);
129 |     fp_mul2(&im,&x->im);
130 | 
131 |     x->re = re;
132 |     x->im = im;
133 | }
134 | 
135 | 


--------------------------------------------------------------------------------
/src/tunecycles.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | #include "isogenies.h"
  5 | #include "steps.h"
  6 | #include "cycle.h"
  7 | #include "constants.h"
  8 | 
  9 | int comparelonglong(const void *uptr,const void *vptr)
 10 | {
 11 |   long long u = *(const long long *) uptr;
 12 |   long long v = *(const long long *) vptr;
 13 |   if (u < v) return -1;
 14 |   if (u > v) return 1;
 15 |   return 0;
 16 | }
 17 | 
 18 | long long median(long long *x,long long xlen)
 19 | {
 20 |   if (xlen <= 0) return 0;
 21 |   qsort(x,xlen,sizeof(long long),comparelonglong);
 22 |   if (xlen&1) return x[xlen/2];
 23 |   return (x[xlen/2-1]+x[xlen/2])/2;
 24 | }
 25 | 
 26 | void isog_setup(proj *A, proj *P, proj *K,
 27 | 		isog_degree deg,
 28 | 		const uintbig *cof, const long *fact, const long *mult, long len,
 29 | 		bool twist)
 30 | {
 31 |   A->x = fp2_0;
 32 |   A->z = fp2_1;
 33 | 
 34 |   for (;;) {
 35 |     do {
 36 |       fp2_random(&K->x); fp2_random(&K->z);
 37 |     } while (is_on_curve(K, A) == twist);
 38 |     xMUL(K,A,K,cof);
 39 |     uintbig cof2;
 40 |     degree_to_uint(&cof2, degree_co(deg, mult, len), fact, len);
 41 |     xMUL(K,A,K,&cof2);
 42 | 
 43 |     if (!mont_iszero(K)) break;
 44 |   }
 45 | 
 46 |   uintbig cof2;
 47 |   degree_to_uint(&cof2, deg, fact, len);
 48 |   xMUL(P,A,K,&cof2);
 49 |   if (!mont_iszero(P)) abort();
 50 | 
 51 |   fp2_random(&P->x);
 52 |   fp2_random(&P->z);
 53 | }
 54 | 
 55 | #define TIMINGS 31
 56 | 
 57 | int main()
 58 | {
 59 |   proj A[TIMINGS];
 60 |   proj P[TIMINGS];
 61 |   proj K[TIMINGS];
 62 |   long long baseline[TIMINGS];
 63 |   long long bench[TIMINGS];
 64 | 
 65 |   long len = p_minus_len;
 66 |   const long *fact = p_minus_fact,
 67 |     *mult = p_minus_mult;
 68 |   const uintbig *cofactor = &p_minus_odd_cofactor;
 69 |   int twist = 1;
 70 | 
 71 |   do {
 72 |     for (long long lpos = 0;lpos < len;++lpos) {
 73 |       long long l = fact[lpos];
 74 |       isog_degree deg = { 0 };
 75 |       degree_set(&deg, lpos, 1);
 76 | 
 77 |       for (long long t = 0;t < TIMINGS;++t)
 78 | 	isog_setup(&A[t],&P[t],&K[t],deg,cofactor,fact,mult,len,twist);
 79 |       for (long long t = 0;t < TIMINGS;++t) {
 80 | 	baseline[t] = getticks();
 81 | 	xISOG_old(&A[t],&P[t],&K[t],l);
 82 | 	baseline[t] = getticks() - baseline[t];
 83 |       }
 84 |       long long baselinemedian = median(baseline,TIMINGS);
 85 | 
 86 |       long long bestbs = 0;
 87 |       long long bestgs = 0;
 88 |       long long bestbenchmedian = -1;
 89 | 
 90 |       for (long long bs = 0;bs <= 100;bs += 2) {
 91 | 	for (long long gs = 0;;++gs) {
 92 | 	  if (!gs) if (bs) continue;
 93 | 	  if (!bs) if (gs) break;
 94 | 	  if (2*bs*gs > (l-1)/2) break;
 95 | 	  if (gs > bs*2) continue;
 96 | 	  if (bs > gs*3) continue;
 97 | 
 98 | 	  steps_override(bs,gs);
 99 | 
100 | 	  for (long long t = 0;t < TIMINGS;++t)
101 | 	    isog_setup(&A[t],&P[t],&K[t],deg,cofactor,fact,mult,len,twist);
102 | 
103 | 	  for (long long t = 0;t < TIMINGS;++t) {
104 | 	    bench[t] = getticks();
105 | 	    xISOG(&A[t],&P[t],&K[t],l);
106 | 	    bench[t] = getticks() - bench[t];
107 | 	  }
108 | 	  /* XXX: check for stability, re-run if necessary */
109 | 
110 | 	  long long benchmedian = median(bench,TIMINGS);
111 | 
112 | 	  if (benchmedian > 0)
113 | 	    if (bestbenchmedian < 0 || benchmedian < bestbenchmedian) {
114 | 	      bestbs = bs;
115 | 	      bestgs = gs;
116 | 	      bestbenchmedian = benchmedian;
117 | 	    }
118 | 	}
119 |       }
120 | 
121 |       printf("%lld %lld %lld %lld %lld\n",l,bestbs,bestgs,bestbenchmedian,baselinemedian);
122 |       fflush(stdout);
123 |     }
124 | 
125 |     len = p_plus_len;
126 |     fact = p_plus_fact;
127 |     mult = p_plus_mult;
128 |     cofactor = &p_plus_odd_cofactor;
129 |     twist--;
130 |   } while (twist >= 0);
131 |   
132 |   return 0;
133 | }
134 | 


--------------------------------------------------------------------------------
/src/p6983/fp.c:
--------------------------------------------------------------------------------
  1 | #include "fp.h"
  2 | #include "rng.h"
  3 | #include <gmp.h>
  4 | #include <pari/pari.h>
  5 | 
  6 | #define FP_LIMBS (4 * 64 / GMP_LIMB_BITS)
  7 | 
  8 | const fp fp_0 = {0, 0, 0, 0};
  9 | const fp fp_1 = {1, 0, 0, 0};
 10 | const uintbig p = { 0x50ca4291ffffffff, 0xd1b004f94a5952c9,
 11 | 		    0xb25c76a437728f3b, 0xa3091565b678a990 };
 12 | const uintbig mp = { 0xaf35bd6e00000001, 0x2e4ffb06b5a6ad36, 
 13 | 		     0x4da3895bc88d70c4, 0x5cf6ea9a4987566f };
 14 | 
 15 | void fp_set(fp *x, uint64_t y) {
 16 |   x->x.c[0] = y;
 17 |   x->x.c[1] = x->x.c[2] = x->x.c[3] = 0;
 18 | }
 19 | 
 20 | void fp_cswap(fp *x, fp *y, bool c) {
 21 |   uint64_t tmp;
 22 |   for (int i = 0; i < 4*c; i++) {
 23 |     tmp = y->x.c[i];
 24 |     y->x.c[i] = x->x.c[i];
 25 |     x->x.c[i] = tmp;
 26 |   }
 27 | }
 28 | 
 29 | void fp_enc(fp *x, uintbig const *y) {
 30 |   x->x.c[0] = y->c[0]; x->x.c[1] = y->c[1];
 31 |   x->x.c[2] = y->c[2]; x->x.c[3] = y->c[3];
 32 | }
 33 | void fp_dec(uintbig *x, fp const *y) {
 34 |   x->c[0] = y->x.c[0]; x->c[1] = y->x.c[1];
 35 |   x->c[2] = y->x.c[2]; x->c[3] = y->x.c[3];
 36 | }
 37 | 
 38 | void fp_add2(fp *x, fp const *y) { fp_add3(x, x, y); }
 39 | void fp_sub2(fp *x, fp const *y) { fp_sub3(x, x, y); }
 40 | void fp_mul2(fp *x, fp const *y) { fp_mul3(x, x, y); }
 41 | 
 42 | void fp_add3(fp *x, fp const *y, fp const *z) {
 43 |   mp_limb_t carry = mpn_add_n(x->x.c, y->x.c, z->x.c, FP_LIMBS);
 44 |   if (carry) {
 45 |     mpn_add_n(x->x.c, x->x.c, mp.c, FP_LIMBS);
 46 |   } else if (x->x.c[3] > p.c[3] ||
 47 | 	     (x->x.c[3] == p.c[3] && x->x.c[2] > p.c[2]) ||
 48 | 	     (x->x.c[3] == p.c[3] && x->x.c[2] == p.c[2] && x->x.c[1] > p.c[1]) ||
 49 | 	     (x->x.c[3] == p.c[3] && x->x.c[2] == p.c[2] && x->x.c[1] == p.c[1] && x->x.c[0] >= p.c[0])
 50 | 	     ) {
 51 |     mpn_sub_n(x->x.c, x->x.c, p.c, FP_LIMBS);
 52 |   }
 53 | }
 54 | 
 55 | void fp_sub3(fp *x, fp const *y, fp const *z) {
 56 |   mp_limb_t borrow = mpn_sub_n(x->x.c, y->x.c, z->x.c, FP_LIMBS);
 57 |   if (borrow) {
 58 |     mpn_sub_n(x->x.c, x->x.c, mp.c, FP_LIMBS);
 59 |   }
 60 | }
 61 | 
 62 | void fp_mul3(fp *x, fp const *y, fp const *z) {
 63 |   uint64_t tmp[8], thrash[5];
 64 |   mpn_mul_n(tmp, y->x.c, z->x.c, FP_LIMBS);
 65 |   mpn_tdiv_qr(thrash, x->x.c, 0, tmp, 2 * FP_LIMBS, p.c, FP_LIMBS);
 66 | }
 67 | 
 68 | void fp_sq1(fp *x) { fp_sq2(x, x); }
 69 | void fp_sq2(fp *x, fp const *y) { fp_mul3(x, y, y); }
 70 | void fp_inv(fp *x) {
 71 |   mpz_t res, mpzx, mpzp;
 72 |   mpz_init(res);
 73 |   mpz_roinit_n(mpzx, x->x.c, FP_LIMBS);
 74 |   mpz_roinit_n(mpzp, p.c, FP_LIMBS);
 75 |   mpz_invert(res, mpzx, mpzp);
 76 |   int i = 0;
 77 |   for (; i < res->_mp_size; ++i) {
 78 |     x->x.c[i] = ((uint64_t*)res->_mp_d)[i];
 79 |   }
 80 |   for (; i < 4; ++i) {
 81 |     x->x.c[i] = 0;
 82 |   }
 83 |   mpz_clear(res);
 84 | }
 85 | bool fp_issquare(fp *x) {
 86 |   mpz_t mpzx, mpzp;
 87 |   mpz_roinit_n(mpzx, x->x.c, FP_LIMBS);
 88 |   mpz_roinit_n(mpzp, p.c, FP_LIMBS);
 89 |   int s = mpz_legendre(mpzx, mpzp);
 90 |   return s+1;
 91 | }
 92 | 
 93 | void fp_sqrt(fp *x) {
 94 |     mpz_t mpzx, mpzp, mpzsqrt, mpzp14;
 95 |     mpz_init(mpzsqrt);
 96 |     mpz_init(mpzp14);
 97 | 
 98 |     mpz_roinit_n(mpzx, x->x.c, FP_LIMBS);
 99 |     mpz_roinit_n(mpzp, p.c, FP_LIMBS);
100 |     mpz_add_ui(mpzp14, mpzp, 1);
101 |     mpz_div_ui(mpzp14, mpzp14, 4);
102 |     mpz_powm(mpzsqrt,mpzx,mpzp14,mpzp);
103 | 
104 |     int i = 0;
105 |     for (; i < mpzsqrt->_mp_size; ++i) {
106 |         x->x.c[i] = ((uint64_t*)mpzsqrt->_mp_d)[i];
107 |     }
108 |     for (; i < 4; ++i) {
109 |         x->x.c[i] = 0;
110 |     }
111 | 
112 |     mpz_clear(mpzsqrt);
113 |     mpz_clear(mpzp14);
114 | }
115 | 
116 | void fp_random(fp *x) {
117 |   uint64_t thrash;
118 |   randombytes(x->x.c + 0, 32);
119 |   mpn_tdiv_qr(&thrash, x->x.c, 0, x->x.c, FP_LIMBS, p.c, FP_LIMBS);
120 | }
121 | 


--------------------------------------------------------------------------------
/src/isomorphism.c:
--------------------------------------------------------------------------------
  1 | #include "isomorphism.h"
  2 | #include <assert.h>
  3 | 
  4 | void jinv256(proj *j, const proj *A) {
  5 |   // j(A) / 256 = (A²-3)³/(A²-4)
  6 |   fp2 X2, Z2;
  7 |   fp2_sq2(&X2, &A->x);
  8 |   fp2_sq2(&Z2, &A->z);
  9 |   fp2_add3(&j->x, &Z2, &Z2);
 10 |   fp2_sub3(&j->z, &X2, &j->x);
 11 |   fp2_sub2(&j->z, &j->x);
 12 |   fp2_add3(&j->x, &j->z, &Z2);
 13 |   fp2_mul2(&j->z, &Z2);
 14 |   fp2_mul2(&j->z, &Z2);
 15 |   fp2_mul3(&X2, &j->x, &j->x);
 16 |   fp2_mul2(&j->x, &X2);
 17 | }
 18 | 
 19 | void mont_isom(isomorphism *isom, const proj *A, const proj *B) {
 20 |   proj A2, B2;
 21 |   fp2 tmp, tmp2;
 22 |   
 23 |   fp2_mul3(&tmp, &A->x, &B->z);
 24 |   fp2_mul3(&A2.x, &A->z, &B->x);
 25 | 
 26 |   fp2_sub3(&tmp2, &tmp, &A2.x);
 27 |   fp2_add2(&tmp, &A2.x);
 28 |   // The A = B case, mapping to x -> x
 29 |   if (fp2_iszero(&tmp2)) {
 30 |     isom->D = isom->Nx = fp2_1;
 31 |     isom->Nz = fp2_0;
 32 |     return;
 33 |   }
 34 | 
 35 |   // The A = -B case, mapping to x -> -x
 36 |   if (fp2_iszero(&tmp)) {
 37 |     isom->D = isom->Nx = fp2_1;
 38 |     fp2_neg1(&isom->D);
 39 |     isom->Nz = fp2_0;
 40 |   }
 41 |   // The A = 0, B = ±3/√2 (j = 1728) case, mapping x -> ±(x-i)/√-2
 42 |   //
 43 |   // Watch out: in this case, the output depends on an arbitrary
 44 |   // choice for i.
 45 |   else if (fp2_iszero(&A->x)) {
 46 |     fp2_neg2(&isom->Nz, &B->x);
 47 |     fp2_mul3(&isom->Nx, &fp2_i, &B->x);
 48 |     fp2_add3(&isom->D, &B->z, &B->z);
 49 |     fp2_add2(&isom->D, &B->z);
 50 |     fp2_neg1(&isom->D);
 51 |     // Issue a warning, nevertheless
 52 |     fprintf(stderr, "WARNING: calling mont_isom on j=1728\n");
 53 |   }
 54 |   else {
 55 |     fp2_sq2(&B2.x, &B->x); fp2_sq2(&B2.z, &B->z);
 56 |     fp2_sq2(&A2.x, &A->x); fp2_sq2(&A2.z, &A->z);
 57 |     fp2_add3(&isom->Nx, &B2.z, &B2.z);
 58 |     fp2_add2(&isom->Nx, &B2.z);
 59 |     fp2_sub3(&isom->Nx, &B2.x, &isom->Nx);
 60 |     
 61 |     // We should never arrive here: if B = √3, then j=0 and A=±√3, so
 62 |     // this is either a mistake (A=B), or it has been caught earlier
 63 |     // (A=-B).
 64 |     //
 65 |     // Of course, one should probably never call this function when
 66 |     // j=0, as the isomorphism is ambiguous.
 67 |     if (fp2_iszero(&isom->Nx))
 68 |       assert(false);
 69 | 
 70 |     fp2_mul2(&isom->Nx, &A->x);  // Ax(Bx²-3Bz²)
 71 |       
 72 |     fp2_mul3(&tmp, &A2.z, &B2.z);
 73 |     fp2_add3(&isom->Nz, &tmp, &tmp);
 74 |     fp2_add2(&isom->Nz, &isom->Nz);
 75 |     fp2_add2(&isom->Nz, &isom->Nz);
 76 |     fp2_add2(&isom->Nz, &tmp);
 77 |     fp2_mul3(&tmp, &A2.x, &B2.z);
 78 |     fp2_sub2(&isom->Nz, &tmp);
 79 |     fp2_mul3(&tmp, &A2.z, &B2.x);
 80 |     fp2_sub2(&isom->Nz, &tmp);
 81 |     fp2_sub2(&isom->Nz, &tmp);  // 9Az²Bz² - Ax²Βz² - 2Az²Bx²
 82 | 
 83 |     fp2_mul3(&isom->D, &isom->Nx, &A->x);
 84 |     fp2_add2(&isom->D, &isom->Nz);
 85 |     fp2_add2(&isom->D, &isom->Nz);
 86 |     fp2_add2(&isom->D, &isom->Nz);  // 3(9Az²Bz² - Ax²Βz² - 2Az²Bx²) + Ax²(Bx²-3Bz²)
 87 | 
 88 |     fp2_mul2(&isom->Nx, &A->z);
 89 |     
 90 |     fp2_mul2(&isom->Nx, &B->x); // Bx ···
 91 |     fp2_mul2(&isom->Nz, &B->x); // Bx ···
 92 |     fp2_mul2(&isom->D,  &B->z);  // Bz ···
 93 |   }
 94 | }
 95 | 
 96 | void rand_isom(isomorphism *isom, proj *A) {
 97 |   fp2_add3(&isom->Nx, &A->z, &A->z);    // 2 Az
 98 |   fp2_mul3(&isom->D, &isom->Nx, &A->z); // 2 Az²
 99 |   fp2_mul3(&isom->Nz, &A->x, &A->x);    // Ax²
100 |   fp2_sub2(&isom->Nz, &isom->D);        // Ax² - 2 Az²
101 |   fp2_sub2(&isom->Nz, &isom->D);        // Ax² - 4 Az²
102 |   fp2_sqrt(&isom->Nz);                  // √(Ax² - 4 Az²)
103 |   fp2_sub2(&isom->Nz, &A->x);           // (α:β) = (-Ax + √(Ax² - 4 Az²) : 2 Az)
104 |   
105 |   fp2_mul3(&A->x, &isom->Nz, &isom->Nz);    // α²
106 |   fp2_mul3(&isom->D, &isom->Nx, &isom->Nx); // β²
107 |   fp2_sub3(&isom->D, &A->x, &isom->D);      // α² - β²
108 |   fp2_add2(&A->x, &isom->D);                // 2α² - β²
109 |   fp2_sqrt(&isom->D);                       // √(α²-β²)
110 |   fp2_mul3(&A->z, &isom->Nz, &isom->D);     // α √(α²-β²)
111 | }
112 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | CC=clang
  2 | CFLAGS=-Wall -Wextra -pedantic -std=gnu99 -I./include -I/usr/local/include
  3 | CFLAGSLINK=-lpari -lm -L/usr/local/lib/ 
  4 | DEBUG_FLAGS=-g
  5 | BENCH_FLAGS=-DNDEBUG -O3 -Os -march=native -mtune=native
  6 | 
  7 | default: all
  8 | 
  9 | # "Library" object files depended upon by all executables
 10 | SRC6983    =fp2.c constants.c precomputed.c steps_tunecycles.c
 11 | SRC6983_ASM=fp.s 
 12 | SRC6983_GMP=fp.c
 13 | SRC    =toolbox.c ideal.c klpt.c idiso.c rng.c poly.c mont.c tedwards.c steps.c\
 14 | 	isogenies.c isomorphism.c two_walks.c mitm.c sqisign.c #verif.c
 15 | SRC_ASM=uint.s
 16 | SRC_GMP=uint.c
 17 | 
 18 | OBJ6983    =$(SRC6983:%.c=build/obj/p6983/%.o)
 19 | OBJ6983_ASM=$(SRC6983_ASM:%.s=build/obj/p6983/%.o)
 20 | OBJ6983_GMP=$(SRC6983_GMP:%.c=build/obj/p6983/%_gmp.o)
 21 | OBJ        =$(SRC:%.c=build/obj/%.o)
 22 | OBJ_ASM    =$(SRC_ASM:%.s=build/obj/%.o)
 23 | OBJ_GMP    =$(SRC_GMP:%.c=build/obj/%_gmp.o)
 24 | 
 25 | LIB    =$(OBJ6983) $(OBJ) $(OBJ6983_ASM) $(OBJ_ASM)
 26 | LIB_GMP=$(OBJ6983) $(OBJ) $(OBJ6983_GMP) $(OBJ_GMP)
 27 | 
 28 | # Benchmarks
 29 | BENCHS_GMP=$(patsubst bench/%.c,build/bench_%_gmp,$(wildcard bench/*.c))
 30 | BENCHS=$(patsubst bench/%.c,build/bench_%,$(wildcard bench/*.c))
 31 | 
 32 | $(BENCHS): build/bench_%: bench/%.c $(SRC:%=src/%) $(SRC_ASM:%=src/%)\
 33 | $(SRC6983:%=src/p6983/%) $(SRC6983_ASM:%=src/p6983/%)
 34 | 	@mkdir -p $(@D)
 35 | 	$(CC) $^ $(CFLAGS) $(BENCH_FLAGS) $(CFLAGSLINK) -o $@
 36 | 
 37 | $(BENCHS_GMP): build/bench_%_gmp: bench/%.c $(SRC:%=src/%) $(SRC_GMP:%=src/%)\
 38 | $(SRC6983:%=src/p6983/%) $(SRC6983_GMP:%=src/p6983/%)
 39 | 	@mkdir -p $(@D)
 40 | 	$(CC) $^ $(CFLAGS) $(BENCH_FLAGS) $(CFLAGSLINK) -lgmp -o $@
 41 | 
 42 | # Tests
 43 | TESTS_GMP=$(patsubst test/%.c,build/test_%_gmp,$(wildcard test/*.c))
 44 | TESTS=$(patsubst test/%.c,build/test_%,$(wildcard test/*.c))
 45 | 
 46 | $(TESTS): build/test_%: test/%.c $(LIB)
 47 | 	$(CC) $< $(LIB) $(CFLAGS) $(DEBUG_FLAGS) $(CFLAGSLINK) -o $@
 48 | 
 49 | $(TESTS_GMP): build/test_%_gmp: test/%.c $(LIB_GMP)
 50 | 	$(CC) $< $(LIB_GMP) $(CFLAGS) $(DEBUG_FLAGS) $(CFLAGSLINK) -lgmp -o $@
 51 | 
 52 | # Additional executables
 53 | EXES=build/precomp
 54 | 
 55 | build/precomp: src/precomp.c $(filter-out %/precomputed.o,$(LIB_GMP))
 56 | 	$(CC) $^ $(CFLAGS) $(DEBUG_FLAGS) $(CFLAGSLINK) -lgmp -o $@
 57 | 
 58 | # Velusqrt Tuning
 59 | 
 60 | build/tunecycles_6983: src/tunecycles.c src/isogenies.c src/mont.c src/p6983/fp2.c src/uint.s\
 61 | src/p6983/fp.s src/p6983/constants.c src/rng.c src/poly.c src/steps.c src/steps_default.c
 62 | 	@mkdir -p $(@D)
 63 | 	$(CC) $^ $(CFLAGS) $(BENCH_FLAGS) $(CFLAGSLINK) -o $@
 64 | 
 65 | src/p6983/tunecycles.out: build/tunecycles_6983
 66 | 	# 8 minutes on 1.9GHz Kaby Lake
 67 | 	time ./$< > $@
 68 | 
 69 | tune: src/tune2c src/p6983/tunecycles.out
 70 | 	./src/tune2c < src/p6983/tunecycles.out > src/p6983/steps_tunecycles.c
 71 | 
 72 | # Object files
 73 | $(OBJ6983) $(OBJ): build/obj/%.o: src/%.c
 74 | 	@mkdir -p $(@D)
 75 | 	$(CC) $< $(CFLAGS) $(DEBUG_FLAGS) -c -o $@
 76 | $(OBJ6983_ASM) $(OBJ_ASM): build/obj/%.o: src/%.s
 77 | 	@mkdir -p $(@D)
 78 | 	$(CC) $< -c -o $@
 79 | $(OBJ6983_GMP) $(OBJ_GMP): build/obj/%_gmp.o: src/%.c
 80 | 	@mkdir -p $(@D)
 81 | 	$(CC) $< $(CFLAGS) $(DEBUG_FLAGS) -c -o $@
 82 | 
 83 | # Run tests
 84 | $(TESTS:%=%_run): %_run: %
 85 | 	@echo
 86 | 	./$^
 87 | 
 88 | $(TESTS_GMP:%=%_run): %_run: %
 89 | 	@echo
 90 | 	./$^
 91 | 
 92 | check_asm: $(TESTS:%=%_run)
 93 | check_gmp: $(TESTS_GMP:%=%_run)
 94 | check: check_asm check_gmp
 95 | 
 96 | # Run benchmarks
 97 | $(BENCHS:build/%=%.tsv): %.tsv: build/%
 98 | 	./$^ >> $@
 99 | 
100 | $(BENCHS_GMP:build/%=%.tsv): %.tsv: build/%
101 | 	./$^ >> $@
102 | 
103 | benchmark_asm: $(BENCHS:build/%=%.tsv)
104 | benchmark_gmp: $(BENCHS_GMP:build/%=%.tsv)
105 | benchmark: benchmark_asm
106 | 
107 | # Phony targets
108 | benchs: $(BENCHS) $(BENCHS_GMP)
109 | tests: $(TESTS) $(TESTS_GMP)
110 | asm: $(BENCHS) $(TESTS)
111 | gmp: $(BENCHS_GMP) $(TESTS_GMP)
112 | all: $(EXES) asm gmp
113 | 
114 | distclean:
115 | 	rm -r build
116 | 
117 | .PHONY: distclean all gmp asm tests benchs benchmark benchmark_asm benchmark_gmp\
118 | check check_gmp check_asm tune default
119 | 


--------------------------------------------------------------------------------
/include/ideal.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifndef IDEAL_H
  3 | #define IDEAL_H
  4 | 
  5 | #include <pari/pari.h>
  6 | #include <stdbool.h>
  7 | 
  8 | GEN alg_scalar(GEN A, GEN i);
  9 | 
 10 | GEN alg_conj(GEN A, GEN x);
 11 | 
 12 | // returns the gram matrix of the norm quadratic form with respect to algbasis(A) 
 13 | GEN alg_gram(GEN A);
 14 | 
 15 | // returns a random integer linear combination of the basis of 'lattice'
 16 | // with coefficients in (-n,n)
 17 | GEN lattice_random(GEN A, GEN lattice, GEN n);
 18 | 
 19 | // returns an element of 'lattice' of prime norm
 20 | // generated as a random linear combination of basis elements with
 21 | // coefficients in (-n,n)
 22 | GEN lattice_random_prime(GEN A, GEN lattice, GEN n);
 23 | 
 24 | // same but l must be a quadratic residue modulo the prime norm
 25 | GEN lideal_equiv_prime_not_quadratic_residue(GEN I, GEN* alpha, GEN l) ;
 26 | 
 27 | // returns an element of 'lattice' of norm N such that gcd(a,N) = b
 28 | GEN lattice_random_gcd(GEN A, GEN lattice, GEN n, GEN a, GEN b);
 29 | 
 30 | // GEN left ideal has 3 components:
 31 | // 1) lattice
 32 | // 2) norm (0 if not computed yet)
 33 | // 3) x such that the ideal is generated by norm and x (0 if not computed yet)
 34 | // 4) the parent algebra
 35 | // 5) the parent order
 36 | 
 37 | // creates the left ideal in 'order' generated by the element 'x' and the integer 'N'
 38 | GEN lideal_create(GEN A, GEN order, GEN x, GEN N);
 39 | 
 40 | // WARNING: the following may modify the input ideal (if a value was not precomputed)
 41 | 
 42 | GEN lideal_lattice(GEN lideal);
 43 | 
 44 | GEN alg_primitive(GEN *n, GEN A, GEN order, GEN x);
 45 | 
 46 | // We call a cyclic ideal an ideal corresponding to a cyclic isogeny (i.e., not divisible by an integer)
 47 | bool lideal_cyclic(GEN *lideal);
 48 | 
 49 | long remove_1_i(GEN A, GEN order, GEN *x);
 50 | 
 51 | long lideal_simplify(GEN *lideal);
 52 | 
 53 | GEN lideal_algebra(GEN lideal);
 54 | 
 55 | GEN lideal_order(GEN lideal);
 56 | 
 57 | GEN lideal_norm(GEN lideal);
 58 | 
 59 | GEN lideal_generator(GEN lideal);
 60 | 
 61 | // finds a generator of 'lideal' of norm coprime with M (assumes gcd(M, norm(lideal)) = 1)
 62 | GEN lideal_generator_coprime(GEN lideal, GEN M);
 63 | 
 64 | GEN lideal_mul(GEN I, GEN alpha); // I*alpha where I is a left-ideal and alpha an element of the algebra
 65 | 
 66 | GEN lideal_add(GEN I1, GEN I2);
 67 | 
 68 | GEN lideal_inter(GEN I1, GEN I2);
 69 | 
 70 | GEN lideal_inter_sum(GEN I1, GEN I2);
 71 | 
 72 | int lideal_equals(GEN I1, GEN I2);
 73 | 
 74 | // lideal_basis * lideal_scalar is a basis of the ideal lattice (lideal_basis has integral coefficients)
 75 | GEN lideal_basis(GEN lideal);
 76 | GEN lideal_scalar(GEN lideal);
 77 | 
 78 | // returns the quadratic form of the left ideal equal to the norm form divided by 
 79 | // the scalar alglat_get_scalar(lideal_lattice(lideal))
 80 | GEN lideal_gram(GEN lideal);
 81 | 
 82 | // returns an LLL-reduced basis of the left ideal divided by 
 83 | // the scalar alglat_get_scalar(lideal_lattice(lideal))
 84 | GEN lideal_lll(GEN lideal);
 85 | 
 86 | // returns the n shortest vectors
 87 | GEN lideal_short(GEN lideal, GEN B, GEN n);
 88 | 
 89 | // if isom is given, it is set to an element alpha in I such that I*conj(alpha)/N(I) = output
 90 | GEN lideal_equiv_prime(GEN I, GEN* alpha);
 91 | 
 92 | // same but the norm should not be in the list 'list_primes'
 93 | GEN lideal_equiv_prime_except(GEN I, GEN* alpha, GEN list_primes);
 94 | 
 95 | GEN lideal_equiv_prime_random(GEN I, GEN* alpha, GEN bound_coeff);
 96 | 
 97 | // the output has norm a (large) prime and possibly cofactors from the factorisation matrix fm
 98 | // try_at_least is the minimum number of enumerated vectors, in an attempts to minimize the large prime
 99 | // try_at_least = 0 returns the first valid solution
100 | GEN lideal_equiv_nearprime(GEN lideal, GEN fm, unsigned int try_at_least);
101 | 
102 | // returns alpha such that I1*alpha = I2, and NULL if not isomorphic
103 | GEN lideal_isom(GEN I1, GEN I2);
104 | 
105 | // random lideal of norm 2^e
106 | GEN lideal_random_2e(GEN A, GEN order, long e);
107 | 
108 | GEN lideal_primary_decomposition(GEN I, GEN fact_norm);
109 | 
110 | #endif
111 | 
112 | 
113 | 
114 | 
115 | 


--------------------------------------------------------------------------------
/src/verif.c:
--------------------------------------------------------------------------------
  1 | #include "verif.h"
  2 | #include <assert.h>
  3 | 
  4 | // PRF to generate points
  5 | static void hash(proj *P, int i) {
  6 |   uintbig_set(&P->x.re.x, 3 * i + 13);
  7 |   uintbig_set(&P->z.re.x, 5 * i * i + 17);
  8 |   uintbig_set(&P->x.im.x, 7 * i * i * i + 19);
  9 |   uintbig_set(&P->z.im.x, 11 * i * i * i + 23);
 10 | }
 11 | 
 12 | // Find a basis of the 2-torsion of A, deterministically
 13 | //
 14 | // Outputs x(P), x(Q) and x(P-Q) of a basis (P,Q) such that [2^(n-1)]P
 15 | // = (0,0).
 16 | //
 17 | // Assumes the curve A has order p+1
 18 | static void find_basis(proj *P, proj *Q, proj *PQ,proj *A) {
 19 |   bool oncurve = class_mod_4 == 3;
 20 |   proj P2, Q2, tmp;
 21 |   long cnt = 1;
 22 |   normalize_proj(A);
 23 |   // Get first point
 24 |   while (true) {
 25 |     hash(P, cnt++);
 26 |     if (is_on_curve(P, A) != oncurve)
 27 |       continue;
 28 |     // multiply by cofactor
 29 |     xMUL(P, A, P, &p_even_cofactor);
 30 |     // check it has maximal order
 31 |     P2 = *P;
 32 |     for (int i = 1; i < two_tors_height; i++)
 33 |       xDBL(&P2, A, &P2);
 34 |     if (!mont_iszero(&P2))
 35 |       break;
 36 |   }
 37 | 
 38 |   // Get linearly independent point
 39 |   while (true) {
 40 |     hash(Q, cnt++);
 41 |     if (is_on_curve(Q, A) != oncurve)
 42 |       continue;
 43 |     // multiply by cofactor
 44 |     xMUL(Q, A, Q, &p_even_cofactor);
 45 |     // check it has maximal order
 46 |     Q2 = *Q;
 47 |     for (int i = 1; i < two_tors_height; i++)
 48 |       xDBL(&Q2, A, &Q2);
 49 |     if (!mont_iszero(&Q2) && !mont_equal(&Q2, &P2))
 50 |       break;
 51 |   }
 52 | 
 53 |   // Compute P-Q
 54 |   xBILIFT(PQ, &tmp, P, Q, A);
 55 | 
 56 |   // Shuffle to satisfy constraint
 57 |   if (fp2_iszero(&P2.x)) {
 58 |   } else if (fp2_iszero(&Q2.x)) {
 59 |     fp2_cswap(&P->x, &Q->x, true);
 60 |     fp2_cswap(&P->z, &Q->z, true);
 61 |   } else {
 62 |     fp2_cswap(&P->x, &PQ->x, true);
 63 |     fp2_cswap(&P->z, &PQ->z, true);
 64 |   }
 65 | }
 66 | 
 67 | //void compress(uint64_t *zip, const two_walk *walk, long len) {}
 68 | 
 69 | 
 70 | void decompress_old(two_walk *walk, proj *A, const uint64_t *zip, long len) {
 71 |   long mask = (1 << two_tors_height) - 1;
 72 |   long hint_mask = (0xf << two_tors_height);
 73 |   uintbig a;
 74 |   proj P, Q, PQ;
 75 |   for (int i = 0; i < len; i++) {
 76 |     uintbig_set(&a, zip[i] & mask);
 77 |     long hint = (zip[i] & hint_mask) >> two_tors_height;
 78 |     // get the next kernel
 79 |     find_basis(&P, &Q, &PQ, A);  // TODO: use point Q from previous step + hint
 80 |     xBIDIM(&walk[i].ker, A, &P, &a, &Q, &uintbig_1, &PQ);
 81 |     walk[i].A = *A;
 82 |     walk[i].len = two_tors_height;
 83 |     // take the next step
 84 |     eval_walk(walk+i, A, &Q);
 85 |   }
 86 | }
 87 | 
 88 | void challenge_alt(proj *A, const uintbig *m) {
 89 |   proj H, K, tmp;
 90 |   uintbig_set(&H.x.re.x, m->c[0]);
 91 |   uintbig_set(&H.x.im.x, m->c[1]);
 92 |   uintbig_set(&H.z.re.x, m->c[2]);
 93 |   uintbig_set(&H.z.im.x, m->c[3]);
 94 | 
 95 |   isog_degree deg = { 0 };
 96 |   degree_set(&deg, 0, p_plus_mult[0]);
 97 |   odd_isogeny phi;
 98 | 
 99 |   while(true) {
100 |     fp_add2(&H.x.re, &fp_1);
101 |     if (!is_on_curve(&H, A))
102 |       continue;
103 |     xMUL(&K, A, &H, &p_plus_odd_cofactor);
104 |     isog_degree cof = degree_co(deg, p_plus_mult, p_plus_len);
105 |     uintbig a;
106 |     degree_to_uint(&a, cof, p_plus_fact, p_plus_len);
107 |     xMUL(&K, A, &K, &a);
108 |     tmp = K;
109 |     uintbig_set(&a, p_plus_fact[0]);
110 |     for (int i = 1; i < p_plus_mult[0]; i++) {
111 |       xMUL(&tmp, A, &tmp, &a);
112 |     }
113 |     if (!mont_iszero(&tmp))
114 |       break;
115 |   }
116 | 
117 |   phi.kernel_plus = K;
118 |   phi.kernel_minus.x = fp2_1;
119 |   phi.kernel_minus.z = fp2_0;
120 |   phi.deg_plus = deg;
121 |   phi.deg_minus.val = 0;
122 |   eval(A, &phi, &H);
123 | 
124 |   //
125 |   degree_set(&deg, 0, p_minus_mult[0]);
126 |   while(true) {
127 |     fp_add2(&H.x.re, &fp_1);
128 |     if (is_on_curve(&H, A))
129 |       continue;
130 |     xMUL(&K, A, &H, &p_minus_odd_cofactor);
131 |     isog_degree cof = degree_co(deg, p_minus_mult, p_minus_len);
132 |     uintbig a;
133 |     degree_to_uint(&a, cof, p_minus_fact, p_minus_len);
134 |     xMUL(&K, A, &K, &a);
135 |     tmp = K;
136 |     uintbig_set(&a, p_minus_fact[0]);
137 |     for (int i = 1; i < p_minus_mult[0]; i++) {
138 |       xMUL(&tmp, A, &tmp, &a);
139 |     }
140 |     if (!mont_iszero(&tmp))
141 |       break;
142 |   }
143 | 
144 |   phi.kernel_minus = K;
145 |   phi.kernel_plus.x = fp2_1;
146 |   phi.kernel_plus.z = fp2_0;
147 |   phi.deg_minus = deg;
148 |   phi.deg_plus.val = 0;
149 |   eval(A, &phi, &H);
150 | }
151 | 


--------------------------------------------------------------------------------
/include/idiso.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifndef IDISO_H
  3 | #define IDISO_H
  4 | 
  5 | #include <pari/pari.h>
  6 | #include "isogenies.h"
  7 | #include "two_walks.h"
  8 | 
  9 | 
 10 | typedef struct special_isogeny {
 11 |   // phi1 : source -> something isomorphic to middle
 12 |   // phi2 : middle -> target
 13 |   proj source;
 14 |   proj target;
 15 | 
 16 |   proj middle;
 17 | 
 18 |   odd_isogeny phi1;
 19 | 
 20 |   odd_isogeny phi2;
 21 |   bool phi2_set;
 22 | 
 23 |   odd_isogeny phi2_dual;
 24 |   bool phi2_dual_set;
 25 | 
 26 | } special_isogeny;
 27 | 
 28 | 
 29 | // A chain of two_walk
 30 | typedef struct two_walk_long {
 31 |     two_walk *phi;
 32 |     long len;
 33 | } two_walk_long;
 34 | 
 35 | 
 36 | void action_from_elle(GEN *m1, GEN *m2, GEN *m3, GEN *m4, long ell, long e);
 37 | 
 38 | 
 39 | // Stuff to translate between isogenies and ideals
 40 | GEN kernel_to_ideal_gen_action(GEN v, GEN m1, GEN m2, GEN m3, GEN m4, long ell, long e);
 41 | GEN endo_to_kernel_action(GEN endo, GEN m1, GEN m2, GEN m3, GEN m4, long ell, long e);
 42 | 
 43 | GEN kernel_to_ideal_action_O0(GEN v, GEN m1, GEN m2, GEN m3, GEN m4, long ell, long e);
 44 | GEN ideal_to_kernel_action_O0(GEN I, GEN m1, GEN m2, GEN m3, GEN m4, long ell, long e);
 45 | 
 46 | GEN kernel_to_ideal_O0_ell(GEN v, long ell);
 47 | GEN ideal_to_kernel_O0_ell(GEN I, long ell);
 48 | 
 49 | GEN ideal_to_kernel_O0_T(GEN I, GEN factorisation_norm);
 50 | GEN kernel_to_ideal_O0_T(GEN coeff);
 51 | 
 52 | odd_isogeny ideal_to_isogeny_O0_T(GEN I, GEN factorisation_norm);
 53 | two_walk ideal_to_isogeny_O0_two(GEN I);
 54 | 
 55 | GEN kernel_to_ideal_gen_O0_ell(GEN v, long ell, long *e);
 56 | GEN endo_to_kernel_O0_ell(GEN alpha, long ell, long e);
 57 | 
 58 | GEN torsion_crt_decompose (GEN v, bool twist);
 59 | GEN torsion_crt_compose (GEN coeff, bool twist);
 60 | 
 61 | void famat_to_degree(isog_degree *deg, isog_degree *deg_twist, GEN f);
 62 | proj coeff_to_E0(GEN coeff, bool twist);
 63 | void gentobig(uintbig *res, GEN a);
 64 | void two_walk_stol(two_walk_long *phil, const two_walk *phi);
 65 | 
 66 | // Evaluate special isogeny phi : A -> ?? at point P.
 67 | // sets P to the image point
 68 | void init_trivial_two_walk_long(two_walk_long *phi);
 69 | void free_two_walk_long(two_walk_long *phi);
 70 | void copy_two_walk_long(two_walk_long *copy, const two_walk_long *phi);
 71 | proj eval_special(proj *A, special_isogeny *phi, const proj *P);
 72 | void two_walk_composition_ls(two_walk_long *phi, const two_walk_long *phi2, const two_walk *phi1);
 73 | void two_walk_composition_sl(two_walk_long *phi, const two_walk *phi2, const two_walk_long *phi1);
 74 | void two_walk_composition_ss(two_walk_long *phi, const two_walk *phi2, const two_walk *phi1);
 75 | void two_walk_composition_ll(two_walk_long *phi, const two_walk_long *phi2, const two_walk_long *phi1);
 76 | odd_isogeny push_odd_isogeny_through_two_walk(const odd_isogeny *phi_odd, proj *phi_odd_source, const two_walk *phi_two);
 77 | odd_isogeny push_odd_isogeny_through_two_walk_long(const odd_isogeny *phi_odd, proj *phi_odd_source, const two_walk_long *phi_two);
 78 | two_walk push_two_walk_through_odd_isogeny(const two_walk *phi_two, const odd_isogeny *phi_odd, const proj *phi_odd_source);
 79 | void push_two_walk_long_through_odd_isogeny(two_walk_long *phi, const two_walk_long *phi_two, const odd_isogeny *phi_odd, const proj *phi_odd_source);
 80 | two_walk push_two_walk_through_special_isogeny(const two_walk *phi_two, special_isogeny *phi_special);
 81 | //two_walk_long push_two_walk_long_through_special_isogeny(const two_walk_long *phi_two, const special_isogeny *phi_special);
 82 | 
 83 | 
 84 | // J is an ideal of norm dividing (global_setup.gen_odd_torsion)^2
 85 | // I is an equivalent ideal of norm a power of 2
 86 | special_isogeny special_ideal_to_isogeny(GEN J, GEN I, const two_walk_long *phi_I);
 87 | 
 88 | // T = global_setup.gen_odd_torsion
 89 | // f = two_tors_height
 90 | // I is a left O0-ideal of norm dividing T^2 2^{2f+delta}
 91 | // J is a left O0-ideal containing I of norm gcd(T^2,n(I))
 92 | // K is a left O0-ideal equivalent to J of norm a power of 2
 93 | // Finds phi such that phi_I = phi * phi_J
 94 | // Finds L equivalent to I of norm dividing T^2
 95 | void ideal_to_isogeny_two_2f_delta(two_walk_long *phi, GEN *L, special_isogeny *phi_L, GEN I, GEN J, GEN K, proj *phi_K_basis, proj *phi_K_target, int delta, GEN I_long);
 96 | 
 97 | // T = global_setup.gen_odd_torsion
 98 | // I is a left O0-ideal of norm dividing T^2 2^e for some positive integer e
 99 | // J = I + O0*T^2
100 | // K is a left O0-ideal equivalent to J of norm a power of 2
101 | // Finds phi such that phi_I = phi * phi_J
102 | // Finds L equivalent to I of norm dividing T^2
103 | void ideal_to_isogeny_two(two_walk_long *phi_res, GEN *L, special_isogeny *phi_L, GEN I, GEN J, GEN K, const special_isogeny *phi_J, const two_walk_long *phi_K, bool endpoint_close_to_E0);
104 | 
105 | void ideal_to_isogeny_O0_two_long(two_walk_long *phi, GEN *L, special_isogeny *phi_L, GEN I, bool endpoint_close_to_E0);
106 | 
107 | #endif
108 | 


--------------------------------------------------------------------------------
/src/two_walks.c:
--------------------------------------------------------------------------------
  1 | #include "two_walks.h"
  2 | #include <assert.h>
  3 | 
  4 | void two_isog(const proj *K, proj *P) {
  5 |   fp2 tmp1, tmp2, tmp3;
  6 |   assert(!fp2_iszero(&K->x));
  7 |   fp2_mul3(&tmp1, &P->x, &K->x);
  8 |   fp2_mul3(&tmp2, &P->z, &K->z);
  9 |   fp2_sub2(&tmp1, &tmp2);
 10 |   fp2_mul3(&tmp2, &P->x, &K->z);
 11 |   fp2_mul3(&tmp3, &P->z, &K->x);
 12 |   fp2_sub2(&tmp2, &tmp3);
 13 |   fp2_mul2(&P->x, &tmp1);
 14 |   fp2_mul2(&P->z, &tmp2);
 15 | }
 16 | 
 17 | void two_isog_dual(const proj *K, proj *P) {
 18 |   fp2 tmp;
 19 |   fp2_add3(&tmp, &P->x, &P->z);
 20 |   fp2_mul2(&P->z, &P->x);
 21 |   fp2_mul2(&P->z, &K->x);
 22 |   fp2_add2(&P->z, &P->z);
 23 |   fp2_add2(&P->z, &P->z);
 24 |   fp2_sq2(&P->x, &tmp);
 25 |   fp2_mul2(&P->x, &K->z);
 26 | }
 27 | 
 28 | // Internal function.
 29 | //
 30 | // Careful! P is an array of points, filled up to stacklen, and it
 31 | // must contain space for up to ⎡log₂(len)⎤ points after that.
 32 | //
 33 | // advance indicates whether K is a single element or an array of
 34 | // length len. In the former case, K will contain the kernels of the
 35 | // isogeny walk. In the latter, it will only contain the kernel of the
 36 | // last step.
 37 | void eval_walk_rec(proj *A, proj *K, long len, bool advance, proj *P, long stacklen) {
 38 |   if (len == 0)
 39 |     return;
 40 |   if (len == 1) {
 41 |     // push points
 42 |     for (int i = 0; i < stacklen; i++)
 43 |       two_isog(K, P+i);
 44 |     // push curve
 45 |     fp2_sq2(&A->z, &K->z);
 46 |     fp2_sq2(&A->x, &K->x);
 47 |     fp2_add2(&A->x, &A->x);
 48 |     fp2_sub3(&A->x, &A->z, &A->x);
 49 |     fp2_add2(&A->x, &A->x);
 50 |   } else {
 51 |     long right = len / 2;
 52 |     long left = len - right;
 53 |     P[stacklen] = *K;
 54 |     for (int i = 0; i < left; i++)
 55 |       xDBL(K, A, K);
 56 |     eval_walk_rec(A, K, right, advance, P, stacklen+1);
 57 |     K[right*advance] = P[stacklen];
 58 |     eval_walk_rec(A, K+right*advance, left, advance, P, stacklen);
 59 |   }
 60 | }
 61 | 
 62 | void eval_walk(const two_walk *phi, proj *B, proj *P) {
 63 |   *B = phi->A;
 64 |   proj K = phi->ker;
 65 |   long log, len = phi->len;
 66 |   for (log = 0; len > 1; len >>= 1) log++;
 67 |   proj stack[1+log];
 68 |   stack[0] = *P;
 69 |   eval_walk_rec(B, &K, phi->len, false, stack, 1);
 70 |   *P = stack[0];
 71 | }
 72 | 
 73 | void eval_walk_mult(const two_walk *phi, proj *B, proj *P, long cardinality) {
 74 |   *B = phi->A;
 75 |   proj K = phi->ker;
 76 |   long log, len = phi->len;
 77 |   for (log = 0; len > 1; len >>= 1) log++;
 78 |   proj stack[cardinality+log];
 79 |   for (int i = 0; i < cardinality; ++i) {
 80 |     stack[i] = P[i];
 81 |   }
 82 |   eval_walk_rec(B, &K, phi->len, false, stack, cardinality);
 83 |   for (int i = 0; i < cardinality; ++i) {
 84 |     P[i] = stack[i];
 85 |   }
 86 | }
 87 | 
 88 | 
 89 | void eval_dual(const two_walk *phi, const proj *B, proj *P) {
 90 |   proj A = phi->A;
 91 |   proj K[phi->len];
 92 |   K[0] = phi->ker;
 93 |   long log, len = phi->len;
 94 |   for (log = 0; len > 1; len >>= 1) log++;
 95 |   proj stack[log];
 96 |   eval_walk_rec(&A, K, phi->len, true, stack, 0);
 97 |   // Move P to A if needed
 98 |   if (!mont_equal(&A, B)) {
 99 |     isomorphism isom;
100 |     mont_isom(&isom, B, &A);
101 |     mont_isom_apply(&isom, P);
102 |   }
103 |   // Eval the chain backwards
104 |   for (int i = phi->len - 1; i >= 0; i--) {
105 |     two_isog_dual(K+i, P);
106 |   }
107 | }
108 | 
109 | void dual_walk(two_walk* phi) {
110 |   proj Kd, K2, tmp, tmp2, A;
111 |   bool oncurve = class_mod_4 == 3;
112 | 
113 |   K2 = phi->ker;
114 |   for (int i = 1; i < phi->len; i++)
115 |     xDBL(&K2, &phi->A, &K2);
116 | 
117 |   assert(!fp2_iszero(&K2.x));
118 |   
119 |   while (true) {
120 |     fp2_random(&Kd.x); Kd.z = fp2_1;
121 |     if (is_on_curve(&Kd, &phi->A) != oncurve)
122 |       continue;
123 |     // multiply by cofactor
124 |     xMUL(&Kd, &phi->A, &Kd, &p_even_cofactor);
125 |     // check order
126 |     tmp = Kd;
127 |     for (int i = 1; i < phi->len; i++)
128 |       xDBL(&tmp, &phi->A, &tmp);
129 |     long cof;
130 |     xDBL(&tmp2, &phi->A, &tmp);
131 |     for (cof = 0; !mont_iszero(&tmp2); cof++) {
132 |       tmp = tmp2;
133 |       xDBL(&tmp2, &phi->A, &tmp2);
134 |     }
135 |     // check orthogonaliy to phi->ker
136 |     if (mont_iszero(&tmp) || mont_equal(&tmp, &K2))
137 |       continue;
138 |     // adjust order
139 |     for (; cof > 0; cof--)
140 |       xDBL(&Kd, &phi->A, &Kd);
141 |     break;
142 |   }
143 | 
144 |   // Push dual generator
145 |   eval_walk(phi, &A, &Kd);
146 |   // Adjust direction
147 |   isomorphism isom;
148 |   rand_isom(&isom, &A);
149 |   mont_isom_apply(&isom, &Kd);
150 | 
151 |   phi->ker = Kd;
152 |   phi->A = A;
153 | }
154 | 
155 | 
156 | // finds isom such that phi*(isom inverse) can be evaluated
157 | // set phi_new phi*(isom inverse)
158 | // then set P to phi_new(isom(P)), and A to the target curve
159 | void eval_walk_isom(isomorphism *isom, two_walk *phi_new, proj *B, proj *R, const two_walk *phi, const proj *P) {
160 |     proj A0 = phi->A;
161 |     *phi_new = *phi;
162 |     if (R) { assert(P); *R = *P; }
163 |     trivial_isom(isom);
164 | 
165 |     if (phi_new->len) {
166 |         proj Q = phi_new->ker;
167 | 
168 |         uintbig k;
169 |         uintbig_set(&k, 1ULL<<(phi_new->len-1));
170 |         xMUL(&Q, &phi_new->A, &Q, &k);
171 |         assert(!mont_iszero(&Q));
172 |         if(fp2_iszero(&Q.x)) {
173 |             // need to push (0,0) away
174 |             rand_isom(isom, &phi_new->A);
175 |             mont_isom_apply(isom, &phi_new->ker);
176 |             if (R) mont_isom_apply(isom, R);
177 |         }
178 |         xDBL(&Q, &A0, &Q); // Q is still on the first curve
179 |         assert(mont_iszero(&Q));
180 | 
181 |         if (R || B) { 
182 |             proj B0;
183 |             proj* B1 = (B) ? B : &B0;
184 | 
185 |             proj R0 = {fp2_1, fp2_0};
186 |             proj* R1 = (R) ? R : &R0;
187 | 
188 |             eval_walk(phi_new, B1, R1);
189 |         } 
190 |     }
191 |     else { if (B) *B = phi->A; }
192 | }
193 | 
194 | void eval_walk_isom_mult(isomorphism *isom, two_walk *phi_new, proj *B, const two_walk *phi, proj *P, long cardinality) {
195 |     proj A0 = phi->A;
196 |     *phi_new = *phi;
197 |     trivial_isom(isom);
198 | 
199 |     if (phi_new->len) {
200 |         proj Q = phi_new->ker;
201 | 
202 |         uintbig k;
203 |         uintbig_set(&k, 1ULL<<(phi_new->len-1));
204 |         xMUL(&Q, &phi_new->A, &Q, &k);
205 |         assert(!mont_iszero(&Q));
206 |         if(fp2_iszero(&Q.x)) {
207 |             // need to push (0,0) away
208 |             rand_isom(isom, &phi_new->A);
209 |             mont_isom_apply(isom, &phi_new->ker);
210 |             for (int i = 0; i < cardinality; ++i) {
211 |               mont_isom_apply(isom, &P[i]);
212 |             }
213 |         }
214 |         xDBL(&Q, &A0, &Q); // Q is still on the first curve
215 |         assert(mont_iszero(&Q));
216 | 
217 |         eval_walk_mult(phi_new, B, P, cardinality);
218 |         
219 |     }
220 |     else { *B = phi->A; }
221 | }
222 | 
223 | 


--------------------------------------------------------------------------------
/src/isogenies.c:
--------------------------------------------------------------------------------
  1 | #include "isogenies.h"
  2 | #include "isomorphism.h"
  3 | #include <assert.h>
  4 | 
  5 | // Internal function: evaluates an isogeny proceeding from the largest
  6 | // to the smallest degrees.
  7 | //
  8 | // To be improved: use SIDH-like strategies for the smallest degree
  9 | void eval_rtl_old(proj *A, proj *Q, int n, isog_degree deg, const long* fact, long len) {
 10 |   uintbig cof;
 11 |   degree_to_uint(&cof, deg, fact, len);
 12 |   for (int i = len - 1; i >= 0; i--) {
 13 |     int v = degree_get(deg, i);
 14 |     for (int j = 0; j < v; j++) {
 15 |       proj K;
 16 |       uintbig_div3_64(&cof, &cof, fact[i]);
 17 |       xMUL(&K, A, Q+1, &cof);
 18 |       assert(!mont_iszero(&K));
 19 |       xISOG_many(A, Q, n, &K, fact[i]);
 20 |     }
 21 |   }
 22 |   assert(mont_iszero(Q+1));
 23 | }
 24 | 
 25 | //Internal function, recursively building a balanced strategy to evaluate points
 26 | void eval_rtl_rec(proj *A, proj *K, int len, bool advance,proj *P, int stacklen, long fact) {
 27 | 
 28 |   if (len==0) return;
 29 | 
 30 |   if (len==1) {
 31 |     xISOG_many(A, P, stacklen, K, fact);
 32 | 
 33 |   } else {
 34 |     long right = len / 2;
 35 |     long left = len - right;
 36 | 
 37 |     P[stacklen]=*K;
 38 | 
 39 |     uintbig fac_mul;
 40 |     uintbig_set(&fac_mul,fact);
 41 | 
 42 |     for (int i=0; i<left;i++)
 43 |       xMUL(K,A,K,&fac_mul);
 44 | 
 45 | 
 46 |     eval_rtl_rec(A, K, right,advance, P, stacklen+1,fact);
 47 |     K[right*advance]=P[stacklen];
 48 | 
 49 |     eval_rtl_rec(A,K+right*advance,left,advance,P,stacklen,fact);
 50 |   }
 51 | 
 52 | 
 53 | }
 54 | 
 55 | //Internal function
 56 | //supersedes eval_rtl_old using the recursive function for bettre isogeny computation
 57 | void eval_rtl(proj *A, proj *Q, int n, isog_degree deg, const long* fact, long len) {
 58 |   uintbig cof;
 59 |   degree_to_uint(&cof, deg, fact, len);
 60 | 
 61 |   //starting by the 3^53 or 5^21 isogeny reduces the number of scalar multiplication
 62 |   int start=0;
 63 |   int v = degree_get(deg, 0);
 64 |   if (fact[0]<6 && v>8) {
 65 |     start=1;
 66 |     proj K;
 67 |     for (int j = 0; j < v; j++) {
 68 |       uintbig_div3_64(&cof, &cof, fact[0]);
 69 |     }
 70 |     xMUL(&K, A, Q+1, &cof);
 71 | 
 72 | 
 73 |     int log,leni = v;
 74 |     for (log =0 ; leni > 1; leni >>=1 ) log++;
 75 | 
 76 |     assert(!mont_iszero(&K));
 77 | 
 78 |     proj stack[n+log];
 79 |     for (int i=0; i<n;i++)
 80 |       stack[i]=*(Q+i);
 81 |     eval_rtl_rec(A, &K, v, false, stack, n, fact[0]);
 82 |     for (int i=0;i<n;i++)
 83 |         *(Q+i)=(stack[i]);
 84 | 
 85 |   }
 86 |   for (int i = len - 1; i >= start; i--) {
 87 |     int v = degree_get(deg, i);
 88 |     if (fact[i]<6 && v!=0) {
 89 |       proj K;
 90 |       for (int j = 0; j < v; j++) {
 91 |         uintbig_div3_64(&cof, &cof, fact[i]);
 92 |       }
 93 |       xMUL(&K, A, Q+1, &cof);
 94 | 
 95 | 
 96 |       int log,leni = v;
 97 |       for (log =0 ; leni > 1; leni >>=1 ) log++;
 98 | 
 99 |       assert(!mont_iszero(&K));
100 | 
101 |       proj stack[n+log];
102 |       for (int i=0; i<n;i++)
103 |         stack[i]=*(Q+i);
104 |       eval_rtl_rec(A, &K, v, false, stack, n, fact[i]);
105 |       assert(mont_iszero(&stack[1]));
106 |       for (int i=0;i<n;i++)
107 |           *(Q+i)=(stack[i]);
108 |     }
109 |     else {
110 |       for (int j = 0; j < v; j++) {
111 |         proj K;
112 |         assert(!mont_iszero(Q+1));
113 |         uintbig_div3_64(&cof, &cof, fact[i]);
114 |         xMUL(&K, A, Q+1, &cof);
115 | 
116 |         assert(!mont_iszero(&K));
117 |         xISOG_many(A, Q, n, &K, fact[i]);
118 |       }
119 |     }
120 | 
121 |   }
122 |   assert(mont_iszero(Q+1));
123 | }
124 | 
125 | 
126 | 
127 | void eval(proj *A, const odd_isogeny *phi, proj *P) {
128 |   proj Q[3];
129 |   Q[0] = *P;
130 | 
131 |   // The p+1 part
132 |   Q[1] = phi->kernel_plus;
133 |   Q[2] = phi->kernel_minus;
134 |   eval_rtl(A, Q, 3, phi->deg_plus, p_plus_fact, p_plus_len);
135 | 
136 |   // The p-1 part
137 |   Q[1] = Q[2];
138 |   eval_rtl(A, Q, 2, phi->deg_minus, p_minus_fact, p_minus_len);
139 | 
140 |   *P = Q[0];
141 | }
142 | 
143 | void eval_mult(proj *A, const odd_isogeny *phi, proj *P, int n) {
144 |   proj Q[n+2];
145 |   Q[0] = P[0];
146 | 
147 |   // The p+1 part
148 |   Q[1] = phi->kernel_plus;
149 | 
150 |   for (int i = 1; i < n; ++i) {
151 |     Q[i+1] = P[i];
152 |   }
153 | 
154 |   Q[n+1] = phi->kernel_minus;
155 | 
156 |   eval_rtl(A, Q, n+2, phi->deg_plus, p_plus_fact, p_plus_len);
157 | 
158 |   // The p-1 part
159 |   Q[1] = Q[n+1];
160 |   eval_rtl(A, Q, n+1, phi->deg_minus, p_minus_fact, p_minus_len);
161 | 
162 |   P[0] = Q[0];
163 |   for (int i = 1; i < n; ++i) {
164 |     P[i] = Q[i+1];
165 |   }
166 | }
167 | 
168 | // Quite naive way to compute a dual isogeny of degree dividing p±1,
169 | // but it seems difficult to do better with XZ-coordinates
170 | //
171 | // res must point to an array of at least 2 proj, of length given by reslen.
172 | // If reslen > 2, any point in res[2:] is pushed through the isogeny
173 | void dual_one_side(proj *A, const proj *K, bool K_is_on_curve, isog_degree deg,
174 | 		   proj *res, long reslen,
175 | 		   const uintbig* cof, const long* fact, const long *mult, long len) {
176 |   isog_degree deg_co;
177 |   uintbig ord, cof2;
178 |   proj Abak, tmp;
179 | 
180 |   while (true) {
181 |     fp2_random(&res[0].x);
182 |     res[0].z = fp2_1;
183 |     if (is_on_curve(res, A) != K_is_on_curve)
184 |       continue;
185 |     xMUL(res, A, res, cof);
186 |     deg_co = degree_co(deg, mult, len);
187 |     degree_to_uint(&cof2, deg_co, fact, len);
188 |     xMUL(res, A, res, &cof2);
189 |     // Now we have a point of order dividing deg
190 |     res[1] = *K;
191 |     Abak = *A;
192 |     eval_rtl(A, res, reslen, deg, fact, len);
193 |     reslen = 2; // stop pushing the input points
194 |     // Now we need to test the order of res[0]
195 |     degree_to_uint(&ord, deg, fact, len);
196 |     long i;
197 |     for (i = 0; i < len; i++) {
198 |       if (degree_get(deg, i)) {
199 | 	uintbig_div3_64(&cof2, &ord, fact[i]);
200 | 	xMUL(&tmp, A, res, &cof2);
201 | 	if (mont_iszero(&tmp)) break;
202 |       }
203 |     }
204 |     // res[0] generates the dual isogeny!
205 |     if (i == len) break;
206 |     else *A = Abak;
207 |   }
208 | }
209 | 
210 | void dual(proj *A, odd_isogeny *phi) {
211 | 
212 | 
213 |   proj Q[3];
214 | 
215 |   // The p+1 part
216 |   bool K_is_on_curve = true; // WARNING: this is assuming that all
217 | 			     // curves are in the isogeny class of
218 | 			     // order (p+1)
219 |   Q[2] = phi->kernel_minus;
220 |   long two_tors = class_mod_4 == 3 ? two_tors_height : 1;
221 |   dual_one_side(A, &phi->kernel_plus, K_is_on_curve, phi->deg_plus,
222 | 		Q, 3, &p_plus_odd_cofactor, p_plus_fact, p_plus_mult, p_plus_len);
223 | 
224 |   // The p-1 part
225 |   phi->kernel_minus = Q[2];
226 |   Q[2] = Q[0];
227 |   two_tors = class_mod_4 == 3 ? 1 : two_tors_height;
228 |   dual_one_side(A, &phi->kernel_minus, !K_is_on_curve, phi->deg_minus,
229 | 		Q, 3, &p_minus_odd_cofactor, p_minus_fact, p_minus_mult, p_minus_len);
230 | 
231 |   phi->kernel_plus = Q[2];
232 |   phi->kernel_minus = Q[0];
233 | 
234 | }
235 | 


--------------------------------------------------------------------------------
/src/p6983/fp.s:
--------------------------------------------------------------------------------
  1 | .intel_syntax noprefix
  2 | 
  3 | .set pbytes,32
  4 | .set plimbs,4
  5 | .global p
  6 | .global _p
  7 | p: _p:
  8 |     .quad 0x50ca4291ffffffff, 0xd1b004f94a5952c9, 0xb25c76a437728f3b, 0xa3091565b678a990
  9 | 
 10 | .inv_min_p_mod_r: /* -p^-1 mod 2^64 */
 11 |     .quad 0x50ca429200000001
 12 | 
 13 | .global fp_0
 14 | .global _fp_0
 15 | fp_0: _fp_0:
 16 |     .zero pbytes
 17 | 
 18 | .global fp_1
 19 | .global _fp_1
 20 | fp_1: _fp_1: /* 2^256 mod p */
 21 |     .quad 0xaf35bd6e00000001, 0x2e4ffb06b5a6ad36, 0x4da3895bc88d70c4, 0x5cf6ea9a4987566f
 22 | 
 23 | .r_squared_mod_p: /* (2^256)^2 mod p */
 24 |     .quad 0xce1eebd1f8f7c152, 0x3d0e272c4d770926, 0x868e6f58f9294cfb, 0x578b875020f84a87
 25 | 
 26 | .p_minus_2:
 27 |     .quad 0x50ca4291fffffffd, 0xd1b004f94a5952c9, 0xb25c76a437728f3b, 0xa3091565b678a990
 28 | 
 29 | .p_minus_1_halves:
 30 |     .quad 0xa8652148ffffffff, 0xe8d8027ca52ca964, 0x592e3b521bb9479d, 0x51848ab2db3c54c8
 31 | 
 32 | .p_plus_1_quarter:
 33 | 	.quad 0x543290a480000000, 0xf46c013e529654b2, 0x2c971da90ddca3ce, 0x28c245596d9e2a64
 34 | 
 35 | .data
 36 | .global fp_mul_count
 37 | .global _fp_mul_count
 38 | fp_mul_count: _fp_mul_count:
 39 |     .quad 0
 40 | 
 41 | .text
 42 | .p2align 4,,15
 43 | 
 44 | .global fp_copy
 45 | .global _fp_copy
 46 | fp_copy: _fp_copy:
 47 |     cld
 48 |     mov rcx, plimbs
 49 |     rep movsq
 50 |     ret
 51 | 
 52 | .global fp_set
 53 | .global _fp_set
 54 | fp_set: _fp_set:
 55 |     push rdi
 56 |     call uintbig_set
 57 |     pop rdi
 58 |     mov rsi, rdi
 59 |     jmp fp_enc
 60 | 
 61 | .global fp_cswap
 62 | .global _fp_cswap
 63 | fp_cswap: _fp_cswap:
 64 |     movzx rax, dl
 65 |     neg rax
 66 |     .set k, 0
 67 |     .rept plimbs
 68 |         mov rcx, [rdi + 8*k]
 69 |         mov rdx, [rsi + 8*k]
 70 | 
 71 |         mov r8, rcx
 72 |         xor r8, rdx
 73 |         and r8, rax
 74 | 
 75 |         xor rcx, r8
 76 |         xor rdx, r8
 77 | 
 78 |         mov [rdi + 8*k], rcx
 79 |         mov [rsi + 8*k], rdx
 80 | 
 81 |         .set k, k+1
 82 |     .endr
 83 |     ret
 84 | 
 85 | .reduce_once:
 86 |     push rbp
 87 |     mov rbp, rdi
 88 | 
 89 |     mov rdi, [rbp +  0]
 90 |     sub rdi, [rip + p +  0]
 91 |     mov rsi, [rbp +  8]
 92 |     sbb rsi, [rip + p +  8]
 93 |     mov rdx, [rbp + 16]
 94 |     sbb rdx, [rip + p + 16]
 95 |     mov rcx, [rbp + 24]
 96 |     sbb rcx, [rip + p + 24]
 97 |     sbb rax, 0 /* handle carry from caller */
 98 | 
 99 |     setnc al
100 |     movzx rax, al
101 |     neg rax
102 | 
103 | .macro cswap2, r, m
104 |     xor \r, \m
105 |     and \r, rax
106 |     xor \m, \r
107 | .endm
108 | 
109 |     cswap2 rdi, [rbp +  0]
110 |     cswap2 rsi, [rbp +  8]
111 |     cswap2 rdx, [rbp + 16]
112 |     cswap2 rcx, [rbp + 24]
113 | 
114 |     pop rbp
115 |     ret
116 | 
117 | .global fp_add2
118 | .global _fp_add2
119 | fp_add2: _fp_add2:
120 |     mov rdx, rdi
121 | .global fp_add3
122 | .global _fp_add3
123 | fp_add3: _fp_add3:
124 |     push rdi
125 |     call uintbig_add3 /* may leave a carry in rax */
126 |     pop rdi
127 |     jmp .reduce_once
128 | 
129 | .global fp_sub2
130 | .global _fp_sub2
131 | fp_sub2: _fp_sub2:
132 |   mov rdx, rdi
133 |   xchg rsi, rdx
134 | .global fp_sub3
135 | .global _fp_sub3
136 | fp_sub3: _fp_sub3:
137 |     push rdi
138 |     call uintbig_sub3
139 |     pop rdi
140 |     xor rsi, rsi
141 |     xor rdx, rdx
142 |     xor rcx, rcx
143 |     test rax, rax
144 |     cmovnz rax, [rip + p +  0]
145 |     cmovnz rsi, [rip + p +  8]
146 |     cmovnz rdx, [rip + p + 16]
147 |     cmovnz rcx, [rip + p + 24]
148 |     add [rdi +  0], rax
149 |     adc [rdi +  8], rsi
150 |     adc [rdi + 16], rdx
151 |     adc [rdi + 24], rcx
152 |     ret
153 | 
154 | 
155 | /* Montgomery arithmetic */
156 | 
157 | .global fp_enc
158 | .global _fp_enc
159 | fp_enc: _fp_enc:
160 |     lea rdx, [rip + .r_squared_mod_p]
161 |     jmp fp_mul3
162 | 
163 | .global fp_dec
164 | .global _fp_dec
165 | fp_dec: _fp_dec:
166 |     lea rdx, [rip + uintbig_1]
167 |     jmp fp_mul3
168 | 
169 | .global fp_mul2
170 | .global _fp_mul2
171 | fp_mul2: _fp_mul2:
172 |   mov rdx, rdi
173 | .global fp_mul3
174 | .global _fp_mul3
175 | fp_mul3: _fp_mul3:
176 |     push rbp
177 |     push rbx
178 |     push r12
179 | 
180 |     push rdi
181 | 
182 |     /* inc qword ptr fp_mul_count */
183 | 
184 |     mov rdi, rsi
185 |     mov rsi, rdx
186 | 
187 |     xor r8,  r8
188 |     xor r9,  r9
189 |     xor r10, r10
190 |     xor r11, r11
191 |     xor r12, r12
192 |     xor rbp, rbp
193 | 
194 |     /* flags are already cleared */
195 | 
196 | .macro MULSTEP, k, r0, r1, r2, r3, r4, r5
197 | 
198 |     mov rdx, [rsi +  0]
199 |     mulx rcx, rdx, [rdi + 8*\k]
200 |     add rdx, \r0
201 |     mulx rcx, rdx, [rip + .inv_min_p_mod_r]
202 | 
203 |     xor rax, rax /* clear flags */
204 | 
205 |     mulx rbx, rax, [rip + p +  0]
206 |     adox \r0, rax
207 | 
208 |     mulx rcx, rax, [rip + p +  8]
209 |     adcx \r1, rbx
210 |     adox \r1, rax
211 | 
212 |     mulx rbx, rax, [rip + p + 16]
213 |     adcx \r2, rcx
214 |     adox \r2, rax
215 | 
216 |     mulx rcx, rax, [rip + p + 24]
217 |     adcx \r3, rbx
218 |     adox \r3, rax
219 | 
220 |     mov rax, 0
221 |     adcx \r4, rcx
222 |     adox \r4, rax
223 | 
224 |     adcx \r5, rax
225 |     adox \r5, rax
226 | 
227 |     mov rdx, [rdi + 8*\k]
228 | 
229 |     xor rax, rax /* clear flags */
230 | 
231 |     mulx rbx, rax, [rsi +  0]
232 |     adox \r0, rax
233 | 
234 |     mulx rcx, rax, [rsi +  8]
235 |     adcx \r1, rbx
236 |     adox \r1, rax
237 | 
238 |     mulx rbx, rax, [rsi + 16]
239 |     adcx \r2, rcx
240 |     adox \r2, rax
241 | 
242 |     mulx rcx, rax, [rsi + 24]
243 |     adcx \r3, rbx
244 |     adox \r3, rax
245 | 
246 |     mov rax, 0
247 |     adcx \r4, rcx
248 |     adox \r4, rax
249 | 
250 |     adcx \r5, rax
251 |     adox \r5, rax
252 | 
253 | .endm
254 | 
255 |     MULSTEP 0, r8,  r9,  r10, r11, r12, rbp
256 |     MULSTEP 1, r9,  r10, r11, r12, rbp, r8
257 |     MULSTEP 2, r10, r11, r12, rbp, r8,  r9
258 |     MULSTEP 3, r11, r12, rbp, r8,  r9,  r10
259 | 
260 |     pop rdi
261 | 
262 |     mov [rdi +  0], r12
263 |     mov [rdi +  8], rbp
264 |     mov [rdi + 16], r8
265 |     mov [rdi + 24], r9
266 |     mov rax, r10
267 | 
268 |     pop r12
269 |     pop rbx
270 |     pop rbp
271 |     jmp .reduce_once
272 | 
273 | .global fp_sq1
274 | .global _fp_sq1
275 | fp_sq1: _fp_sq1:
276 |     mov rsi, rdi
277 | .global fp_sq2
278 | .global _fp_sq2
279 | fp_sq2: _fp_sq2:
280 |     /* TODO implement optimized Montgomery squaring */
281 |     mov rdx, rsi
282 |     jmp fp_mul3
283 | 
284 | /* (obviously) not constant time in the exponent! */
285 | .fp_pow:
286 |     push rbx
287 |     mov rbx, rsi
288 |     push r12
289 |     push r13
290 |     push rdi
291 |     sub rsp, pbytes
292 | 
293 |     mov rsi, rdi
294 |     mov rdi, rsp
295 |     call fp_copy
296 | 
297 |     mov rdi, [rsp + pbytes]
298 |     lea rsi, [rip + fp_1]
299 |     call fp_copy
300 | 
301 | .macro POWSTEP, k
302 |         mov r13, [rbx + 8*\k]
303 |         xor r12, r12
304 | 
305 |         2:
306 |         test r13, 1
307 |         jz 1f
308 | 
309 |         mov rdi, [rsp + pbytes]
310 |         mov rsi, rsp
311 |         call fp_mul2
312 | 
313 |         1:
314 |         mov rdi, rsp
315 |         call fp_sq1
316 | 
317 |         shr r13
318 | 
319 |         inc r12
320 |         test r12, 64
321 |         jz 2b
322 | .endm
323 | 
324 |     POWSTEP 0
325 |     POWSTEP 1
326 |     POWSTEP 2
327 |     POWSTEP 3
328 | 
329 |     add rsp, pbytes+8
330 |     pop r13
331 |     pop r12
332 |     pop rbx
333 |     ret
334 | 
335 | /* TODO use a better addition chain? */
336 | .global fp_inv
337 | .global _fp_inv
338 | fp_inv: _fp_inv:
339 |     lea rsi, [rip + .p_minus_2]
340 |     jmp .fp_pow
341 | 
342 | .global fp_sqrt
343 | .global _fp_sqrt
344 | fp_sqrt: _fp_sqrt:
345 |     lea rsi, [rip + .p_plus_1_quarter]
346 |     jmp .fp_pow
347 | 
348 | /* TODO use a better addition chain? */
349 | .global fp_issquare
350 | .global _fp_issquare
351 | fp_issquare: _fp_issquare:
352 |     push rdi
353 |     lea rsi, [rip + .p_minus_1_halves]
354 |     call .fp_pow
355 |     pop rdi
356 | 
357 |     xor rax, rax
358 |     .set k, 0
359 |     .rept plimbs
360 |         mov rsi, [rdi + 8*k]
361 |         xor rsi, [rip + fp_1 + 8*k]
362 |         or rax, rsi
363 |         .set k, k+1
364 |     .endr
365 |     test rax, rax
366 |     setz al
367 |     movzx rax, al
368 |     ret
369 | 
370 | /* not constant time (but this shouldn't leak anything of importance) */
371 | .global fp_random
372 | .global _fp_random
373 | fp_random: _fp_random:
374 | 
375 |     push rdi
376 |     mov rsi, pbytes
377 |     call _randombytes
378 |     pop rdi
379 | 
380 |     .set k, plimbs-1
381 |     .rept plimbs
382 |         mov rax, [rip + p + 8*k]
383 |         cmp [rdi + 8*k], rax
384 |         ja fp_random
385 |         jb 0f
386 |         .set k, k-1
387 |     .endr
388 |     jmp fp_random
389 |     0:
390 |     ret
391 | 


--------------------------------------------------------------------------------
/src/toolbox.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <stdint.h>
  3 | #include <stdio.h>
  4 | #include <stdlib.h>
  5 | #include <time.h> 
  6 | #include <pari/pari.h>
  7 | #include <math.h>
  8 | #include <limits.h>
  9 | 
 10 | #include "ideal.h"
 11 | #include "toolbox.h"
 12 | 
 13 | static clock_t global_timer;
 14 | 
 15 | clock_t
 16 | tic()
 17 | {
 18 |     global_timer = clock();
 19 |     return global_timer;
 20 | }
 21 | 
 22 | float
 23 | tac()
 24 | {
 25 |     float ms = (1000. * (float) (clock() - global_timer) / CLOCKS_PER_SEC);
 26 |     return ms;
 27 | }
 28 | 
 29 | float
 30 | TAC(const char *str)
 31 | {
 32 |     float ms = (1000. * (float) (clock() - global_timer) / CLOCKS_PER_SEC);
 33 |     printf("%s [%d ms]\n", str, (int) ms);
 34 |     return ms;
 35 | }
 36 | 
 37 | float
 38 | toc(const clock_t t)
 39 | {
 40 |     float ms = (1000. * (float) (clock() - t) / CLOCKS_PER_SEC);
 41 |     return ms;
 42 | }
 43 | 
 44 | float
 45 | TOC(const clock_t t, const char *str)
 46 | {
 47 |     float ms = (1000. * (float) (clock() - t) / CLOCKS_PER_SEC);
 48 |     printf("%s [%d ms]\n", str, (int) ms);
 49 |     return ms;
 50 | }
 51 | 
 52 | // computes the factorisation matrix of x1 by x2 given their factorisation matrices f1 and f2
 53 | GEN famat_div(GEN f1, GEN f2) {
 54 |     pari_sp ltop = avma;
 55 |     GEN f = famat_reduce(famat_div_shallow(f1,f2));
 56 |     return gerepilecopy(ltop,f);
 57 | }
 58 | 
 59 | // returns the first divisor in f1 (first prime in the list with non-zero exponent)
 60 | // if f2 is given, it is set to the updated factorisation, where the returned factor has been removed
 61 | GEN famat_pop(GEN f1, GEN* f2) {
 62 |     if (lg(f1) == 1) return NULL;
 63 | 
 64 |     pari_sp ltop = avma;
 65 |     GEN new_f1 = gcopy(f1);
 66 |     long n = lg(gel(new_f1,1)) - 1;
 67 | 
 68 |     for (int i = 1; i <= n; ++i) {
 69 |         if (gcmpgs(gel(gel(new_f1,2),i), 0) > 0) {
 70 |             if (f2) {
 71 |                 gel(gel(new_f1,2),i) = gsubgs(gel(gel(new_f1,2),i), 1);
 72 |                 *f2 = gerepilecopy(ltop, new_f1);
 73 |                 return gel(gel(*f2,1),i);
 74 |             }
 75 |             return gerepilecopy(ltop,gel(gel(new_f1,1),i));
 76 |         }
 77 |     }
 78 | 
 79 |     avma = ltop;
 80 |     return NULL;
 81 | }
 82 | 
 83 | GEN famat_degree(GEN f) {
 84 |     if (lg(f) == 1) return gen_0;
 85 | 
 86 |     pari_sp ltop = avma;
 87 | 
 88 |     unsigned long n = lg(gel(f,1)) - 1;
 89 |     GEN degree = gen_0;
 90 | 
 91 |     for (unsigned long i = 1; i <= n; ++i) {
 92 |         degree = gadd(degree, gel(gel(f,2),i));
 93 |     }
 94 | 
 95 |     return gerepilecopy(ltop,degree);
 96 | }
 97 | 
 98 | // returns the n-th prime divisor in f, where primes are counted with multiplicity (the 3rd prime of 2*3^2*5 is 3 because 2,3,3,5)
 99 | GEN famat_get_nth(GEN f, GEN n, unsigned long *index) {
100 |     if (lg(f) == 1) return NULL;
101 | 
102 |     pari_sp ltop = avma;
103 | 
104 |     unsigned long m = lg(gel(f,1)) - 1;
105 | 
106 |     GEN degree = gen_0;
107 | 
108 |     for (unsigned long i = 1; i <= m; ++i) {
109 |         degree = gadd(degree, gel(gel(f,2),i));
110 |         if (gcmp(n, degree) <= 0) {
111 |             if (index) *index = i;
112 |             return gerepilecopy(ltop,gel(gel(f,1),i));
113 |         }
114 |     }
115 | 
116 |     avma = ltop;
117 |     return NULL;
118 | }
119 | 
120 | 
121 | GEN famat_random(GEN f1, GEN B) {
122 |     pari_sp ltop = avma;
123 | 
124 |     int lg_f1 = lg(gel(f1,1)) - 1;
125 |     int m;
126 |     long index, acc, n;
127 | 
128 |     double logs[lg_f1], logB = dbllog2r(itor(B,10));
129 |     double sum = 0;
130 |     long available_terms[lg_f1], chosen_terms[lg_f1], total_available = 0;
131 | 
132 |     index = 0;
133 |     for (int i = 1; i <= lg_f1; ++i) {
134 |         if (gcmp(gel(gel(f1,2),i),gen_0) > 0) {
135 |             logs[index] = dbllog2r(itor(gel(gel(f1,1),i),10));
136 |             available_terms[index] = itos_or_0(gel(gel(f1,2),i));
137 |             total_available += available_terms[index];
138 |             chosen_terms[index] = 0;
139 |             index++;
140 |         }
141 |     }
142 |     m = index;
143 | 
144 |     while (sum < logB && total_available > 0) {
145 |         n = random_Fl(total_available);
146 |         index = -1;
147 |         acc = 0;
148 |         while (acc <= n) {
149 |             index++;
150 |             acc += available_terms[index];
151 |         }
152 |         sum += logs[index];
153 |         available_terms[index]--;
154 |         chosen_terms[index]++;
155 | 
156 |         total_available--;
157 |     }
158 | 
159 | 
160 | 
161 |     // remove superfluous factors (largest first)
162 |     for (int i = m-1; i >= 0; --i) {
163 |         for (int j = 0; j < chosen_terms[i]; ++j) {
164 |             if (sum - logs[i] > logB ) {
165 |                 sum = sum - logs[i];
166 |                 chosen_terms[i]--;
167 |             }
168 |             else break;
169 |         }
170 |     }
171 | 
172 |     // generate result
173 |     GEN result = cgetg(3, t_MAT);
174 |     gel(result,1) = cgetg(m+1, t_COL);
175 |     gel(result,2) = cgetg(m+1, t_COL);
176 |     index = 0;
177 |     for (int i = 1; i <= lg_f1; ++i) {
178 |         if (gcmp(gel(gel(f1,2),i),gen_0) > 0) {
179 |             gel(gel(result,1),index+1) = gcopy(gel(gel(f1,1),i));
180 |             gel(gel(result,2),index+1) = stoi(chosen_terms[index]);
181 |             index++;
182 |         }
183 |     }
184 |     // m == index;
185 | 
186 |     return gerepileupto(ltop,result);
187 | }
188 | 
189 | // returns the product
190 | GEN famat_prod(GEN f) {
191 |     if (lg(f) == 1) return gen_1;
192 | 
193 |     pari_sp ltop = avma;
194 | 
195 |     long m = lg(gel(f,1)) - 1;
196 | 
197 |     GEN list = cgetg(m+1, t_VEC);
198 |     for (long i = 1; i <= m; ++i) {
199 |         gel(list,i) = powii(gel(gel(f,1),i), gel(gel(f,2),i));
200 |     }
201 | 
202 |     return gerepilecopy(ltop,ZV_prod(list));
203 | }
204 | 
205 | // TODO: currently assumes LONG_IS_64BIT
206 | int cornacchia_extended(GEN N, GEN *x, GEN *y) {
207 | 
208 |     // test for forbiden factors
209 |     // we could also allow these factors if they appear with an even power, but does not seem to improve performance
210 |     // 11638895555051853627 = 3*7*11*19*23*31*43*47*59*67*71*79*83 = primes that are 3 mod 4 up to 101
211 | 
212 |     if (ugcd(11638895555051853627ULL, umodiu(N,11638895555051853627ULL)) == 1) {
213 |         // no bad small factor
214 |         pari_sp ltop = avma;
215 |         long valuation_2 = vali(N);
216 |         GEN N_odd = shifti(N, -valuation_2); // remove the even part
217 | 
218 |         // 10003628061488344205 = 5*13*17*29*37*41*53*61*73*89*97*101 = primes that are 1 mod 4 up to 101
219 |         unsigned long small_factors_1_mod_4 = ugcd(10003628061488344205ULL, umodiu(N_odd,10003628061488344205ULL));
220 |         unsigned long gcd = small_factors_1_mod_4;
221 | 
222 |         while (gcd != 1) {
223 |             N_odd = diviiexact(N_odd,stoi(gcd));
224 |             gcd = ugcd(gcd, umodiu(N_odd,gcd));
225 |             small_factors_1_mod_4 *= gcd;
226 |         }
227 | 
228 |         if ((umodiu(N_odd, 4) == 1)) { // we hope the 'unfactored' part is a prime 1 mod 4
229 |             if (ispseudoprime(N_odd,0)) { // the 'unfactored' part is prime, can use Cornacchia
230 | 
231 |                 GEN x0,y0;
232 |                 cornacchia(gen_1, N_odd, &x0, &y0);
233 | 
234 |                 GEN small_factors = factoru(small_factors_1_mod_4); // TODO: can improve that...
235 |                 GEN sol_2 = gpowgs(mkcomplex(gen_1,gen_1), valuation_2);
236 |                 GEN sol_odd = gen_1;
237 |                 GEN cx;
238 | 
239 |                 for (int i = 1; i < lg(gel(small_factors,1)); ++i) {
240 |                     switch (gel(small_factors,1)[i]) {
241 |                         case 5: cx = mkcomplex(stoi(2), stoi(1)); break;
242 |                         case 13: cx = mkcomplex(stoi(3), stoi(2)); break;
243 |                         case 17: cx = mkcomplex(stoi(4), stoi(1)); break;
244 |                         case 29: cx = mkcomplex(stoi(5), stoi(2)); break;
245 |                         case 37: cx = mkcomplex(stoi(6), stoi(1)); break;
246 |                         case 41: cx = mkcomplex(stoi(5), stoi(4)); break;
247 |                         case 53: cx = mkcomplex(stoi(7), stoi(2)); break;
248 |                         case 61: cx = mkcomplex(stoi(6), stoi(5)); break;
249 |                         case 73: cx = mkcomplex(stoi(8), stoi(3)); break;
250 |                         case 89: cx = mkcomplex(stoi(8), stoi(5)); break;
251 |                         case 97: cx = mkcomplex(stoi(9), stoi(4)); break;
252 |                         case 101: cx = mkcomplex(stoi(10), stoi(1)); break;
253 |                     }
254 |                     sol_odd = gmul(sol_odd, gpowgs(cx, gel(small_factors,2)[i]));
255 |                 }
256 | 
257 |                 GEN sol = gmul(sol_odd, sol_2);
258 |                 sol = gmul(sol, mkcomplex(x0,y0));
259 | 
260 |                 *x = gel(sol,1);
261 |                 *y = gel(sol,2);
262 |                 gerepileall(ltop,2,x,y);
263 |                 return 1;
264 |             }
265 |         }
266 |         avma = ltop;
267 |     }
268 |     return 0;
269 | }
270 | 
271 | // solve x^2 + y^2 + p(u^2 + v^2) = M, with (u,v) != (0,0)
272 | // when parity != 0, ensures that (x+v) and (y+u) are not both even
273 | // (this means that x + y*i + u*j + v*ji is not in 2*Order(1,i,(1-ji)/2, (i+j)/2)
274 | GEN norm_equation_special(GEN p, GEN M, long parity, bool randomized) {
275 |     pari_sp ltop = avma;
276 |     GEN upper_bound_pari = gdivent(M,p); // gdivent is the eauclidean division
277 |     long upper_bound = itos_or_0(upper_bound_pari);
278 |     long u = 1, v = 0, n, delta, A; // sum_of_2_squares;
279 |     GEN N, x, y; // fac, q
280 | 
281 |     long bound_randomized;
282 |     if (upper_bound == 0) bound_randomized = 1<<15;
283 |     else bound_randomized = (long)(sqrt(upper_bound/2));
284 | 
285 |     while (1) {
286 |         if (randomized) {
287 |             u = random_Fl(bound_randomized);
288 |             v = random_Fl(bound_randomized);
289 |         }
290 | 
291 |         n = u*u+v*v;
292 | 
293 |         // if we are outside the bound, find the next point within the bound if it exists, and quit if it doesn't
294 |         if ((!is_bigint(upper_bound_pari)) && n > upper_bound) {
295 |             if (randomized) continue;
296 | 
297 |             // we are in the bottom half, outside the bound... find the next point within
298 | 
299 |             A = u+v;
300 |             delta = 2*upper_bound - A*A;
301 | 
302 |             if (delta < 0)  {
303 |                 avma = ltop;
304 |                 return NULL; // no solution!
305 |             }
306 | 
307 |             u = (long)(((double)A + sqrt(delta))/2.);
308 |             v = A-u;
309 | 
310 |             n = u*u+v*v;
311 |         }
312 | 
313 |         N = gsub(M,gmulgs(p,n)); // N = M - p(u^2 + v^2)
314 | 
315 |         if (cornacchia_extended(N, &x, &y)) { // no bad small factor
316 |             if ((parity == 0) || (smodis(gaddgs(x,v),2) != 0) || (smodis(gaddgs(y,u),2) != 0) ) {
317 |                 GEN res = mkvec4(x,y,stoi(u),stoi(v));
318 |                 return gerepilecopy(ltop,res);
319 |             }
320 |         }
321 | 
322 | 
323 |         // update (u,v)
324 |         if (!randomized) {
325 |             if (v+1 < u) { u--; v++; }
326 |             else { u = v+u+1; v = 0; }
327 |         }
328 |     }
329 | 
330 |     avma = ltop;
331 |     return NULL;
332 | }
333 | 
334 | GEN lattice_nearest_plane(GEN lat, GEN target, long flag) {
335 |     pari_sp ltop = avma;
336 | 
337 |     GEN latlll = gmul(lat, qflll0(lat, flag));
338 |     GEN latgs_sqr_len;
339 |     GEN latgs = RgM_gram_schmidt(latlll, &latgs_sqr_len);
340 |     GEN b = target;
341 |     unsigned long n = lg(latlll) - 1;
342 |     GEN c;
343 |     for (int i = n; i > 0; --i)
344 |     {
345 |         c = ground(gdiv(RgV_dotproduct(b,gel(latgs,i)),gel(latgs_sqr_len,i)));
346 |         b = gsub(b,gmul(c,gel(latlll,i)));
347 |     }
348 |     return gerepilecopy(ltop, gsub(target,b));
349 | }
350 | 
351 | 
352 | 
353 | 
354 | 
355 | 


--------------------------------------------------------------------------------
/test/klpt.c:
--------------------------------------------------------------------------------
  1 | #define _XOPEN_SOURCE
  2 | 
  3 | #include <stdint.h>
  4 | #include <stdio.h>
  5 | #include <stdlib.h>
  6 | #include <time.h>
  7 | #include <pari/pari.h>
  8 | #include <math.h>
  9 | 
 10 | #include "ideal.h"
 11 | #include "toolbox.h"
 12 | #include "klpt.h"
 13 | #include "precomputed.h"
 14 | 
 15 | 
 16 | 
 17 | GEN norm0(GEN x) {
 18 |     return algnorm(global_setup.B, x,0);
 19 | }
 20 | 
 21 | /*
 22 | static GEN alg_O0_to_standard(GEN elt) {
 23 |     return RgM_RgC_mul(global_setup.O0_to_standard, elt);
 24 | }
 25 | 
 26 | static GEN alg_standard_to_O0(GEN elt) {
 27 |     return RgM_RgC_mul(global_setup.standard_to_O0, elt);
 28 | }
 29 | */
 30 | 
 31 | 
 32 | int test_lideal1() {
 33 |     GEN B_O0 = global_setup.O0;
 34 |     GEN B = global_setup.B;
 35 | 
 36 |     GEN gen = mkcol4s(2,7,3,-5);
 37 |     GEN lideal = lideal_create(B, B_O0, gen, NULL);
 38 | 
 39 |     //output(lideal);
 40 |     output(norm0(gen));
 41 |     output(lideal_norm(lideal));
 42 | 
 43 | 
 44 |     gel(lideal,3) = NULL;
 45 |     GEN z = lideal_generator(lideal);
 46 | 
 47 |     output(norm0(z));
 48 |     GEN lideal2 = lideal_create(B, B_O0, z, lideal_norm(lideal));
 49 | 
 50 |     //output(lideal2);
 51 |     gel(lideal2,2) = gen_0;
 52 |     output(lideal_norm(lideal2));
 53 | 
 54 |     return 0;
 55 | }
 56 | 
 57 | 
 58 | 
 59 | int test_lideal2() {
 60 |     GEN B_O0 = global_setup.O0;
 61 |     GEN B = global_setup.B;
 62 | 
 63 |     GEN gen = lattice_random(B, B_O0, stoi(10000)); // = mkcol4s(2,7,0,0);
 64 | 
 65 |     output(algnorm(B, gen, 0));
 66 | 
 67 |     GEN lideal = lideal_create(B, B_O0, gen, NULL);
 68 | 
 69 |     printf("LLL\n");
 70 |     //output(lideal_lll(lideal));
 71 | 
 72 |     //printf("Short\n");
 73 |     output(norm0(gel(lideal_lll(lideal),1)));
 74 |     output(norm0(gel(lideal_lll(lideal),2)));
 75 |     output(norm0(gel(lideal_lll(lideal),3)));
 76 |     output(norm0(gel(lideal_lll(lideal),4)));
 77 | 
 78 |     //lideal_equiv_prime(lideal);
 79 | 
 80 | 
 81 |     GEN I1 = lideal_random_2e(B, B_O0, 256);
 82 | 
 83 |     output(lideal_norm(I1));
 84 | 
 85 |     printf("A\n");
 86 |     GEN I2 = lideal_equiv_prime(I1,NULL);
 87 | 
 88 |     output(lideal_norm(I2));
 89 | 
 90 | 
 91 |     printf("B\n");
 92 |     lideal_isom(I1, I2);
 93 | 
 94 |     printf("C\n");
 95 |     output(algmultable(B));
 96 | 
 97 |     return 0;
 98 | }
 99 | 
100 | int test_norm_eq() {
101 |     float accumulated_time_ms = 0.;
102 |     int repetitions = 10000;
103 |     clock_t t;
104 | 
105 |     //GEN fm = famat_sqr(global_setup.torsion_fm);
106 | 
107 |     pari_sp av = avma;
108 | 
109 |     for (int i = 0; i < repetitions; ++i) {
110 |         long ctr = 1;
111 |         GEN sol = NULL, target;
112 |         //GEN N = randomi(mpshift(gen_1, 128));
113 |         //GEN p_div_N = gadd(truedivii(global_setup.p, N), gen_1);
114 |         //GEN fm_1;
115 | 
116 |         t = tic();
117 |         while (!sol) {
118 |             //fm_1 = famat_random(fm, gmulgs(p_div_N,(ctr/10) + 2));
119 | 
120 |             //target = gmul(N,famat_prod(fm_1));
121 |             target = randomi(gmul(global_setup.p, stoi(ctr/20 + 1)));
122 |             sol = norm_equation_special(global_setup.p, target, 0, false);
123 |             ctr++;
124 |         }
125 |         //output(sol);
126 |         accumulated_time_ms += toc(t);
127 | 
128 |         GEN sum1 = gadd(gsqr(gel(sol,1)), gsqr(gel(sol,2)));
129 |         GEN sum2 = gmul(global_setup.p,gadd(gsqr(gel(sol,3)), gsqr(gel(sol,4))));
130 |         GEN sum = gadd(sum1,sum2);
131 |         if (gcmp(sum,target) != 0) {
132 |             printf("error in norm_equation_special: incorrect result!\n");
133 |             break;
134 |         }
135 | 
136 |         avma = av;
137 |     }
138 |     printf("average time\t [%f ms]\n",  (accumulated_time_ms / repetitions));
139 |     return 0;
140 | }
141 | 
142 | int test_famat_rand() {
143 |     float accumulated_time_ms = 0.;
144 |     int repetitions = 100;
145 |     clock_t t;
146 | 
147 |     GEN fm = famat_sqr(global_setup.torsion_fm);
148 | 
149 |     pari_sp av = avma;
150 | 
151 |     for (int i = 0; i < repetitions; ++i) {
152 |         GEN target;
153 |         GEN N = randomi(mpshift(gen_1, 128));
154 |         GEN B = gmul(gadd(truedivii(global_setup.p, N), gen_1),stoi(3));
155 |         GEN fm_1;
156 | 
157 |         t = tic();
158 |         for (int i = 0; i < 100; ++i) {
159 |             //fm_1 = famat_random(fm, gmulgs(p_div_N,(ctr/10) + 2));
160 |             fm_1 = famat_random(fm, B);
161 |             target = gmul(N,famat_prod(fm_1));
162 |         }
163 |         //output(sol);
164 |         accumulated_time_ms += toc(t);
165 | 
166 |         avma = av;
167 |     }
168 |     printf("average time\t [%f ms]\n",  (accumulated_time_ms / repetitions));
169 |     return 0;
170 | }
171 | 
172 | int test_famat() {
173 |     GEN fm = mkmat2(mkcol4s(2,3,5,7), mkcol4s(4,2,1,2));
174 |     GEN fm2;
175 |     output(famat_pop(fm,&fm2));
176 |     output(famat_pop(fm2,&fm2));
177 |     output(famat_pop(fm2,&fm2));
178 |     output(famat_pop(fm2,&fm2));
179 |     output(famat_pop(fm2,&fm2));
180 | 
181 | 
182 | 
183 |     fm = mkmat2(mkcol4s(2,3,5,7), mkcol4s(30,2,3,2));
184 |     output(fm);
185 | 
186 |     for (int i = 0; i < 20; ++i) {
187 |         fm2 = famat_random(fm,stoi(10));
188 |         output(fm2);
189 |     }
190 | 
191 |     return 0;
192 | }
193 | 
194 | int test_klpt() {
195 | 
196 |     float accumulated_time_ms = 0., accumulated_bitlength = 0.;
197 |     int repetitions = 100;
198 |     pari_sp av = avma;
199 | 
200 |     for (int i = 0; i < repetitions; ++i) {
201 |         GEN I = lideal_random_2e(global_setup.B, global_setup.O0, 130);
202 | 
203 |         GEN fm = famat_sqr(global_setup.torsion_fm);
204 |         gel(gel(fm,2),1) = gen_0; // remove power of 2
205 | 
206 |         clock_t t = tic();
207 |         GEN J = klpt_special_smooth(I, fm);
208 |         accumulated_time_ms += toc(t);
209 |         //accumulated_time_ms += toc(t);
210 | 
211 |         GEN NJ = lideal_norm(J);
212 |         accumulated_bitlength += dbllog2r(itor(NJ,10));
213 |         //printf("%ld bits\n", (long)dbllog2r(itor(NJ,10)));
214 | 
215 |         // check norm
216 | 
217 |         int smooth_norm = (gcmp(famat_prod(famat_Z_gcd(fm, NJ)), NJ) == 0);
218 |         if (!smooth_norm) { printf("output of klpt does not have a valid norm\n"); break; }
219 | 
220 |         // check isomorphism
221 | 
222 |         GEN alpha = lideal_isom(I, J); // I*alpha = J
223 |         if (!alpha) { printf("output of klpt is not isomorphic to input\n"); break; }
224 | 
225 |         avma = av;
226 |     }
227 | 
228 |     printf("average time\t [%f ms]\n",  (accumulated_time_ms / repetitions));
229 |     printf("average length\t %d bits\n", (int) (accumulated_bitlength / repetitions));
230 | 
231 |     return 0;
232 | }
233 | 
234 | int test_equiv_nearprime() {
235 | 
236 |     float accumulated_time_ms = 0., accumulated_bitlength = 0.;
237 |     int repetitions = 100;
238 |     pari_sp av = avma;
239 | 
240 |     for (int i = 0; i < repetitions; ++i) {
241 |         GEN I = lideal_random_2e(global_setup.B, global_setup.O0, 130);
242 |         GEN fm = famat_sqr(global_setup.torsion_fm);
243 |         gel(gel(fm,2),1) = gen_0; // remove power of 2
244 | 
245 |         clock_t t = tic();
246 |         GEN J = lideal_equiv_nearprime(I,fm,0);
247 |         // GEN J = lideal_equiv_prime_except(I,NULL,NULL);
248 |         accumulated_time_ms += toc(t);
249 | 
250 |         //GEN NJ = lideal_norm(J);
251 |         //if (!ispseudoprime(NJ,0)) { printf("output of lideal_equiv_prime_except is not of prime norm\n"); break; }
252 | 
253 |         // check isomorphism
254 | 
255 |         GEN alpha = lideal_isom(I, J); // I*alpha = J
256 |         if (!alpha) { printf("output of lideal_equiv_* is not isomorphic to input\n"); break; }
257 | 
258 |         avma = av;
259 |     }
260 | 
261 |     printf("average time\t [%f ms]\n",  (accumulated_time_ms / repetitions));
262 |     printf("average length\t %d bits\n", (int) (accumulated_bitlength / repetitions));
263 | 
264 |     return 0;
265 | }
266 | 
267 | int test_klpt2e() {
268 | 
269 |     float accumulated_time_ms = 0., accumulated_bitlength = 0.;
270 |     int repetitions = 100;
271 |     pari_sp av = avma;
272 | 
273 |     for (int i = 0; i < repetitions; ++i) {
274 |         GEN I = lideal_random_2e(global_setup.B, global_setup.O0, 64);
275 |         GEN fm = famat_sqr(global_setup.torsion_fm);
276 |         gel(gel(fm,2),1) = gen_0; // remove power of 2
277 | 
278 |         clock_t t = tic();
279 |         GEN J = klpt_special_smooth_small_2e_input(I, fm);
280 |         accumulated_time_ms += toc(t);
281 |         //accumulated_time_ms += toc(t);
282 | 
283 |         if (J) {
284 |             GEN NJ = lideal_norm(J);
285 |             accumulated_bitlength += dbllog2r(itor(NJ,10));
286 |             //printf("%ld bits\n", (long)dbllog2r(itor(NJ,10)));
287 | 
288 |             // check norm
289 | 
290 |             int smooth_norm = (gcmp(famat_prod(famat_Z_gcd(fm, NJ)), NJ) == 0);
291 |             if (!smooth_norm) { printf("output of klpt does not have a valid norm\n"); break; }
292 | 
293 |             // check isomorphism
294 | 
295 |             GEN alpha = lideal_isom(I, J); // I*alpha = J
296 |             if (!alpha) { printf("output of klpt is not isomorphic to input\n"); }
297 |         }
298 | 
299 |         avma = av;
300 |     }
301 | 
302 |     printf("average time\t [%f ms]\n",  (accumulated_time_ms / repetitions));
303 |     printf("average length\t %d bits\n", (int) (accumulated_bitlength / repetitions));
304 | 
305 |     return 0;
306 | }
307 | 
308 | int test_klpt_general() {
309 | 
310 |     float accumulated_time_ms = 0., accumulated_bitlength = 0.;
311 |     int repetitions = 100;
312 |     pari_sp av = avma;
313 |     int win = 0;
314 | 
315 |     for (int i = 0; i < repetitions; ++i) {
316 |         unsigned int length_NI = 64;
317 | 
318 |         GEN NI = NULL;
319 | 
320 |         do {
321 |             NI = randomprime(powiu(gen_2, length_NI));
322 |         } while (Fp_issquare(gen_2,NI));
323 | 
324 |         GEN alpha = NULL;
325 | 
326 |         unsigned int margin = 6;
327 |         while (!alpha) {
328 |             alpha = norm_equation_special(global_setup.p, gmul(NI,randomi(powiu(gen_2, 256-length_NI + margin))), 0, false);
329 |             ++margin;
330 |         }
331 | 
332 |         GEN I = lideal_create(global_setup.B, global_setup.O0, gtrans(alpha), NI);
333 | 
334 |         GEN K = lideal_random_2e(global_setup.B, global_setup.O0, 130);
335 |         K = lideal_equiv_prime(K,NULL);
336 |         clock_t t = tic();
337 |         GEN J = klpt_general_power(I, K, gen_2);
338 |         accumulated_time_ms += toc(t);
339 | 
340 | 
341 |         if (J) {
342 |             ++win;
343 | 
344 |             GEN NJ = lideal_norm(J);
345 |             accumulated_bitlength += dbllog2r(itor(NJ,10));
346 | 
347 |             //printf("%ld bits\n", (long)dbllog2r(itor(NJ,10)));
348 | 
349 | 
350 |             // check norm
351 | 
352 |             //int smooth_norm = (gcmp(famat_prod(famat_Z_gcd(fm, NJ)), NJ) == 0);
353 |             //if (!smooth_norm) { printf("output of klpt does not have a valid norm\n"); break; }
354 | 
355 |             //output(Z_factor_limit(NJ, 3));
356 | 
357 |             // check isomorphism
358 | 
359 |             GEN I1 = lideal_inter(I,K);
360 |             GEN I2 = lideal_inter(I,J);
361 | 
362 |             GEN alpha = lideal_isom(I1, I2); // I1*alpha = I2
363 |             if (!alpha) { printf("output of klpt is not isomorphic to input!\n"); break; }
364 |         }
365 | 
366 |         avma = av;
367 |     }
368 | 
369 |     printf("average time\t [%f ms]\n",  (accumulated_time_ms / repetitions));
370 |     printf("average length\t %d bits\n", (int) (accumulated_bitlength / win));
371 | 
372 |     return 0;
373 | }
374 | 
375 | 
376 | 
377 | // argv[1] is the random seed; default = 1
378 | int main(int argc, char *argv[]){
379 |     pari_init(80000000, 1<<18);
380 | 
381 |     setrand(stoi(1));
382 |     srand48(1);
383 |     if( argc > 1 ) {
384 |       setrand(strtoi(argv[1]));
385 |       srand48(atoi(argv[1]));
386 |     }
387 | 
388 |     long var = fetch_var();
389 |     GEN nf = nfinit(pol_x(fetch_var()),LOWDEFAULTPREC);
390 | 
391 |     GEN a = stoi(-1),
392 |         p = strtoi("73743043621499797449074820543863456997944695372324032511999999999999999999999"),
393 |         b = negi(p);
394 | 
395 |     GEN B = alg_hilbert(nf, a, b, var, 0);
396 | 
397 |     GEN B_1 = mkcol4s(1,0,0,0);
398 |     GEN B_i = mkcol4s(0,1,0,0);
399 |     GEN B_j = mkcol4s(0,0,1,0);
400 |     GEN B_ji = mkcol4s(0,0,0,1);
401 |     //GEN B_ij = mkcol4s(0,0,0,-1);
402 | 
403 |     GEN B_1k_2 = mkcol4(ghalf,gen_0,gen_0,gneg(ghalf)); // (1-ji)/2
404 |     GEN B_ij_2 = mkcol4(gen_0,ghalf,ghalf,gen_0); // (i+j)/2
405 | 
406 |     GEN B_O0 = alglathnf(B,mkmat4(B_1, B_i, B_1k_2, B_ij_2), gen_0);
407 | 
408 |     global_setup.p = p;
409 |     global_setup.B = B; // the quaternion algebra
410 |     global_setup.qf = mkmat4(mkcol4s(1,0,0,0),
411 |                              mkcol4s(0,1,0,0),
412 |                              mkcol4(gen_0,gen_0,p,gen_0),
413 |                              mkcol4(gen_0,gen_0,gen_0,p)); // quadratic form defined by the reduced norm
414 | 
415 |     global_setup.torsion_fm = Z_factor_limit(strtoi(
416 |         "197530174297949459837634878151545563369632855190375548677707409417459236752253845947265965991865263091519488000000000000000000000"
417 |         ), 30000);
418 | 
419 |     global_setup.O0 = B_O0; // the cannonical maximal order
420 |     global_setup.one = B_1;
421 |     global_setup.i = B_i;
422 |     global_setup.j = B_j;
423 |     global_setup.ji = B_ji;
424 | 
425 |     global_setup.O0_b1 = B_1;
426 |     global_setup.O0_b2 = B_i;
427 |     global_setup.O0_b3 = B_1k_2;
428 |     global_setup.O0_b4 = B_ij_2;
429 |     global_setup.O0_to_standard = mkmat4(B_1, B_i, B_1k_2, B_ij_2);
430 |     global_setup.standard_to_O0 = RgM_inv(global_setup.O0_to_standard);
431 | 
432 |     // test_klpt();
433 |     test_klpt_general();
434 | 
435 |     printf("    \033[1;32mAll tests passed\033[0m\n");
436 |     exit(0);
437 | }
438 | 


--------------------------------------------------------------------------------
/src/tedwards.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <string.h>
  3 | #include <assert.h>
  4 | #include <gmp.h>
  5 | #define FP_LIMBS (4 * 64 / GMP_LIMB_BITS)
  6 | 
  7 | #include "steps.h"
  8 | #include "tedwards.h"
  9 | #include "uintbig.h"
 10 | #include "poly.h"
 11 | 
 12 | // a*x^2+y^2=1+d*x^2*y^2
 13 | // a = A.x, d = A.z
 14 | 
 15 | bool ted_is_on_curve(point const *P, proj const *E) {
 16 |     fp2 x2, y2, z2, tmp1, tmp2;
 17 |     fp2_sq2(&x2,&P->x);
 18 |     fp2_sq2(&y2,&P->y);
 19 |     fp2_sq2(&z2,&P->z);
 20 | 
 21 |     fp2_mul3(&tmp1, &x2, &E->x);
 22 |     fp2_add2(&tmp1, &y2);
 23 |     fp2_mul2(&tmp1, &z2);
 24 | 
 25 |     fp2_mul3(&tmp2, &x2, &y2);
 26 |     fp2_mul2(&tmp2, &E->z);
 27 |     fp2_sq1(&z2);
 28 |     fp2_add2(&tmp2, &z2);
 29 | 
 30 |     fp2_sub2(&tmp1, &tmp2);
 31 | 
 32 |     return fp2_iszero(&tmp1);
 33 | }
 34 | 
 35 | 
 36 | 
 37 | 
 38 | void ted_double(point *Q, proj const *E, point const *P) {
 39 |     // A = X1^2
 40 |     // B = Y1^2
 41 |     // C = 2*Z1^2
 42 |     // D = a*A
 43 |     // K = (X1+Y1)^2-A-B
 44 |     // G = D+B
 45 |     // F = G-C
 46 |     // H = D-B
 47 |     // X3 = K*F
 48 |     // Y3 = G*H
 49 |     // T3 = K*H
 50 |     // Z3 = F*G
 51 | 
 52 |     // TODO: neutral element
 53 |     fp2 A,B,C,D,K,G,F,H;
 54 | 
 55 |     fp2_sq2(&A,&P->x);
 56 |     fp2_sq2(&B,&P->y);
 57 |     fp2_sq2(&C,&P->z);
 58 |     fp2_add2(&C,&C);
 59 |     fp2_mul3(&D,&A,&E->x);
 60 |     fp2_add3(&K,&P->x,&P->y);
 61 |     fp2_sq1(&K);
 62 |     fp2_sub2(&K,&A);
 63 |     fp2_sub2(&K,&B);
 64 |     fp2_add3(&G,&D,&B);
 65 |     fp2_sub3(&F,&G,&C);
 66 |     fp2_sub3(&H,&D,&B);
 67 | 
 68 | 
 69 |     fp2_mul3(&Q->x,&K,&F);
 70 |     fp2_mul3(&Q->y,&G,&H);
 71 |     fp2_mul3(&Q->t,&K,&H);
 72 |     fp2_mul3(&Q->z,&F,&G);
 73 | 
 74 | }
 75 | 
 76 | void ted_add(point *S, proj const *E, point const *P, point const *Q) {
 77 |     // A = X1*X2
 78 |     // B = Y1*Y2
 79 |     // C = Z1*T2
 80 |     // D = T1*Z2
 81 |     // K = D+C
 82 |     // F = (X1-Y1)*(X2+Y2)+B-A
 83 |     // G = B+a*A
 84 |     // H = D-C
 85 |     // X3 = K*F
 86 |     // Y3 = G*H
 87 |     // T3 = K*H
 88 |     // Z3 = F*G
 89 | 
 90 |     // TODO: neutral element
 91 |     point res;
 92 | 
 93 |     fp2 A,B,C,D,K,F,G,H,tmp;
 94 | 
 95 |     fp2_mul3(&A,&P->x, &Q->x);
 96 |     fp2_mul3(&B,&P->y, &Q->y);
 97 |     fp2_mul3(&C,&P->z, &Q->t);
 98 |     fp2_mul3(&D,&P->t, &Q->z);
 99 |     fp2_add3(&K,&D,&C);
100 |     fp2_add3(&F,&Q->x, &Q->y);
101 |     fp2_sub3(&tmp,&P->x, &P->y);
102 |     fp2_mul2(&F,&tmp);
103 |     fp2_add2(&F,&B);
104 |     fp2_sub2(&F,&A);
105 |     fp2_mul3(&G,&A,&E->x);
106 |     fp2_add2(&G,&B);
107 |     fp2_sub3(&H,&D,&C);
108 | 
109 | 
110 |     fp2_mul3(&res.x,&K,&F);
111 |     fp2_mul3(&res.y,&G,&H);
112 |     fp2_mul3(&res.t,&K,&H);
113 |     fp2_mul3(&res.z,&F,&G);
114 | 
115 |     if (fp2_iszero(&res.x) && fp2_iszero(&res.y) && fp2_iszero(&res.z)) {
116 |         ted_double(S, E, P);
117 |     }
118 |     else *S = res;
119 | }
120 | 
121 | void ted_neg(point *Q, point const *P) {
122 |     fp2_neg2(&Q->x, &P->x);
123 |     Q->y = P->y;
124 |     Q->z = P->z;
125 |     fp2_neg2(&Q->t, &P->t);
126 | }
127 | 
128 | void ted_mul(point *res, point const *P, proj const *E, uintbig const *k)
129 | {
130 |     const point Pcopy = *P;
131 |     res->x = fp2_0;
132 |     res->y = fp2_1;
133 |     res->z = fp2_1;
134 | 
135 |     unsigned long i = BITS;
136 |     while (--i && !uintbig_bit(k, i));
137 | 
138 |     do {
139 |         ted_double(res,E,res);
140 |         if (uintbig_bit(k, i)) {
141 |             ted_add(res, E, res, &Pcopy);
142 |         }
143 |     } while (i--);
144 | }
145 | 
146 | 
147 | 
148 | bool ted_iszero(point const *P) {
149 |     if (fp2_iszero(&P->x)) {
150 |         fp2 a;
151 |         fp2_sub3(&a, &P->y, &P->z);
152 |         return fp2_iszero(&a);
153 |     }
154 |     else return false;
155 | }
156 | 
157 | void mont_to_ted(proj *E, proj const *A, bool twist) {
158 |     fp2 tmp, two;
159 |     tmp = A->z;
160 |     fp2_inv(&tmp);
161 |     fp2_mul2(&tmp,&A->x);
162 |     fp2_set(&two,2);
163 |     fp2_add3(&E->x, &tmp, &two);
164 |     fp2_sub3(&E->z, &tmp, &two);
165 |     if (twist) {
166 |         // B = Fp2_inv(fp2_non_residue)
167 |         tmp = fp2_non_residue();
168 |         fp2_mul2(&E->x,&tmp);
169 |         fp2_mul2(&E->z,&tmp);
170 |     }
171 | }
172 | 
173 | void mont_to_ted_point(point *Q, proj const *A, proj const *P) {
174 |     if (fp2_iszero(&P->z)) {
175 |         fp2_set(&Q->x, 0);
176 |         fp2_set(&Q->y, 1);
177 |         fp2_set(&Q->z, 1);
178 |         fp2_set(&Q->t, 0);
179 |     }
180 |     else {
181 |         fp2 tmp, y;
182 |         xLIFT(&y, A, P);
183 | 
184 |         fp2_add3(&tmp,&P->x,&P->z);
185 |         fp2_mul3(&Q->x,&P->x,&tmp);
186 | 
187 |         fp2_sub3(&Q->y,&P->x,&P->z);
188 |         fp2_mul2(&Q->y,&y);
189 | 
190 |         fp2_mul3(&Q->z,&tmp,&y);
191 | 
192 |         Q->t = Q->z;
193 |         fp2_inv(&Q->t);
194 |         fp2_mul2(&Q->t,&Q->x);
195 |         fp2_mul2(&Q->t,&Q->y);
196 |     }
197 | }
198 | 
199 | void ted_to_mont_point(proj *Q, point const *P) {
200 |     fp2_add3(&Q->x, &P->z, &P->y);
201 |     fp2_sub3(&Q->z, &P->z, &P->y);
202 | }
203 | 
204 | bool ted_equal(point const *P1, point const *P2) {
205 |     fp2 x1z2, y1z2;
206 |     fp2 y2z1, x2z1;
207 |     fp2 x1z2_x2z1, y1z2_y2z1;
208 | 
209 |     fp2_mul3(&x1z2, &P1->x, &P2->z);
210 |     fp2_mul3(&y1z2, &P1->y, &P2->z);
211 |     fp2_mul3(&y2z1, &P2->y, &P1->z);
212 |     fp2_mul3(&x2z1, &P2->x, &P1->z);
213 |     fp2_sub3(&x1z2_x2z1, &x1z2, &x2z1);
214 |     fp2_sub3(&y1z2_y2z1, &y1z2, &y2z1);
215 | 
216 |     return fp2_iszero(&x1z2_x2z1) && fp2_iszero(&y1z2_y2z1);
217 | }
218 | 
219 | 
220 | void ted_miller_dou(fp2 *cz2,fp2 *cxy,fp2 *cxz, point *P3, proj const *E, point const *P1) {
221 |     fp2 A,B,C,D,E_,F,G,H,I,J,K;
222 | 
223 |     fp2_sq2(&A,&P1->x);
224 |     fp2_sq2(&B,&P1->y);
225 |     fp2_sq2(&C,&P1->z);
226 |     fp2_add3(&D,&P1->x,&P1->y);
227 |     fp2_sq2(&D,&D);
228 |     fp2_add3(&E_,&P1->y,&P1->z);
229 |     fp2_sq2(&E_,&E_);
230 |     fp2_add3(&F,&A,&B);
231 |     fp2_sub3(&F,&D,&F);
232 |     fp2_add3(&G,&B,&C);
233 |     fp2_sub3(&G,&E_,&G);
234 |     fp2_mul3(&H,&A,&E->x);
235 |     fp2_add3(&I,&H,&B);
236 |     fp2_sub3(&J,&C,&I);
237 |     fp2_add3(&K,&J,&C);
238 | 
239 |     // coefficients of the conic
240 |     fp2_sub3(cz2,&P1->t,&P1->x);
241 |     fp2_mul2(cz2,&P1->y);
242 |     fp2_add2(cz2,cz2);
243 |     fp2_add3(cxy,&J,&J);
244 |     fp2_add2(cxy,&G);
245 |     fp2_mul3(cxz,&P1->x,&P1->t);
246 |     fp2_mul2(cxz,&E->x);
247 |     fp2_sub2(cxz,&B);
248 |     fp2_add2(cxz,cxz);
249 | 
250 |     // compute P3 = 2*P1
251 |     fp2_mul3(&P3->x,&F,&K);
252 |     fp2_sub3(&P3->y,&B,&H);
253 |     fp2_mul2(&P3->y,&I);
254 |     fp2_mul3(&P3->z,&I,&K);
255 |     fp2_sub3(&P3->t,&B,&H);
256 |     fp2_mul2(&P3->t,&F);
257 | }
258 | 
259 | void ted_miller_add(fp2 *cz2,fp2 *cxy,fp2 *cxz, point *P3, proj const *E, point const *P1, point const *P2) {
260 |     fp2 A,B,C,D,E_,F,G,H,I,tmp;
261 | 
262 |     fp2_mul3(&A,&P1->x,&P2->x);
263 |     fp2_mul3(&B,&P1->y,&P2->y);
264 |     fp2_mul3(&C,&P1->z,&P2->t);
265 |     fp2_mul3(&D,&P1->t,&P2->z);
266 |     fp2_add3(&E_,&D,&C);
267 |     fp2_add3(&F,&P2->x,&P2->y);
268 |     fp2_sub3(&tmp,&P1->x,&P1->y);
269 |     fp2_mul2(&F,&tmp);
270 |     fp2_add2(&F,&B);
271 |     fp2_sub2(&F,&A);
272 |     fp2_mul3(&G,&A,&E->x);
273 |     fp2_add2(&G,&B);
274 |     fp2_sub3(&H,&D,&C);
275 |     fp2_mul3(&I,&P1->t,&P2->t);
276 | 
277 |     // coefficients of the conic
278 |     fp2_sub3(cz2,&P1->t,&P1->x);
279 |     fp2_add3(&tmp,&P2->t,&P2->x);
280 |     fp2_mul2(cz2,&tmp);
281 |     fp2_sub2(cz2,&I);
282 |     fp2_add2(cz2,&A);
283 |     fp2_mul3(cxy,&P1->x,&P2->z);
284 |     fp2_mul3(&tmp,&P2->x,&P1->z);
285 |     fp2_sub2(cxy,&tmp);
286 |     fp2_add2(cxy,&F);
287 |     fp2_sub3(cxz,&P1->y,&P1->t);
288 |     fp2_add3(&tmp,&P2->y,&P2->t);
289 |     fp2_mul2(cxz,&tmp);
290 |     fp2_sub2(cxz,&B);
291 |     fp2_add2(cxz,&I);
292 |     fp2_sub2(cxz,&H);
293 | 
294 |     // compute P3 = 2*P1
295 |     fp2_mul3(&P3->x,&E_,&F);
296 |     fp2_mul3(&P3->y,&G,&H);
297 |     fp2_mul3(&P3->t,&E_,&H);
298 |     fp2_mul3(&P3->z,&F,&G);
299 | }
300 | 
301 | // eta_Q = (Q.z+Q.y)/Q.x and Y_Q = Q.y/Q.z
302 | void ted_phi_l1l2(fp2 *f, fp2 *g, const fp2 *cz2, const fp2 *cxy, const fp2 *cxz, const fp2 *eta_Q, const fp2 *Y_Q, const point *P3){
303 |     fp2 tmp, f0, g0;
304 |     fp2_mul3(&f0, cz2, eta_Q);
305 |     fp2_mul3(&tmp, cxy, Y_Q);
306 |     fp2_add2(&f0, &tmp);
307 |     fp2_add2(&f0, cxz);
308 |     fp2_mul3(&g0, Y_Q, &P3->z);
309 |     fp2_sub2(&g0, &P3->y);
310 | 
311 |     *f = f0;
312 |     *g = g0;
313 | }
314 | 
315 | void ted_miller(fp2 *res, fp2 *res2, proj const *E, point const *P, point const *Q, point const *Q2, uintbig const *k) {
316 |     point R = *P;
317 |     fp2 f, g,f2,g2, cz2, cxy, cxz, f0,g0, eta_Q, Y_Q, eta_Q2, Y_Q2, tmp;
318 |     fp2_set(&f, 1);
319 |     fp2_set(&g, 1);
320 | 
321 |     Y_Q = Q->z;
322 |     fp2_inv(&Y_Q);
323 |     fp2_mul2(&Y_Q,&Q->y);
324 | 
325 |     fp2_add3(&eta_Q,&Q->z,&Q->y);
326 |     tmp = Q->x;
327 |     fp2_inv(&tmp);
328 |     fp2_mul2(&eta_Q,&tmp);
329 | 
330 |     if (Q2) {
331 |         fp2_set(&f2, 1);
332 |         fp2_set(&g2, 1);
333 | 
334 |         Y_Q2 = Q2->z;
335 |         fp2_inv(&Y_Q2);
336 |         fp2_mul2(&Y_Q2,&Q2->y);
337 | 
338 |         fp2_add3(&eta_Q2,&Q2->z,&Q2->y);
339 |         tmp = Q2->x;
340 |         fp2_inv(&tmp);
341 |         fp2_mul2(&eta_Q2,&tmp);
342 |     }
343 | 
344 |     unsigned long i = BITS;
345 |     while (--i && !uintbig_bit(k, i));
346 |     i--;
347 |     do {
348 |         ted_miller_dou(&cz2,&cxy,&cxz, &R, E, &R);
349 | 
350 | 
351 |         ted_phi_l1l2(&f0,&g0, &cz2,&cxy,&cxz, &eta_Q, &Y_Q, &R);
352 |         fp2_sq1(&f);
353 |         fp2_sq1(&g);
354 |         fp2_mul2(&f,&f0);
355 |         fp2_mul2(&g,&g0);
356 | 
357 |         if (Q2) {
358 |             ted_phi_l1l2(&f0,&g0, &cz2,&cxy,&cxz, &eta_Q2, &Y_Q2, &R);
359 |             fp2_sq1(&f2);
360 |             fp2_sq1(&g2);
361 |             fp2_mul2(&f2,&f0);
362 |             fp2_mul2(&g2,&g0);
363 |         }
364 | 
365 |         if (uintbig_bit(k, i)) {
366 |             ted_miller_add(&cz2,&cxy,&cxz, &R, E, &R, P);
367 | 
368 |             ted_phi_l1l2(&f0,&g0, &cz2,&cxy,&cxz, &eta_Q, &Y_Q, &R);
369 |             fp2_mul2(&f,&f0);
370 |             fp2_mul2(&g,&g0);
371 | 
372 |             if (Q2) {
373 |                 ted_phi_l1l2(&f0,&g0, &cz2,&cxy,&cxz, &eta_Q2, &Y_Q2, &R);
374 |                 fp2_mul2(&f2,&f0);
375 |                 fp2_mul2(&g2,&g0);
376 |             }
377 |         }
378 | 
379 |     } while (i--);
380 | 
381 |     // // testing
382 |     // point test;
383 |     // ted_mul(&test, P, E, k);
384 |     // ted_neg(&test,&test);
385 |     // ted_add(&test, E, &test, &R);
386 |     // assert(ted_iszero(&test));
387 | 
388 |     if (fp2_iszero(&f) || fp2_iszero(&g)) {
389 |         *res = fp2_0;
390 |     }
391 |     else {
392 |         *res = g;
393 |         fp2_inv(res);
394 |         fp2_mul2(res,&f);
395 |     }
396 | 
397 |     if (Q2) {
398 |         if (fp2_iszero(&f2) || fp2_iszero(&g2)) {
399 |             *res2 = fp2_0;
400 |         }
401 |         else {
402 |             *res2 = g2;
403 |             fp2_inv(res2);
404 |             fp2_mul2(res2,&f2);
405 |         }
406 |     }
407 | }
408 | 
409 | void ted_weil(fp2 *res, const proj *E, const point *P, const point *Q, const uintbig *k) {
410 |     fp2 fQT, fPQT, fPT, fQPT;
411 |     point S,R,T;
412 | 
413 |     ted_neg(&S,Q);
414 |     ted_add(&T, E, P, &S);
415 | 
416 | 
417 |     ted_add(&S, E, P, &T);
418 |     ted_miller(&fQT, &fQPT, E, Q, &T, &S, k);
419 |     if (fp2_iszero(&fQT) || fp2_iszero(&fQPT)) { *res = fp2_1; return; }
420 | 
421 |     //ted_miller(&fQPT, NULL, E, Q, &S, NULL, k);
422 |     //if (fp2_iszero(&fQPT)) { *res = fp2_1; return; }
423 | 
424 |     ted_neg(&S,&T);
425 |     ted_add(&R, E, Q, &S);
426 |     ted_miller(&fPT, &fPQT, E, P, &S, &R, k);
427 |     if (fp2_iszero(&fPT) || fp2_iszero(&fPQT)) { *res = fp2_1; return; }
428 | 
429 | 
430 |     fp2_mul3(res, &fPT, &fQPT);
431 |     fp2_inv(res);
432 |     fp2_mul2(res, &fQT);
433 |     fp2_mul2(res, &fPQT);
434 | }
435 | 
436 | 
437 | 
438 | 
439 | 
440 | bool ted_bidim_log_weil(long *a, long *b, const proj *E, const point *Q, const point *P1, const point *P2, long ell) {
441 |     uintbig ell_big;
442 |     uintbig_set(&ell_big, ell);
443 | 
444 |     fp2 weil_12, weil_Q1, weil_Q2;
445 | 
446 |     ted_weil(&weil_12, E, P1, P2, &ell_big);
447 |     ted_weil(&weil_Q1, E, Q,  P1, &ell_big);
448 |     ted_weil(&weil_Q2, E, Q,  P2, &ell_big);
449 | 
450 |     if (!fp2_dlp_naive(a, &weil_Q2, &weil_12, ell)) { return false; }
451 |     if (!fp2_dlp_naive(b, &weil_Q1, &weil_12, ell)) { return false; }
452 |     *b = (ell -  *b) % ell;
453 |     return true;
454 | }
455 | 
456 | bool ted_bidim_log(GEN *a, GEN *b, const proj *E, const point *Q, const point *P1, const point *P2, long ell, long e) {
457 |     pari_sp ltop = avma;
458 | 
459 |     uintbig ell_big, x_big, y_big;
460 |     uintbig_set(&ell_big, ell);
461 | 
462 |     GEN log_1 = gen_0, log_2 = gen_0, ell_pow = gen_1, gtmp;
463 |     long x,y;
464 |     point Ps1[e], Ps2[e], R, Ri, tmp;
465 | 
466 |     Ps1[0] = *P1;
467 |     Ps2[0] = *P2;
468 | 
469 |     for (int i = 1; i < e; ++i) {
470 |         ted_mul(&Ps1[i], &Ps1[i-1], E, &ell_big);
471 |         ted_mul(&Ps2[i], &Ps2[i-1], E, &ell_big);
472 |     }
473 | 
474 |     R = *Q;
475 | 
476 |     for (int i = 0; i < e; ++i) {
477 |         Ri = R;
478 |         for (int j = 0; j < e-1-i; ++j) {
479 |             ted_mul(&Ri, &Ri, E, &ell_big);
480 |         }
481 |         if(!ted_bidim_log_weil(&x,&y, E, &Ri, &Ps1[e-1], &Ps2[e-1], ell))
482 |             { return false; }
483 | 
484 |         uintbig_set(&x_big, x);
485 |         ted_mul(&tmp, &Ps1[i], E, &x_big);
486 |         ted_neg(&tmp, &tmp);
487 |         ted_add(&R, E, &R, &tmp);
488 | 
489 |         uintbig_set(&y_big, y);
490 |         ted_mul(&tmp, &Ps2[i], E, &y_big);
491 |         ted_neg(&tmp, &tmp);
492 |         ted_add(&R, E, &R, &tmp);
493 | 
494 |         gtmp = mului(x, ell_pow);
495 |         log_1 = gadd(log_1, gtmp);
496 | 
497 |         gtmp = mului(y, ell_pow);
498 |         log_2 = gadd(log_2, gtmp);
499 | 
500 |         ell_pow = muliu(ell_pow,ell);
501 | 
502 |     }
503 | 
504 |     *a = log_1;
505 |     *b = log_2;
506 | 
507 |     gerepileall(ltop, 2, a, b);
508 |     return true;
509 | }
510 | 


--------------------------------------------------------------------------------
/test/arith.c:
--------------------------------------------------------------------------------
  1 | #define _XOPEN_SOURCE
  2 | #include <stdint.h>
  3 | #include <stdio.h>
  4 | #include <stdlib.h>
  5 | #include <time.h> 
  6 | #include <pari/pari.h>
  7 | #include <math.h>
  8 | #include <assert.h>
  9 | #include <gmp.h>
 10 | 
 11 | #define FP_LIMBS (4 * 64 / GMP_LIMB_BITS)
 12 | 
 13 | 
 14 | #include "ideal.h"
 15 | #include "toolbox.h"
 16 | #include "klpt.h"
 17 | 
 18 | #include "mont.h"
 19 | #include "tedwards.h"
 20 | #include "constants.h"
 21 | #include "precomputed.h"
 22 | 
 23 | GEN norm0(GEN x) {
 24 |     return algnorm(global_setup.B, x,0);
 25 | }
 26 | 
 27 | /*
 28 | static GEN alg_O0_to_standard(GEN elt) {
 29 |     return RgM_RgC_mul(global_setup.O0_to_standard, elt);
 30 | }
 31 | */
 32 | 
 33 | static GEN alg_standard_to_O0(GEN elt) {
 34 |     return RgM_RgC_mul(global_setup.standard_to_O0, elt);
 35 | }
 36 | 
 37 | 
 38 | 
 39 | proj random_point(proj const *A, long ell, long e) {
 40 |     proj P;
 41 |     uintbig cofactor;
 42 |     uintbig_add3(&cofactor, &p, &uintbig_1);
 43 |     uintbig ell_big;
 44 |     uintbig_set(&ell_big, ell);
 45 |     for (int i = 0; i < e; ++i) {
 46 |         uintbig_div3_64(&cofactor, &cofactor, ell); 
 47 |     }
 48 |     proj Z;
 49 | 
 50 |     while (1) {
 51 |         fp2_random(&P.x); fp2_random(&P.z);
 52 |         if (!is_on_curve(&P, A)) continue;
 53 |         xMUL(&P, A, &P, &cofactor);
 54 |         Z = P;
 55 |         for (int i = 0; i < e-1; ++i) {
 56 |             xMUL(&Z, A, &Z, &ell_big);
 57 |         }
 58 |         if (!fp2_iszero(&Z.z)) { 
 59 |             //xMUL(&Z, A, &Z, &ell_big);
 60 |             //assert(fp2_iszero(&Z.z));
 61 |             return P;
 62 |         }
 63 |     }
 64 | 
 65 | }
 66 | 
 67 | int test_random() {
 68 |     // float accumulated_time_ms = 0.;
 69 |     int repetitions = 1000;
 70 |     clock_t t;
 71 | 
 72 |     proj A = { fp2_0, fp2_1 };
 73 |     long len = p_plus_len;
 74 |     proj P;
 75 | 
 76 |     t = tic();
 77 |     for (int i = 0; i < repetitions; i++) {
 78 |         long ell = p_plus_fact[i % len], e = p_plus_mult[i % len];
 79 |         P = random_point(&A, ell, e);
 80 |     }
 81 |     TOC(t, "rand");
 82 | 
 83 |     // printf("average time\t [%f ms]\n",  (accumulated_time_ms / repetitions));
 84 | 
 85 |     return 0;
 86 | }
 87 | 
 88 | 
 89 | int test_ted() {
 90 |     float accumulated_time_ms = 0.;
 91 |     clock_t t;
 92 | 
 93 |     uintbig k;
 94 | 
 95 |     proj A = { fp2_0, fp2_1 };
 96 |     long len = p_plus_len;
 97 |     int repetitions = p_plus_len*10;
 98 |     proj P,Q;
 99 | 
100 |     proj E;
101 |     mont_to_ted(&E, &A, false);
102 | 
103 |     point tP,tQ,tPpQ,tPmQ,t2P, tsum;
104 | 
105 |     for (int i = 0; i < repetitions; i++) {
106 |         t = tic();
107 |         long ell = p_plus_fact[i % len], e = p_plus_mult[i % len];
108 | 
109 |         P = random_point(&A, ell, e);
110 |         Q = random_point(&A, ell, e);
111 | 
112 |         mont_to_ted_point(&tP, &A, &P);
113 |         mont_to_ted_point(&tQ, &A, &Q);
114 | 
115 |         ted_add(&tPpQ, &E, &tP, &tQ);
116 |         ted_neg(&tQ, &tQ);
117 |         ted_add(&tPmQ, &E, &tP, &tQ);
118 |         ted_neg(&tQ, &tQ);
119 | 
120 |         ted_double(&t2P, &E, &tP);
121 | 
122 |         assert(ted_is_on_curve(&tPpQ,&E));
123 |         assert(ted_is_on_curve(&tPmQ,&E));
124 |         assert(ted_is_on_curve(&t2P,&E));
125 | 
126 |         ted_neg(&tsum, &t2P);
127 |         ted_add(&tsum, &E, &tsum, &tPpQ);
128 |         ted_add(&tsum, &E, &tsum, &tPmQ);
129 | 
130 |         assert(ted_iszero(&tsum));
131 | 
132 | 
133 |         ted_double(&tsum, &E, &tP);
134 |         ted_add(&tsum, &E, &tsum, &tP);
135 |         ted_add(&tsum, &E, &tsum, &tP);
136 |         ted_add(&tsum, &E, &tsum, &tP);
137 |         ted_add(&tsum, &E, &tsum, &tP);
138 |         ted_add(&tsum, &E, &tsum, &tP);
139 |         ted_add(&tsum, &E, &tsum, &tP);
140 |         ted_add(&tsum, &E, &tsum, &tP);
141 |         ted_add(&tsum, &E, &tsum, &tP);
142 |         ted_add(&tsum, &E, &tsum, &tP);
143 |         ted_add(&tsum, &E, &tsum, &tP);
144 |         ted_add(&tsum, &E, &tsum, &tP); // 13*P
145 | 
146 | 
147 |         uintbig_set(&k,13);
148 |         ted_mul(&tQ, &tP, &E, &k);
149 | 
150 |         ted_neg(&tsum, &tsum);
151 |         ted_add(&tsum, &E, &tsum, &tQ);
152 | 
153 |         assert(ted_iszero(&tsum));
154 | 
155 | 
156 |         accumulated_time_ms += toc(t);
157 | 
158 |     }
159 | 
160 |     //printf("average time\t [%f ms]\n",  (accumulated_time_ms / repetitions));
161 | 
162 |     return 0;
163 | }
164 | 
165 | 
166 | int test_weil() {
167 |     float accumulated_time_ms = 0.;
168 |     clock_t t;
169 | 
170 |     proj A = { fp2_0, fp2_1 };
171 |     long len = p_plus_len;
172 |     int repetitions = p_plus_len*10;
173 |     proj montP,montQ, montR, montS;
174 |     uintbig ellbig;
175 | 
176 |     proj E;
177 |     mont_to_ted(&E, &A, false);
178 | 
179 |     point P,Q,R,S,T;
180 |     fp2 a, b, tmp;
181 | 
182 | 
183 |     for (int i = 0; i < repetitions; i++) {
184 | 
185 |         long ell = p_plus_fact[i % len], e = p_plus_mult[i % len];
186 |         //ell = 5;
187 |         e = 1;
188 | 
189 |         uintbig_set(&ellbig, ell);
190 | 
191 | 
192 |         montP = random_point(&A, ell, e);
193 |         montQ = random_point(&A, ell, e);
194 |         montR = random_point(&A, ell, e);
195 |         montS = random_point(&A, ell, e);
196 | 
197 |         mont_to_ted_point(&P, &A, &montP);
198 |         mont_to_ted_point(&Q, &A, &montQ);
199 |         mont_to_ted_point(&R, &A, &montR);
200 |         mont_to_ted_point(&S, &A, &montS);
201 | 
202 |         assert(ted_is_on_curve(&P,&E));
203 |         assert(ted_is_on_curve(&Q,&E));
204 |         assert(ted_is_on_curve(&R,&E));
205 |         assert(ted_is_on_curve(&S,&E));
206 | 
207 |         ted_mul(&T, &P, &E, &ellbig);
208 |         assert(ted_iszero(&T));
209 |         ted_mul(&T, &Q, &E, &ellbig);
210 |         assert(ted_iszero(&T));
211 |         ted_mul(&T, &R, &E, &ellbig);
212 |         assert(ted_iszero(&T));
213 |         ted_mul(&T, &S, &E, &ellbig);
214 |         assert(ted_iszero(&T));
215 | 
216 |         t = tic();
217 |         ted_weil(&a, &E, &P, &Q, &ellbig);
218 |         fp2_exp(&tmp, &a, &ellbig);
219 |         fp2_sub2(&tmp, &fp2_1);
220 | 
221 |         assert(fp2_iszero(&tmp));
222 | 
223 |         ted_add(&R, &E, &Q, &P);
224 |         ted_weil(&b, &E, &P, &R, &ellbig);
225 | 
226 |         fp2_sub3(&tmp,&a,&b);
227 | 
228 |         assert(fp2_iszero(&tmp));
229 |         accumulated_time_ms += toc(t);
230 | 
231 |     }
232 | 
233 |     //printf("average time\t [%f ms]\n",  (accumulated_time_ms / repetitions));
234 | 
235 |     return 0;
236 | }
237 | 
238 | 
239 | int test_bidim() {
240 |     // float accumulated_time_ms = 0.;
241 |     clock_t t;
242 | 
243 |     proj A = { fp2_0, fp2_1 };
244 |     long len = p_plus_len;
245 |     int repetitions = p_plus_len*10;
246 |     proj P,Q,PQ,R1,R2;
247 |     uintbig k_big, a_big, b_big, k_1, a_kb;
248 |     long k, a, b;
249 |     fp2 x1z2,x2z1,diff;
250 | 
251 |     t = tic();
252 |     for (int i = 0; i < repetitions; i++) {
253 |         long ell = p_plus_fact[i % len], e = p_plus_mult[i % len];
254 |         k = random_Fl(ell);
255 |         a = random_Fl(ell);
256 |         b = random_Fl(ell);
257 |         uintbig_set(&k_big, k);
258 |         uintbig_set(&a_big, a);
259 |         uintbig_set(&b_big, b);
260 | 
261 |         uintbig_set(&k_1, k+1);
262 | 
263 |         P = random_point(&A, ell, e);
264 |         //printf("%ld^%ld\n",ell,e);
265 |         xMUL(&Q, &A, &P, &k_big);
266 |         xMUL(&PQ, &A, &P, &k_1);
267 | 
268 |         xBIDIM(&R1, &A, &P, &a_big, &Q, &b_big, &PQ);
269 | 
270 | 
271 |         uintbig_set(&a_kb, a+k*b);
272 |         xMUL(&R2, &A, &P, &a_kb);
273 | 
274 |         fp2_mul3(&x1z2, &R1.x, &R2.z);
275 |         fp2_mul3(&x2z1, &R2.x, &R1.z);
276 |         fp2_sub3(&diff, &x1z2, &x2z1);
277 |         
278 |         assert(fp2_iszero(&diff));
279 |     }
280 |     TOC(t, "bidim");
281 | 
282 |     // printf("average time\t [%f ms]\n",  (accumulated_time_ms / repetitions));
283 | 
284 |     return 0;
285 | }
286 | 
287 | 
288 | 
289 | GEN kerner_to_ideal(GEN v, GEN m1, GEN m2, GEN m3, GEN m4, long ell, long e) {
290 |     pari_sp ltop = avma;
291 | 
292 |     GEN gelle = gpowgs(stoi(ell),e);
293 | 
294 |     GEN v1 = gmul(m1, v);
295 |     GEN v2 = gmul(m2, v);
296 |     GEN v3 = gmul(m3, v);
297 |     GEN v4 = gmul(m4, v);
298 | 
299 |     GEN matsys = mkmat4(v1,v2,v3,v4);
300 |     GEN ker = matkermod(matsys, gelle, NULL); // flag = 1 because integral entries
301 | 
302 | 
303 |     GEN sol, sol_reduced;
304 | 
305 |     int dim_ker = lg(ker)-1;
306 |     for (int i = 1; i <= dim_ker; ++i){
307 |         sol = gel(ker,i);
308 |         sol_reduced = gmodgs(sol,ell);
309 |         if (!isexactzero(sol_reduced)) break;
310 |     }
311 | 
312 |     GEN generator = gmul(gel(sol,1),global_setup.O0_b1);
313 |     generator = gadd(generator, gmul(gel(sol,2),global_setup.O0_b2));
314 |     generator = gadd(generator, gmul(gel(sol,3),global_setup.O0_b3));
315 |     generator = gadd(generator, gmul(gel(sol,4),global_setup.O0_b4));
316 | 
317 |     GEN ideal = lideal_create(global_setup.B, global_setup.O0, generator, gelle);
318 | 
319 |     return gerepilecopy(ltop, ideal);
320 | }
321 | 
322 | GEN ideal_to_kernel(GEN I, GEN m1, GEN m2, GEN m3, GEN m4, long ell, long e) {
323 |     pari_sp ltop = avma;
324 | 
325 |     GEN gelle = gpowgs(stoi(ell),e);
326 | 
327 |     GEN generator = lideal_generator(I);
328 |     GEN generator_O0 = alg_standard_to_O0(generator);
329 |     GEN endo = gmul(gel(generator_O0,1),m1);
330 |     endo = gadd(endo, gmul(gel(generator_O0,2),m2));
331 |     endo = gadd(endo, gmul(gel(generator_O0,3),m3));
332 |     endo = gadd(endo, gmul(gel(generator_O0,4),m4));
333 | 
334 |     GEN ker = matkermod(endo, gelle, NULL);
335 | 
336 |     GEN sol, sol_reduced;
337 | 
338 |     int dim_ker = lg(ker)-1;
339 |     for (int i = 1; i <= dim_ker; ++i){
340 |         sol = gel(ker,i);
341 |         sol_reduced = gmodgs(sol,ell);
342 |         if (!isexactzero(sol_reduced)) break;
343 |     }
344 | 
345 |     // remains to compute sol[1]*P1 + sol[2]*P2 where P1,P2 is a basis of the torsion
346 | 
347 |     return gerepilecopy(ltop, sol);
348 | }
349 | 
350 | int test_kertoid() {
351 | 
352 |     // float accumulated_time_ms = 0.;
353 |     int repetitions = 1000;
354 |     clock_t t;
355 |     long ell, e;
356 |     GEN gelle, m_1, m_i, m_j, m_ji, m_2, m_3, m_4, v, v1, v2, w;
357 |     GEN ideal;
358 | 
359 |     ell = 6983;
360 |     e = 1;
361 |     gelle = gpowgs(stoi(ell),e);
362 | 
363 |     m_1 = mkmat2(mkcol2s(1,0), mkcol2s(0,1));
364 |     m_i = mkmat2(mkcol2s(0,1), mkcol2s(-1,0));
365 |     m_j = mkmat2(mkcol2s(1296,525), mkcol2s(525,-1296));
366 |     m_ji = gmul(m_j, m_i);
367 | 
368 |  // B_1k_2, B_ij_2
369 |     m_2 = m_i;
370 |     m_3 = gmod(gmul(gsub(m_1, m_ji), Fp_inv(gen_2,gelle)),gelle); // (1 - ji) / 2
371 |     m_4 = gmod(gmul(gadd(m_i, m_j), Fp_inv(gen_2,gelle)),gelle); // (i + j) / 2
372 | 
373 | 
374 |     ell = 2;
375 |     e = 33;
376 |     gelle = gpowgs(stoi(ell),e);
377 |     m_2 = mkmat2(mkcol2s(-1298618089,2597427807), mkcol2s(-2442345774,1298618089));
378 |     m_3 = mkmat2(mkcol2s(3594923693,3500346076), mkcol2s(-3142991081,-3594923692));
379 |     m_4 = mkmat2(mkcol2s(-3020381676,2095475696), mkcol2s(-348475943,3020381676));
380 | 
381 | 
382 | 
383 |     t = tic();
384 |     for (int i = 0; i < repetitions; i++) {
385 |         do { 
386 |             v1 = randomi(gelle);
387 |             v2 = randomi(gelle);
388 |         } while (!(umodiu(v1,ell) || umodiu(v2,ell)));
389 | 
390 |         v = mkcol2(v1,v2);
391 | 
392 |         ideal = kerner_to_ideal(v, m_1, m_2, m_3, m_4, ell, e);
393 | 
394 |         // n1 = lideal_norm(ideal);
395 |         // gel(ideal,2) = gen_0;
396 |         // n2 = lideal_norm(ideal);
397 |         // printf("%d",gcmp(n1,n2));
398 | 
399 |         w = ideal_to_kernel(ideal, m_1, m_2, m_3, m_4, ell, e);
400 | 
401 |         assert(gcmp(gmod(QM_det(mkmat2(v,w)),gelle),gen_0) == 0);
402 | 
403 |     }
404 |     TOC(t, "kertoid");
405 | 
406 |     // printf("average time\t [%f ms]\n",  (accumulated_time_ms / repetitions));
407 | 
408 |     return 0;
409 | }
410 | 
411 | 
412 | // argv[1] is the random seed; default = 1
413 | int main(int argc, char *argv[]){
414 |     pari_init(80000000, 1<<18);
415 | 
416 |     setrand(stoi(1));
417 |     srand48(1);
418 |     if( argc > 1 ) {
419 |       setrand(strtoi(argv[1]));
420 |       srand48(atoi(argv[1]));
421 |     }
422 | 
423 |     long var = fetch_var();
424 |     GEN nf = nfinit(pol_x(fetch_var()),LOWDEFAULTPREC);
425 |     
426 |     GEN a = stoi(-1),
427 |         p = strtoi("73743043621499797449074820543863456997944695372324032511999999999999999999999"),
428 |         b = negi(p);
429 | 
430 |     GEN B = alg_hilbert(nf, a, b, var, 0);
431 | 
432 |     GEN B_1 = mkcol4s(1,0,0,0);
433 |     GEN B_i = mkcol4s(0,1,0,0);
434 |     GEN B_j = mkcol4s(0,0,1,0);
435 |     GEN B_ji = mkcol4s(0,0,0,1);
436 |     //GEN B_ij = mkcol4s(0,0,0,-1);
437 | 
438 |     GEN B_1k_2 = mkcol4(ghalf,gen_0,gen_0,gneg(ghalf)); // (1-ji)/2
439 |     GEN B_ij_2 = mkcol4(gen_0,ghalf,ghalf,gen_0); // (i+j)/2
440 | 
441 |     GEN B_O0 = alglathnf(B,mkmat4(B_1, B_i, B_1k_2, B_ij_2), gen_0);
442 | 
443 |     global_setup.p = p;
444 |     global_setup.B = B; // the quaternion algebra
445 |     global_setup.qf = mkmat4(mkcol4s(1,0,0,0),
446 |                              mkcol4s(0,1,0,0),
447 |                              mkcol4(gen_0,gen_0,p,gen_0),
448 |                              mkcol4(gen_0,gen_0,gen_0,p)); // quadratic form defined by the reduced norm
449 | 
450 |     global_setup.torsion_fm = Z_factor_limit(strtoi(
451 |         "197530174297949459837634878151545563369632855190375548677707409417459236752253845947265965991865263091519488000000000000000000000"
452 |         ), 30000);
453 | 
454 |     global_setup.O0 = B_O0; // the cannonical maximal order
455 |     global_setup.one = B_1;
456 |     global_setup.i = B_i;
457 |     global_setup.j = B_j;
458 |     global_setup.ji = B_ji;
459 | 
460 |     global_setup.O0_b1 = B_1;
461 |     global_setup.O0_b2 = B_i;
462 |     global_setup.O0_b3 = B_1k_2;
463 |     global_setup.O0_b4 = B_ij_2;
464 |     global_setup.O0_to_standard = mkmat4(B_1, B_i, B_1k_2, B_ij_2);
465 |     global_setup.standard_to_O0 = RgM_inv(global_setup.O0_to_standard);
466 |     
467 |     test_weil();
468 | 
469 |     printf("    \033[1;32mAll tests passed\033[0m\n");
470 |     exit(0);
471 | }
472 | 
473 | 
474 | 
475 | 


--------------------------------------------------------------------------------
/src/mitm.c:
--------------------------------------------------------------------------------
  1 | #include "two_walks.h"
  2 | #include <stdlib.h>
  3 | 
  4 | // An entry of the hash table
  5 | typedef struct entry {
  6 |   uint16_t hash[3];
  7 |   uint16_t a;
  8 | } entry;
  9 | 
 10 | static uint64_t hash(proj *j, const proj *A) {
 11 |   jinv256(j, A);
 12 |   fp2_inv(&j->z); // ouch!
 13 |   fp2_mul2(&j->x, &j->z);
 14 |   j->z = fp2_1;
 15 |   // Quite arbitrary hash function mixing some words of the j-invariant
 16 |   return (j->x.re.x.c[0] + j->x.re.x.c[3] + j->x.im.x.c[1] + j->x.im.x.c[2]) | (1l << 47);
 17 | }
 18 | 
 19 | static void insert(uint64_t hash, uint16_t a, entry *table, long tab_size) {
 20 |   long i;
 21 |   for (i = hash % tab_size; table[i].hash[2]; i = (i+1) % tab_size);
 22 |   table[i].hash[0] = hash;
 23 |   table[i].hash[1] = hash >> 16;
 24 |   table[i].hash[2] = hash >> 32;
 25 |   table[i].a = a;
 26 | }
 27 | 
 28 | static long lookup(uint64_t hash, entry *table, long tab_size, long start) {
 29 |   start = (start < 0 ? hash : (start + 1)) % tab_size;
 30 |   while (table[start].hash[2]) {
 31 |     if (table[start].hash[0] == (hash & 0xffff)
 32 | 	&& table[start].hash[1] == (hash >> 16 & 0xffff)
 33 | 	&& table[start].hash[2] == (hash >> 32 & 0xffff))
 34 |       return start;
 35 |     start = (start + 1) % tab_size;
 36 |   }
 37 |   return -1;
 38 | }
 39 | 
 40 | // Find a basis of the 2-torsion of A
 41 | //
 42 | // Outputs x(P), x(Q) and x(P-Q) of a basis (P,Q) such that [2^(n-1)]P
 43 | // = (0,0).
 44 | //
 45 | // Assumes the curve A has order p+1
 46 | static void find_basis(proj *P, proj *Q, proj *PQ, const proj *A) {
 47 |   bool oncurve = class_mod_4 == 3;
 48 |   proj P2, Q2, tmp;
 49 |   // Get first point
 50 |   while (true) {
 51 |     fp2_random(&P->x); P->z = fp2_1;
 52 |     if (is_on_curve(P, A) != oncurve)
 53 |       continue;
 54 |     // multiply by cofactor
 55 |     xMUL(P, A, P, &p_even_cofactor);
 56 |     // check it has maximal order
 57 |     P2 = *P;
 58 |     for (int i = 1; i < two_tors_height; i++)
 59 |       xDBL(&P2, A, &P2);
 60 |     if (!mont_iszero(&P2))
 61 |       break;
 62 |   }
 63 |   
 64 |   // Get linearly independent point
 65 |   while (true) {
 66 |     fp2_random(&Q->x); Q->z = fp2_1;
 67 |     if (is_on_curve(Q, A) != oncurve)
 68 |       continue;
 69 |     // multiply by cofactor
 70 |     xMUL(Q, A, Q, &p_even_cofactor);
 71 |     // check it has maximal order
 72 |     Q2 = *Q;
 73 |     for (int i = 1; i < two_tors_height; i++)
 74 |       xDBL(&Q2, A, &Q2);
 75 |     if (!mont_iszero(&Q2) && !mont_equal(&Q2, &P2))
 76 |       break;
 77 |   }
 78 | 
 79 |   // Compute P-Q
 80 |   xBILIFT(PQ, &tmp, P, Q, A);
 81 | 
 82 |   // Shuffle to satisfy constraint
 83 |   if (fp2_iszero(&P2.x)) {
 84 |   } else if (fp2_iszero(&Q2.x)) {
 85 |     fp2_cswap(&P->x, &Q->x, true);
 86 |     fp2_cswap(&P->z, &Q->z, true);
 87 |   } else {
 88 |     fp2_cswap(&P->x, &PQ->x, true);
 89 |     fp2_cswap(&P->z, &PQ->z, true);
 90 |   }
 91 | }
 92 | 
 93 | 
 94 | bool MITM_cutoff(two_walk *phi, const proj *from, const proj *to, long len, long tab_size) {
 95 |   long dual_len = len - tab_size;
 96 |   proj Pf, Qf, PQf, Pt, Qt, PQt, B, C, j;
 97 |   proj dual_kers[dual_len], phi_kers[tab_size];
 98 | 
 99 |   // Prepare stack for isogeny evaluations
100 |   long stacksize, ts = len - tab_size;
101 |   ts = ts < tab_size ? tab_size : ts;
102 |   for (stacksize = 0; ts > 1; ts >>= 1) stacksize++;
103 |   proj stack[stacksize];
104 | 
105 |   // Compute bases  
106 |   find_basis(&Pf, &Qf, &PQf, from);
107 |   find_basis(&Pt, &Qt, &PQt, to);
108 | 
109 |   // This may be too large for the stack, better malloc
110 |   long h_tab_size = (1 << tab_size) + (1 << (tab_size - 1));
111 |   entry *h_tab = malloc(sizeof(entry) * h_tab_size);
112 |   for (long i = 0; i < h_tab_size; i++)
113 |     h_tab[i].hash[2] = 0;
114 | 
115 |   // Fill the table
116 |   uintbig a;
117 |   long cof = two_tors_height - tab_size;
118 |   for (long i = 0; i < cof; i++) {
119 |     xDBL(&Pf, from, &Pf);
120 |     xDBL(&Qf, from, &Qf);
121 |     xDBL(&PQf, from, &PQf);
122 |   }
123 | 
124 |   for (uint64_t i = 0; i < 1 << tab_size; i++) {
125 |     uintbig_set(&a, i);
126 |     xBIDIM(phi_kers, from, &Pf, &a, &Qf, &uintbig_1, &PQf);
127 |     B = *from;
128 |     eval_walk_rec(&B, phi_kers, tab_size, false, stack, 0);
129 |     long h = hash(&j, &B);
130 |     insert(h, i, h_tab, h_tab_size);
131 |   }
132 | 
133 |   // Prepare phi
134 |   cof = two_tors_height - len;
135 |   for (long i = 0; i < cof; i++) {
136 |     xDBL(&Pt, to, &Pt);
137 |     xDBL(&Qt, to, &Qt);
138 |     xDBL(&PQt, to, &PQt);
139 |   }
140 |   phi->A = *from;
141 |   phi->len = len;
142 |   phi->ker = Pt;
143 |   
144 |   // Search for collisions
145 |   for (long i = 0; i < tab_size; i++) {
146 |     xDBL(&Pt, to, &Pt);
147 |     xDBL(&Qt, to, &Qt);
148 |     xDBL(&PQt, to, &PQt);
149 |   }
150 |   for (uint64_t i = 0; i < 1 << dual_len; i++) {
151 |     uintbig_set(&a, i);
152 |     xBIDIM(dual_kers, to, &Pt, &a, &Qt, &uintbig_1, &PQt);
153 |     B = *to;
154 |     eval_walk_rec(&B, dual_kers, dual_len, true, stack, 0);
155 |     long h = hash(&j, &B);
156 |     long start = -1;
157 |     do {
158 |       start = lookup(h, h_tab, h_tab_size, start);
159 |       if (start >= 0) {
160 |       	// Potential collision, recompute the walk and check
161 |       	proj j1;
162 |       	uintbig_set(&a, h_tab[start].a);
163 |       	xBIDIM(phi_kers, from, &Pf, &a, &Qf, &uintbig_1, &PQf);
164 |       	C = *from;
165 |       	eval_walk_rec(&C, phi_kers, tab_size, true, stack, 0);
166 |       	jinv256(&j1, &C);
167 |       	if (mont_equal(&j, &j1)) {
168 |       	  // Collision found, reconstruct kernel
169 |       	  // Push phi->ker through dual_kers
170 |       	  for (long j = 0; j < dual_len; j++) {
171 |       	    two_isog(dual_kers+j, &phi->ker);
172 |       	  }
173 |       	  // Compute and evaluate isomorphism at meeting point
174 |       	  isomorphism isom;
175 |       	  mont_isom(&isom, &B, &C);
176 |       	  mont_isom_apply(&isom, &phi->ker);
177 |       	  // Push phi->ker through the dual of phi_kers
178 |       	  for (long j = tab_size - 1; j >= 0; j--) {
179 |       	    two_isog_dual(phi_kers+j, &phi->ker);
180 |       	  }
181 |       	  
182 |       	  free(h_tab);
183 |       	  return true;
184 |       	}
185 |       }
186 |     } while (start >= 0);
187 |   }
188 |   
189 |   free(h_tab);
190 |   return false;
191 | }
192 | 
193 | 
194 | 
195 | 
196 | 
197 | 
198 | 
199 | 
200 | 
201 | 
202 | 
203 | 
204 | 
205 | 
206 | 
207 | 
208 | 
209 | 
210 | 
211 | 
212 | 
213 | 
214 | 
215 | 
216 | 
217 | static uint64_t hash_fp2(const fp2 *p) {
218 |     return (p->re.x.c[0] + p->re.x.c[3] + p->im.x.c[1] + p->im.x.c[2]) | (1l << 47);
219 | }
220 | 
221 | static void push_points(proj *curve, proj *new_stack_1, proj *new_stack_2, proj *new_stack_3,
222 |   const proj *stack_1, const proj *stack_2, const proj *stack_3, long stacklen) {
223 | 
224 |   fp2_sq2(&curve->z, &stack_1[stacklen-1].z);
225 |   fp2_sq2(&curve->x, &stack_1[stacklen-1].x);
226 | 
227 |   fp2_add2(&curve->x, &curve->x);
228 |   fp2_sub3(&curve->x, &curve->z, &curve->x);
229 |   fp2_add2(&curve->x, &curve->x);
230 | 
231 |   proj A24;
232 |   if (1 < stacklen) {
233 |     fp2_add3(&A24.x, &curve->z, &curve->z);    //precomputation of A24=(A+2C:4C)
234 |     fp2_add3(&A24.z, &A24.x, &A24.x);
235 |     fp2_add2(&A24.x, &curve->x);
236 |   }
237 | 
238 |   for (int i = 0; i < stacklen-1; i++) {
239 |     new_stack_1[i] = stack_1[i];
240 |     new_stack_2[i] = stack_2[i];
241 |     new_stack_3[i] = stack_3[i];
242 |     two_isog(&stack_1[stacklen-1], &new_stack_1[i]);
243 |     two_isog(&stack_1[stacklen-1], &new_stack_2[i]);
244 |     two_isog(&stack_1[stacklen-1], &new_stack_3[i]);
245 |     xDBLADD(&new_stack_3[i], &new_stack_2[i], &new_stack_3[i], &new_stack_2[i], &new_stack_1[i], &A24);
246 |   }
247 | 
248 | 
249 | }
250 | 
251 | static long build_list_rec(proj *A, long lenA, long len, proj **stack_1, proj **stack_2, proj **stack_3, long stacklen) {
252 | 
253 |   if (len == 0) {
254 |     return lenA;
255 |   }
256 |   if (len == 1) {
257 |     for (int j = 0; j < lenA; ++j) {
258 | 
259 |       // push points though second isogeny
260 |       push_points(&A[lenA+j], stack_2[lenA+j], stack_1[lenA+j], stack_3[lenA+j],
261 |         stack_2[j], stack_1[j], stack_3[j], stacklen);
262 | 
263 |       // push points though first isogeny
264 |       push_points(&A[j], stack_1[j], stack_2[j], stack_3[j],
265 |         stack_1[j], stack_2[j], stack_3[j], stacklen);
266 | 
267 |     }
268 | 
269 |     return lenA*2;
270 | 
271 |   } else {
272 |     long right = (double)len * 0.5;
273 |     long left = len - right;
274 |     for (int j = 0; j < lenA; ++j) {
275 |       stack_1[j][stacklen] = stack_1[j][stacklen-1];
276 |       stack_2[j][stacklen] = stack_2[j][stacklen-1];
277 |       stack_3[j][stacklen] = stack_3[j][stacklen-1];
278 |       for (int i = 0; i < left; i++) {
279 |         xDBL(&stack_1[j][stacklen], &A[j], &stack_1[j][stacklen]);
280 |         xDBL(&stack_2[j][stacklen], &A[j], &stack_2[j][stacklen]);
281 |         xDBL(&stack_3[j][stacklen], &A[j], &stack_3[j][stacklen]);
282 |       }
283 |     }
284 |     lenA = build_list_rec(A, lenA, right, stack_1, stack_2, stack_3, stacklen+1);
285 |     return build_list_rec(A, lenA, left, stack_1, stack_2, stack_3, stacklen);
286 |   }
287 | }
288 | 
289 | // compute p.x[i]/p.z[i] for i = 0..(len-1)
290 | static void simultaneous_ratio(fp2 *res, proj* p, long len) {
291 |   fp2 prod = fp2_1;
292 |   for (int i = 0; i < len; ++i) {
293 |     fp2_mul3(&res[i], &prod, &p[i].x);
294 |     fp2_mul2(&prod, &p[i].z);
295 |   }
296 |   fp2_inv(&prod);
297 |   for (int i = len-1; i >= 0; --i) {
298 |     fp2_mul2(&res[i], &prod);
299 |     fp2_mul2(&prod, &p[i].z);
300 |   }
301 | }
302 | 
303 | // P3 = P1-P2 corresponds to the (0,0) direction
304 | // returns a list A[2^length] such that A[i] is the target of
305 | // the isogeny of kernel P1 + a*P3
306 | static void build_list(long length, const proj *from, proj *A, const proj *P1, const proj *P2, const proj *P3) {
307 |   // B is an array of length 2^length
308 |   A[0] = *from;
309 | 
310 |   long lenA = (1ull<<length);
311 |   proj *stack_1[lenA], *stack_2[lenA], *stack_3[lenA];
312 | 
313 |   long log, len = length;
314 |   for (log = 0; len > 1; len >>= 1) log++;
315 |   log += 1;
316 | 
317 |   for (int i = 0; i < lenA; ++i) {
318 |     // TODO: this is too much
319 |     stack_1[i] = malloc(sizeof(proj)*log);
320 |     stack_2[i] = malloc(sizeof(proj)*log);
321 |     stack_3[i] = malloc(sizeof(proj)*log);
322 |   }
323 | 
324 |   stack_1[0][0] = *P1;
325 |   stack_2[0][0] = *P2;
326 |   stack_3[0][0] = *P3;
327 | 
328 |   long lenB = build_list_rec(A, 1, length, stack_1, stack_2, stack_3, 1);
329 | 
330 |   for (int i = 0; i < lenB; ++i) {
331 |     free(stack_1[i]);
332 |     free(stack_2[i]);
333 |     free(stack_3[i]);
334 |   }
335 | }
336 | 
337 | 
338 | // Qi is a basis of the 2^length torsion of B
339 | // K is a point in B, then pushed through the isogeny found
340 | static bool search_collision(long *i1, proj *K, proj *C, entry *h_tab, long h_tab_size,
341 |   const fp2 *j1_fp, long length, const proj *B,
342 |   const proj *Q1, const proj *Q2, const proj *Q3) {
343 | 
344 |   long lenA = 1ul << length;
345 | 
346 |   proj A[lenA];
347 | 
348 |   build_list(length, B, A, Q1, Q2, Q3);
349 | 
350 |   proj j2_proj[lenA];
351 |   for (int i = 0; i < lenA; ++i) {
352 |     jinv256(j2_proj+i, A+i);
353 |   }
354 |   fp2 j2_fp[lenA];
355 |   simultaneous_ratio(j2_fp, j2_proj, lenA);
356 | 
357 |   uintbig a;
358 |   uint64_t h;
359 |   two_walk phi2;
360 |   phi2.len = length;
361 |   phi2.A = *B;
362 | 
363 |   proj Q13;
364 |   xADD(&Q13, Q1, Q3, Q2);
365 | 
366 |   for (int i2 = 0; i2 < lenA; ++i2) {
367 |     h = hash_fp2(j2_fp + i2);      
368 |     long start = -1;
369 |     do {
370 |       start = lookup(h, h_tab, h_tab_size, start);
371 |       if (start >= 0) {
372 |         // Potential collision, check
373 |         *i1 = h_tab[start].a;
374 | 
375 | 
376 |         if (fp2_equal(&j1_fp[*i1], &j2_fp[i2])) {
377 |           // Collision, reconstitute the walk and return
378 |           uintbig_set(&a, i2);
379 |           xBIDIM(&phi2.ker, B, Q1, &uintbig_1, Q3, &a, &Q13);
380 |           
381 |           eval_walk(&phi2, C, K);
382 | 
383 |           return true;
384 |         }
385 |       }
386 |     } while (start >= 0);
387 |   }
388 |   return false;
389 | 
390 | }
391 | 
392 | 
393 | bool MITM2(two_walk *eta, const proj *from, const proj *to, long length) {
394 |   long len2 = length/2;
395 |   long len1 = length-len2;
396 | 
397 |   if (length < 6) {
398 |     len1 = length;
399 |     len2 = 0;
400 |   }
401 |   if (length == 0) {
402 |     eta->len = 0;
403 |     eta->A = *from;
404 |     eta->ker = (proj){fp2_1,fp2_0};
405 |     proj j1, j2;
406 | 
407 |     jinv256(&j1, from);
408 |     jinv256(&j2, to);
409 | 
410 |     return mont_equal(&j1,&j2);
411 |   }
412 | 
413 | 
414 |   long lenA1 = 1ul << len1;
415 | 
416 |   proj A1[lenA1], P1, P2, P3;
417 |   find_basis(&P3, &P1, &P2, from); // P3 has the (0,0) direction
418 | 
419 |   for (long i = 0; i < two_tors_height-len1; i++) {
420 |     xDBL(&P1, from, &P1);
421 |     xDBL(&P2, from, &P2);
422 |     xDBL(&P3, from, &P3);
423 |   }
424 | 
425 |   build_list(len1, from, A1, &P1, &P2, &P3);
426 | 
427 |   // Fill table
428 | 
429 |   proj j1_proj[lenA1];
430 |   for (long i = 0; i < lenA1; ++i) {
431 |     jinv256(j1_proj+i, A1+i);
432 |   }
433 |   fp2 j1_fp[lenA1];
434 |   simultaneous_ratio(j1_fp, j1_proj, lenA1);
435 | 
436 |   long h_tab_size = (1 << len1) + (1 << (len1 - 1));
437 |   entry *h_tab = malloc(sizeof(entry) * h_tab_size);
438 |   for (long i = 0; i < h_tab_size; i++)
439 |     h_tab[i].hash[2] = 0;
440 | 
441 |   uint64_t h;
442 |   for (long i = 0; i < lenA1; ++i) {
443 |     h = hash_fp2(j1_fp + i);
444 |     insert(h, i, h_tab, h_tab_size);
445 |   }
446 | 
447 |   proj K;
448 |   bool found;
449 |   long i1;
450 | 
451 |   uintbig a;
452 |   proj C;
453 |   two_walk phi1;
454 |   phi1.len = len1;
455 |   phi1.A = *from;
456 |   proj P13;
457 |   xADD(&P13, &P1, &P3, &P2);
458 | 
459 |   int head_len = (len2 < 4) ? 0 : 3;
460 |   two_walk head_walk;
461 |   head_walk.len = head_len;
462 |   head_walk.A = *to;
463 |   proj head_curve;
464 | 
465 |   proj Q1_long, Q2_long, Q3_long, Q1, Q2, Q3;
466 |   find_basis(&Q3_long, &Q1_long, &Q2_long, to); // P3 has the (0,0) direction
467 | 
468 |   for (long i = 0; i < two_tors_height-length; i++) {
469 |     xDBL(&Q1_long, to, &Q1_long);
470 |     xDBL(&Q2_long, to, &Q2_long);
471 |     xDBL(&Q3_long, to, &Q3_long);
472 |   }
473 |   Q1 = Q1_long;
474 |   Q2 = Q2_long;
475 |   Q3 = Q3_long;
476 |   for (long i = 0; i < length-head_len; i++) {
477 |     xDBL(&Q1, to, &Q1);
478 |     xDBL(&Q2, to, &Q2);
479 |     xDBL(&Q3, to, &Q3);
480 |   }
481 |   proj Q13;
482 |   xADD(&Q13, &Q1, &Q3, &Q2);
483 | 
484 | 
485 |   for (int head = 0; head < (1 << head_len); ++head) {
486 |     uintbig_set(&a, head);
487 |     xBIDIM(&head_walk.ker, to, &Q1, &uintbig_1, &Q3, &a, &Q13);
488 |     K = Q3_long;
489 |     eval_walk(&head_walk, &head_curve, &K);
490 | 
491 |     // compute a basis of 2^(len2-head_len) torsion in head_curve with R3 the (0,0) direction
492 | 
493 |     proj R1 = Q1_long, R2 = Q2_long, R3 = K;
494 |     eval_walk(&head_walk, &head_curve, &R1);
495 |     eval_walk(&head_walk, &head_curve, &R2);
496 | 
497 |     for (int i = 0; i < head_len; ++i) {
498 |       if (head & (1 << i))
499 |         xADD(&R1, &R1, &R3, &R2);
500 |       else
501 |         xADD(&R2, &R2, &R3, &R1);
502 |       xDBL(&R3, &head_curve, &R3);
503 |     }
504 | 
505 |     for (long i = 0; i < length - len2; i++) {
506 |       xDBL(&R1, &head_curve, &R1);
507 |       xDBL(&R2, &head_curve, &R2);
508 |       xDBL(&R3, &head_curve, &R3);
509 |     }
510 | 
511 | 
512 |     found = search_collision(&i1, &K, &C, h_tab, h_tab_size, j1_fp,
513 |       len2-head_len, &head_curve, &R1, &R2, &R3);
514 | 
515 | 
516 |     if (found) {
517 |       eta->ker = K;
518 |       uintbig_set(&a, i1);
519 |       xBIDIM(&phi1.ker, from, &P1, &uintbig_1, &P3, &a, &P13);
520 | 
521 |       eval_dual(&phi1, &C, &eta->ker);
522 | 
523 |       eta->len = length;
524 |       eta->A = *from;
525 | 
526 |       free(h_tab);
527 |       return true;
528 |     }
529 |   }
530 |   free(h_tab);
531 |   return false;
532 | }
533 | 
534 | 
535 | 
536 | 
537 | 
538 | 
539 | 
540 | 
541 | 
542 | 


--------------------------------------------------------------------------------
/include/cycle.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2003, 2007-8 Matteo Frigo
  3 |  * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology
  4 |  *
  5 |  * Permission is hereby granted, free of charge, to any person obtaining
  6 |  * a copy of this software and associated documentation files (the
  7 |  * "Software"), to deal in the Software without restriction, including
  8 |  * without limitation the rights to use, copy, modify, merge, publish,
  9 |  * distribute, sublicense, and/or sell copies of the Software, and to
 10 |  * permit persons to whom the Software is furnished to do so, subject to
 11 |  * the following conditions:
 12 |  *
 13 |  * The above copyright notice and this permission notice shall be
 14 |  * included in all copies or substantial portions of the Software.
 15 |  *
 16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 17 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 18 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 19 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 20 |  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 21 |  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 22 |  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 23 |  *
 24 |  */
 25 | 
 26 | 
 27 | /* machine-dependent cycle counters code. Needs to be inlined. */
 28 | 
 29 | /***************************************************************************/
 30 | /* To use the cycle counters in your code, simply #include "cycle.h" (this
 31 |    file), and then use the functions/macros:
 32 | 
 33 |                  ticks getticks(void);
 34 | 
 35 |    ticks is an opaque typedef defined below, representing the current time.
 36 |    You extract the elapsed time between two calls to gettick() via:
 37 | 
 38 |                  double elapsed(ticks t1, ticks t0);
 39 | 
 40 |    which returns a double-precision variable in arbitrary units.  You
 41 |    are not expected to convert this into human units like seconds; it
 42 |    is intended only for *comparisons* of time intervals.
 43 | 
 44 |    (In order to use some of the OS-dependent timer routines like
 45 |    Solaris' gethrtime, you need to paste the autoconf snippet below
 46 |    into your configure.ac file and #include "config.h" before cycle.h,
 47 |    or define the relevant macros manually if you are not using autoconf.)
 48 | */
 49 | 
 50 | /***************************************************************************/
 51 | /* This file uses macros like HAVE_GETHRTIME that are assumed to be
 52 |    defined according to whether the corresponding function/type/header
 53 |    is available on your system.  The necessary macros are most
 54 |    conveniently defined if you are using GNU autoconf, via the tests:
 55 |    
 56 |    dnl ---------------------------------------------------------------------
 57 | 
 58 |    AC_C_INLINE
 59 |    AC_HEADER_TIME
 60 |    AC_CHECK_HEADERS([sys/time.h c_asm.h intrinsics.h mach/mach_time.h])
 61 | 
 62 |    AC_CHECK_TYPE([hrtime_t],[AC_DEFINE(HAVE_HRTIME_T, 1, [Define to 1 if hrtime_t is defined in <sys/time.h>])],,[#if HAVE_SYS_TIME_H
 63 | #include <sys/time.h>
 64 | #endif])
 65 | 
 66 |    AC_CHECK_FUNCS([gethrtime read_real_time time_base_to_time clock_gettime mach_absolute_time])
 67 | 
 68 |    dnl Cray UNICOS _rtc() (real-time clock) intrinsic
 69 |    AC_MSG_CHECKING([for _rtc intrinsic])
 70 |    rtc_ok=yes
 71 |    AC_TRY_LINK([#ifdef HAVE_INTRINSICS_H
 72 | #include <intrinsics.h>
 73 | #endif], [_rtc()], [AC_DEFINE(HAVE__RTC,1,[Define if you have the UNICOS _rtc() intrinsic.])], [rtc_ok=no])
 74 |    AC_MSG_RESULT($rtc_ok)
 75 | 
 76 |    dnl ---------------------------------------------------------------------
 77 | */
 78 | 
 79 | /***************************************************************************/
 80 | 
 81 | #if TIME_WITH_SYS_TIME
 82 | # include <sys/time.h>
 83 | # include <time.h>
 84 | #else
 85 | # if HAVE_SYS_TIME_H
 86 | #  include <sys/time.h>
 87 | # else
 88 | #  include <time.h>
 89 | # endif
 90 | #endif
 91 | 
 92 | #define INLINE_ELAPSED(INL) static INL double elapsed(ticks t1, ticks t0) \
 93 | {									  \
 94 |      return (double)t1 - (double)t0;					  \
 95 | }
 96 | 
 97 | /*----------------------------------------------------------------*/
 98 | /* Solaris */
 99 | #if defined(HAVE_GETHRTIME) && defined(HAVE_HRTIME_T) && !defined(HAVE_TICK_COUNTER)
100 | typedef hrtime_t ticks;
101 | 
102 | #define getticks gethrtime
103 | 
104 | INLINE_ELAPSED(inline)
105 | 
106 | #define HAVE_TICK_COUNTER
107 | #endif
108 | 
109 | /*----------------------------------------------------------------*/
110 | /* AIX v. 4+ routines to read the real-time clock or time-base register */
111 | #if defined(HAVE_READ_REAL_TIME) && defined(HAVE_TIME_BASE_TO_TIME) && !defined(HAVE_TICK_COUNTER)
112 | typedef timebasestruct_t ticks;
113 | 
114 | static __inline ticks getticks(void)
115 | {
116 |      ticks t;
117 |      read_real_time(&t, TIMEBASE_SZ);
118 |      return t;
119 | }
120 | 
121 | static __inline double elapsed(ticks t1, ticks t0) /* time in nanoseconds */
122 | {
123 |      time_base_to_time(&t1, TIMEBASE_SZ);
124 |      time_base_to_time(&t0, TIMEBASE_SZ);
125 |      return (((double)t1.tb_high - (double)t0.tb_high) * 1.0e9 + 
126 | 	     ((double)t1.tb_low - (double)t0.tb_low));
127 | }
128 | 
129 | #define HAVE_TICK_COUNTER
130 | #endif
131 | 
132 | /*----------------------------------------------------------------*/
133 | /*
134 |  * PowerPC ``cycle'' counter using the time base register.
135 |  */
136 | #if ((((defined(__GNUC__) && (defined(__powerpc__) || defined(__ppc__))) || (defined(__MWERKS__) && defined(macintosh)))) || (defined(__IBM_GCC_ASM) && (defined(__powerpc__) || defined(__ppc__))))  && !defined(HAVE_TICK_COUNTER)
137 | typedef unsigned long long ticks;
138 | 
139 | static __inline__ ticks getticks(void)
140 | {
141 |      unsigned int tbl, tbu0, tbu1;
142 | 
143 |      do {
144 | 	  __asm__ __volatile__ ("mftbu %0" : "=r"(tbu0));
145 | 	  __asm__ __volatile__ ("mftb %0" : "=r"(tbl));
146 | 	  __asm__ __volatile__ ("mftbu %0" : "=r"(tbu1));
147 |      } while (tbu0 != tbu1);
148 | 
149 |      return (((unsigned long long)tbu0) << 32) | tbl;
150 | }
151 | 
152 | INLINE_ELAPSED(__inline__)
153 | 
154 | #define HAVE_TICK_COUNTER
155 | #endif
156 | 
157 | /* MacOS/Mach (Darwin) time-base register interface (unlike UpTime,
158 |    from Carbon, requires no additional libraries to be linked). */
159 | #if defined(HAVE_MACH_ABSOLUTE_TIME) && defined(HAVE_MACH_MACH_TIME_H) && !defined(HAVE_TICK_COUNTER)
160 | #include <mach/mach_time.h>
161 | typedef uint64_t ticks;
162 | #define getticks mach_absolute_time
163 | INLINE_ELAPSED(__inline__)
164 | #define HAVE_TICK_COUNTER
165 | #endif
166 | 
167 | /*----------------------------------------------------------------*/
168 | /*
169 |  * Pentium cycle counter 
170 |  */
171 | #if (defined(__GNUC__) || defined(__ICC)) && defined(__i386__)  && !defined(HAVE_TICK_COUNTER)
172 | typedef unsigned long long ticks;
173 | 
174 | static __inline__ ticks getticks(void)
175 | {
176 |      ticks ret;
177 | 
178 |      __asm__ __volatile__("rdtsc": "=A" (ret));
179 |      /* no input, nothing else clobbered */
180 |      return ret;
181 | }
182 | 
183 | INLINE_ELAPSED(__inline__)
184 | 
185 | #define HAVE_TICK_COUNTER
186 | #define TIME_MIN 5000.0   /* unreliable pentium IV cycle counter */
187 | #endif
188 | 
189 | /* Visual C++ -- thanks to Morten Nissov for his help with this */
190 | #if _MSC_VER >= 1200 && _M_IX86 >= 500 && !defined(HAVE_TICK_COUNTER)
191 | #include <windows.h>
192 | typedef LARGE_INTEGER ticks;
193 | #define RDTSC __asm __emit 0fh __asm __emit 031h /* hack for VC++ 5.0 */
194 | 
195 | static __inline ticks getticks(void)
196 | {
197 |      ticks retval;
198 | 
199 |      __asm {
200 | 	  RDTSC
201 | 	  mov retval.HighPart, edx
202 | 	  mov retval.LowPart, eax
203 |      }
204 |      return retval;
205 | }
206 | 
207 | static __inline double elapsed(ticks t1, ticks t0)
208 | {  
209 |      return (double)t1.QuadPart - (double)t0.QuadPart;
210 | }  
211 | 
212 | #define HAVE_TICK_COUNTER
213 | #define TIME_MIN 5000.0   /* unreliable pentium IV cycle counter */
214 | #endif
215 | 
216 | /*----------------------------------------------------------------*/
217 | /*
218 |  * X86-64 cycle counter
219 |  */
220 | #if (defined(__GNUC__) || defined(__ICC) || defined(__SUNPRO_C)) && defined(__x86_64__)  && !defined(HAVE_TICK_COUNTER)
221 | typedef unsigned long long ticks;
222 | 
223 | static __inline__ ticks getticks(void)
224 | {
225 |      unsigned a, d; 
226 |      asm volatile("rdtsc" : "=a" (a), "=d" (d)); 
227 |      return ((ticks)a) | (((ticks)d) << 32); 
228 | }
229 | 
230 | INLINE_ELAPSED(__inline__)
231 | 
232 | #define HAVE_TICK_COUNTER
233 | #endif
234 | 
235 | /* PGI compiler, courtesy Cristiano Calonaci, Andrea Tarsi, & Roberto Gori.
236 |    NOTE: this code will fail to link unless you use the -Masmkeyword compiler
237 |    option (grrr). */
238 | #if defined(__PGI) && defined(__x86_64__) && !defined(HAVE_TICK_COUNTER) 
239 | typedef unsigned long long ticks;
240 | static ticks getticks(void)
241 | {
242 |     asm(" rdtsc; shl    $0x20,%rdx; mov    %eax,%eax; or     %rdx,%rax;    ");
243 | }
244 | INLINE_ELAPSED(__inline__)
245 | #define HAVE_TICK_COUNTER
246 | #endif
247 | 
248 | /* Visual C++, courtesy of Dirk Michaelis */
249 | #if _MSC_VER >= 1400 && (defined(_M_AMD64) || defined(_M_X64)) && !defined(HAVE_TICK_COUNTER)
250 | 
251 | #include <intrin.h>
252 | #pragma intrinsic(__rdtsc)
253 | typedef unsigned __int64 ticks;
254 | #define getticks __rdtsc
255 | INLINE_ELAPSED(__inline)
256 | 
257 | #define HAVE_TICK_COUNTER
258 | #endif
259 | 
260 | /*----------------------------------------------------------------*/
261 | /*
262 |  * IA64 cycle counter
263 |  */
264 | 
265 | /* intel's icc/ecc compiler */
266 | #if (defined(__EDG_VERSION) || defined(__ECC)) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER)
267 | typedef unsigned long ticks;
268 | #include <ia64intrin.h>
269 | 
270 | static __inline__ ticks getticks(void)
271 | {
272 |      return __getReg(_IA64_REG_AR_ITC);
273 | }
274 |  
275 | INLINE_ELAPSED(__inline__)
276 |  
277 | #define HAVE_TICK_COUNTER
278 | #endif
279 | 
280 | /* gcc */
281 | #if defined(__GNUC__) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER)
282 | typedef unsigned long ticks;
283 | 
284 | static __inline__ ticks getticks(void)
285 | {
286 |      ticks ret;
287 | 
288 |      __asm__ __volatile__ ("mov %0=ar.itc" : "=r"(ret));
289 |      return ret;
290 | }
291 | 
292 | INLINE_ELAPSED(__inline__)
293 | 
294 | #define HAVE_TICK_COUNTER
295 | #endif
296 | 
297 | /* HP/UX IA64 compiler, courtesy Teresa L. Johnson: */
298 | #if defined(__hpux) && defined(__ia64) && !defined(HAVE_TICK_COUNTER)
299 | #include <machine/sys/inline.h>
300 | typedef unsigned long ticks;
301 | 
302 | static inline ticks getticks(void)
303 | {
304 |      ticks ret;
305 | 
306 |      ret = _Asm_mov_from_ar (_AREG_ITC);
307 |      return ret;
308 | }
309 | 
310 | INLINE_ELAPSED(inline)
311 | 
312 | #define HAVE_TICK_COUNTER
313 | #endif
314 | 
315 | /* Microsoft Visual C++ */
316 | #if defined(_MSC_VER) && defined(_M_IA64) && !defined(HAVE_TICK_COUNTER)
317 | typedef unsigned __int64 ticks;
318 | 
319 | #  ifdef __cplusplus
320 | extern "C"
321 | #  endif
322 | ticks __getReg(int whichReg);
323 | #pragma intrinsic(__getReg)
324 | 
325 | static __inline ticks getticks(void)
326 | {
327 |      volatile ticks temp;
328 |      temp = __getReg(3116);
329 |      return temp;
330 | }
331 | 
332 | INLINE_ELAPSED(inline)
333 | 
334 | #define HAVE_TICK_COUNTER
335 | #endif
336 | 
337 | /*----------------------------------------------------------------*/
338 | /*
339 |  * PA-RISC cycle counter 
340 |  */
341 | #if defined(__hppa__) || defined(__hppa) && !defined(HAVE_TICK_COUNTER)
342 | typedef unsigned long ticks;
343 | 
344 | #  ifdef __GNUC__
345 | static __inline__ ticks getticks(void)
346 | {
347 |      ticks ret;
348 | 
349 |      __asm__ __volatile__("mfctl 16, %0": "=r" (ret));
350 |      /* no input, nothing else clobbered */
351 |      return ret;
352 | }
353 | #  else
354 | #  include <machine/inline.h>
355 | static inline unsigned long getticks(void)
356 | {
357 |      register ticks ret;
358 |      _MFCTL(16, ret);
359 |      return ret;
360 | }
361 | #  endif
362 | 
363 | INLINE_ELAPSED(inline)
364 | 
365 | #define HAVE_TICK_COUNTER
366 | #endif
367 | 
368 | /*----------------------------------------------------------------*/
369 | /* S390, courtesy of James Treacy */
370 | #if defined(__GNUC__) && defined(__s390__) && !defined(HAVE_TICK_COUNTER)
371 | typedef unsigned long long ticks;
372 | 
373 | static __inline__ ticks getticks(void)
374 | {
375 |      ticks cycles;
376 |      __asm__("stck 0(%0)" : : "a" (&(cycles)) : "memory", "cc");
377 |      return cycles;
378 | }
379 | 
380 | INLINE_ELAPSED(__inline__)
381 | 
382 | #define HAVE_TICK_COUNTER
383 | #endif
384 | /*----------------------------------------------------------------*/
385 | #if defined(__GNUC__) && defined(__alpha__) && !defined(HAVE_TICK_COUNTER)
386 | /*
387 |  * The 32-bit cycle counter on alpha overflows pretty quickly, 
388 |  * unfortunately.  A 1GHz machine overflows in 4 seconds.
389 |  */
390 | typedef unsigned int ticks;
391 | 
392 | static __inline__ ticks getticks(void)
393 | {
394 |      unsigned long cc;
395 |      __asm__ __volatile__ ("rpcc %0" : "=r"(cc));
396 |      return (cc & 0xFFFFFFFF);
397 | }
398 | 
399 | INLINE_ELAPSED(__inline__)
400 | 
401 | #define HAVE_TICK_COUNTER
402 | #endif
403 | 
404 | /*----------------------------------------------------------------*/
405 | #if defined(__GNUC__) && defined(__sparc_v9__) && !defined(HAVE_TICK_COUNTER)
406 | typedef unsigned long ticks;
407 | 
408 | static __inline__ ticks getticks(void)
409 | {
410 |      ticks ret;
411 |      __asm__ __volatile__("rd %%tick, %0" : "=r" (ret));
412 |      return ret;
413 | }
414 | 
415 | INLINE_ELAPSED(__inline__)
416 | 
417 | #define HAVE_TICK_COUNTER
418 | #endif
419 | 
420 | /*----------------------------------------------------------------*/
421 | #if (defined(__DECC) || defined(__DECCXX)) && defined(__alpha) && defined(HAVE_C_ASM_H) && !defined(HAVE_TICK_COUNTER)
422 | #  include <c_asm.h>
423 | typedef unsigned int ticks;
424 | 
425 | static __inline ticks getticks(void)
426 | {
427 |      unsigned long cc;
428 |      cc = asm("rpcc %v0");
429 |      return (cc & 0xFFFFFFFF);
430 | }
431 | 
432 | INLINE_ELAPSED(__inline)
433 | 
434 | #define HAVE_TICK_COUNTER
435 | #endif
436 | /*----------------------------------------------------------------*/
437 | /* SGI/Irix */
438 | #if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_SGI_CYCLE) && !defined(HAVE_TICK_COUNTER)
439 | typedef struct timespec ticks;
440 | 
441 | static inline ticks getticks(void)
442 | {
443 |      struct timespec t;
444 |      clock_gettime(CLOCK_SGI_CYCLE, &t);
445 |      return t;
446 | }
447 | 
448 | static inline double elapsed(ticks t1, ticks t0)
449 | {
450 |      return ((double)t1.tv_sec - (double)t0.tv_sec) * 1.0E9 +
451 | 	  ((double)t1.tv_nsec - (double)t0.tv_nsec);
452 | }
453 | #define HAVE_TICK_COUNTER
454 | #endif
455 | 
456 | /*----------------------------------------------------------------*/
457 | /* Cray UNICOS _rtc() intrinsic function */
458 | #if defined(HAVE__RTC) && !defined(HAVE_TICK_COUNTER)
459 | #ifdef HAVE_INTRINSICS_H
460 | #  include <intrinsics.h>
461 | #endif
462 | 
463 | typedef long long ticks;
464 | 
465 | #define getticks _rtc
466 | 
467 | INLINE_ELAPSED(inline)
468 | 
469 | #define HAVE_TICK_COUNTER
470 | #endif
471 | 
472 | /*----------------------------------------------------------------*/
473 | /* MIPS ZBus */
474 | #if HAVE_MIPS_ZBUS_TIMER
475 | #if defined(__mips__) && !defined(HAVE_TICK_COUNTER)
476 | #include <sys/mman.h>
477 | #include <unistd.h>
478 | #include <fcntl.h>
479 | 
480 | typedef uint64_t ticks;
481 | 
482 | static inline ticks getticks(void)
483 | {
484 |   static uint64_t* addr = 0;
485 | 
486 |   if (addr == 0)
487 |   {
488 |     uint32_t rq_addr = 0x10030000;
489 |     int fd;
490 |     int pgsize;
491 | 
492 |     pgsize = getpagesize();
493 |     fd = open ("/dev/mem", O_RDONLY | O_SYNC, 0);
494 |     if (fd < 0) {
495 |       perror("open");
496 |       return NULL;
497 |     }
498 |     addr = mmap(0, pgsize, PROT_READ, MAP_SHARED, fd, rq_addr);
499 |     close(fd);
500 |     if (addr == (uint64_t *)-1) {
501 |       perror("mmap");
502 |       return NULL;
503 |     }
504 |   }
505 | 
506 |   return *addr;
507 | }
508 | 
509 | INLINE_ELAPSED(inline)
510 | 
511 | #define HAVE_TICK_COUNTER
512 | #endif
513 | #endif /* HAVE_MIPS_ZBUS_TIMER */
514 | 
515 | 


--------------------------------------------------------------------------------