├── constants.py ├── pollardRho.py ├── pollardPm1.py ├── README.md ├── factor.py ├── utils.py ├── ecm.py └── primeSieve.py /constants.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | """ 4 | File that contains constants tuned specifically for the factoring algorithms 5 | and the prime sieves. Tweakable if required. 6 | """ 7 | 8 | # Prime sieve constants 9 | SMALL_THRESHOLD = 60 10 | ERAT_THRESHOLD = 35 * 10**5 11 | ATKIN_THERSHOLD = 10**10 12 | LOWER_SEG_SIZE = 65536 13 | UPPER_SEG_SIZE = 2097152 14 | 15 | # Pollard rho constants 16 | PRIME_THRESHOLD_RHO = 500 17 | SIZE_THRESHOLD_RHO = 10**20 18 | 19 | # Pollard (p-1) constants 20 | MAX_B1_PM1 = 10**8 21 | MAX_B2_PM1 = 10**10 22 | MAX_D_PM1 = 500 23 | 24 | # ECM constants 25 | MAX_CURVES_ECM = 10000 26 | MAX_RND_ECM = 2**63 27 | MAX_B1_ECM = 43 * 10**7 28 | MAX_B2_ECM = 2 * 10**10 29 | 30 | # General factorization constants 31 | PRIME_THRESHOLD_BF = 25000 32 | 33 | # Names of factoring routines for displaying purposes 34 | NAME_ECM = "ECM" 35 | NAME_RHO = "Pollard Rho" 36 | NAME_PM1 = "Pollard p-1" -------------------------------------------------------------------------------- /pollardRho.py: -------------------------------------------------------------------------------- 1 | import utils 2 | import random 3 | import primeSieve 4 | import constants 5 | 6 | """ 7 | This module contains an implementation of Brent's improvement to Pollard's Rho 8 | alogrithm. 9 | 10 | TODO: Include explanation of algorithm. 11 | """ 12 | 13 | small_primes = primeSieve.prime_sieve(constants.PRIME_THRESHOLD_RHO) 14 | 15 | def factorize_rho(n, verbose = False): 16 | if n == 1 or utils.is_prime(n): 17 | return n 18 | 19 | # If no factor is found, return -1 20 | for i in range(len(small_primes) - 1, -1, -1): 21 | r, c, y = 1, small_primes[i], random.randint(1, n-1) 22 | if verbose: 23 | print "Trying offset:", c 24 | 25 | m, g, q, ys = random.randint(1, n-1), 1, 1, y 26 | min_val, k = 0, 0 27 | while g == 1: 28 | x, k = y, 0 29 | for j in range(r): 30 | y = y*y + c 31 | if y > n: y %= n 32 | while k < r and g == 1: 33 | ys, min_val = y, min(m, r-k) 34 | for j in range(min_val): 35 | y = y*y + c 36 | if y > n : y %= n 37 | q = q * abs(x - y) 38 | if q > n: q %= n 39 | g = utils.gcd(q, n) 40 | k += m 41 | r <<= 1 42 | 43 | if g == n: 44 | # If no factor found, try again. 45 | while True: 46 | ys = ys*ys + c 47 | if ys > n: ys %= n 48 | g = utils.gcd(abs(x-ys), n) 49 | if g > 1: 50 | break 51 | 52 | if g != n: 53 | return g 54 | else: 55 | return -1 56 | -------------------------------------------------------------------------------- /pollardPm1.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | import math 4 | import utils 5 | import primeSieve 6 | import constants 7 | 8 | """ 9 | This module contains an implementation of the two-stage variant of Pollard's 10 | p-1 algorithm. 11 | 12 | This was adapted from a version of the same at StackExchange. 13 | 14 | References: 15 | https://stackoverflow.com/questions/16424369/python-pollard-p-1-factorization 16 | 17 | TODO: 18 | Include explanation of algorithm. 19 | """ 20 | 21 | def compute_bounds(n): 22 | """ 23 | Computes Stage 1 and Stage 2 bounds for both Pollard p-1. 24 | """ 25 | log_q = math.log(pow(10, (len(str(n)) - 2) >> 1)) 26 | t = int(math.ceil(math.exp(math.sqrt(0.5 * log_q * \ 27 | math.log(log_q))) / 10) * 10) 28 | B1 = min(t, constants.MAX_B1_PM1) 29 | B2 = min(B1 * 100, constants.MAX_B2_PM1) 30 | return B1, B2 31 | 32 | 33 | def factorize_pm1(n, verbose = False): 34 | if n == 1 or utils.is_prime(n): 35 | return n 36 | elif n % 2 == 0: 37 | return 2 38 | 39 | B1, B2 = compute_bounds(n) 40 | if verbose: 41 | print "Number of digits:", len(str(n)) 42 | print "Bounds:", B1, B2 43 | 44 | # ----- Stage 1 ----- 45 | if verbose: 46 | print "Stage 1..." 47 | print "Sieveing primes below", str(B1) 48 | 49 | primes_below_b1 = primeSieve.prime_sieve(B1) 50 | 51 | # Compute a large number which is B1-power-smooth. As in this implementation, 52 | # a usual choice for this number is the LCM of the integers below B1. 53 | c = 2 54 | for p in primes_below_b1: 55 | pp = p 56 | while pp <= B1: 57 | c = pow(c, p, n) 58 | pp *= p 59 | 60 | g = utils.gcd(c-1, n) 61 | # If stage 1 is successful, return the non-trivial factor found. Else, go on 62 | # to stage 2. 63 | if g != 1 and g != n: 64 | return g 65 | 66 | # ----- Stage 2 ----- 67 | # NOTE: This stage only works if 'n' has exactly one prime factor between B1 and 68 | # B2 (hence the name 'large-prime variant'). 69 | if verbose: 70 | print "Stage 2..." 71 | print "Sieveing primes between", str(B1), "and", str(B2) 72 | 73 | primes = primeSieve.segmented_sieve(B1+1, B2) 74 | d_cache = [-1] * (constants.MAX_D_PM1 + 1) 75 | p, temp_c = primes[0], c 76 | c, count = pow(c, p, n), 0 77 | 78 | for pos in xrange(1, len(primes)): 79 | q = primes[pos] 80 | # Use differences between successive primes and cache them 81 | d = q - p 82 | if d <= constants.MAX_D_PM1: 83 | if d_cache[d] == -1: 84 | x = pow(temp_c, d, n) 85 | d_cache[d] = x 86 | else: 87 | x = d_cache[d] 88 | else: 89 | x = pow(temp_c, d, n) 90 | 91 | # Use modular multiplication instead of exponentiation to speed things up 92 | c, p = (c * x) % n, q 93 | count += 1 94 | 95 | # Accumulate products and compute GCD's periodically 96 | if (count & 127) == 0: 97 | g = utils.gcd(c - 1, n) 98 | # Return non-trivial factor if successful 99 | if g != 1 and g != n: 100 | return g 101 | 102 | g = utils.gcd(c-1, n) 103 | if g != 1 and g != n: 104 | return g 105 | else: 106 | return -1 107 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # factor 2 | 3 | Fast prime factorization in Python. Factors most 50-60 digit numbers within a minute or so (with PyPy). 4 | The algorithm used depends on the size of the input 5 | 6 | * `pollardPm1.py` contains an implementation of the large prime (two stage) variant of Pollard's _p-1_ algorithm. 7 | * `pollardRho.py` contains an implementation of Pollard's Rho algorithm with Brent's improvements. 8 | * `ecm.py` contains an implementation of Lenstra's elliptic curve factorization algorithm. It is inversionless (since it uses Montgomery coordinates), uses two stages, and uses Suyama's parametrization to generate random elliptic curves. It also contains an implementation of Montgomery's PRAC algorithm for scalar multiplication (thanks Paul Zimmerman!) but this turned out to be slower than the usual double-and-add algorithm weirdly. 9 | * `primeSieve.py` contains a bunch of prime sieves (optimized versions of Atkin, Eratosthenes, segmented Eratosthenes). Look at the [file](https://github.com/nishanth17/factor/blob/master/primeSieve.py) for specific benchmarks. 10 | 11 | # Usage 12 | All you have to do is run the file `factor.py`, enter a number, and hit Enter. Here's an example in terminal: 13 | 14 | python factor.py 15 | Enter a number: 15 16 | 17 | Factoring 15... 18 | Number of digits: 2 19 | Finding small prime factors... 20 | Prime factors found: 3, 5 21 | 22 | 15 = 3^1 * 5^1 23 | 24 | Time: 5.00679016113e-05 s 25 | 26 | and another... 27 | 28 | Enter number: 37897387397398739739826929827929827927927762729872987928 29 | 30 | Factoring 37897387397398739739826929827929827927927762729872987928... 31 | Number of digits: 56 32 | Finding small prime factors... 33 | Prime factors found: 2, 3 34 | Factoring 1579057808224947489159455409497076163663656780411374497 with ECM... 35 | Number of digits: 55 36 | Bounds: 250000 128992510 37 | Sieving primes... 38 | Stage 2 found factor! 39 | Found factor 67246307 40 | Factoring 67246307... 41 | Number of digits: 8 42 | 67246307 is prime! 43 | Factoring 23481702991138940747474138758238071923617408171... 44 | Number of digits: 47 45 | Factoring 23481702991138940747474138758238071923617408171 with ECM... 46 | Number of digits: 47 47 | Bounds: 50000 12746592 48 | Sieving primes... 49 | Tried 40 random curves... 50 | Tried 80 random curves... 51 | Tried 120 random curves... 52 | Tried 160 random curves... 53 | Stage 2 found factor! 54 | Found factor 4788272261623351 55 | Factoring 4788272261623351... 56 | Number of digits: 16 57 | 4788272261623351 is prime! 58 | Factoring 4904003303934522319753958187821... 59 | Number of digits: 31 60 | 4904003303934522319753958187821 is prime! 61 | 62 | 37897387397398739739826929827929827927927762729872987928 = 2^3 * 3^1 * 67246307^1 * 4788272261623351^1 * 4904003303934522319753958187821^1 63 | 64 | Time: 24.7774269581 s 65 | 66 | # References 67 | * A.O.L Atkin, D.J.Bernstein; [Prime Sieves using Binary Quadratic Forms](http://www.ams.org/journals/mcom/2004-73-246/S0025-5718-03-01501-1/S0025-5718-03-01501-1.pdf); *Mathematics of Computation*, 73-246: 1023-30 68 | * Peter L Montgomery; [Speeding the Pollard and Elliptical Methods of Factorization](http://modular.math.washington.edu/edu/124/misc/montgomery.pdf); *Mathematics of Computation* (Jan 1987), Issue 177: 243-264 69 | * Montgomery, P.L.; [Evaluating Recurrences of the form Xm+n = f(Xm, Xn, Xm-n) via Lucas Chains](http://cr.yp.to/bib/1992/montgomery-lucas.ps); Unpublished manuscript (Jan 1992) 70 | 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /factor.py: -------------------------------------------------------------------------------- 1 | import time 2 | import math 3 | import constants 4 | import utils, primeSieve 5 | import pollardRho, pollardPm1, ecm 6 | 7 | small_primes = primeSieve.prime_sieve(constants.PRIME_THRESHOLD_BF) 8 | 9 | def merge_factorizations(f1, f2): 10 | """ 11 | Merges prime factorizations of two numbers which are sorted in increasing order of 12 | their prime factors into a larger one containing the prime factorization of their 13 | product -- similar to the merge step in mergesort. 14 | """ 15 | if f1 == -1 or f2 == -1: 16 | # Factorization failed in this case 17 | return -1 18 | f = [] 19 | i = j = 0 20 | while i < len(f1) and j < len(f2): 21 | if f1[i][0] < f2[j][0]: 22 | f.append(f1[i]) 23 | i += 1 24 | elif f1[i][0] > f2[j][0]: 25 | f.append(f2[j]) 26 | j += 1 27 | else: 28 | f.append((f1[i][0], f1[i][1] + f2[j][1])) 29 | i += 1 30 | j += 1 31 | if i < len(f1): 32 | f.extend(f1[i:]) 33 | elif j < len(f2): 34 | f.extend(f2[j:]) 35 | return f 36 | 37 | 38 | def factorize_bf(n): 39 | """ 40 | Brute-forces small primes up to some pre-specified limit. 41 | """ 42 | sn = int(math.sqrt(n)) 43 | f = [] 44 | for p in small_primes: 45 | if p > sn: 46 | if n > 1: 47 | f.append((n, 1)) 48 | n = 1 49 | break 50 | i = 0 51 | while n % p == 0: 52 | n //= p 53 | i += 1 54 | if i > 0: 55 | f.append((p, i)) 56 | sn = int(math.sqrt(n)) 57 | 58 | return f, n 59 | 60 | 61 | def print_factoring_routine(n, routine_name): 62 | """ 63 | Prints factoring routine currently being used along with the number to be factored. 64 | """ 65 | print "Factoring", str(n), "with", routine_name + "..." 66 | 67 | 68 | # TODO: Incorporate Pollard (p-1) into this - ignoring it for now 69 | def factorize(n, verbose = False, level = 3): 70 | """ 71 | Factorizes a specified integer or returns -1 if no factors can be found. 72 | """ 73 | if verbose: 74 | if n != 1: 75 | print "Factoring", str(n) + "..." 76 | print "Number of digits:", len(str(n)) 77 | if n == 1: 78 | return [] 79 | if utils.is_prime(n): 80 | if verbose: 81 | print str(n), "is prime!" 82 | return [(n, 1)] 83 | else: 84 | f, f1 = [], [] 85 | if level > 2: 86 | # Try brute force for small prime factors 87 | if verbose: 88 | print "Finding small prime factors..." 89 | f, n = factorize_bf(n) 90 | if verbose: 91 | if not f: 92 | print "Found no small prime factors... :(" 93 | else: 94 | print "Prime factors found:", reduce(lambda x, y: x + y, [str(i[0]) + ", " for i in f])[:-2] 95 | 96 | 97 | if level > 1 and n <= constants.SIZE_THRESHOLD_RHO and n > 1: 98 | # Try Pollard rho 99 | if verbose: 100 | print_factoring_routine(n, constants.NAME_RHO) 101 | 102 | g = pollardRho.factorize_rho(n, verbose = verbose) 103 | if g != -1: 104 | if verbose: 105 | print "Found factor", str(g) 106 | f1 = merge_factorizations(factorize(g, verbose = verbose, level = 2), \ 107 | factorize(n/g, verbose = verbose, level = 2)) 108 | if f1 != -1: 109 | f.extend(f1) 110 | 111 | if level > 0 and (f1 == -1 or n > constants.SIZE_THRESHOLD_RHO) and n > 1: 112 | # If Pollard rho fails try ECM 113 | if verbose: 114 | print_factoring_routine(n, constants.NAME_ECM) 115 | 116 | g = ecm.factorize_ecm(n, verbose = verbose) 117 | if g != -1: 118 | if verbose: 119 | print "Found factor", str(g) 120 | f1 = merge_factorizations(factorize(g, verbose = verbose, level = 2), \ 121 | factorize(n/g, verbose = verbose, level = 2)) 122 | if f1 != -1: 123 | f.extend(f1) 124 | else: 125 | f = -1 126 | return f 127 | 128 | 129 | def print_factorization(n, f): 130 | """ 131 | Prints a number as a product of the respective primes (and their exponents) in its prime 132 | factorization. 133 | 134 | EXAMPLE: 135 | 56 = 2^3 * 7^1 136 | """ 137 | if n == 1: 138 | return 1 139 | 140 | s = str(n) + " = " 141 | for i in xrange(len(f)-1): 142 | pf, exp = f[i][0], f[i][1] 143 | s += str(pf) + "^" + str(exp) + " * " 144 | 145 | s += str(f[-1][0]) + "^" + str(f[-1][1]) 146 | return s 147 | 148 | 149 | if __name__ == "__main__": 150 | while True: 151 | n = int(input("Enter number: ")) 152 | print "" 153 | t = time.time() 154 | f = factorize(n, verbose = True) 155 | t1 = time.time() 156 | if f == -1: 157 | print "\n", n, "couldn't be factored :(\n" 158 | else: 159 | print "\n", print_factorization(n, f) 160 | print "\nTime:", t1 - t, "s\n" 161 | 162 | 163 | 164 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | import math 4 | import random 5 | import fractions 6 | 7 | PRIME_THRESHOLD = 100000 8 | MR_THRESHOLD = 10**36 9 | 10 | def binary_search(x, arr, include_equal = False): 11 | """ 12 | Returns the index of the smallest element in an array which is larger 13 | than a specified element. This assumes that the array is sorted in 14 | non-decreasing order. If the element is larger than the largest element 15 | in the array, then the length of the array is returned instead. 16 | 17 | Arguments: 18 | x (:int) - the element to be searched for 19 | arr (:int list) - the array sorted in non-decreasing order 20 | 21 | Returns: 22 | the position of the largest element in 'arr' greater than 'x' 23 | 24 | Examples: 25 | >>> binary_search(2, [0, 2, 3]) 26 | >>> 2 27 | 28 | >>> binary_search(-1, [0, 2, 3]) 29 | >>> 0 30 | 31 | >>> binary_search(99, [0, 2, 3]) 32 | >>> 3 33 | """ 34 | if x > arr[-1]: 35 | return len(arr) 36 | elif x < arr[0]: 37 | return 0 38 | 39 | l, r = 0, len(arr) - 1 40 | while l <= r: 41 | m = (l + r) >> 1 42 | if arr[m] == x: 43 | return m + 1 if not include_equal else m 44 | elif arr[m] < x: 45 | l = m + 1 46 | else: 47 | r = m - 1 48 | 49 | return l 50 | 51 | 52 | def gcd(a, b): 53 | """ 54 | Returns the greatest common divisor (GCD) of two specified integers. 55 | 56 | Arguments: 57 | a (:int) - the first integer 58 | b (:int) - the second integer 59 | 60 | Reutrns: 61 | the GCD of 'a' and 'b' 62 | 63 | Examples: 64 | >>> gcd(1, 3) 65 | >>> 1 66 | 67 | >>> gcd(2, 4) 68 | >>> 2 69 | 70 | >>> gcd(10**8, 350) 71 | >>> 10 72 | """ 73 | return fractions.gcd(a, b) 74 | 75 | def xgcd(a, b): 76 | """ 77 | Performs the Extended Euclidean algorithm to return the result of Bézout's 78 | identity. 79 | 80 | Arguments: 81 | a (:int) - the first integer 82 | b (:int) - the second integer 83 | 84 | Returns: 85 | 'r' such that ar + bs = d where d = gcd(a, b) 86 | """ 87 | r, s = 0, 1 88 | while b != 0: 89 | c, d = divmod(a, b) 90 | r, s = s, r - c*s 91 | a, b = b, d 92 | return r 93 | 94 | 95 | def is_prime_bf(n): 96 | """ 97 | Tests whether an integer is prime through brute force. A wheel (mod 6) 98 | is used to test potential candidates. 99 | 100 | Arguments: 101 | n (:int) - the integer to be tested 102 | 103 | Returns: 104 | True if 'n' is prime and False otherwise 105 | 106 | Examples: 107 | >>> is_prime_bf(20) 108 | >>> False 109 | 110 | >>> is_prime_bf(7) 111 | >>> True 112 | 113 | >>> is_prime_bf(9999) 114 | >>> False 115 | """ 116 | if n < 2: return False 117 | if n == 2 or n == 3: return True 118 | if not n & 1: return False 119 | if not n % 3: return False 120 | if n < 9: return True 121 | sqrt_n = int(math.sqrt(n)) + 1 122 | for i in range(5, sqrt_n, 6): 123 | if not n % i or not n % (i + 2): return False 124 | return True 125 | 126 | 127 | def is_prime_fast(n, use_probabilistic = False, tolerance = 30): 128 | """ 129 | Tests whether a number is prime using a deterministic version of the Miller- 130 | Rabin primality test. Optionally tests whether the specified number is a 131 | prime probabistically up to a given tolerance using the regular version of 132 | the Miller-Rabin test. If the number is greater than 10^36, then all witnesses 133 | in the range [2, 2*log(n)*log(log(n))] are tested. However, this is conjectural 134 | and only heuristic evidence exists for it. To certify that a number is actually 135 | prime, one needs to test all witnesses in the range [2, 2*log(n)^2]. However, 136 | this is generally quite slow. 137 | 138 | Arguments: 139 | n (:int) - the integer to be tested 140 | use_probabilistic (:bool) - flag to indicate whether to use the regular 141 | version of the Miller-Rabin primality test 142 | tolerance (:int) - number of trials to be used to test primality 143 | 144 | Returns: 145 | True if 'n' is prime (or probably prime) and False otherwise 146 | 147 | Todo: 148 | Check for improved SPRP bases. 149 | 150 | References: 151 | - Francky from the PE Forums 152 | - https://miller-rabin.appspot.com/ 153 | - https://en.wikipedia.org/wiki/Miller–Rabin_primality_test 154 | """ 155 | firstPrime = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, \ 156 | 53, 59, 61, 67, 71] 157 | 158 | # Determine bases for deterministic Miller-Rabin test 159 | if n >= MR_THRESHOLD: 160 | logn = math.log(n) 161 | if not use_probabilistic: 162 | w = xrange(2, 2 * int(logn*log(logn)/log(2))) 163 | else: 164 | w = xrange(tolerance) 165 | elif n >= 1543267864443420616877677640751301: w = firstPrime[:20] 166 | elif n >= 564132928021909221014087501701: w = firstPrime[:18] 167 | elif n >= 59276361075595573263446330101: w = firstPrime[:16] 168 | elif n >= 6003094289670105800312596501: w = firstPrime[:15] 169 | elif n >= 3317044064679887385961981: w = firstPrime[:14] 170 | elif n >= 318665857834031151167461: w = firstPrime[:13] 171 | elif n >= 3825123056546413051: w = firstPrime[:12] 172 | #[2, 3, 5, 7, 11, 13, 17, 19, 23] 173 | elif n >= 341550071728321: w = firstPrime[:9] 174 | #[2, 3, 5, 7, 11, 13, 17] 175 | elif n >= 3474749660383: w = firstPrime[:7] 176 | elif n >= 2152302898747: w = firstPrime[:6] 177 | #[2, 3, 5, 7, 11, 13] 178 | elif n >= 4759123141: w = firstPrime[:5] 179 | #[2, 3, 5, 7, 11] 180 | elif n >= 9006403: w = [2, 7, 61] 181 | elif n >= 489997: 182 | # Some Fermat stuff 183 | if n&1 and n%3 and n%5 and n%7 and n%11 and n%13 and n%17 and n%19 \ 184 | and n%23 and n%29 and n%31 and n%37 and n%41 and n%43 and n%47 \ 185 | and n%53 and n%59 and n%61 and n%67 and n%71 and n%73 and n%79 \ 186 | and n%83 and n%89 and n%97 and n%101: 187 | hn, nm1 = n >> 1, n - 1 188 | p = pow(2, hn, n) 189 | if p == 1 or p == nm1: 190 | p = pow(3, hn, n) 191 | if p == 1 or p == nm1: 192 | p = pow(5, hn, n) 193 | return p == 1 or p == nm1 194 | return False 195 | elif n >= 42799: 196 | return n&1 and n%3 and n%5 and n%7 and n%11 and n%13 and n%17 \ 197 | and n%19 and n%23 and n%29 and n%31 and n%37 and n%41 and n%43 \ 198 | and pow(2, n-1, n) == 1 and pow(5, n-1, n) == 1 199 | elif n >= 841: 200 | return n&1 and n%3 and n%5 and n%7 and n%11 and n%13 and n%17 \ 201 | and n%19 and n%23 and n%29 and n%31 and n%37 and n%41 and n%43 \ 202 | and n%47 and n%53 and n%59 and n%61 and n%67 and n%71 and n%73 \ 203 | and n%79 and n%83 and n%89 and n%97 and n%101 and n%103 \ 204 | and pow(2, n-1, n) == 1 205 | elif n >= 25: 206 | return n&1 and n%3 and n%5 and n%7 \ 207 | and n%11 and n%13 and n%17 and n%19 and n%23 208 | elif n >= 4: 209 | return n&1 and n%3 210 | else: 211 | return n > 1 212 | 213 | if not (n&1 and n%3 and n%5 and n%7 and n%11 and n%13 and n%17 \ 214 | and n%19 and n%23 and n%29 and n%31 and n%37 and n%41 and n%43 \ 215 | and n%47 and n%53 and n%59 and n%61 and n%67 and n%71 and n%73 \ 216 | and n%79 and n%83 and n%89): return False 217 | 218 | # Miller-Rabin 219 | s = 0 220 | d = n - 1 221 | while not d & 1: 222 | d >>= 1 223 | s += 1 224 | for k in w: 225 | # Pick a random witness if probabilistic 226 | if use_probabilistic: 227 | p = random.randint(2, n-2) 228 | else: 229 | p = k 230 | x = pow(p, d, n) 231 | if x == 1: continue 232 | for _ in xrange(s): 233 | if x+1 == n: break 234 | x = x*x % n 235 | else: return False 236 | return True 237 | 238 | 239 | def is_prime(n, use_probabilistic = False, tolerance = 30): 240 | """ 241 | Tests whether a number is prime. The choice of test used depeneds on the size of 242 | the specified number. Optionally tests whether the specified number is probably 243 | prime up to a given tolerance using the regular version of the Miller-Rabin test. 244 | 245 | Arguments: 246 | n (:int) - the integer to be tested 247 | use_probabilistic (:bool) - flag to indicate whether to use the regular 248 | version of the Miller-Rabin primality test 249 | tolerance (:int) - number of trials to be used to test primality 250 | 251 | Returns: 252 | True if 'n' is prime (or probably prime) and False otherwise 253 | 254 | Examples: 255 | >>> is_prime(20) 256 | >>> False 257 | 258 | >>> is_prime(7) 259 | >>> True 260 | 261 | >>> is_prime(9999) 262 | >>> False 263 | """ 264 | if n < PRIME_THRESHOLD: 265 | return is_prime_bf(n) 266 | else: 267 | if use_probabilistic: 268 | return is_prime_fast(n, use_probabilistic, tolerance) 269 | else: 270 | if n < MR_THRESHOLD: 271 | return is_prime_fast(n) 272 | else: 273 | return is_prime_fast(n, True, 40) 274 | 275 | -------------------------------------------------------------------------------- /ecm.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | import math 4 | import utils 5 | import random 6 | import primeSieve 7 | import constants 8 | from decimal import Decimal 9 | 10 | """ 11 | This module contains an implementation of a two-stage version Lenstra's elliptical 12 | curve factorization method (ECM) with the usual stage 1 and stage 2 optimizations. 13 | This implementation uses Suyama's paramterization to generate curves in Montgomery 14 | form and is inversionless. 15 | """ 16 | 17 | RESOLUTION = 40 18 | 19 | def compute_bounds(n): 20 | """ 21 | Computes Stage 1 and Stage 2 bounds for both ECM. This almost coincides with GMP-ECM's 22 | bounds for the same but are clipped because of prime sieve limitations. 23 | Reference: 24 | http://www.mersennewiki.org/index.php/Elliptic_Curve_Method 25 | """ 26 | log_n = len(str(n)) 27 | if log_n <= 30: 28 | B1, B2 = 2000, 147396 29 | elif log_n <= 40: 30 | B1, B2 = 11000, 1873422 31 | elif log_n <= 50: 32 | B1, B2 = 50000, 12746592 33 | elif log_n <= 60: 34 | B1, B2 = 250000, 128992510 35 | elif log_n <= 70: 36 | B1, B2 = 1000000, 1045563762 37 | elif log_n <= 80: 38 | B1, B2 = 3000000, 5706890290 39 | else: 40 | # Anything greater and my computer runs out of memory -- prolly need to fix this 41 | B1, B2 = constants.MAX_B1_ECM, constants.MAX_B2_ECM 42 | return B1, B2 43 | 44 | 45 | def point_add(px, pz, qx, qz, rx, rz, n): 46 | """ 47 | Adds two specified P and Q points (in Montgomery form) in E(Z\nZ). Assumes R = P - Q. 48 | """ 49 | u = (px-pz) * (qx+qz) 50 | v = (px+pz) * (qx-qz) 51 | upv, umv = u+v, u-v 52 | x = (rz * upv * upv) 53 | if x >= n: 54 | x %= n 55 | z = rx * umv * umv 56 | if z >= n: 57 | z %= n 58 | return x, z 59 | 60 | 61 | def point_double(px, pz, n, a24): 62 | """ 63 | Doubles a point P (in Montgomery form) in E(Z\nZ). 64 | """ 65 | u, v = px+pz, px-pz 66 | u2, v2 = u*u, v*v 67 | t = u2 - v2 68 | x = (u2 * v2) 69 | if x >= n: 70 | x %= n 71 | z = (t * (v2 + a24*t)) 72 | if z >= n: 73 | z %= n 74 | return x, z 75 | 76 | 77 | def scalar_multiply(k, px, pz, n, a24): 78 | """ 79 | Multiplies a specified point P (in Montgomery form) by a specified scalar in E(Z\nZ). 80 | """ 81 | sk = bin(k) 82 | lk = len(sk) 83 | qx, qz = px, pz 84 | rx, rz = point_double(px, pz, n, a24) 85 | 86 | for i in xrange(3, lk): 87 | if sk[i] == '1': 88 | qx, qz = point_add(rx, rz, qx, qz, px, pz, n) 89 | rx, rz = point_double(rx, rz, n, a24) 90 | else: 91 | rx, rz = point_add(qx, qz, rx, rz, px, pz, n) 92 | qx, qz = point_double(qx, qz, n, a24) 93 | 94 | return qx, qz 95 | 96 | ########################################################### 97 | 98 | ADD_COST = 6 99 | DUP_COST = 5 100 | 101 | def lucas_cost(k, v): 102 | d = k 103 | r = int(Decimal(d) * Decimal(v) + Decimal(0.5)) 104 | if r >= k: 105 | return ADD_COST * k 106 | 107 | d, e, c = k - r, 2*r - k, DUP_COST + ADD_COST 108 | while d != e: 109 | # Want d >= e so swap if d < e 110 | if d < e: 111 | d, e = e, d 112 | 113 | # Condition 1 114 | if 4*d <= 5*e and (d + e) % 3 == 0: 115 | d, e = (2*d - e) / 3, (2*e - d) / 3 116 | c += 3 * ADD_COST 117 | # Condition 2 118 | elif 4*d <= 5*e and (d - e) % 6 == 0: 119 | d = (d - e) / 2 120 | c += ADD_COST + DUP_COST 121 | # Condition 3 122 | elif d <= 4*e: 123 | d -= e 124 | c += ADD_COST 125 | # Condition 4 126 | elif (d + e) % 2 == 0: 127 | d = (d - e) / 2 128 | c += ADD_COST + DUP_COST 129 | # Condition 5 130 | elif d % 2 == 0: 131 | d /= 2 132 | c += ADD_COST + DUP_COST 133 | # Condition 6 134 | elif d % 3 == 0: 135 | d = d/3 - e 136 | c += 3*ADD_COST + DUP_COST 137 | # Condition 7 138 | elif (d + e) % 3 == 0: 139 | d = (d - 2*e) / 3 140 | c += 3*ADD_COST + DUP_COST 141 | # Condition 8 142 | elif (d - e) % 3 == 0: 143 | d = (d - e) / 3 144 | c += 3*ADD_COST + DUP_COST 145 | # Condition 9 146 | else: 147 | e /= 2 148 | c += ADD_COST + DUP_COST 149 | 150 | return c 151 | 152 | 153 | def multiply_prac(k, px, pz, n, a24): 154 | ax, bx, cx, tx, t2x = px, 0, 0, 0, 0 155 | az, bz, cz, tz, t2z = pz, 0, 0, 0, 0 156 | v = [0.61803398874989485, 0.5801787282954641, 0.6179144065288179 , 0.6180796684698958] 157 | 158 | # Find best value of v 159 | r, i = lucas_cost(k, v[0]), 0 160 | for d in xrange(len(v)): 161 | e = lucas_cost(k, v[d]) 162 | if e < r: 163 | r, i = e, d 164 | 165 | r = int(Decimal(k) * Decimal(v[i]) + Decimal(0.5)) 166 | d, e = k - r, 2*r - k 167 | bx, bz, cx, cz = ax, az, ax, az 168 | ax, az = point_double(ax, az, n, a24) 169 | 170 | while d != e: 171 | # Want d >= e so swap if d < e 172 | if d < e: 173 | d, e = e, d 174 | ax, az, bx, bz = bx, bz, ax, az 175 | 176 | # Condition 1 177 | if 4*d <= 5*e and (d + e) % 3 == 0: 178 | d, e = (2*d - e) / 3, (2*e - d) / 3 179 | tx, tz = point_add(ax, az, bx, bz, cx, cz, n) 180 | t2x, t2z = point_add(tx, tz, ax, az, bx, bz, n) 181 | bx, bz = point_add(bx, bz, tx, tz, ax, az, n) 182 | ax, az, t2x, t2z = t2x, t2z, ax, az 183 | # Condition 2 184 | elif 4*d <= 5*e and (d - e) % 6 == 0: 185 | d = (d - e) / 2 186 | bx, bz = point_add(ax, az, bx, bz, cx, cz, n) 187 | ax, az = point_double(ax, az, n, a24) 188 | # Condition 3 189 | elif d <= 4*e: 190 | d -= e 191 | # tx, tz = point_add(bx, bz, ax, az, cx, cz, n) 192 | # bx, tx, cx = tx, cx, bx 193 | # bz, tz, cz = tz, cz, bz 194 | cx, cz = point_add(bx, bz, ax, az, cx, cz, n) 195 | bx, bz, cx, cz = cx, cz, bx, bz 196 | # Condition 4 197 | elif (d + e) % 2 == 0: 198 | d = (d - e) / 2 199 | bx, bz = point_add(bx, bz, ax, az, cx, cz, n) 200 | ax, az = point_double(ax, az, n, a24) 201 | # Condition 5 202 | elif d % 2 == 0: 203 | d /= 2 204 | cx, cz = point_add(cx, cz, ax, az, bx, bz, n) 205 | ax, az = point_double(ax, az, n, a24) 206 | # Condition 6 207 | elif d % 3 == 0: 208 | d = d/3 - e 209 | tx, tz = point_double(ax, az, n, a24) 210 | t2x, t2z = point_add(ax, az, bx, bz, cx, cz, n) 211 | ax, az = point_add(tx, tz, ax, az, ax, az, n) 212 | # tx, tz = point_add(tx, tz, t2x, t2z, cx, cz, n) 213 | # cx, bx, tx = bx, tx, cx 214 | # cz, bz, tz = bz, tz, cz 215 | cx, cz = point_add(tx, tz, t2x, t2z, cx, cz, n) 216 | bx, bz, cx, cz = cx, cz, bx, bz 217 | # Condition 7 218 | elif (d + e) % 3 == 0: 219 | d = (d - 2*e) / 3 220 | tx, tz = point_add(ax, az, bx, bz, cx, cz, n) 221 | bx, bz = point_add(tx, tz, ax, az, bx, bz, n) 222 | tx, tz = point_double(ax, az, n, a24) 223 | # TODO: Check order of a and t here 224 | ax, az = point_add(ax, az, tx, tz, ax, az, n) 225 | # Condition 8 226 | elif (d - e) % 3 == 0: 227 | d = (d - e) / 3 228 | tx, tz = point_add(ax, az, bx, bz, cx, cz, n) 229 | # TODO: Check whether c = f(a, c, b) or c = f(c, a, b) 230 | cx, cz = point_add(cx, cz, ax, az, bx, bz, n) 231 | bx, bz, tx, tz = tx, tz, bx, bz 232 | tx, tz = point_double(ax, az, n, a24) 233 | # TODO: Check order of a and t here 234 | ax, az = point_add(ax, az, tx, tz, ax, az, n) 235 | # Condition 9 236 | else: 237 | e /= 2 238 | cx, cz = point_add(cx, cz, bx, bz, ax, az, n) 239 | bx, bz = point_double(bx, bz, n, a24) 240 | 241 | x, z = point_add(ax, az, bx, bz, cx, cz, n) 242 | return x, z 243 | 244 | 245 | ########################################################### 246 | 247 | 248 | def factorize_ecm(n, verbose = False): 249 | """ 250 | ECM algorithm 251 | """ 252 | if n == 1 or utils.is_prime(n): 253 | return n 254 | 255 | B1, B2 = compute_bounds(n) 256 | if verbose: 257 | print "Number of digits:", len(str(n)) 258 | print "Bounds:", B1, B2 259 | 260 | D = int(math.sqrt(B2)) 261 | beta = [0] * (D+1) 262 | S = [0] * (2*D + 2) 263 | 264 | # ----- Stage 1 and Stage 2 precomputations ----- 265 | curves, log_B1 = 0, math.log(B1) 266 | 267 | if verbose: print "Sieving primes..." 268 | primes = primeSieve.prime_sieve(B2) 269 | 270 | num_primes = len(primes) 271 | idx_B1 = utils.binary_search(B1, primes) 272 | 273 | # Compute a B1-powersmooth integer 'k' 274 | k = 1 275 | for i in xrange(idx_B1): 276 | p = primes[i] 277 | k = k * pow(p, int(log_B1/math.log(p))) 278 | 279 | g = 1 280 | while (g == 1 or g == n) and curves <= constants.MAX_CURVES_ECM: 281 | curves += 1 282 | sigma = random.randint(6, constants.MAX_RND_ECM) 283 | if verbose and curves % RESOLUTION == 0: 284 | print "Tried", curves, "random curves..." 285 | 286 | # Generate a new random curve in Montgomery form with Suyama's parametrization 287 | u = ((sigma * sigma) - 5) % n 288 | v = (4 * sigma) % n 289 | vmu = v - u 290 | A = ((vmu*vmu*vmu) * (3*u + v) / (4*u*u*u*v) - 2) % n 291 | a24 = (A+2) / 4 292 | 293 | # ----- Stage 1 ----- 294 | px, pz = ((u*u*u) / (v*v*v)) % n, 1 295 | qx, qz = scalar_multiply(k, px, pz, n, a24) 296 | g = utils.gcd(n, qz) 297 | 298 | # If stage 1 is successful, return a non-trivial factor else 299 | # move on to stage 2 300 | if g != 1 and g != n: 301 | print "Stage 1 found factor!" 302 | return g 303 | 304 | # ----- Stage 2 ----- 305 | S[1], S[2] = point_double(qx, qz, n, a24) 306 | S[3], S[4] = point_double(S[1], S[2], n, a24) 307 | beta[1] = (S[1] * S[2]) % n 308 | beta[2] = (S[3] * S[4]) % n 309 | for d in xrange(3, D+1): 310 | d2 = 2 * d 311 | S[d2-1], S[d2] = point_add(S[d2-3], S[d2-2], S[1], S[2], S[d2-5], S[d2-4], n) 312 | beta[d] = (S[d2-1] * S[d2]) % n 313 | 314 | g, B = 1, B1 - 1 315 | 316 | rx, rz = scalar_multiply(B, qx, qz, n, a24) 317 | tx, tz = scalar_multiply(B - 2*D, qx, qz, n, a24) 318 | q, step = idx_B1, 2*D 319 | 320 | for r in xrange(B, B2, step): 321 | alpha, limit = (rx * rz) % n, r + step 322 | while q < num_primes and primes[q] <= limit: 323 | d = (primes[q] - r) / 2 324 | f = (rx - S[2*d-1]) * (rz + S[2*d]) - alpha + beta[d] 325 | g = (g * f) % n 326 | q += 1 327 | trx, trz = rx, rz 328 | rx, rz = point_add(rx, rz, S[2*D-1], S[2*D], tx, tz, n) 329 | tx, tz = trx, trz 330 | 331 | g = utils.gcd(n, g) 332 | 333 | # No non-trivial factor found, return -1 334 | if curves > constants.MAX_CURVES_ECM: 335 | return -1 336 | else: 337 | print "Stage 2 found factor!" 338 | return g -------------------------------------------------------------------------------- /primeSieve.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | """ 4 | This module has a bunch of prime sieves. 5 | 6 | -> WHEELED SIEVE OF ERATOSTHENES 7 | A sieve of Eratosthenes with a wheel mod 6. 8 | 9 | -> SIEVE OF ATKIN 10 | A segmented version of the sieve of Atkin as described in [1]. 11 | NOTE: This would probably be a lot more efficient with NumPy arrays but PyPy doesn't 12 | support NumPy as of yet. 13 | 14 | -> SEGMENTED SIEVE OF ERATOSTHENES 15 | A segmented sieve of Eratosthenes with a wheel mod 2. The wheel mod 6 version of this is 16 | annoying as hell to implement and might be included in the future. 17 | 18 | 19 | BENCHMARKS: 20 | Tests performed on a Macbook Pro (mid-2012) w/ a 2.6 GHz Intel Core i7 3720QM 21 | processor and 8 GB RAM. 22 | 23 | BENCHMARKS | 10^6 | 10^7 | 10^8 | 10^9 24 | ------------------------------------------------------------- 25 | Eratosthenes| 0.02s | 0.32s | 3.81s | 93.99s 26 | Atkin| 0.06s | 0.13s | 0.72s | 5.4s 27 | 28 | REFRENCES: 29 | [1] A.O.L Atkin, D.J.Bernstein; Prime Sieves using Binary Quadratic Forms; Mathematics 30 | of Computation, 73-246: 1023-30 31 | """ 32 | 33 | import math 34 | import time 35 | import utils 36 | import constants 37 | 38 | # Sieve bits 39 | segs = [[] for _ in xrange(60)] 40 | 41 | # Primes under 60 42 | under60 = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59] 43 | 44 | # delta's in the solutions to the congruences in algorithms 4.1, 4.2, 4.3 45 | # in the paper 46 | dAll = [1, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 49, 53, 59] 47 | 48 | # All (d, f, g) where 4f^2 + g^2 = d (mod 60), d ≤ 60, f ≤ 15, g ≤ 30 49 | DFG1 = [[1, 0, 1], [1, 0, 11], [1, 0, 19], \ 50 | [1, 0, 29], [1, 2, 15], [1, 3, 5], [1, 3, 25], [1, 5, 9], \ 51 | [1, 5, 21], [1, 7, 15], [1, 8, 15], [1, 10, 9], \ 52 | [1, 10, 21], [1, 12, 5], [1, 12, 25], [1, 13, 15], \ 53 | [13, 1, 3], [13, 1, 27], [13, 4, 3], [13, 4, 27], \ 54 | [13, 6, 7], [13, 6, 13], [13, 6, 17], [13, 6, 23], \ 55 | [13, 9, 7], [13, 9, 13], [13, 9, 17], [13, 9, 23], \ 56 | [13, 11, 3], [13, 11, 27], [13, 14, 3], [13, 14, 27], \ 57 | [17, 2, 1], [17, 2, 11], [17, 2, 19], [17, 2, 29], \ 58 | [17, 7, 1], [17, 7, 11], [17, 7, 19], [17, 7, 29], \ 59 | [17, 8, 1], [17, 8, 11], [17, 8, 19], [17, 8, 29], \ 60 | [17, 13, 1], [17, 13, 11], [17, 13, 19], [17, 13, 29], \ 61 | [29, 1, 5], [29, 1, 25], [29, 4, 5], [29, 4, 25], \ 62 | [29, 5, 7], [29, 5, 13], [29, 5, 17], [29, 5, 23], \ 63 | [29, 10, 7], [29, 10, 13], [29, 10, 17], [29, 10, 23], \ 64 | [29, 11, 5], [29, 11, 25], [29, 14, 5], [29, 14, 25], \ 65 | [37, 2, 9], [37, 2, 21], [37, 3, 1], [37, 3, 11], \ 66 | [37, 3, 19], [37, 3, 29], [37, 7, 9], [37, 7, 21], \ 67 | [37, 8, 9], [37, 8, 21], [37, 12, 1], [37, 12, 11], \ 68 | [37, 12, 19], [37, 12, 29], [37, 13, 9], [37, 13, 21], \ 69 | [41, 2, 5], [41, 2, 25], [41, 5, 1], [41, 5, 11], \ 70 | [41, 5, 19], [41, 5, 29], [41, 7, 5], [41, 7, 25], \ 71 | [41, 8, 5], [41, 8, 25], [41, 10, 1], [41, 10, 11], \ 72 | [41, 10, 19], [41, 10, 29], [41, 13, 5], [41, 13, 25], \ 73 | [49, 0, 7], [49, 0, 13], [49, 0, 17], [49, 0, 23], \ 74 | [49, 1, 15], [49, 4, 15], [49, 5, 3], [49, 5, 27], \ 75 | [49, 6, 5], [49, 6, 25], [49, 9, 5], [49, 9, 25], \ 76 | [49, 10, 3], [49, 10, 27], [49, 11, 15], [49, 14, 15], \ 77 | [53, 1, 7], [53, 1, 13], [53, 1, 17], [53, 1, 23], \ 78 | [53, 4, 7], [53, 4, 13], [53, 4, 17], [53, 4, 23], \ 79 | [53, 11, 7], [53, 11, 13], [53, 11, 17], [53, 11, 23], \ 80 | [53, 14, 7], [53, 14, 13], [53, 14, 17], [53, 14, 23]] 81 | 82 | 83 | # All (d, f, g) where 3f^2 + g^2 = d (mod 60), d ≤ 60, f ≤ 10, g ≤ 30 84 | DFG2 = [[7, 1, 2], [7, 1, 8], [7, 1, 22], \ 85 | [7, 1, 28], [7, 3, 10], [7, 3, 20], [7, 7, 10], \ 86 | [7, 7, 20], [7, 9, 2], [7, 9, 8], [7, 9, 22], [7, 9, 28], \ 87 | [19, 1, 4], [19, 1, 14], [19, 1, 16], [19, 1, 26], \ 88 | [19, 5, 2], [19, 5, 8], [19, 5, 22], [19, 5, 28], \ 89 | [19, 9, 4], [19, 9, 14], [19, 9, 16], [19, 9, 26], \ 90 | [31, 3, 2], [31, 3, 8], [31, 3, 22], [31, 3, 28], \ 91 | [31, 5, 4], [31, 5, 14], [31, 5, 16], [31, 5, 26], \ 92 | [31, 7, 2], [31, 7, 8], [31, 7, 22], [31, 7, 28], \ 93 | [43, 1, 10], [43, 1, 20], [43, 3, 4], [43, 3, 14], \ 94 | [43, 3, 16], [43, 3, 26], [43, 7, 4], [43, 7, 14], \ 95 | [43, 7, 16], [43, 7, 26], [43, 9, 10], [43, 9, 20]] 96 | 97 | 98 | # All (d, f, g) where 3f^2 - g^2 = d (mod 60), d < 60, f ≤ 10, g ≤ 30 99 | DFG3 = [[11, 0, 7], [11, 0, 13], [11, 0, 17], \ 100 | [11, 0, 23], [11, 2, 1], [11, 2, 11], [11, 2, 19], \ 101 | [11, 2, 29], [11, 3, 4], [11, 3, 14], [11, 3, 16], \ 102 | [11, 3, 26], [11, 5, 2], [11, 5, 8], [11, 5, 22], \ 103 | [11, 5, 28], [11, 7, 4], [11, 7, 14], [11, 7, 16], \ 104 | [11, 7, 26], [11, 8, 1], [11, 8, 11], [11, 8, 19], \ 105 | [11, 8, 29], [23, 1, 10], [23, 1, 20], [23, 2, 7], \ 106 | [23, 2, 13], [23, 2, 17], [23, 2, 23], [23, 3, 2], \ 107 | [23, 3, 8], [23, 3, 22], [23, 3, 28], [23, 4, 5], \ 108 | [23, 4, 25], [23, 6, 5], [23, 6, 25], [23, 7, 2], \ 109 | [23, 7, 8], [23, 7, 22], [23, 7, 28], [23, 8, 7], \ 110 | [23, 8, 13], [23, 8, 17], [23, 8, 23], [23, 9, 10], \ 111 | [23, 9, 20], [47, 1, 4], [47, 1, 14], [47, 1, 16], \ 112 | [47, 1, 26], [47, 2, 5], [47, 2, 25], [47, 3, 10], \ 113 | [47, 3, 20], [47, 4, 1], [47, 4, 11], [47, 4, 19], \ 114 | [47, 4, 29], [47, 6, 1], [47, 6, 11], [47, 6, 19], \ 115 | [47, 6, 29], [47, 7, 10], [47, 7, 20], [47, 8, 5], \ 116 | [47, 8, 25], [47, 9, 4], [47, 9, 14], [47, 9, 16], \ 117 | [47, 9, 26], [59, 0, 1], [59, 0, 11], [59, 0, 19], \ 118 | [59, 0, 29], [59, 1, 2], [59, 1, 8], [59, 1, 22], \ 119 | [59, 1, 28], [59, 4, 7], [59, 4, 13], [59, 4, 17], \ 120 | [59, 4, 23], [59, 5, 4], [59, 5, 14], [59, 5, 16], \ 121 | [59, 5, 26], [59, 6, 7], [59, 6, 13], [59, 6, 17], \ 122 | [59, 6, 23], [59, 9, 2], [59, 9, 8], [59, 9, 22], \ 123 | [59, 9, 28]] 124 | 125 | 126 | def small_sieve(n): 127 | """ 128 | Returns the primes under a specified number with a modified sieve of Eratosthenes and a 129 | wheel mod 6. 130 | 131 | Arguments: 132 | n (:int) - the number to list primes under 133 | 134 | Returns: 135 | the primes under 'n' in a list 136 | 137 | Examples: 138 | >>> small_sieve(9) 139 | >>> [2, 3, 5, 7] 140 | 141 | >>> small_sieve(30) 142 | >>> [2, 3, 5, 7, 11, 13, 17, 19, 23, 29] 143 | 144 | References: 145 | http://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n 146 | /3035188#3035188 147 | """ 148 | correction = (n % 6 > 1) 149 | n = {0: n, 1: n-1, 2: n+4, 3: n+3, 4: n+2, 5: n+1}[n % 6] 150 | sieve = [True] * (n/3) 151 | sieve[0] = False 152 | limit = int(math.sqrt(n))/3 + 1 153 | # Use a wheel (mod 6) 154 | for i in range(limit): 155 | if sieve[i]: 156 | k = 3*i + 1 | 1 157 | sieve[((k*k)/3) :: (k << 1)] = \ 158 | [False] * ((n/6 - (k*k)/6 - 1)/k + 1) 159 | sieve[(k * k + (k << 2) - \ 160 | (k << 1) * (i & 1)) / 3 :: (k << 1)] = \ 161 | [False] * ((n/6 - (k*k + (k << 2) - \ 162 | 2*k * (i & 1))/6 - 1)/k + 1) 163 | return [2, 3] + [3*i + 1 | 1 for i in xrange(1, n/3 - correction) if sieve[i]] 164 | 165 | 166 | def enum1(d, f, g, L, B, segs): 167 | """ 168 | Alg 4.1: Given d ≤ 60, f ≤ 15, g ≤ 30 such that 4f^2 + g^2 = d (mod 60) find (x, y, k) 169 | with x > 0, y > 0, L ≤ k ≤ L + B, such that 4x^2 + y^2 = 60k + d and x = f + 15r, y = 170 | g + 30s where r, s are integers. 171 | """ 172 | x, y0, temp = f, g, L+B 173 | k0 = (4*f*f + g*g - d) / 60 174 | while k0 < temp: 175 | k0 += x + x + 15 176 | x += 15 177 | 178 | while True: 179 | x -= 15 180 | k0 -= x + x + 15 181 | if x <= 0: 182 | return 183 | while k0 < L: 184 | k0 += y0 + 15 185 | y0 += 30 186 | 187 | k, y = k0, y0 188 | while k < temp: 189 | segs[d][(k-L) >> 5] ^= 1 << ((k-L) & 31) 190 | k += y + 15 191 | y += 30 192 | 193 | 194 | def enum2(d, f, g, L, B, segs): 195 | """ 196 | Alg 4.2: Given d ≤ 60, f ≤ 10, g ≤ 30 such that 3f^2 + g^2 = d (mod 60) find (x, y, k) 197 | with x > 0, y > 0, L ≤ k ≤ L + B, such that 3x^2 + y^2 = 60k + d and x = f + 10r, y = 198 | g + 30s where r, s are integers. 199 | """ 200 | x, y0, temp = f, g, L+B 201 | k0 = (3*f*f + g*g - d) / 60 202 | while k0 < temp: 203 | k0 += x + 5 204 | x += 10 205 | 206 | while True: 207 | x -= 10 208 | k0 -= x + 5 209 | if x <= 0: 210 | return 211 | while k0 < L: 212 | k0 += y0 + 15 213 | y0 += 30 214 | 215 | k, y = k0, y0 216 | while k < temp: 217 | segs[d][(k-L) >> 5] ^= 1 << ((k-L) & 31) 218 | 219 | k += y + 15 220 | y += 30 221 | 222 | 223 | def enum3(d, f, g, L, B, segs): 224 | """ 225 | Alg 4.3: Given d < 60, f ≤ 10, g ≤ 30 such that 3f^2 - g^2 = d (mod 60) find (x, y, k) 226 | with x > 0, y > 0, L ≤ k ≤ L + B, such that 3x^2 - y^2 = 60k + d and x = f + 10r, y = 227 | g + 30s where r, s are integers. 228 | """ 229 | x, y0, temp = f, g, L+B 230 | k0 = (3*f*f - g*g - d) / 60 231 | 232 | while True: 233 | while k0 >= temp: 234 | if x <= y0: 235 | return 236 | k0 -= y0 + 15 237 | y0 += 30 238 | 239 | k, y = k0, y0 240 | while k >= L and y < x: 241 | segs[d][(k-L) >> 5] ^= 1 << ((k-L) & 31) 242 | k -= y + 15 243 | y += 30 244 | 245 | k0 += x + 5 246 | x += 10 247 | 248 | 249 | def sieve_of_atkin(n): 250 | """ 251 | Returns the primes under a specified number with a segmented sieve of Atkin. 252 | 253 | Arguments: 254 | n (:int) - the number to list primes under 255 | 256 | Returns: 257 | the primes under 'n' in a list 258 | """ 259 | sqrt_n, u, r = int(math.sqrt(n)), n + 32, 17 260 | B, lu = 60 * sqrt_n, math.log(u) 261 | primes = small_sieve(sqrt_n) 262 | ret = under60 + [0] * int(u/lu + u/(lu*lu) * 1.5 - r) 263 | for d in dAll: 264 | segs[d] = [0] * ((B >> 5) + 1) 265 | 266 | # Do computations in segments of size 60√n 267 | lim = n/60 + 1 268 | for L in xrange(1, lim, B): 269 | for d in dAll: 270 | for k in xrange(len(segs[d])): 271 | segs[d][k] = 0 272 | 273 | # Sieve off the primes (i.e. solutions to the various quadratic 274 | # Diophantine equations) 275 | lim2 = 60 * (L+B) 276 | for d,f,g in DFG1: 277 | enum1(d, f, g, L, B, segs) 278 | for d,f,g in DFG2: 279 | enum2(d, f, g, L, B, segs) 280 | for d,f,g in DFG3: 281 | enum3(d, f, g, L, B, segs) 282 | 283 | # Sieve off non-squarefree numbers 284 | for p in primes: 285 | p2 = p * p 286 | if p2 > lim2: 287 | break 288 | if p >= 7: 289 | b = -utils.xgcd(p2, 60) 290 | if b < 0: b += p2 291 | for d in dAll: 292 | x = b * (60*L + d) % p2 293 | while x < B: 294 | segs[d][x >> 5] &= ~(1 << (x & 31)) 295 | x += p2 296 | 297 | # Compute primes 298 | for j in xrange((B >> 5) + 1): 299 | for x in xrange(32): 300 | k = 60 * (L + x + (j << 5)) 301 | for d in dAll: 302 | if k + d > n: 303 | return ret[:r] 304 | # If a_k = 1, 60k + d is a prime 305 | if ((segs[d][j] << 31 - x) & 0xFFFFFFFF) >= 0x80000000: 306 | ret[r] = 60*k + d 307 | r += 1 308 | 309 | def prime_sieve(n): 310 | """ 311 | Returns the primes below a specified number with the choice of prime sieve depending on the 312 | size of the number. 313 | 314 | Arguments: 315 | n (:int) - the number to list primes under 316 | 317 | Returns: 318 | the primes under 'n' in a list 319 | 320 | Examples: 321 | >>> prime_sieve(9) 322 | >>> [2, 3, 5, 7] 323 | 324 | >>> len(prime_sieve(10**9)) 325 | >>> 50847534 326 | """ 327 | if n <= constants.SMALL_THRESHOLD: 328 | return under60[:utils.binary_search(n, under60)] 329 | elif n <= constants.ERAT_THRESHOLD: 330 | return small_sieve(n) 331 | elif n <= constants.ATKIN_THERSHOLD: 332 | return sieve_of_atkin(n) 333 | else: 334 | return segmented_sieve(2, n) 335 | 336 | 337 | def segmented_sieve(lo, hi): 338 | """ 339 | Returns the primes between two specified numbers using a segmented sieve of Eratosthenes. 340 | Optionally, one may specify the size of the segment to be used. If not specified, the segment 341 | size used defaults to the square root of the difference between the two specified numbers. 342 | 343 | NOTE: A small segment size results in low memory usage but results in a large computation time. 344 | There seems to be an optimal segment size but I can't really figure out what it is. 345 | 346 | Arguments: 347 | lo (:int) - the lower bound of the interval 348 | hi (:int) - the upper bound of the interval 349 | 350 | Returns: 351 | the primes in the interval [lo, hi] in a list 352 | """ 353 | if hi < lo: return [] 354 | max_prime, pos = int(math.sqrt(hi)), 0 355 | base_primes = prime_sieve(max_prime) 356 | primes = [0] * int(math.ceil(1.5 * hi/math.log(hi)) - math.floor(1.5 * lo/math.log(lo))) 357 | 358 | # Include primes below √hi if necessary 359 | if lo < max_prime: 360 | lo_pos = utils.binary_search(lo, base_primes, include_equal = True) 361 | for k in xrange(lo_pos, len(base_primes)): 362 | primes[pos] = base_primes[k] 363 | pos += 1 364 | lo = max_prime 365 | 366 | # Compute segment size 367 | delta = constants.UPPER_SEG_SIZE if hi - lo >= constants.UPPER_SEG_SIZE else constants.LOWER_SEG_SIZE 368 | 369 | l1, l = len(base_primes), (delta >> 4) + 1 370 | int_size, sieve = l << 3, bytearray([0x0] * l) 371 | lo_1, hi_1 = lo, lo + delta 372 | 373 | # Compute stuff in segments 374 | while lo_1 <= hi: 375 | # Re-zero sieve bits if necessary 376 | if lo_1 != lo: 377 | for i in range(l): 378 | sieve[i] = 0 379 | 380 | if (lo_1 & 1) == 0: 381 | lo_1 += 1 382 | 383 | # Sieve off primes 384 | for i in xrange(1, l1): 385 | p = base_primes[i] 386 | k = (p - (lo_1 % p)) % p 387 | if (k & 1) == 1: 388 | k += p 389 | k >>= 1 390 | while k < int_size: 391 | sieve[k >> 3] |= 1 << (k & 7) 392 | k += p 393 | 394 | # Compute primes and put them in the prime list 395 | end = min(hi_1, hi) + 1 396 | for n in range(lo_1, end, 2): 397 | d = n - lo_1 398 | if ((sieve[d >> 4] >> ((d >> 1) & 0x7)) & 0x1) == 0x0: 399 | primes[pos] = n 400 | pos += 1 401 | 402 | # Update segment boundaries 403 | lo_1 = hi_1 + 1 404 | hi_1 = lo_1 + delta 405 | 406 | return primes[:pos] 407 | --------------------------------------------------------------------------------