PyPy 5.4.1 64bit(linux)、HP(80)〜1.54s
32ビットバージョンの時間はわずかに遅くなります。
経験的に決定されたブレークポイントを使用して、4つの異なる分解方法を使用します。
- n <2 30 →試行分割による因数分解
- n <2 70 → ポラーズロー(ブレントのバリアント)
- その他 → Lenstra ECF(モンゴメリー曲線、須山のパラメーターを使用)、テスト〜100曲線
- ECFが失敗した場合 → 複数多項式2次ふるい(モンゴメリーの多項式を使用)
ECFとMPQSの間のクリーンなブレークを見つけようとしばらくの間試みましたが、1つはないと思われます。ただし、 nに小さな係数が含まれている、ECFは通常、すぐにそれを検出するため、MPQSに進む前に、いくつかの曲線のみをテストすることを選択しました。
現在、Mathmaticaよりも2倍遅いだけです。これは確かに成功だと思います。
home-prime.py
import math
import my_math
import mpqs
max_trial = 1e10
max_pollard = 1e22
def factor(n):
if n < max_trial:
return factor_trial(n)
for p in my_math.small_primes:
if n%p == 0:
return [p] + factor(n/p)
if my_math.is_prime(n):
return [n]
if n < max_pollard:
p = pollard_rho(n)
else:
p = lenstra_ecf(n) or mpqs.mpqs(n)
return factor(p) + factor(n/p)
def factor_trial(n):
a = []
for p in my_math.small_primes:
while n%p == 0:
a += [p]
n /= p
i = 211
while i*i < n:
for o in my_math.offsets:
i += o
while n%i == 0:
a += [i]
n /= i
if n > 1:
a += [n]
return a
def pollard_rho(n):
# Brent's variant
y, r, q = 0, 1, 1
c, m = 9, 40
g = 1
while g == 1:
x = y
for i in range(r):
y = (y*y + c) % n
k = 0
while k < r and g == 1:
ys = y
for j in range(min(m, r-k)):
y = (y*y + c) % n
q = q*abs(x-y) % n
g = my_math.gcd(q, n)
k += m
r *= 2
if g == n:
ys = (ys*ys + c) % n
g = gcd(n, abs(x-ys))
while g == 1:
ys = (ys*ys + c) % n
g = gcd(n, abs(x-ys))
return g
def ec_add((x1, z1), (x2, z2), (x0, z0), n):
t1, t2 = (x1-z1)*(x2+z2), (x1+z1)*(x2-z2)
x, z = t1+t2, t1-t2
return (z0*x*x % n, x0*z*z % n)
def ec_double((x, z), (a, b), n):
t1 = x+z; t1 *= t1
t2 = x-z; t2 *= t2
t3 = t1 - t2
t4 = 4*b*t2
return (t1*t4 % n, t3*(t4 + a*t3) % n)
def ec_multiply(k, p, C, n):
# Montgomery ladder algorithm
p0 = p
q, p = p, ec_double(p, C, n)
b = k >> 1
while b > (b & -b):
b ^= b & -b
while b:
if k&b:
q, p = ec_add(p, q, p0, n), ec_double(p, C, n)
else:
q, p = ec_double(q, C, n), ec_add(p, q, p0, n),
b >>= 1
return q
def lenstra_ecf(n, m = 5):
# Montgomery curves w/ Suyama parameterization.
# Based on pseudocode found in:
# "Implementing the Elliptic Curve Method of Factoring in Reconfigurable Hardware"
# Gaj, Kris et. al
# http://www.hyperelliptic.org/tanja/SHARCS/talks06/Gaj.pdf
# Phase 2 is not implemented.
B1, B2 = 8, 13
for i in range(m):
pg = my_math.primes()
p = pg.next()
k = 1
while p < B1:
k *= p**int(math.log(B1, p))
p = pg.next()
for s in range(B1, B2):
u, v = s*s-5, 4*s
x = u*u*u
z = v*v*v
t = pow(v-u, 3, n)
P = (x, z)
C = (t*(3*u+v) % n, 4*x*v % n)
Q = ec_multiply(k, P, C, n)
g = my_math.gcd(Q[1], n)
if 1 < g < n: return g
B1, B2 = B2, B1 + B2
if __name__ == '__main__':
import time
import sys
for n in sys.argv[1:]:
t0 = time.time()
i = int(n)
f = []
while len(f) != 1:
f = sorted(factor(i))
#print i, f
i = int(''.join(map(str, f)))
t1 = time.time()-t0
print n, i
print '%.3fs'%(t1)
print
サンプルのタイミング
$ pypy home-prime.py 8 16 20 64 65 80
8 3331113965338635107
0.005s
16 31636373
0.001s
20 3318308475676071413
0.004s
64 1272505013723
0.000s
65 1381321118321175157763339900357651
0.397s
80 313169138727147145210044974146858220729781791489
1.537s
5の平均は約0.39秒です。
依存関係
mpqs.py
Fastest semiprime factorizationへの私の答えから直接取られています、いくつかの非常に小さな修正を加えた。
mpqs.py
import math
import my_math
import time
# Multiple Polynomial Quadratic Sieve
def mpqs(n, verbose=False):
if verbose:
time1 = time.time()
root_n = my_math.isqrt(n)
root_2n = my_math.isqrt(n+n)
# formula chosen by experimentation
# seems to be close to optimal for n < 10^50
bound = int(5 * math.log(n, 10)**2)
prime = []
mod_root = []
log_p = []
num_prime = 0
# find a number of small primes for which n is a quadratic residue
p = 2
while p < bound or num_prime < 3:
# legendre (n|p) is only defined for odd p
if p > 2:
leg = my_math.legendre(n, p)
else:
leg = n & 1
if leg == 1:
prime += [p]
mod_root += [int(my_math.mod_sqrt(n, p))]
log_p += [math.log(p, 10)]
num_prime += 1
elif leg == 0:
if verbose:
print 'trial division found factors:'
print p, 'x', n/p
return p
p = my_math.next_prime(p)
# size of the sieve
x_max = bound*8
# maximum value on the sieved range
m_val = (x_max * root_2n) >> 1
# fudging the threshold down a bit makes it easier to find powers of primes as factors
# as well as partial-partial relationships, but it also makes the smoothness check slower.
# there's a happy medium somewhere, depending on how efficient the smoothness check is
thresh = math.log(m_val, 10) * 0.735
# skip small primes. they contribute very little to the log sum
# and add a lot of unnecessary entries to the table
# instead, fudge the threshold down a bit, assuming ~1/4 of them pass
min_prime = int(thresh*3)
fudge = sum(log_p[i] for i,p in enumerate(prime) if p < min_prime)/4
thresh -= fudge
sieve_primes = [p for p in prime if p >= min_prime]
sp_idx = prime.index(sieve_primes[0])
if verbose:
print 'smoothness bound:', bound
print 'sieve size:', x_max
print 'log threshold:', thresh
print 'skipping primes less than:', min_prime
smooth = []
used_prime = set()
partial = {}
num_smooth = 0
prev_num_smooth = 0
num_used_prime = 0
num_partial = 0
num_poly = 0
root_A = my_math.isqrt(root_2n / x_max)
if verbose:
print 'sieving for smooths...'
while True:
# find an integer value A such that:
# A is =~ sqrt(2*n) / x_max
# A is a perfect square
# sqrt(A) is prime, and n is a quadratic residue mod sqrt(A)
while True:
root_A = my_math.next_prime(root_A)
leg = my_math.legendre(n, root_A)
if leg == 1:
break
elif leg == 0:
if verbose:
print 'dumb luck found factors:'
print root_A, 'x', n/root_A
return root_A
A = root_A * root_A
# solve for an adequate B
# B*B is a quadratic residue mod n, such that B*B-A*C = n
# this is unsolvable if n is not a quadratic residue mod sqrt(A)
b = my_math.mod_sqrt(n, root_A)
B = (b + (n - b*b) * my_math.mod_inv(b + b, root_A))%A
# B*B-A*C = n <=> C = (B*B-n)/A
C = (B*B - n) / A
num_poly += 1
# sieve for prime factors
sums = [0.0]*(2*x_max)
i = sp_idx
for p in sieve_primes:
logp = log_p[i]
inv_A = my_math.mod_inv(A, p)
# modular root of the quadratic
a = int(((mod_root[i] - B) * inv_A)%p)
b = int(((p - mod_root[i] - B) * inv_A)%p)
amx = a+x_max
bmx = b+x_max
ax = amx-p
bx = bmx-p
k = p
while k < x_max:
sums[k+ax] += logp
sums[k+bx] += logp
sums[amx-k] += logp
sums[bmx-k] += logp
k += p
if k+ax < x_max:
sums[k+ax] += logp
if k+bx < x_max:
sums[k+bx] += logp
if amx-k > 0:
sums[amx-k] += logp
if bmx-k > 0:
sums[bmx-k] += logp
i += 1
# check for smooths
x = -x_max
for v in sums:
if v > thresh:
vec = set()
sqr = []
# because B*B-n = A*C
# (A*x+B)^2 - n = A*A*x*x+2*A*B*x + B*B - n
# = A*(A*x*x+2*B*x+C)
# gives the congruency
# (A*x+B)^2 = A*(A*x*x+2*B*x+C) (mod n)
# because A is chosen to be square, it doesn't need to be sieved
sieve_val = (A*x + B+B)*x + C
if sieve_val < 0:
vec = {-1}
sieve_val = -sieve_val
for p in prime:
while sieve_val%p == 0:
if p in vec:
# keep track of perfect square factors
# to avoid taking the sqrt of a gigantic number at the end
sqr += [p]
vec ^= {p}
sieve_val = int(sieve_val / p)
if sieve_val == 1:
# smooth
smooth += [(vec, (sqr, (A*x+B), root_A))]
used_prime |= vec
elif sieve_val in partial:
# combine two partials to make a (xor) smooth
# that is, every prime factor with an odd power is in our factor base
pair_vec, pair_vals = partial[sieve_val]
sqr += list(vec & pair_vec) + [sieve_val]
vec ^= pair_vec
smooth += [(vec, (sqr + pair_vals[0], (A*x+B)*pair_vals[1], root_A*pair_vals[2]))]
used_prime |= vec
num_partial += 1
else:
# save partial for later pairing
partial[sieve_val] = (vec, (sqr, A*x+B, root_A))
x += 1
prev_num_smooth = num_smooth
num_smooth = len(smooth)
num_used_prime = len(used_prime)
if verbose:
print 100 * num_smooth / num_prime, 'percent complete\r',
if num_smooth > num_used_prime and num_smooth > prev_num_smooth:
if verbose:
print '%d polynomials sieved (%d values)'%(num_poly, num_poly*x_max*2)
print 'found %d smooths (%d from partials) in %f seconds'%(num_smooth, num_partial, time.time()-time1)
print 'solving for non-trivial congruencies...'
used_prime_list = sorted(list(used_prime))
# set up bit fields for gaussian elimination
masks = []
mask = 1
bit_fields = [0]*num_used_prime
for vec, vals in smooth:
masks += [mask]
i = 0
for p in used_prime_list:
if p in vec: bit_fields[i] |= mask
i += 1
mask <<= 1
# row echelon form
col_offset = 0
null_cols = []
for col in xrange(num_smooth):
pivot = col-col_offset == num_used_prime or bit_fields[col-col_offset] & masks[col] == 0
for row in xrange(col+1-col_offset, num_used_prime):
if bit_fields[row] & masks[col]:
if pivot:
bit_fields[col-col_offset], bit_fields[row] = bit_fields[row], bit_fields[col-col_offset]
pivot = False
else:
bit_fields[row] ^= bit_fields[col-col_offset]
if pivot:
null_cols += [col]
col_offset += 1
# reduced row echelon form
for row in xrange(num_used_prime):
# lowest set bit
mask = bit_fields[row] & -bit_fields[row]
for up_row in xrange(row):
if bit_fields[up_row] & mask:
bit_fields[up_row] ^= bit_fields[row]
# check for non-trivial congruencies
for col in null_cols:
all_vec, (lh, rh, rA) = smooth[col]
lhs = lh # sieved values (left hand side)
rhs = [rh] # sieved values - n (right hand side)
rAs = [rA] # root_As (cofactor of lhs)
i = 0
for field in bit_fields:
if field & masks[col]:
vec, (lh, rh, rA) = smooth[i]
lhs += list(all_vec & vec) + lh
all_vec ^= vec
rhs += [rh]
rAs += [rA]
i += 1
factor = my_math.gcd(my_math.list_prod(rAs)*my_math.list_prod(lhs) - my_math.list_prod(rhs), n)
if 1 < factor < n:
break
else:
if verbose:
print 'none found.'
continue
break
if verbose:
print 'factors found:'
print factor, 'x', n/factor
print 'time elapsed: %f seconds'%(time.time()-time1)
return factor
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description='Uses a MPQS to factor a composite number')
parser.add_argument('composite', metavar='number_to_factor', type=long, help='the composite number to factor')
parser.add_argument('--verbose', dest='verbose', action='store_true', help="enable verbose output")
args = parser.parse_args()
if args.verbose:
mpqs(args.composite, args.verbose)
else:
time1 = time.time()
print mpqs(args.composite)
print 'time elapsed: %f seconds'%(time.time()-time1)
my_math.py
mpqs.py
はと同じ投稿から取られていますが、良い素数間の最大のギャップを見つけるための回答で使用した無制限の素数ジェネレータにも追加しました。
my_math.py
# primes less than 212
small_primes = [
2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37,
41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89,
97,101,103,107,109,113,127,131,137,139,149,151,
157,163,167,173,179,181,191,193,197,199,211]
# pre-calced sieve of eratosthenes for n = 2, 3, 5, 7
indices = [
1, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47,
53, 59, 61, 67, 71, 73, 79, 83, 89, 97,101,103,
107,109,113,121,127,131,137,139,143,149,151,157,
163,167,169,173,179,181,187,191,193,197,199,209]
# distances between sieve values
offsets = [
10, 2, 4, 2, 4, 6, 2, 6, 4, 2, 4, 6,
6, 2, 6, 4, 2, 6, 4, 6, 8, 4, 2, 4,
2, 4, 8, 6, 4, 6, 2, 4, 6, 2, 6, 6,
4, 2, 4, 6, 2, 6, 4, 2, 4, 2,10, 2]
# tabulated, mod 105
dindices =[
0,10, 2, 0, 4, 0, 0, 0, 8, 0, 0, 2, 0, 4, 0,
0, 6, 2, 0, 4, 0, 0, 4, 6, 0, 0, 6, 0, 0, 2,
0, 6, 2, 0, 4, 0, 0, 4, 6, 0, 0, 2, 0, 4, 2,
0, 6, 6, 0, 0, 0, 0, 6, 6, 0, 0, 0, 0, 4, 2,
0, 6, 2, 0, 4, 0, 0, 4, 6, 0, 0, 2, 0, 6, 2,
0, 6, 0, 0, 4, 0, 0, 4, 6, 0, 0, 2, 0, 4, 8,
0, 0, 2, 0,10, 0, 0, 4, 0, 0, 0, 2, 0, 4, 2]
max_int = 2147483647
# returns the index of x in a sorted list a
# or the index of the next larger item if x is not present
# i.e. the proper insertion point for x in a
def binary_search(a, x):
s = 0
e = len(a)
m = e >> 1
while m != e:
if a[m] < x:
s = m
m = (s + e + 1) >> 1
else:
e = m
m = (s + e) >> 1
return m
# divide and conquer list product
def list_prod(a):
size = len(a)
if size == 1:
return a[0]
return list_prod(a[:size>>1]) * list_prod(a[size>>1:])
# greatest common divisor of a and b
def gcd(a, b):
while b:
a, b = b, a%b
return a
# extended gcd
def ext_gcd(a, m):
a = int(a%m)
x, u = 0, 1
while a:
x, u = u, x - (m/a)*u
m, a = a, m%a
return (m, x, u)
# legendre symbol (a|m)
# note: returns m-1 if a is a non-residue, instead of -1
def legendre(a, m):
return pow(a, (m-1) >> 1, m)
# modular inverse of a mod m
def mod_inv(a, m):
return ext_gcd(a, m)[1]
# modular sqrt(n) mod p
# p must be prime
def mod_sqrt(n, p):
a = n%p
if p%4 == 3:
return pow(a, (p+1) >> 2, p)
elif p%8 == 5:
v = pow(a << 1, (p-5) >> 3, p)
i = ((a*v*v << 1) % p) - 1
return (a*v*i)%p
elif p%8 == 1:
# Shank's method
q = p-1
e = 0
while q&1 == 0:
e += 1
q >>= 1
n = 2
while legendre(n, p) != p-1:
n += 1
w = pow(a, q, p)
x = pow(a, (q+1) >> 1, p)
y = pow(n, q, p)
r = e
while True:
if w == 1:
return x
v = w
k = 0
while v != 1 and k+1 < r:
v = (v*v)%p
k += 1
if k == 0:
return x
d = pow(y, 1 << (r-k-1), p)
x = (x*d)%p
y = (d*d)%p
w = (w*y)%p
r = k
else: # p == 2
return a
#integer sqrt of n
def isqrt(n):
c = n*4/3
d = c.bit_length()
a = d>>1
if d&1:
x = 1 << a
y = (x + (n >> a)) >> 1
else:
x = (3 << a) >> 2
y = (x + (c >> a)) >> 1
if x != y:
x = y
y = (x + n/x) >> 1
while y < x:
x = y
y = (x + n/x) >> 1
return x
# integer cbrt of n
def icbrt(n):
d = n.bit_length()
if d%3 == 2:
x = 3 << d/3-1
else:
x = 1 << d/3
y = (2*x + n/(x*x))/3
if x != y:
x = y
y = (2*x + n/(x*x))/3
while y < x:
x = y
y = (2*x + n/(x*x))/3
return x
# strong probable prime
def is_sprp(n, b=2):
if n < 2: return False
d = n-1
s = 0
while d&1 == 0:
s += 1
d >>= 1
x = pow(b, d, n)
if x == 1 or x == n-1:
return True
for r in xrange(1, s):
x = (x * x)%n
if x == 1:
return False
elif x == n-1:
return True
return False
# lucas probable prime
# assumes D = 1 (mod 4), (D|n) = -1
def is_lucas_prp(n, D):
P = 1
Q = (1-D) >> 2
# n+1 = 2**r*s where s is odd
s = n+1
r = 0
while s&1 == 0:
r += 1
s >>= 1
# calculate the bit reversal of (odd) s
# e.g. 19 (10011) <=> 25 (11001)
t = 0
while s:
if s&1:
t += 1
s -= 1
else:
t <<= 1
s >>= 1
# use the same bit reversal process to calculate the sth Lucas number
# keep track of q = Q**n as we go
U = 0
V = 2
q = 1
# mod_inv(2, n)
inv_2 = (n+1) >> 1
while t:
if t&1:
# U, V of n+1
U, V = ((U + V) * inv_2)%n, ((D*U + V) * inv_2)%n
q = (q * Q)%n
t -= 1
else:
# U, V of n*2
U, V = (U * V)%n, (V * V - 2 * q)%n
q = (q * q)%n
t >>= 1
# double s until we have the 2**r*sth Lucas number
while r:
U, V = (U * V)%n, (V * V - 2 * q)%n
q = (q * q)%n
r -= 1
# primality check
# if n is prime, n divides the n+1st Lucas number, given the assumptions
return U == 0
## Baillie-PSW ##
# this is technically a probabalistic test, but there are no known pseudoprimes
def is_bpsw(n):
if not is_sprp(n, 2): return False
# idea shamelessly stolen from Mathmatica's PrimeQ
# if n is a 2-sprp and a 3-sprp, n is necessarily square-free
if not is_sprp(n, 3): return False
a = 5
s = 2
# if n is a perfect square, this will never terminate
while legendre(a, n) != n-1:
s = -s
a = s-a
return is_lucas_prp(n, a)
# an 'almost certain' primality check
def is_prime(n):
if n < 212:
m = binary_search(small_primes, n)
return n == small_primes[m]
for p in small_primes:
if n%p == 0:
return False
# if n is a 32-bit integer, perform full trial division
if n <= max_int:
p = 211
while p*p < n:
for o in offsets:
p += o
if n%p == 0:
return False
return True
return is_bpsw(n)
# next prime strictly larger than n
def next_prime(n):
if n < 2:
return 2
# first odd larger than n
n = (n + 1) | 1
if n < 212:
m = binary_search(small_primes, n)
return small_primes[m]
# find our position in the sieve rotation via binary search
x = int(n%210)
m = binary_search(indices, x)
i = int(n + (indices[m] - x))
# adjust offsets
offs = offsets[m:] + offsets[:m]
while True:
for o in offs:
if is_prime(i):
return i
i += o
# an infinite prime number generator
def primes(start = 0):
for n in small_primes[start:]: yield n
pg = primes(6)
p = pg.next()
q = p*p
sieve = {221: 13, 253: 11}
n = 211
while True:
for o in offsets:
n += o
stp = sieve.pop(n, 0)
if stp:
nxt = n/stp
nxt += dindices[nxt%105]
while nxt*stp in sieve:
nxt += dindices[nxt%105]
sieve[nxt*stp] = stp
elif n < q:
yield n
else:
sieve[q + dindices[p%105]*p] = p
p = pg.next()
q = p*p
# true if n is a prime power > 0
def is_prime_power(n):
if n > 1:
for p in small_primes:
if n%p == 0:
n /= p
while n%p == 0: n /= p
return n == 1
r = isqrt(n)
if r*r == n:
return is_prime_power(r)
s = icbrt(n)
if s*s*s == n:
return is_prime_power(s)
p = 211
while p*p < r:
for o in offsets:
p += o
if n%p == 0:
n /= p
while n%p == 0: n /= p
return n == 1
if n <= max_int:
while p*p < n:
for o in offsets:
p += o
if n%p == 0:
return False
return True
return is_bpsw(n)
return False