这是我能想到的最好的算法。
def get_primes(n):
numbers = set(range(n, 1, -1))
primes = []
while numbers:
p = numbers.pop()
primes.append(p)
numbers.difference_update(set(range(p*2, n+1, p)))
return primes
>>> timeit.Timer(stmt='get_primes.get_primes(1000000)', setup='import get_primes').timeit(1)
1.1499958793645562
还能做得更快吗?
这段代码有一个缺陷:由于numbers是一个无序集,不能保证numbers.pop()将从集合中移除最低的数字。尽管如此,它还是适用于(至少对我来说)一些输入数字:
>>> sum(get_primes(2000000))
142913828922L
#That's the correct sum of all numbers below 2 million
>>> 529 in get_primes(1000)
False
>>> 529 in get_primes(530)
True
我很惊讶居然没人提到numba。
该版本在2.47 ms±36.5µs内达到1M标记。
几年前,维基百科页面上出现了一个阿特金筛子的伪代码。这已经不存在了,参考阿特金筛似乎是一个不同的算法。一个2007/03/01版本的维基百科页面(Primer number as 2007-03-01)显示了我用作参考的伪代码。
import numpy as np
from numba import njit
@njit
def nb_primes(n):
# Generates prime numbers 2 <= p <= n
# Atkin's sieve -- see https://en.wikipedia.org/w/index.php?title=Prime_number&oldid=111775466
sqrt_n = int(np.sqrt(n)) + 1
# initialize the sieve
s = np.full(n + 1, -1, dtype=np.int8)
s[2] = 1
s[3] = 1
# put in candidate primes:
# integers which have an odd number of
# representations by certain quadratic forms
for x in range(1, sqrt_n):
x2 = x * x
for y in range(1, sqrt_n):
y2 = y * y
k = 4 * x2 + y2
if k <= n and (k % 12 == 1 or k % 12 == 5): s[k] *= -1
k = 3 * x2 + y2
if k <= n and (k % 12 == 7): s[k] *= -1
k = 3 * x2 - y2
if k <= n and x > y and k % 12 == 11: s[k] *= -1
# eliminate composites by sieving
for k in range(5, sqrt_n):
if s[k]:
k2 = k*k
# k is prime, omit multiples of its square; this is sufficient because
# composites which managed to get on the list cannot be square-free
for i in range(1, n // k2 + 1):
j = i * k2 # j ∈ {k², 2k², 3k², ..., n}
s[j] = -1
return np.nonzero(s>0)[0]
# initial run for "compilation"
nb_primes(10)
时机
In[10]:
%timeit nb_primes(1_000_000)
Out[10]:
2.47 ms ± 36.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
In[11]:
%timeit nb_primes(10_000_000)
Out[11]:
33.4 ms ± 373 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
In[12]:
%timeit nb_primes(100_000_000)
Out[12]:
828 ms ± 5.64 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
随着时间的推移,我收集了几个质数筛子。我电脑上最快的是这样的:
from time import time
# 175 ms for all the primes up to the value 10**6
def primes_sieve(limit):
a = [True] * limit
a[0] = a[1] = False
#a[2] = True
for n in xrange(4, limit, 2):
a[n] = False
root_limit = int(limit**.5)+1
for i in xrange(3,root_limit):
if a[i]:
for n in xrange(i*i, limit, 2*i):
a[n] = False
return a
LIMIT = 10**6
s=time()
primes = primes_sieve(LIMIT)
print time()-s
如果你接受itertools,但不接受numpy,这里有一个针对Python 3的rwh_primes2的改编版本,它在我的机器上运行速度大约是原来的两倍。唯一的实质性变化是使用bytearray而不是列表来表示布尔值,并使用压缩而不是列表推导来构建最终列表。(如果可以的话,我会把这句话作为moarningsun之类的评论。)
import itertools
izip = itertools.zip_longest
chain = itertools.chain.from_iterable
compress = itertools.compress
def rwh_primes2_python3(n):
""" Input n>=6, Returns a list of primes, 2 <= p < n """
zero = bytearray([False])
size = n//3 + (n % 6 == 2)
sieve = bytearray([True]) * size
sieve[0] = False
for i in range(int(n**0.5)//3+1):
if sieve[i]:
k=3*i+1|1
start = (k*k+4*k-2*k*(i&1))//3
sieve[(k*k)//3::2*k]=zero*((size - (k*k)//3 - 1) // (2 * k) + 1)
sieve[ start ::2*k]=zero*((size - start - 1) // (2 * k) + 1)
ans = [2,3]
poss = chain(izip(*[range(i, n, 6) for i in (1,5)]))
ans.extend(compress(poss, sieve))
return ans
比较:
>>> timeit.timeit('primes.rwh_primes2(10**6)', setup='import primes', number=1)
0.0652179726976101
>>> timeit.timeit('primes.rwh_primes2_python3(10**6)', setup='import primes', number=1)
0.03267321276325674
and
>>> timeit.timeit('primes.rwh_primes2(10**8)', setup='import primes', number=1)
6.394284538007014
>>> timeit.timeit('primes.rwh_primes2_python3(10**8)', setup='import primes', number=1)
3.833829450302801
这里有一个来自Python Cookbook的非常简洁的示例——该URL的最快版本是:
import itertools
def erat2( ):
D = { }
yield 2
for q in itertools.islice(itertools.count(3), 0, None, 2):
p = D.pop(q, None)
if p is None:
D[q*q] = q
yield q
else:
x = p + q
while x in D or not (x&1):
x += p
D[x] = p
这就给出了
def get_primes_erat(n):
return list(itertools.takewhile(lambda p: p<n, erat2()))
在shell提示符(正如我喜欢做的那样)中测量这段代码在pri.py中,我观察到:
$ python2.5 -mtimeit -s'import pri' 'pri.get_primes(1000000)'
10 loops, best of 3: 1.69 sec per loop
$ python2.5 -mtimeit -s'import pri' 'pri.get_primes_erat(1000000)'
10 loops, best of 3: 673 msec per loop
所以看起来食谱解决方案的速度是原来的两倍多。