这是我能想到的最好的算法。

def get_primes(n):
    numbers = set(range(n, 1, -1))
    primes = []
    while numbers:
        p = numbers.pop()
        primes.append(p)
        numbers.difference_update(set(range(p*2, n+1, p)))
    return primes

>>> timeit.Timer(stmt='get_primes.get_primes(1000000)', setup='import   get_primes').timeit(1)
1.1499958793645562

还能做得更快吗?

这段代码有一个缺陷:由于numbers是一个无序集,不能保证numbers.pop()将从集合中移除最低的数字。尽管如此,它还是适用于(至少对我来说)一些输入数字:

>>> sum(get_primes(2000000))
142913828922L
#That's the correct sum of all numbers below 2 million
>>> 529 in get_primes(1000)
False
>>> 529 in get_primes(530)
True

当前回答

我发现的最简单的方法是:

primes = []
for n in range(low, high + 1):
    if all(n % i for i in primes):
        primes.append(n)

其他回答

我测试了一些unutbu的功能,我用饥饿的百万数字计算它

获胜者是使用numpy库的函数,

注意:做一个内存利用率测试也很有趣:)

示例代码

完整的代码在我的github存储库

#!/usr/bin/env python

import lib
import timeit
import sys
import math
import datetime

import prettyplotlib as ppl
import numpy as np

import matplotlib.pyplot as plt
from prettyplotlib import brewer2mpl

primenumbers_gen = [
    'sieveOfEratosthenes',
    'ambi_sieve',
    'ambi_sieve_plain',
    'sundaram3',
    'sieve_wheel_30',
    'primesfrom3to',
    'primesfrom2to',
    'rwh_primes',
    'rwh_primes1',
    'rwh_primes2',
]

def human_format(num):
    # https://stackoverflow.com/questions/579310/formatting-long-numbers-as-strings-in-python?answertab=active#tab-top
    magnitude = 0
    while abs(num) >= 1000:
        magnitude += 1
        num /= 1000.0
    # add more suffixes if you need them
    return '%.2f%s' % (num, ['', 'K', 'M', 'G', 'T', 'P'][magnitude])


if __name__=='__main__':

    # Vars
    n = 10000000 # number itereration generator
    nbcol = 5 # For decompose prime number generator
    nb_benchloop = 3 # Eliminate false positive value during the test (bench average time)
    datetimeformat = '%Y-%m-%d %H:%M:%S.%f'
    config = 'from __main__ import n; import lib'
    primenumbers_gen = {
        'sieveOfEratosthenes': {'color': 'b'},
        'ambi_sieve': {'color': 'b'},
        'ambi_sieve_plain': {'color': 'b'},
         'sundaram3': {'color': 'b'},
        'sieve_wheel_30': {'color': 'b'},
# # #        'primesfrom2to': {'color': 'b'},
        'primesfrom3to': {'color': 'b'},
        # 'rwh_primes': {'color': 'b'},
        # 'rwh_primes1': {'color': 'b'},
        'rwh_primes2': {'color': 'b'},
    }


    # Get n in command line
    if len(sys.argv)>1:
        n = int(sys.argv[1])

    step = int(math.ceil(n / float(nbcol)))
    nbs = np.array([i * step for i in range(1, int(nbcol) + 1)])
    set2 = brewer2mpl.get_map('Paired', 'qualitative', 12).mpl_colors

    print datetime.datetime.now().strftime(datetimeformat)
    print("Compute prime number to %(n)s" % locals())
    print("")

    results = dict()
    for pgen in primenumbers_gen:
        results[pgen] = dict()
        benchtimes = list()
        for n in nbs:
            t = timeit.Timer("lib.%(pgen)s(n)" % locals(), setup=config)
            execute_times = t.repeat(repeat=nb_benchloop,number=1)
            benchtime = np.mean(execute_times)
            benchtimes.append(benchtime)
        results[pgen] = {'benchtimes':np.array(benchtimes)}

fig, ax = plt.subplots(1)
plt.ylabel('Computation time (in second)')
plt.xlabel('Numbers computed')
i = 0
for pgen in primenumbers_gen:

    bench = results[pgen]['benchtimes']
    avgs = np.divide(bench,nbs)
    avg = np.average(bench, weights=nbs)

    # Compute linear regression
    A = np.vstack([nbs, np.ones(len(nbs))]).T
    a, b = np.linalg.lstsq(A, nbs*avgs)[0]

    # Plot
    i += 1
    #label="%(pgen)s" % locals()
    #ppl.plot(nbs, nbs*avgs, label=label, lw=1, linestyle='--', color=set2[i % 12])
    label="%(pgen)s avg" % locals()
    ppl.plot(nbs, a * nbs + b, label=label, lw=2, color=set2[i % 12])
print datetime.datetime.now().strftime(datetimeformat)

ppl.legend(ax, loc='upper left', ncol=4)

# Change x axis label
ax.get_xaxis().get_major_formatter().set_scientific(False)
fig.canvas.draw()
labels = [human_format(int(item.get_text())) for item in ax.get_xticklabels()]

ax.set_xticklabels(labels)
ax = plt.gca()

plt.show()

我很惊讶居然没人提到numba。

该版本在2.47 ms±36.5µs内达到1M标记。

几年前,维基百科页面上出现了一个阿特金筛子的伪代码。这已经不存在了,参考阿特金筛似乎是一个不同的算法。一个2007/03/01版本的维基百科页面(Primer number as 2007-03-01)显示了我用作参考的伪代码。

import numpy as np
from numba import njit

@njit
def nb_primes(n):
    # Generates prime numbers 2 <= p <= n
    # Atkin's sieve -- see https://en.wikipedia.org/w/index.php?title=Prime_number&oldid=111775466
    sqrt_n = int(np.sqrt(n)) + 1

    # initialize the sieve
    s = np.full(n + 1, -1, dtype=np.int8)
    s[2] = 1
    s[3] = 1

    # put in candidate primes:
    # integers which have an odd number of
    # representations by certain quadratic forms
    for x in range(1, sqrt_n):
        x2 = x * x
        for y in range(1, sqrt_n):
            y2 = y * y
            k = 4 * x2 + y2
            if k <= n and (k % 12 == 1 or k % 12 == 5): s[k] *= -1
            k = 3 * x2 + y2
            if k <= n and (k % 12 == 7): s[k] *= -1
            k = 3 * x2 - y2
            if k <= n and x > y and k % 12 == 11: s[k] *= -1

    # eliminate composites by sieving
    for k in range(5, sqrt_n):
        if s[k]:
            k2 = k*k
            # k is prime, omit multiples of its square; this is sufficient because
            # composites which managed to get on the list cannot be square-free
            for i in range(1, n // k2 + 1):
                j = i * k2 # j ∈ {k², 2k², 3k², ..., n}
                s[j] = -1
    return np.nonzero(s>0)[0]

# initial run for "compilation" 
nb_primes(10)

时机

In[10]:
%timeit nb_primes(1_000_000)

Out[10]:
2.47 ms ± 36.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

In[11]:
%timeit nb_primes(10_000_000)

Out[11]:
33.4 ms ± 373 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)

In[12]:
%timeit nb_primes(100_000_000)

Out[12]:
828 ms ± 5.64 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

很抱歉打扰,但erat2()在算法中有一个严重的缺陷。

在搜索下一个合成时,我们只需要测试奇数。 Q p都是奇数;那么q+p是偶数,不需要检验,但q+2*p总是奇数。这消除了while循环条件中的“if even”测试,并节省了大约30%的运行时。

当我们在它:而不是优雅的'D.pop(q,None)'获取和删除方法,使用'if q in D: p=D[q],del D[q]',这是两倍的速度!至少在我的机器上(P3-1Ghz)。 所以我建议这个聪明算法的实现:

def erat3( ):
    from itertools import islice, count

    # q is the running integer that's checked for primeness.
    # yield 2 and no other even number thereafter
    yield 2
    D = {}
    # no need to mark D[4] as we will test odd numbers only
    for q in islice(count(3),0,None,2):
        if q in D:                  #  is composite
            p = D[q]
            del D[q]
            # q is composite. p=D[q] is the first prime that
            # divides it. Since we've reached q, we no longer
            # need it in the map, but we'll mark the next
            # multiple of its witnesses to prepare for larger
            # numbers.
            x = q + p+p        # next odd(!) multiple
            while x in D:      # skip composites
                x += p+p
            D[x] = p
        else:                  # is prime
            # q is a new prime.
            # Yield it and mark its first multiple that isn't
            # already marked in previous iterations.
            D[q*q] = q
            yield q

对于最快的代码,numpy解决方案是最好的。不过,出于纯粹的学术原因,我发布了我的纯python版本,它比上面发布的食谱版本快不到50%。由于我将整个列表放在内存中,所以需要足够的空间来容纳所有内容,但它的可伸缩性似乎相当好。

def daniel_sieve_2(maxNumber):
    """
    Given a number, returns all numbers less than or equal to
    that number which are prime.
    """
    allNumbers = range(3, maxNumber+1, 2)
    for mIndex, number in enumerate(xrange(3, maxNumber+1, 2)):
        if allNumbers[mIndex] == 0:
            continue
        # now set all multiples to 0
        for index in xrange(mIndex+number, (maxNumber-3)/2+1, number):
            allNumbers[index] = 0
    return [2] + filter(lambda n: n!=0, allNumbers)

结果是:

>>>mine = timeit.Timer("daniel_sieve_2(1000000)",
...                    "from sieves import daniel_sieve_2")
>>>prev = timeit.Timer("get_primes_erat(1000000)",
...                    "from sieves import get_primes_erat")
>>>print "Mine: {0:0.4f} ms".format(min(mine.repeat(3, 1))*1000)
Mine: 428.9446 ms
>>>print "Previous Best {0:0.4f} ms".format(min(prev.repeat(3, 1))*1000)
Previous Best 621.3581 ms

更快、更内存的纯Python代码:

def primes(n):
    """ Returns  a list of primes < n """
    sieve = [True] * n
    for i in range(3,int(n**0.5)+1,2):
        if sieve[i]:
            sieve[i*i::2*i]=[False]*((n-i*i-1)//(2*i)+1)
    return [2] + [i for i in range(3,n,2) if sieve[i]]

或者从半筛子开始

def primes1(n):
    """ Returns  a list of primes < n """
    sieve = [True] * (n//2)
    for i in range(3,int(n**0.5)+1,2):
        if sieve[i//2]:
            sieve[i*i//2::i] = [False] * ((n-i*i-1)//(2*i)+1)
    return [2] + [2*i+1 for i in range(1,n//2) if sieve[i]]

更快,内存更明智的numpy代码:

import numpy
def primesfrom3to(n):
    """ Returns a array of primes, 3 <= p < n """
    sieve = numpy.ones(n//2, dtype=bool)
    for i in range(3,int(n**0.5)+1,2):
        if sieve[i//2]:
            sieve[i*i//2::i] = False
    return 2*numpy.nonzero(sieve)[0][1::]+1

从三分之一筛子开始的一种更快的变化:

import numpy
def primesfrom2to(n):
    """ Input n>=6, Returns a array of primes, 2 <= p < n """
    sieve = numpy.ones(n//3 + (n%6==2), dtype=bool)
    for i in range(1,int(n**0.5)//3+1):
        if sieve[i]:
            k=3*i+1|1
            sieve[       k*k//3     ::2*k] = False
            sieve[k*(k-2*(i&1)+4)//3::2*k] = False
    return numpy.r_[2,3,((3*numpy.nonzero(sieve)[0][1:]+1)|1)]

上面代码的纯python版本(很难编码)将是:

def primes2(n):
    """ Input n>=6, Returns a list of primes, 2 <= p < n """
    n, correction = n-n%6+6, 2-(n%6>1)
    sieve = [True] * (n//3)
    for i in range(1,int(n**0.5)//3+1):
      if sieve[i]:
        k=3*i+1|1
        sieve[      k*k//3      ::2*k] = [False] * ((n//6-k*k//6-1)//k+1)
        sieve[k*(k-2*(i&1)+4)//3::2*k] = [False] * ((n//6-k*(k-2*(i&1)+4)//6-1)//k+1)
    return [2,3] + [3*i+1|1 for i in range(1,n//3-correction) if sieve[i]]

不幸的是,pure-python不采用更简单和更快的numpy方式进行赋值,并且在循环中调用len(),如[False]*len(sieve[((k*k)//3)::2*k])太慢。所以我不得不即兴修正输入(避免更多的数学运算),并做一些极端的(痛苦的)数学魔术。

我个人认为numpy(它被广泛使用)不是Python标准库的一部分是很遗憾的,而且Python开发人员似乎完全忽略了语法和速度方面的改进。