这是我能想到的最好的算法。

def get_primes(n):
    numbers = set(range(n, 1, -1))
    primes = []
    while numbers:
        p = numbers.pop()
        primes.append(p)
        numbers.difference_update(set(range(p*2, n+1, p)))
    return primes

>>> timeit.Timer(stmt='get_primes.get_primes(1000000)', setup='import   get_primes').timeit(1)
1.1499958793645562

还能做得更快吗?

这段代码有一个缺陷:由于numbers是一个无序集,不能保证numbers.pop()将从集合中移除最低的数字。尽管如此,它还是适用于(至少对我来说)一些输入数字:

>>> sum(get_primes(2000000))
142913828922L
#That's the correct sum of all numbers below 2 million
>>> 529 in get_primes(1000)
False
>>> 529 in get_primes(530)
True

当前回答

在Pure Python中最快的质数筛分:

from itertools import compress

def half_sieve(n):
    """
    Returns a list of prime numbers less than `n`.
    """
    if n <= 2:
        return []
    sieve = bytearray([True]) * (n // 2)
    for i in range(3, int(n ** 0.5) + 1, 2):
        if sieve[i // 2]:
            sieve[i * i // 2::i] = bytearray((n - i * i - 1) // (2 * i) + 1)
    primes = list(compress(range(1, n, 2), sieve))
    primes[0] = 2
    return primes

我优化了埃拉托色尼筛子的速度和内存。

基准

from time import clock
import platform

def benchmark(iterations, limit):
    start = clock()
    for x in range(iterations):
        half_sieve(limit)
    end = clock() - start
    print(f'{end/iterations:.4f} seconds for primes < {limit}')

if __name__ == '__main__':
    print(platform.python_version())
    print(platform.platform())
    print(platform.processor())
    it = 10
    for pw in range(4, 9):
        benchmark(it, 10**pw)

输出

>>> 3.6.7
>>> Windows-10-10.0.17763-SP0
>>> Intel64 Family 6 Model 78 Stepping 3, GenuineIntel
>>> 0.0003 seconds for primes < 10000
>>> 0.0021 seconds for primes < 100000
>>> 0.0204 seconds for primes < 1000000
>>> 0.2389 seconds for primes < 10000000
>>> 2.6702 seconds for primes < 100000000

其他回答

我已经更新了Python 3的大部分代码,并将其扔给perfplot(我的一个项目),看看哪个实际上是最快的。事实证明,对于较大的n,从{2,3}开始的质数是最好的:


代码重现情节:

import perfplot
from math import sqrt, ceil
import numpy as np
import sympy


def rwh_primes(n):
    # https://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188
    """ Returns  a list of primes < n """
    sieve = [True] * n
    for i in range(3, int(n ** 0.5) + 1, 2):
        if sieve[i]:
            sieve[i * i::2 * i] = [False] * ((n - i * i - 1) // (2 * i) + 1)
    return [2] + [i for i in range(3, n, 2) if sieve[i]]


def rwh_primes1(n):
    # https://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188
    """ Returns  a list of primes < n """
    sieve = [True] * (n // 2)
    for i in range(3, int(n ** 0.5) + 1, 2):
        if sieve[i // 2]:
            sieve[i * i // 2::i] = [False] * ((n - i * i - 1) // (2 * i) + 1)
    return [2] + [2 * i + 1 for i in range(1, n // 2) if sieve[i]]


def rwh_primes2(n):
    # https://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188
    """Input n>=6, Returns a list of primes, 2 <= p < n"""
    assert n >= 6
    correction = n % 6 > 1
    n = {0: n, 1: n - 1, 2: n + 4, 3: n + 3, 4: n + 2, 5: n + 1}[n % 6]
    sieve = [True] * (n // 3)
    sieve[0] = False
    for i in range(int(n ** 0.5) // 3 + 1):
        if sieve[i]:
            k = 3 * i + 1 | 1
            sieve[((k * k) // 3)::2 * k] = [False] * (
                (n // 6 - (k * k) // 6 - 1) // k + 1
            )
            sieve[(k * k + 4 * k - 2 * k * (i & 1)) // 3::2 * k] = [False] * (
                (n // 6 - (k * k + 4 * k - 2 * k * (i & 1)) // 6 - 1) // k + 1
            )
    return [2, 3] + [3 * i + 1 | 1 for i in range(1, n // 3 - correction) if sieve[i]]


def sieve_wheel_30(N):
    # http://zerovolt.com/?p=88
    """ Returns a list of primes <= N using wheel criterion 2*3*5 = 30

Copyright 2009 by zerovolt.com
This code is free for non-commercial purposes, in which case you can just leave this comment as a credit for my work.
If you need this code for commercial purposes, please contact me by sending an email to: info [at] zerovolt [dot] com."""
    __smallp = (
        2,
        3,
        5,
        7,
        11,
        13,
        17,
        19,
        23,
        29,
        31,
        37,
        41,
        43,
        47,
        53,
        59,
        61,
        67,
        71,
        73,
        79,
        83,
        89,
        97,
        101,
        103,
        107,
        109,
        113,
        127,
        131,
        137,
        139,
        149,
        151,
        157,
        163,
        167,
        173,
        179,
        181,
        191,
        193,
        197,
        199,
        211,
        223,
        227,
        229,
        233,
        239,
        241,
        251,
        257,
        263,
        269,
        271,
        277,
        281,
        283,
        293,
        307,
        311,
        313,
        317,
        331,
        337,
        347,
        349,
        353,
        359,
        367,
        373,
        379,
        383,
        389,
        397,
        401,
        409,
        419,
        421,
        431,
        433,
        439,
        443,
        449,
        457,
        461,
        463,
        467,
        479,
        487,
        491,
        499,
        503,
        509,
        521,
        523,
        541,
        547,
        557,
        563,
        569,
        571,
        577,
        587,
        593,
        599,
        601,
        607,
        613,
        617,
        619,
        631,
        641,
        643,
        647,
        653,
        659,
        661,
        673,
        677,
        683,
        691,
        701,
        709,
        719,
        727,
        733,
        739,
        743,
        751,
        757,
        761,
        769,
        773,
        787,
        797,
        809,
        811,
        821,
        823,
        827,
        829,
        839,
        853,
        857,
        859,
        863,
        877,
        881,
        883,
        887,
        907,
        911,
        919,
        929,
        937,
        941,
        947,
        953,
        967,
        971,
        977,
        983,
        991,
        997,
    )
    # wheel = (2, 3, 5)
    const = 30
    if N < 2:
        return []
    if N <= const:
        pos = 0
        while __smallp[pos] <= N:
            pos += 1
        return list(__smallp[:pos])
    # make the offsets list
    offsets = (7, 11, 13, 17, 19, 23, 29, 1)
    # prepare the list
    p = [2, 3, 5]
    dim = 2 + N // const
    tk1 = [True] * dim
    tk7 = [True] * dim
    tk11 = [True] * dim
    tk13 = [True] * dim
    tk17 = [True] * dim
    tk19 = [True] * dim
    tk23 = [True] * dim
    tk29 = [True] * dim
    tk1[0] = False
    # help dictionary d
    # d[a , b] = c  ==> if I want to find the smallest useful multiple of (30*pos)+a
    # on tkc, then I need the index given by the product of [(30*pos)+a][(30*pos)+b]
    # in general. If b < a, I need [(30*pos)+a][(30*(pos+1))+b]
    d = {}
    for x in offsets:
        for y in offsets:
            res = (x * y) % const
            if res in offsets:
                d[(x, res)] = y
    # another help dictionary: gives tkx calling tmptk[x]
    tmptk = {1: tk1, 7: tk7, 11: tk11, 13: tk13, 17: tk17, 19: tk19, 23: tk23, 29: tk29}
    pos, prime, lastadded, stop = 0, 0, 0, int(ceil(sqrt(N)))

    # inner functions definition
    def del_mult(tk, start, step):
        for k in range(start, len(tk), step):
            tk[k] = False

    # end of inner functions definition
    cpos = const * pos
    while prime < stop:
        # 30k + 7
        if tk7[pos]:
            prime = cpos + 7
            p.append(prime)
            lastadded = 7
            for off in offsets:
                tmp = d[(7, off)]
                start = (
                    (pos + prime)
                    if off == 7
                    else (prime * (const * (pos + 1 if tmp < 7 else 0) + tmp)) // const
                )
                del_mult(tmptk[off], start, prime)
        # 30k + 11
        if tk11[pos]:
            prime = cpos + 11
            p.append(prime)
            lastadded = 11
            for off in offsets:
                tmp = d[(11, off)]
                start = (
                    (pos + prime)
                    if off == 11
                    else (prime * (const * (pos + 1 if tmp < 11 else 0) + tmp)) // const
                )
                del_mult(tmptk[off], start, prime)
        # 30k + 13
        if tk13[pos]:
            prime = cpos + 13
            p.append(prime)
            lastadded = 13
            for off in offsets:
                tmp = d[(13, off)]
                start = (
                    (pos + prime)
                    if off == 13
                    else (prime * (const * (pos + 1 if tmp < 13 else 0) + tmp)) // const
                )
                del_mult(tmptk[off], start, prime)
        # 30k + 17
        if tk17[pos]:
            prime = cpos + 17
            p.append(prime)
            lastadded = 17
            for off in offsets:
                tmp = d[(17, off)]
                start = (
                    (pos + prime)
                    if off == 17
                    else (prime * (const * (pos + 1 if tmp < 17 else 0) + tmp)) // const
                )
                del_mult(tmptk[off], start, prime)
        # 30k + 19
        if tk19[pos]:
            prime = cpos + 19
            p.append(prime)
            lastadded = 19
            for off in offsets:
                tmp = d[(19, off)]
                start = (
                    (pos + prime)
                    if off == 19
                    else (prime * (const * (pos + 1 if tmp < 19 else 0) + tmp)) // const
                )
                del_mult(tmptk[off], start, prime)
        # 30k + 23
        if tk23[pos]:
            prime = cpos + 23
            p.append(prime)
            lastadded = 23
            for off in offsets:
                tmp = d[(23, off)]
                start = (
                    (pos + prime)
                    if off == 23
                    else (prime * (const * (pos + 1 if tmp < 23 else 0) + tmp)) // const
                )
                del_mult(tmptk[off], start, prime)
        # 30k + 29
        if tk29[pos]:
            prime = cpos + 29
            p.append(prime)
            lastadded = 29
            for off in offsets:
                tmp = d[(29, off)]
                start = (
                    (pos + prime)
                    if off == 29
                    else (prime * (const * (pos + 1 if tmp < 29 else 0) + tmp)) // const
                )
                del_mult(tmptk[off], start, prime)
        # now we go back to top tk1, so we need to increase pos by 1
        pos += 1
        cpos = const * pos
        # 30k + 1
        if tk1[pos]:
            prime = cpos + 1
            p.append(prime)
            lastadded = 1
            for off in offsets:
                tmp = d[(1, off)]
                start = (
                    (pos + prime)
                    if off == 1
                    else (prime * (const * pos + tmp)) // const
                )
                del_mult(tmptk[off], start, prime)
    # time to add remaining primes
    # if lastadded == 1, remove last element and start adding them from tk1
    # this way we don't need an "if" within the last while
    if lastadded == 1:
        p.pop()
    # now complete for every other possible prime
    while pos < len(tk1):
        cpos = const * pos
        if tk1[pos]:
            p.append(cpos + 1)
        if tk7[pos]:
            p.append(cpos + 7)
        if tk11[pos]:
            p.append(cpos + 11)
        if tk13[pos]:
            p.append(cpos + 13)
        if tk17[pos]:
            p.append(cpos + 17)
        if tk19[pos]:
            p.append(cpos + 19)
        if tk23[pos]:
            p.append(cpos + 23)
        if tk29[pos]:
            p.append(cpos + 29)
        pos += 1
    # remove exceeding if present
    pos = len(p) - 1
    while p[pos] > N:
        pos -= 1
    if pos < len(p) - 1:
        del p[pos + 1 :]
    # return p list
    return p


def sieve_of_eratosthenes(n):
    """sieveOfEratosthenes(n): return the list of the primes < n."""
    # Code from: <dickinsm@gmail.com>, Nov 30 2006
    # http://groups.google.com/group/comp.lang.python/msg/f1f10ced88c68c2d
    if n <= 2:
        return []
    sieve = list(range(3, n, 2))
    top = len(sieve)
    for si in sieve:
        if si:
            bottom = (si * si - 3) // 2
            if bottom >= top:
                break
            sieve[bottom::si] = [0] * -((bottom - top) // si)
    return [2] + [el for el in sieve if el]


def sieve_of_atkin(end):
    """return a list of all the prime numbers <end using the Sieve of Atkin."""
    # Code by Steve Krenzel, <Sgk284@gmail.com>, improved
    # Code: https://web.archive.org/web/20080324064651/http://krenzel.info/?p=83
    # Info: http://en.wikipedia.org/wiki/Sieve_of_Atkin
    assert end > 0
    lng = (end - 1) // 2
    sieve = [False] * (lng + 1)

    x_max, x2, xd = int(sqrt((end - 1) / 4.0)), 0, 4
    for xd in range(4, 8 * x_max + 2, 8):
        x2 += xd
        y_max = int(sqrt(end - x2))
        n, n_diff = x2 + y_max * y_max, (y_max << 1) - 1
        if not (n & 1):
            n -= n_diff
            n_diff -= 2
        for d in range((n_diff - 1) << 1, -1, -8):
            m = n % 12
            if m == 1 or m == 5:
                m = n >> 1
                sieve[m] = not sieve[m]
            n -= d

    x_max, x2, xd = int(sqrt((end - 1) / 3.0)), 0, 3
    for xd in range(3, 6 * x_max + 2, 6):
        x2 += xd
        y_max = int(sqrt(end - x2))
        n, n_diff = x2 + y_max * y_max, (y_max << 1) - 1
        if not (n & 1):
            n -= n_diff
            n_diff -= 2
        for d in range((n_diff - 1) << 1, -1, -8):
            if n % 12 == 7:
                m = n >> 1
                sieve[m] = not sieve[m]
            n -= d

    x_max, y_min, x2, xd = int((2 + sqrt(4 - 8 * (1 - end))) / 4), -1, 0, 3
    for x in range(1, x_max + 1):
        x2 += xd
        xd += 6
        if x2 >= end:
            y_min = (((int(ceil(sqrt(x2 - end))) - 1) << 1) - 2) << 1
        n, n_diff = ((x * x + x) << 1) - 1, (((x - 1) << 1) - 2) << 1
        for d in range(n_diff, y_min, -8):
            if n % 12 == 11:
                m = n >> 1
                sieve[m] = not sieve[m]
            n += d

    primes = [2, 3]
    if end <= 3:
        return primes[: max(0, end - 2)]

    for n in range(5 >> 1, (int(sqrt(end)) + 1) >> 1):
        if sieve[n]:
            primes.append((n << 1) + 1)
            aux = (n << 1) + 1
            aux *= aux
            for k in range(aux, end, 2 * aux):
                sieve[k >> 1] = False

    s = int(sqrt(end)) + 1
    if s % 2 == 0:
        s += 1
    primes.extend([i for i in range(s, end, 2) if sieve[i >> 1]])

    return primes


def ambi_sieve_plain(n):
    s = list(range(3, n, 2))
    for m in range(3, int(n ** 0.5) + 1, 2):
        if s[(m - 3) // 2]:
            for t in range((m * m - 3) // 2, (n >> 1) - 1, m):
                s[t] = 0
    return [2] + [t for t in s if t > 0]


def sundaram3(max_n):
    # https://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/2073279#2073279
    numbers = range(3, max_n + 1, 2)
    half = (max_n) // 2
    initial = 4

    for step in range(3, max_n + 1, 2):
        for i in range(initial, half, step):
            numbers[i - 1] = 0
        initial += 2 * (step + 1)

        if initial > half:
            return [2] + filter(None, numbers)


# Using Numpy:
def ambi_sieve(n):
    # http://tommih.blogspot.com/2009/04/fast-prime-number-generator.html
    s = np.arange(3, n, 2)
    for m in range(3, int(n ** 0.5) + 1, 2):
        if s[(m - 3) // 2]:
            s[(m * m - 3) // 2::m] = 0
    return np.r_[2, s[s > 0]]


def primesfrom3to(n):
    # https://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188
    """ Returns an array of primes, p < n """
    assert n >= 2
    sieve = np.ones(n // 2, dtype=bool)
    for i in range(3, int(n ** 0.5) + 1, 2):
        if sieve[i // 2]:
            sieve[i * i // 2::i] = False
    return np.r_[2, 2 * np.nonzero(sieve)[0][1::] + 1]


def primesfrom2to(n):
    # https://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188
    """ Input n>=6, Returns an array of primes, 2 <= p < n """
    assert n >= 6
    sieve = np.ones(n // 3 + (n % 6 == 2), dtype=bool)
    sieve[0] = False
    for i in range(int(n ** 0.5) // 3 + 1):
        if sieve[i]:
            k = 3 * i + 1 | 1
            sieve[((k * k) // 3)::2 * k] = False
            sieve[(k * k + 4 * k - 2 * k * (i & 1)) // 3::2 * k] = False
    return np.r_[2, 3, ((3 * np.nonzero(sieve)[0] + 1) | 1)]


def sympy_sieve(n):
    return list(sympy.sieve.primerange(1, n))


b = perfplot.bench(
    setup=lambda n: n,
    kernels=[
        rwh_primes,
        rwh_primes1,
        rwh_primes2,
        sieve_wheel_30,
        sieve_of_eratosthenes,
        sieve_of_atkin,
        # ambi_sieve_plain,
        # sundaram3,
        ambi_sieve,
        primesfrom3to,
        primesfrom2to,
        sympy_sieve,
    ],
    n_range=[2 ** k for k in range(3, 25)],
    xlabel="n",
)
b.save("out.png")
b.show()

这是使用存储列表查找质数的一种优雅而简单的解决方案。从4个变量开始,你只需要测试除数的奇数质数,你只需要测试你要测试的质数的一半(测试9,11,13是否能整除17没有意义)。它将先前存储的质数作为除数进行测试。

    # Program to calculate Primes
 primes = [1,3,5,7]
for n in range(9,100000,2):
    for x in range(1,(len(primes)/2)):
        if n % primes[x] == 0:
            break
    else:
        primes.append(n)
print primes

在写这篇文章的时候,这是最快的工作解决方案(至少在我的机器上是这样)。它同时使用numpy和bitarray,并受到这个答案的primesfrom2to的启发。

import numpy as np
from bitarray import bitarray


def bit_primes(n):
    bit_sieve = bitarray(n // 3 + (n % 6 == 2))
    bit_sieve.setall(1)
    bit_sieve[0] = False

    for i in range(int(n ** 0.5) // 3 + 1):
        if bit_sieve[i]:
            k = 3 * i + 1 | 1
            bit_sieve[k * k // 3::2 * k] = False
            bit_sieve[(k * k + 4 * k - 2 * k * (i & 1)) // 3::2 * k] = False

    np_sieve = np.unpackbits(np.frombuffer(bit_sieve.tobytes(), dtype=np.uint8)).view(bool)
    return np.concatenate(((2, 3), ((3 * np.flatnonzero(np_sieve) + 1) | 1)))

下面是与素数from2to的比较,它之前被发现是unutbu比较中最快的解:

python3 -m timeit -s "import fast_primes" "fast_primes.bit_primes(1000000)"
200 loops, best of 5: 1.19 msec per loop

python3 -m timeit -s "import fast_primes" "fast_primes.primesfrom2to(1000000)"
200 loops, best of 5: 1.23 msec per loop

对于寻找100万以下的质数,bit_primes稍微快一些。 n值越大,差异就越大。在某些情况下,bit_primes的速度是原来的两倍多:

python3 -m timeit -s "import fast_primes" "fast_primes.bit_primes(500_000_000)"
1 loop, best of 5: 540 msec per loop

python3 -m timeit -s "import fast_primes" "fast_primes.primesfrom2to(500_000_000)"
1 loop, best of 5: 1.15 sec per loop

作为参考,以下是primesfrom2to I的最小修改版本(适用于Python 3):

def primesfrom2to(n):
    # https://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188
    """ Input n>=6, Returns a array of primes, 2 <= p < n"""
    sieve = np.ones(n // 3 + (n % 6 == 2), dtype=np.bool)
    sieve[0] = False
    for i in range(int(n ** 0.5) // 3 + 1):
        if sieve[i]:
            k = 3 * i + 1 | 1
            sieve[((k * k) // 3)::2 * k] = False
            sieve[(k * k + 4 * k - 2 * k * (i & 1)) // 3::2 * k] = False
    return np.r_[2, 3, ((3 * np.nonzero(sieve)[0] + 1) | 1)]

下面是Eratosthenes的一个numpy版本,具有良好的复杂度(低于排序长度为n的数组)和向量化。与@unutbu相比,用46微秒就可以找到100万以下的所有质数。

import numpy as np 
def generate_primes(n):
    is_prime = np.ones(n+1,dtype=bool)
    is_prime[0:2] = False
    for i in range(int(n**0.5)+1):
        if is_prime[i]:
            is_prime[i**2::i]=False
    return np.where(is_prime)[0]

计时:

import time    
for i in range(2,10):
    timer =time.time()
    generate_primes(10**i)
    print('n = 10^',i,' time =', round(time.time()-timer,6))

>> n = 10^ 2  time = 5.6e-05
>> n = 10^ 3  time = 6.4e-05
>> n = 10^ 4  time = 0.000114
>> n = 10^ 5  time = 0.000593
>> n = 10^ 6  time = 0.00467
>> n = 10^ 7  time = 0.177758
>> n = 10^ 8  time = 1.701312
>> n = 10^ 9  time = 19.322478

我很惊讶居然没人提到numba。

该版本在2.47 ms±36.5µs内达到1M标记。

几年前,维基百科页面上出现了一个阿特金筛子的伪代码。这已经不存在了,参考阿特金筛似乎是一个不同的算法。一个2007/03/01版本的维基百科页面(Primer number as 2007-03-01)显示了我用作参考的伪代码。

import numpy as np
from numba import njit

@njit
def nb_primes(n):
    # Generates prime numbers 2 <= p <= n
    # Atkin's sieve -- see https://en.wikipedia.org/w/index.php?title=Prime_number&oldid=111775466
    sqrt_n = int(np.sqrt(n)) + 1

    # initialize the sieve
    s = np.full(n + 1, -1, dtype=np.int8)
    s[2] = 1
    s[3] = 1

    # put in candidate primes:
    # integers which have an odd number of
    # representations by certain quadratic forms
    for x in range(1, sqrt_n):
        x2 = x * x
        for y in range(1, sqrt_n):
            y2 = y * y
            k = 4 * x2 + y2
            if k <= n and (k % 12 == 1 or k % 12 == 5): s[k] *= -1
            k = 3 * x2 + y2
            if k <= n and (k % 12 == 7): s[k] *= -1
            k = 3 * x2 - y2
            if k <= n and x > y and k % 12 == 11: s[k] *= -1

    # eliminate composites by sieving
    for k in range(5, sqrt_n):
        if s[k]:
            k2 = k*k
            # k is prime, omit multiples of its square; this is sufficient because
            # composites which managed to get on the list cannot be square-free
            for i in range(1, n // k2 + 1):
                j = i * k2 # j ∈ {k², 2k², 3k², ..., n}
                s[j] = -1
    return np.nonzero(s>0)[0]

# initial run for "compilation" 
nb_primes(10)

时机

In[10]:
%timeit nb_primes(1_000_000)

Out[10]:
2.47 ms ± 36.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

In[11]:
%timeit nb_primes(10_000_000)

Out[11]:
33.4 ms ± 373 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)

In[12]:
%timeit nb_primes(100_000_000)

Out[12]:
828 ms ± 5.64 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)