这是我能想到的最好的算法。

def get_primes(n):
    numbers = set(range(n, 1, -1))
    primes = []
    while numbers:
        p = numbers.pop()
        primes.append(p)
        numbers.difference_update(set(range(p*2, n+1, p)))
    return primes

>>> timeit.Timer(stmt='get_primes.get_primes(1000000)', setup='import   get_primes').timeit(1)
1.1499958793645562

还能做得更快吗?

这段代码有一个缺陷:由于numbers是一个无序集,不能保证numbers.pop()将从集合中移除最低的数字。尽管如此,它还是适用于(至少对我来说)一些输入数字:

>>> sum(get_primes(2000000))
142913828922L
#That's the correct sum of all numbers below 2 million
>>> 529 in get_primes(1000)
False
>>> 529 in get_primes(530)
True

当前回答

很抱歉打扰,但erat2()在算法中有一个严重的缺陷。

在搜索下一个合成时,我们只需要测试奇数。 Q p都是奇数;那么q+p是偶数,不需要检验,但q+2*p总是奇数。这消除了while循环条件中的“if even”测试,并节省了大约30%的运行时。

当我们在它:而不是优雅的'D.pop(q,None)'获取和删除方法,使用'if q in D: p=D[q],del D[q]',这是两倍的速度!至少在我的机器上(P3-1Ghz)。 所以我建议这个聪明算法的实现:

def erat3( ):
    from itertools import islice, count

    # q is the running integer that's checked for primeness.
    # yield 2 and no other even number thereafter
    yield 2
    D = {}
    # no need to mark D[4] as we will test odd numbers only
    for q in islice(count(3),0,None,2):
        if q in D:                  #  is composite
            p = D[q]
            del D[q]
            # q is composite. p=D[q] is the first prime that
            # divides it. Since we've reached q, we no longer
            # need it in the map, but we'll mark the next
            # multiple of its witnesses to prepare for larger
            # numbers.
            x = q + p+p        # next odd(!) multiple
            while x in D:      # skip composites
                x += p+p
            D[x] = p
        else:                  # is prime
            # q is a new prime.
            # Yield it and mark its first multiple that isn't
            # already marked in previous iterations.
            D[q*q] = q
            yield q

其他回答

我测试了一些unutbu的功能,我用饥饿的百万数字计算它

获胜者是使用numpy库的函数,

注意:做一个内存利用率测试也很有趣:)

示例代码

完整的代码在我的github存储库

#!/usr/bin/env python

import lib
import timeit
import sys
import math
import datetime

import prettyplotlib as ppl
import numpy as np

import matplotlib.pyplot as plt
from prettyplotlib import brewer2mpl

primenumbers_gen = [
    'sieveOfEratosthenes',
    'ambi_sieve',
    'ambi_sieve_plain',
    'sundaram3',
    'sieve_wheel_30',
    'primesfrom3to',
    'primesfrom2to',
    'rwh_primes',
    'rwh_primes1',
    'rwh_primes2',
]

def human_format(num):
    # https://stackoverflow.com/questions/579310/formatting-long-numbers-as-strings-in-python?answertab=active#tab-top
    magnitude = 0
    while abs(num) >= 1000:
        magnitude += 1
        num /= 1000.0
    # add more suffixes if you need them
    return '%.2f%s' % (num, ['', 'K', 'M', 'G', 'T', 'P'][magnitude])


if __name__=='__main__':

    # Vars
    n = 10000000 # number itereration generator
    nbcol = 5 # For decompose prime number generator
    nb_benchloop = 3 # Eliminate false positive value during the test (bench average time)
    datetimeformat = '%Y-%m-%d %H:%M:%S.%f'
    config = 'from __main__ import n; import lib'
    primenumbers_gen = {
        'sieveOfEratosthenes': {'color': 'b'},
        'ambi_sieve': {'color': 'b'},
        'ambi_sieve_plain': {'color': 'b'},
         'sundaram3': {'color': 'b'},
        'sieve_wheel_30': {'color': 'b'},
# # #        'primesfrom2to': {'color': 'b'},
        'primesfrom3to': {'color': 'b'},
        # 'rwh_primes': {'color': 'b'},
        # 'rwh_primes1': {'color': 'b'},
        'rwh_primes2': {'color': 'b'},
    }


    # Get n in command line
    if len(sys.argv)>1:
        n = int(sys.argv[1])

    step = int(math.ceil(n / float(nbcol)))
    nbs = np.array([i * step for i in range(1, int(nbcol) + 1)])
    set2 = brewer2mpl.get_map('Paired', 'qualitative', 12).mpl_colors

    print datetime.datetime.now().strftime(datetimeformat)
    print("Compute prime number to %(n)s" % locals())
    print("")

    results = dict()
    for pgen in primenumbers_gen:
        results[pgen] = dict()
        benchtimes = list()
        for n in nbs:
            t = timeit.Timer("lib.%(pgen)s(n)" % locals(), setup=config)
            execute_times = t.repeat(repeat=nb_benchloop,number=1)
            benchtime = np.mean(execute_times)
            benchtimes.append(benchtime)
        results[pgen] = {'benchtimes':np.array(benchtimes)}

fig, ax = plt.subplots(1)
plt.ylabel('Computation time (in second)')
plt.xlabel('Numbers computed')
i = 0
for pgen in primenumbers_gen:

    bench = results[pgen]['benchtimes']
    avgs = np.divide(bench,nbs)
    avg = np.average(bench, weights=nbs)

    # Compute linear regression
    A = np.vstack([nbs, np.ones(len(nbs))]).T
    a, b = np.linalg.lstsq(A, nbs*avgs)[0]

    # Plot
    i += 1
    #label="%(pgen)s" % locals()
    #ppl.plot(nbs, nbs*avgs, label=label, lw=1, linestyle='--', color=set2[i % 12])
    label="%(pgen)s avg" % locals()
    ppl.plot(nbs, a * nbs + b, label=label, lw=2, color=set2[i % 12])
print datetime.datetime.now().strftime(datetimeformat)

ppl.legend(ax, loc='upper left', ncol=4)

# Change x axis label
ax.get_xaxis().get_major_formatter().set_scientific(False)
fig.canvas.draw()
labels = [human_format(int(item.get_text())) for item in ax.get_xticklabels()]

ax.set_xticklabels(labels)
ax = plt.gca()

plt.show()

更快、更内存的纯Python代码:

def primes(n):
    """ Returns  a list of primes < n """
    sieve = [True] * n
    for i in range(3,int(n**0.5)+1,2):
        if sieve[i]:
            sieve[i*i::2*i]=[False]*((n-i*i-1)//(2*i)+1)
    return [2] + [i for i in range(3,n,2) if sieve[i]]

或者从半筛子开始

def primes1(n):
    """ Returns  a list of primes < n """
    sieve = [True] * (n//2)
    for i in range(3,int(n**0.5)+1,2):
        if sieve[i//2]:
            sieve[i*i//2::i] = [False] * ((n-i*i-1)//(2*i)+1)
    return [2] + [2*i+1 for i in range(1,n//2) if sieve[i]]

更快,内存更明智的numpy代码:

import numpy
def primesfrom3to(n):
    """ Returns a array of primes, 3 <= p < n """
    sieve = numpy.ones(n//2, dtype=bool)
    for i in range(3,int(n**0.5)+1,2):
        if sieve[i//2]:
            sieve[i*i//2::i] = False
    return 2*numpy.nonzero(sieve)[0][1::]+1

从三分之一筛子开始的一种更快的变化:

import numpy
def primesfrom2to(n):
    """ Input n>=6, Returns a array of primes, 2 <= p < n """
    sieve = numpy.ones(n//3 + (n%6==2), dtype=bool)
    for i in range(1,int(n**0.5)//3+1):
        if sieve[i]:
            k=3*i+1|1
            sieve[       k*k//3     ::2*k] = False
            sieve[k*(k-2*(i&1)+4)//3::2*k] = False
    return numpy.r_[2,3,((3*numpy.nonzero(sieve)[0][1:]+1)|1)]

上面代码的纯python版本(很难编码)将是:

def primes2(n):
    """ Input n>=6, Returns a list of primes, 2 <= p < n """
    n, correction = n-n%6+6, 2-(n%6>1)
    sieve = [True] * (n//3)
    for i in range(1,int(n**0.5)//3+1):
      if sieve[i]:
        k=3*i+1|1
        sieve[      k*k//3      ::2*k] = [False] * ((n//6-k*k//6-1)//k+1)
        sieve[k*(k-2*(i&1)+4)//3::2*k] = [False] * ((n//6-k*(k-2*(i&1)+4)//6-1)//k+1)
    return [2,3] + [3*i+1|1 for i in range(1,n//3-correction) if sieve[i]]

不幸的是,pure-python不采用更简单和更快的numpy方式进行赋值,并且在循环中调用len(),如[False]*len(sieve[((k*k)//3)::2*k])太慢。所以我不得不即兴修正输入(避免更多的数学运算),并做一些极端的(痛苦的)数学魔术。

我个人认为numpy(它被广泛使用)不是Python标准库的一部分是很遗憾的,而且Python开发人员似乎完全忽略了语法和速度方面的改进。

这里是最快的函数之一的两个更新版本(纯Python 3.6),

from itertools import compress

def rwh_primes1v1(n):
    """ Returns  a list of primes < n for n > 2 """
    sieve = bytearray([True]) * (n//2)
    for i in range(3,int(n**0.5)+1,2):
        if sieve[i//2]:
            sieve[i*i//2::i] = bytearray((n-i*i-1)//(2*i)+1)
    return [2,*compress(range(3,n,2), sieve[1:])]

def rwh_primes1v2(n):
    """ Returns a list of primes < n for n > 2 """
    sieve = bytearray([True]) * (n//2+1)
    for i in range(1,int(n**0.5)//2+1):
        if sieve[i]:
            sieve[2*i*(i+1)::2*i+1] = bytearray((n//2-2*i*(i+1))//(2*i+1)+1)
    return [2,*compress(range(3,n,2), sieve[1:])]

我对这个问题反应迟钝,但这似乎是一个有趣的练习。我使用numpy,这可能是作弊,我怀疑这个方法是最快的,但它应该是清楚的。它筛选一个仅引用其下标的布尔数组,并从所有True值的下标中引出质数。不需要取模。

import numpy as np
def ajs_primes3a(upto):
    mat = np.ones((upto), dtype=bool)
    mat[0] = False
    mat[1] = False
    mat[4::2] = False
    for idx in range(3, int(upto ** 0.5)+1, 2):
        mat[idx*2::idx] = False
    return np.where(mat == True)[0]

我猜最快的方法是在代码中硬编码质数。

因此,为什么不编写一个缓慢的脚本,生成另一个源文件,其中包含所有数字,然后在运行实际程序时导入该源文件呢?

当然,只有当你在编译时知道N的上限时,这才有效,但这是(几乎)所有项目欧拉问题的情况。

 

PS:我可能错了,虽然解析源的硬连接质数比计算它们要慢,但据我所知,Python是从编译的.pyc文件运行的,所以在这种情况下,读取一个包含所有质数到N的二进制数组应该是非常快的。