这是我能想到的最好的算法。

def get_primes(n):
    numbers = set(range(n, 1, -1))
    primes = []
    while numbers:
        p = numbers.pop()
        primes.append(p)
        numbers.difference_update(set(range(p*2, n+1, p)))
    return primes

>>> timeit.Timer(stmt='get_primes.get_primes(1000000)', setup='import   get_primes').timeit(1)
1.1499958793645562

还能做得更快吗?

这段代码有一个缺陷:由于numbers是一个无序集,不能保证numbers.pop()将从集合中移除最低的数字。尽管如此,它还是适用于(至少对我来说)一些输入数字:

>>> sum(get_primes(2000000))
142913828922L
#That's the correct sum of all numbers below 2 million
>>> 529 in get_primes(1000)
False
>>> 529 in get_primes(530)
True

当前回答

下面是一个使用python的列表推导式生成质数的有趣技术(但不是最有效的):

noprimes = [j for i in range(2, 8) for j in range(i*2, 50, i)]
primes = [x for x in range(2, 50) if x not in noprimes]

其他回答

到目前为止,我尝试过的最快的方法是基于Python烹饪书erat2函数:

import itertools as it
def erat2a( ):
    D = {  }
    yield 2
    for q in it.islice(it.count(3), 0, None, 2):
        p = D.pop(q, None)
        if p is None:
            D[q*q] = q
            yield q
        else:
            x = q + 2*p
            while x in D:
                x += 2*p
            D[x] = p

关于加速的解释,请看下面的答案。

我测试了一些unutbu的功能,我用饥饿的百万数字计算它

获胜者是使用numpy库的函数,

注意:做一个内存利用率测试也很有趣:)

示例代码

完整的代码在我的github存储库

#!/usr/bin/env python

import lib
import timeit
import sys
import math
import datetime

import prettyplotlib as ppl
import numpy as np

import matplotlib.pyplot as plt
from prettyplotlib import brewer2mpl

primenumbers_gen = [
    'sieveOfEratosthenes',
    'ambi_sieve',
    'ambi_sieve_plain',
    'sundaram3',
    'sieve_wheel_30',
    'primesfrom3to',
    'primesfrom2to',
    'rwh_primes',
    'rwh_primes1',
    'rwh_primes2',
]

def human_format(num):
    # https://stackoverflow.com/questions/579310/formatting-long-numbers-as-strings-in-python?answertab=active#tab-top
    magnitude = 0
    while abs(num) >= 1000:
        magnitude += 1
        num /= 1000.0
    # add more suffixes if you need them
    return '%.2f%s' % (num, ['', 'K', 'M', 'G', 'T', 'P'][magnitude])


if __name__=='__main__':

    # Vars
    n = 10000000 # number itereration generator
    nbcol = 5 # For decompose prime number generator
    nb_benchloop = 3 # Eliminate false positive value during the test (bench average time)
    datetimeformat = '%Y-%m-%d %H:%M:%S.%f'
    config = 'from __main__ import n; import lib'
    primenumbers_gen = {
        'sieveOfEratosthenes': {'color': 'b'},
        'ambi_sieve': {'color': 'b'},
        'ambi_sieve_plain': {'color': 'b'},
         'sundaram3': {'color': 'b'},
        'sieve_wheel_30': {'color': 'b'},
# # #        'primesfrom2to': {'color': 'b'},
        'primesfrom3to': {'color': 'b'},
        # 'rwh_primes': {'color': 'b'},
        # 'rwh_primes1': {'color': 'b'},
        'rwh_primes2': {'color': 'b'},
    }


    # Get n in command line
    if len(sys.argv)>1:
        n = int(sys.argv[1])

    step = int(math.ceil(n / float(nbcol)))
    nbs = np.array([i * step for i in range(1, int(nbcol) + 1)])
    set2 = brewer2mpl.get_map('Paired', 'qualitative', 12).mpl_colors

    print datetime.datetime.now().strftime(datetimeformat)
    print("Compute prime number to %(n)s" % locals())
    print("")

    results = dict()
    for pgen in primenumbers_gen:
        results[pgen] = dict()
        benchtimes = list()
        for n in nbs:
            t = timeit.Timer("lib.%(pgen)s(n)" % locals(), setup=config)
            execute_times = t.repeat(repeat=nb_benchloop,number=1)
            benchtime = np.mean(execute_times)
            benchtimes.append(benchtime)
        results[pgen] = {'benchtimes':np.array(benchtimes)}

fig, ax = plt.subplots(1)
plt.ylabel('Computation time (in second)')
plt.xlabel('Numbers computed')
i = 0
for pgen in primenumbers_gen:

    bench = results[pgen]['benchtimes']
    avgs = np.divide(bench,nbs)
    avg = np.average(bench, weights=nbs)

    # Compute linear regression
    A = np.vstack([nbs, np.ones(len(nbs))]).T
    a, b = np.linalg.lstsq(A, nbs*avgs)[0]

    # Plot
    i += 1
    #label="%(pgen)s" % locals()
    #ppl.plot(nbs, nbs*avgs, label=label, lw=1, linestyle='--', color=set2[i % 12])
    label="%(pgen)s avg" % locals()
    ppl.plot(nbs, a * nbs + b, label=label, lw=2, color=set2[i % 12])
print datetime.datetime.now().strftime(datetimeformat)

ppl.legend(ax, loc='upper left', ncol=4)

# Change x axis label
ax.get_xaxis().get_major_formatter().set_scientific(False)
fig.canvas.draw()
labels = [human_format(int(item.get_text())) for item in ax.get_xticklabels()]

ax.set_xticklabels(labels)
ax = plt.gca()

plt.show()

你有一个更快的代码和最简单的代码生成质数。 但对于更大的数字,当n=10000, 10000000时,它不起作用,可能是。pop()方法失败了

考虑:N是质数吗?

case 1: You got some factors of N, for i in range(2, N): If N is prime loop is performed for ~(N-2) times. else less number of times case 2: for i in range(2, int(math.sqrt(N)): Loop is performed for almost ~(sqrt(N)-2) times if N is prime else will break somewhere case 3: Better We Divide N With Only number of primes<=sqrt(N) Where loop is performed for only π(sqrt(N)) times π(sqrt(N)) << sqrt(N) as N increases from math import sqrt from time import * prime_list = [2] n = int(input()) s = time() for n0 in range(2,n+1): for i0 in prime_list: if n0%i0==0: break elif i0>=int(sqrt(n0)): prime_list.append(n0) break e = time() print(e-s) #print(prime_list); print(f'pi({n})={len(prime_list)}') print(f'{n}: {len(prime_list)}, time: {e-s}') Output 100: 25, time: 0.00010275840759277344 1000: 168, time: 0.0008606910705566406 10000: 1229, time: 0.015588521957397461 100000: 9592, time: 0.023436546325683594 1000000: 78498, time: 4.1965954303741455 10000000: 664579, time: 109.24591708183289 100000000: 5761455, time: 2289.130858898163

小于1000似乎很慢,但小于10^6我认为更快。

然而,我无法理解时间的复杂性。

下面是我在Python中通常用来生成质数的代码:

$ python -mtimeit -s'import sieve' 'sieve.sieve(1000000)' 
10 loops, best of 3: 445 msec per loop
$ cat sieve.py
from math import sqrt

def sieve(size):
 prime=[True]*size
 rng=xrange
 limit=int(sqrt(size))

 for i in rng(3,limit+1,+2):
  if prime[i]:
   prime[i*i::+i]=[False]*len(prime[i*i::+i])

 return [2]+[i for i in rng(3,size,+2) if prime[i]]

if __name__=='__main__':
 print sieve(100)

它不能与这里发布的更快的解决方案竞争,但至少它是纯python。

谢谢你提出这个问题。我今天真的学到了很多东西。

这里是最快的函数之一的两个更新版本(纯Python 3.6),

from itertools import compress

def rwh_primes1v1(n):
    """ Returns  a list of primes < n for n > 2 """
    sieve = bytearray([True]) * (n//2)
    for i in range(3,int(n**0.5)+1,2):
        if sieve[i//2]:
            sieve[i*i//2::i] = bytearray((n-i*i-1)//(2*i)+1)
    return [2,*compress(range(3,n,2), sieve[1:])]

def rwh_primes1v2(n):
    """ Returns a list of primes < n for n > 2 """
    sieve = bytearray([True]) * (n//2+1)
    for i in range(1,int(n**0.5)//2+1):
        if sieve[i]:
            sieve[2*i*(i+1)::2*i+1] = bytearray((n//2-2*i*(i+1))//(2*i+1)+1)
    return [2,*compress(range(3,n,2), sieve[1:])]