我正在寻找最快的方法来获得π的值,作为一个个人挑战。更具体地说,我使用的方法不涉及使用#define常量M_PI,或硬编码的数字。

下面的程序测试了我所知道的各种方法。从理论上讲,内联汇编版本是最快的选择,尽管显然不能移植。我将它作为一个基准,与其他版本进行比较。在我的测试中,使用内置函数,4 * atan(1)版本在GCC 4.2上是最快的,因为它自动将atan(1)折叠成一个常量。通过指定-fno-builtin, atan2(0, -1)版本是最快的。

下面是主要的测试程序(pitimes.c):

#include <math.h>
#include <stdio.h>
#include <time.h>

#define ITERS 10000000
#define TESTWITH(x) {                                                       \
    diff = 0.0;                                                             \
    time1 = clock();                                                        \
    for (i = 0; i < ITERS; ++i)                                             \
        diff += (x) - M_PI;                                                 \
    time2 = clock();                                                        \
    printf("%s\t=> %e, time => %f\n", #x, diff, diffclock(time2, time1));   \
}

static inline double
diffclock(clock_t time1, clock_t time0)
{
    return (double) (time1 - time0) / CLOCKS_PER_SEC;
}

int
main()
{
    int i;
    clock_t time1, time2;
    double diff;

    /* Warmup. The atan2 case catches GCC's atan folding (which would
     * optimise the ``4 * atan(1) - M_PI'' to a no-op), if -fno-builtin
     * is not used. */
    TESTWITH(4 * atan(1))
    TESTWITH(4 * atan2(1, 1))

#if defined(__GNUC__) && (defined(__i386__) || defined(__amd64__))
    extern double fldpi();
    TESTWITH(fldpi())
#endif

    /* Actual tests start here. */
    TESTWITH(atan2(0, -1))
    TESTWITH(acos(-1))
    TESTWITH(2 * asin(1))
    TESTWITH(4 * atan2(1, 1))
    TESTWITH(4 * atan(1))

    return 0;
}

内联汇编的东西(fldpi.c)只适用于x86和x64系统:

double
fldpi()
{
    double pi;
    asm("fldpi" : "=t" (pi));
    return pi;
}

和一个构建脚本,构建我正在测试的所有配置(build.sh):

#!/bin/sh
gcc -O3 -Wall -c           -m32 -o fldpi-32.o fldpi.c
gcc -O3 -Wall -c           -m64 -o fldpi-64.o fldpi.c

gcc -O3 -Wall -ffast-math  -m32 -o pitimes1-32 pitimes.c fldpi-32.o
gcc -O3 -Wall              -m32 -o pitimes2-32 pitimes.c fldpi-32.o -lm
gcc -O3 -Wall -fno-builtin -m32 -o pitimes3-32 pitimes.c fldpi-32.o -lm
gcc -O3 -Wall -ffast-math  -m64 -o pitimes1-64 pitimes.c fldpi-64.o -lm
gcc -O3 -Wall              -m64 -o pitimes2-64 pitimes.c fldpi-64.o -lm
gcc -O3 -Wall -fno-builtin -m64 -o pitimes3-64 pitimes.c fldpi-64.o -lm

除了在各种编译器标志之间进行测试(我也比较了32位和64位,因为优化是不同的),我还尝试切换测试的顺序。但是,atan2(0, -1)版本在每次测试中仍然名列前茅。


当前回答

如果您愿意使用近似值,355 / 113适用于6个十进制数字,并且具有用于整数表达式的附加优势。如今,这已经不那么重要了,因为“浮点数学协处理器”已经没有任何意义了,但它曾经非常重要。

其他回答

我认为圆周率的值是圆的周长和半径之比。

它可以通过常规的数学计算简单地实现

这是一个“经典”方法,非常容易实现。 这个在python(不是最快的语言)中的实现:

from math import pi
from time import time


precision = 10**6 # higher value -> higher precision
                  # lower  value -> higher speed

t = time()

calc = 0
for k in xrange(0, precision):
    calc += ((-1)**k) / (2*k+1.)
calc *= 4. # this is just a little optimization

t = time()-t

print "Calculated: %.40f" % calc
print "Constant pi: %.40f" % pi
print "Difference: %.40f" % abs(calc-pi)
print "Time elapsed: %s" % repr(t)

你可以在这里找到更多信息。

无论如何,在python中获得精确的圆周率值的最快方法是:

from gmpy import pi
print pi(3000) # the rule is the same as 
               # the precision on the previous code

下面是gpy pi方法的源代码,我认为在这种情况下,代码没有注释那么有用:

static char doc_pi[]="\
pi(n): returns pi with n bits of precision in an mpf object\n\
";

/* This function was originally from netlib, package bmp, by
 * Richard P. Brent. Paulo Cesar Pereira de Andrade converted
 * it to C and used it in his LISP interpreter.
 *
 * Original comments:
 * 
 *   sets mp pi = 3.14159... to the available precision.
 *   uses the gauss-legendre algorithm.
 *   this method requires time o(ln(t)m(t)), so it is slower
 *   than mppi if m(t) = o(t**2), but would be faster for
 *   large t if a faster multiplication algorithm were used
 *   (see comments in mpmul).
 *   for a description of the method, see - multiple-precision
 *   zero-finding and the complexity of elementary function
 *   evaluation (by r. p. brent), in analytic computational
 *   complexity (edited by j. f. traub), academic press, 1976, 151-176.
 *   rounding options not implemented, no guard digits used.
*/
static PyObject *
Pygmpy_pi(PyObject *self, PyObject *args)
{
    PympfObject *pi;
    int precision;
    mpf_t r_i2, r_i3, r_i4;
    mpf_t ix;

    ONE_ARG("pi", "i", &precision);
    if(!(pi = Pympf_new(precision))) {
        return NULL;
    }

    mpf_set_si(pi->f, 1);

    mpf_init(ix);
    mpf_set_ui(ix, 1);

    mpf_init2(r_i2, precision);

    mpf_init2(r_i3, precision);
    mpf_set_d(r_i3, 0.25);

    mpf_init2(r_i4, precision);
    mpf_set_d(r_i4, 0.5);
    mpf_sqrt(r_i4, r_i4);

    for (;;) {
        mpf_set(r_i2, pi->f);
        mpf_add(pi->f, pi->f, r_i4);
        mpf_div_ui(pi->f, pi->f, 2);
        mpf_mul(r_i4, r_i2, r_i4);
        mpf_sub(r_i2, pi->f, r_i2);
        mpf_mul(r_i2, r_i2, r_i2);
        mpf_mul(r_i2, r_i2, ix);
        mpf_sub(r_i3, r_i3, r_i2);
        mpf_sqrt(r_i4, r_i4);
        mpf_mul_ui(ix, ix, 2);
        /* Check for convergence */
        if (!(mpf_cmp_si(r_i2, 0) && 
              mpf_get_prec(r_i2) >= (unsigned)precision)) {
            mpf_mul(pi->f, pi->f, r_i4);
            mpf_div(pi->f, pi->f, r_i3);
            break;
        }
    }

    mpf_clear(ix);
    mpf_clear(r_i2);
    mpf_clear(r_i3);
    mpf_clear(r_i4);

    return (PyObject*)pi;
}

编辑:我在剪切和粘贴和缩进方面有一些问题,你可以在这里找到源代码。

为了完整起见,一个c++模板版本,对于一个优化的构建,它将在编译时计算PI的近似值,并将内联到单个值。

#include <iostream>

template<int I>
struct sign
{
    enum {value = (I % 2) == 0 ? 1 : -1};
};

template<int I, int J>
struct pi_calc
{
    inline static double value ()
    {
        return (pi_calc<I-1, J>::value () + pi_calc<I-1, J+1>::value ()) / 2.0;
    }
};

template<int J>
struct pi_calc<0, J>
{
    inline static double value ()
    {
        return (sign<J>::value * 4.0) / (2.0 * J + 1.0) + pi_calc<0, J-1>::value ();
    }
};


template<>
struct pi_calc<0, 0>
{
    inline static double value ()
    {
        return 4.0;
    }
};

template<int I>
struct pi
{
    inline static double value ()
    {
        return pi_calc<I, I>::value ();
    }
};

int main ()
{
    std::cout.precision (12);

    const double pi_value = pi<10>::value ();

    std::cout << "pi ~ " << pi_value << std::endl;

    return 0;
}

注意,对于I > 10,优化构建可能会很慢,对于非优化运行也是如此。对于12次迭代,我相信大约有80k次调用value()(在没有内存的情况下)。

更好的方法

要获得标准常数(如pi)或标准概念的输出,我们应该首先使用所使用语言中可用的内置方法。它将以最快和最好的方式返回一个值。我正在使用python以最快的方式运行,以获得圆周率的值。

数学库的PI变量。数学库将变量pi存储为常数。

math_pi.py

import math
print math.pi

使用linux /usr/bin/time -v python math_pi.py的time工具运行脚本

输出:

Command being timed: "python math_pi.py"
User time (seconds): 0.01
System time (seconds): 0.01
Percent of CPU this job got: 91%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.03

用arccos的数学方法

acos_pi.py

import math
print math.acos(-1)

使用linux /usr/bin/time -v python acos_pi.py的time工具运行脚本

输出:

Command being timed: "python acos_pi.py"
User time (seconds): 0.02
System time (seconds): 0.01
Percent of CPU this job got: 94%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.03

使用BBP公式

bbp_pi.py

from decimal import Decimal, getcontext
getcontext().prec=100
print sum(1/Decimal(16)**k * 
          (Decimal(4)/(8*k+1) - 
           Decimal(2)/(8*k+4) - 
           Decimal(1)/(8*k+5) -
           Decimal(1)/(8*k+6)) for k in range(100))

使用linux /usr/bin/time -v python bbp_pi.py的time工具运行脚本

输出:

Command being timed: "python c.py"
User time (seconds): 0.05
System time (seconds): 0.01
Percent of CPU this job got: 98%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.06

因此,最好的方法是使用语言提供的内置方法,因为它们是获得输出的最快和最好的方法。在python中使用math.pi

正好是3![弗林克教授(辛普森一家)]

开玩笑,但这里有一个在c#(。微软网络框架。

using System;
using System.Text;

class Program {
    static void Main(string[] args) {
        int Digits = 100;

        BigNumber x = new BigNumber(Digits);
        BigNumber y = new BigNumber(Digits);
        x.ArcTan(16, 5);
        y.ArcTan(4, 239);
        x.Subtract(y);
        string pi = x.ToString();
        Console.WriteLine(pi);
    }
}

public class BigNumber {
    private UInt32[] number;
    private int size;
    private int maxDigits;

    public BigNumber(int maxDigits) {
        this.maxDigits = maxDigits;
        this.size = (int)Math.Ceiling((float)maxDigits * 0.104) + 2;
        number = new UInt32[size];
    }
    public BigNumber(int maxDigits, UInt32 intPart)
        : this(maxDigits) {
        number[0] = intPart;
        for (int i = 1; i < size; i++) {
            number[i] = 0;
        }
    }
    private void VerifySameSize(BigNumber value) {
        if (Object.ReferenceEquals(this, value))
            throw new Exception("BigNumbers cannot operate on themselves");
        if (value.size != this.size)
            throw new Exception("BigNumbers must have the same size");
    }

    public void Add(BigNumber value) {
        VerifySameSize(value);

        int index = size - 1;
        while (index >= 0 && value.number[index] == 0)
            index--;

        UInt32 carry = 0;
        while (index >= 0) {
            UInt64 result = (UInt64)number[index] +
                            value.number[index] + carry;
            number[index] = (UInt32)result;
            if (result >= 0x100000000U)
                carry = 1;
            else
                carry = 0;
            index--;
        }
    }
    public void Subtract(BigNumber value) {
        VerifySameSize(value);

        int index = size - 1;
        while (index >= 0 && value.number[index] == 0)
            index--;

        UInt32 borrow = 0;
        while (index >= 0) {
            UInt64 result = 0x100000000U + (UInt64)number[index] -
                            value.number[index] - borrow;
            number[index] = (UInt32)result;
            if (result >= 0x100000000U)
                borrow = 0;
            else
                borrow = 1;
            index--;
        }
    }
    public void Multiply(UInt32 value) {
        int index = size - 1;
        while (index >= 0 && number[index] == 0)
            index--;

        UInt32 carry = 0;
        while (index >= 0) {
            UInt64 result = (UInt64)number[index] * value + carry;
            number[index] = (UInt32)result;
            carry = (UInt32)(result >> 32);
            index--;
        }
    }
    public void Divide(UInt32 value) {
        int index = 0;
        while (index < size && number[index] == 0)
            index++;

        UInt32 carry = 0;
        while (index < size) {
            UInt64 result = number[index] + ((UInt64)carry << 32);
            number[index] = (UInt32)(result / (UInt64)value);
            carry = (UInt32)(result % (UInt64)value);
            index++;
        }
    }
    public void Assign(BigNumber value) {
        VerifySameSize(value);
        for (int i = 0; i < size; i++) {
            number[i] = value.number[i];
        }
    }

    public override string ToString() {
        BigNumber temp = new BigNumber(maxDigits);
        temp.Assign(this);

        StringBuilder sb = new StringBuilder();
        sb.Append(temp.number[0]);
        sb.Append(System.Globalization.CultureInfo.CurrentCulture.NumberFormat.CurrencyDecimalSeparator);

        int digitCount = 0;
        while (digitCount < maxDigits) {
            temp.number[0] = 0;
            temp.Multiply(100000);
            sb.AppendFormat("{0:D5}", temp.number[0]);
            digitCount += 5;
        }

        return sb.ToString();
    }
    public bool IsZero() {
        foreach (UInt32 item in number) {
            if (item != 0)
                return false;
        }
        return true;
    }

    public void ArcTan(UInt32 multiplicand, UInt32 reciprocal) {
        BigNumber X = new BigNumber(maxDigits, multiplicand);
        X.Divide(reciprocal);
        reciprocal *= reciprocal;

        this.Assign(X);

        BigNumber term = new BigNumber(maxDigits);
        UInt32 divisor = 1;
        bool subtractTerm = true;
        while (true) {
            X.Divide(reciprocal);
            term.Assign(X);
            divisor += 2;
            term.Divide(divisor);
            if (term.IsZero())
                break;

            if (subtractTerm)
                this.Subtract(term);
            else
                this.Add(term);
            subtractTerm = !subtractTerm;
        }
    }
}