我把Project Euler中的第12题作为一个编程练习,并比较了我在C、Python、Erlang和Haskell中的实现(当然不是最优的)。为了获得更高的执行时间,我搜索第一个因数超过1000的三角形数,而不是原始问题中所述的500。

结果如下:

C:

lorenzo@enzo:~/erlang$ gcc -lm -o euler12.bin euler12.c
lorenzo@enzo:~/erlang$ time ./euler12.bin
842161320

real    0m11.074s
user    0m11.070s
sys 0m0.000s

Python:

lorenzo@enzo:~/erlang$ time ./euler12.py 
842161320

real    1m16.632s
user    1m16.370s
sys 0m0.250s

Python与PyPy:

lorenzo@enzo:~/Downloads/pypy-c-jit-43780-b590cf6de419-linux64/bin$ time ./pypy /home/lorenzo/erlang/euler12.py 
842161320

real    0m13.082s
user    0m13.050s
sys 0m0.020s

Erlang:

lorenzo@enzo:~/erlang$ erlc euler12.erl 
lorenzo@enzo:~/erlang$ time erl -s euler12 solve
Erlang R13B03 (erts-5.7.4) [source] [64-bit] [smp:4:4] [rq:4] [async-threads:0] [hipe] [kernel-poll:false]

Eshell V5.7.4  (abort with ^G)
1> 842161320

real    0m48.259s
user    0m48.070s
sys 0m0.020s

Haskell:

lorenzo@enzo:~/erlang$ ghc euler12.hs -o euler12.hsx
[1 of 1] Compiling Main             ( euler12.hs, euler12.o )
Linking euler12.hsx ...
lorenzo@enzo:~/erlang$ time ./euler12.hsx 
842161320

real    2m37.326s
user    2m37.240s
sys 0m0.080s

简介:

C: 100% Python: 692% (PyPy占118%) Erlang: 436%(135%归功于RichardC) Haskell: 1421%

我认为C语言有一个很大的优势,因为它使用长来进行计算,而不是像其他三种那样使用任意长度的整数。它也不需要首先加载运行时(其他的呢?)

问题1: Erlang, Python和Haskell是否会因为使用任意长度的整数而降低速度,或者只要值小于MAXINT就不会?

问题2: 哈斯克尔为什么这么慢?是否有一个编译器标志关闭刹车或它是我的实现?(后者是很有可能的,因为Haskell对我来说是一本有七个印章的书。)

问题3: 你能否给我一些提示,如何在不改变我确定因素的方式的情况下优化这些实现?以任何方式优化:更好、更快、更“原生”的语言。

编辑:

问题4: 我的函数实现是否允许LCO(最后调用优化,也就是尾递归消除),从而避免在调用堆栈中添加不必要的帧?

虽然我不得不承认我的Haskell和Erlang知识非常有限,但我确实试图用这四种语言实现尽可能相似的相同算法。


使用的源代码:

#include <stdio.h>
#include <math.h>

int factorCount (long n)
{
    double square = sqrt (n);
    int isquare = (int) square;
    int count = isquare == square ? -1 : 0;
    long candidate;
    for (candidate = 1; candidate <= isquare; candidate ++)
        if (0 == n % candidate) count += 2;
    return count;
}

int main ()
{
    long triangle = 1;
    int index = 1;
    while (factorCount (triangle) < 1001)
    {
        index ++;
        triangle += index;
    }
    printf ("%ld\n", triangle);
}

#! /usr/bin/env python3.2

import math

def factorCount (n):
    square = math.sqrt (n)
    isquare = int (square)
    count = -1 if isquare == square else 0
    for candidate in range (1, isquare + 1):
        if not n % candidate: count += 2
    return count

triangle = 1
index = 1
while factorCount (triangle) < 1001:
    index += 1
    triangle += index

print (triangle)

-module (euler12).
-compile (export_all).

factorCount (Number) -> factorCount (Number, math:sqrt (Number), 1, 0).

factorCount (_, Sqrt, Candidate, Count) when Candidate > Sqrt -> Count;

factorCount (_, Sqrt, Candidate, Count) when Candidate == Sqrt -> Count + 1;

factorCount (Number, Sqrt, Candidate, Count) ->
    case Number rem Candidate of
        0 -> factorCount (Number, Sqrt, Candidate + 1, Count + 2);
        _ -> factorCount (Number, Sqrt, Candidate + 1, Count)
    end.

nextTriangle (Index, Triangle) ->
    Count = factorCount (Triangle),
    if
        Count > 1000 -> Triangle;
        true -> nextTriangle (Index + 1, Triangle + Index + 1)  
    end.

solve () ->
    io:format ("~p~n", [nextTriangle (1, 1) ] ),
    halt (0).

factorCount number = factorCount' number isquare 1 0 - (fromEnum $ square == fromIntegral isquare)
    where square = sqrt $ fromIntegral number
          isquare = floor square

factorCount' number sqrt candidate count
    | fromIntegral candidate > sqrt = count
    | number `mod` candidate == 0 = factorCount' number sqrt (candidate + 1) (count + 2)
    | otherwise = factorCount' number sqrt (candidate + 1) count

nextTriangle index triangle
    | factorCount triangle > 1000 = triangle
    | otherwise = nextTriangle (index + 1) (triangle + index + 1)

main = print $ nextTriangle 1 1

当前回答

问题1:Erlang、Python和Haskell是否会因为使用 任意长度的整数,只要值更小 比MAXINT ?

对于Erlang,第一个问题的答案是否定的。最后一个问题可以通过适当地使用Erlang来回答,如下所示:

http://bredsaal.dk/learning-erlang-using-projecteuler-net

由于它比您最初的C示例要快,我猜它会有很多问题,因为其他人已经详细讨论过了。

这个Erlang模块在一个便宜的上网本上执行大约5秒…它使用erlang中的网络线程模型,并演示了如何利用事件模型。它可以分布在许多节点上。而且速度很快。不是我的代码。

-module(p12dist).  
-author("Jannich Brendle, jannich@bredsaal.dk, http://blog.bredsaal.dk").  
-compile(export_all).

server() ->  
  server(1).

server(Number) ->  
  receive {getwork, Worker_PID} -> Worker_PID ! {work,Number,Number+100},  
  server(Number+101);  
  {result,T} -> io:format("The result is: \~w.\~n", [T]);  
  _ -> server(Number)  
  end.

worker(Server_PID) ->  
  Server_PID ! {getwork, self()},  
  receive {work,Start,End} -> solve(Start,End,Server_PID)  
  end,  
  worker(Server_PID).

start() ->  
  Server_PID = spawn(p12dist, server, []),  
  spawn(p12dist, worker, [Server_PID]),  
  spawn(p12dist, worker, [Server_PID]),  
  spawn(p12dist, worker, [Server_PID]),  
  spawn(p12dist, worker, [Server_PID]).

solve(N,End,_) when N =:= End -> no_solution;

solve(N,End,Server_PID) ->  
  T=round(N*(N+1)/2),
  case (divisor(T,round(math:sqrt(T))) > 500) of  
    true ->  
      Server_PID ! {result,T};  
    false ->  
      solve(N+1,End,Server_PID)  
  end.

divisors(N) ->  
  divisor(N,round(math:sqrt(N))).

divisor(_,0) -> 1;  
divisor(N,I) ->  
  case (N rem I) =:= 0 of  
  true ->  
    2+divisor(N,I-1);  
  false ->  
    divisor(N,I-1)  
  end.

下面的测试发生在Intel(R) Atom(TM) CPU N270 @ 1.60GHz上

~$ time erl -noshell -s p12dist start

The result is: 76576500.

^C

BREAK: (a)bort (c)ontinue (p)roc info (i)nfo (l)oaded
       (v)ersion (k)ill (D)b-tables (d)istribution
a

real    0m5.510s
user    0m5.836s
sys 0m0.152s

其他回答

看看这个博客。在过去一年左右的时间里,他用Haskell和Python完成了一些Project Euler问题,他通常发现Haskell要快得多。我认为在这些语言之间,它更多地与你的流畅性和编码风格有关。

说到Python速度,你使用了错误的实现!尝试一下PyPy,对于这样的事情,你会发现它要快得多。

我把“Jannich Brendle”版本改成了1000,而不是500。并列出euler12.bin, euler12.bin的结果。话务量,p12dist.erl。两个erl代码都使用'+native'进行编译。

zhengs-MacBook-Pro:workspace zhengzhibin$ time erl -noshell -s p12dist start
The result is: 842161320.

real    0m3.879s
user    0m14.553s
sys     0m0.314s
zhengs-MacBook-Pro:workspace zhengzhibin$ time erl -noshell -s euler12 solve
842161320

real    0m10.125s
user    0m10.078s
sys     0m0.046s
zhengs-MacBook-Pro:workspace zhengzhibin$ time ./euler12.bin 
842161320

real    0m5.370s
user    0m5.328s
sys     0m0.004s
zhengs-MacBook-Pro:workspace zhengzhibin$

I made the assumption that the number of factors is only large if the numbers involved have many small factors. So I used thaumkid's excellent algorithm, but first used an approximation to the factor count that is never too small. It's quite simple: Check for prime factors up to 29, then check the remaining number and calculate an upper bound for the nmber of factors. Use this to calculate an upper bound for the number of factors, and if that number is high enough, calculate the exact number of factors.

下面的代码不需要这个假设来保证正确性,但是为了快速。这似乎很有效;只有大约十万分之一的数字给出了足够高的估计,需要进行全面检查。

代码如下:

// Return at least the number of factors of n.
static uint64_t approxfactorcount (uint64_t n)
{
    uint64_t count = 1, add;

#define CHECK(d)                            \
    do {                                    \
        if (n % d == 0) {                   \
            add = count;                    \
            do { n /= d; count += add; }    \
            while (n % d == 0);             \
        }                                   \
    } while (0)

    CHECK ( 2); CHECK ( 3); CHECK ( 5); CHECK ( 7); CHECK (11); CHECK (13);
    CHECK (17); CHECK (19); CHECK (23); CHECK (29);
    if (n == 1) return count;
    if (n < 1ull * 31 * 31) return count * 2;
    if (n < 1ull * 31 * 31 * 37) return count * 4;
    if (n < 1ull * 31 * 31 * 37 * 37) return count * 8;
    if (n < 1ull * 31 * 31 * 37 * 37 * 41) return count * 16;
    if (n < 1ull * 31 * 31 * 37 * 37 * 41 * 43) return count * 32;
    if (n < 1ull * 31 * 31 * 37 * 37 * 41 * 43 * 47) return count * 64;
    if (n < 1ull * 31 * 31 * 37 * 37 * 41 * 43 * 47 * 53) return count * 128;
    if (n < 1ull * 31 * 31 * 37 * 37 * 41 * 43 * 47 * 53 * 59) return count * 256;
    if (n < 1ull * 31 * 31 * 37 * 37 * 41 * 43 * 47 * 53 * 59 * 61) return count * 512;
    if (n < 1ull * 31 * 31 * 37 * 37 * 41 * 43 * 47 * 53 * 59 * 61 * 67) return count * 1024;
    if (n < 1ull * 31 * 31 * 37 * 37 * 41 * 43 * 47 * 53 * 59 * 61 * 67 * 71) return count * 2048;
    if (n < 1ull * 31 * 31 * 37 * 37 * 41 * 43 * 47 * 53 * 59 * 61 * 67 * 71 * 73) return count * 4096;
    return count * 1000000;
}

// Return the number of factors of n.
static uint64_t factorcount (uint64_t n)
{
    uint64_t count = 1, add;

    CHECK (2); CHECK (3);

    uint64_t d = 5, inc = 2;
    for (; d*d <= n; d += inc, inc = (6 - inc))
        CHECK (d);

    if (n > 1) count *= 2; // n must be a prime number
    return count;
}

// Prints triangular numbers with record numbers of factors.
static void printrecordnumbers (uint64_t limit)
{
    uint64_t record = 30000;

    uint64_t count1, factor1;
    uint64_t count2 = 1, factor2 = 1;

    for (uint64_t n = 1; n <= limit; ++n)
    {
        factor1 = factor2;
        count1 = count2;

        factor2 = n + 1; if (factor2 % 2 == 0) factor2 /= 2;
        count2 = approxfactorcount (factor2);

        if (count1 * count2 > record)
        {
            uint64_t factors = factorcount (factor1) * factorcount (factor2);
            if (factors > record)
            {
                printf ("%lluth triangular number = %llu has %llu factors\n", n, factor1 * factor2, factors);
                record = factors;
            }
        }
    }
}

其中,14753024个三角形数有13824个因子用时约0.7秒,879207615个三角形数有61440个因子用时34秒,12524486975个三角形数有138240个因子用时10分5秒,26467,792064个三角形数有172032个因子用时21分25秒(2.4GHz Core2 Duo),因此该代码平均每个数只需要116个处理器周期。最后一个三角数本身大于2^68,所以

问题1:erlang, python和haskell会因为使用任意长度的整数而降低速度吗?还是只要值小于MAXINT就不会?

This is unlikely. I cannot say much about Erlang and Haskell (well, maybe a bit about Haskell below) but I can point a lot of other bottlenecks in Python. Every time the program tries to execute an operation with some values in Python, it should verify whether the values are from the proper type, and it costs a bit of time. Your factorCount function just allocates a list with range (1, isquare + 1) various times, and runtime, malloc-styled memory allocation is way slower than iterating on a range with a counter as you do in C. Notably, the factorCount() is called multiple times and so allocates a lot of lists. Also, let us not forget that Python is interpreted and the CPython interpreter has no great focus on being optimized.

编辑:哦,好吧,我注意到你使用的是Python 3,所以range()不返回一个列表,而是一个生成器。在这种情况下,我关于分配列表的观点有一半是错误的:该函数只是分配范围对象,尽管效率很低,但没有分配包含很多项的列表那么低。

问题2:为什么haskell这么慢?是否有一个编译器标志关闭刹车或它是我的实现?(后者很有可能,因为haskell对我来说是一本有七个印章的书。)

你在使用Hugs吗?Hugs是一个相当慢的解释器。如果你正在使用它,也许你可以得到一个更好的GHC时间-但我只是在思考假设,这种东西,一个好的Haskell编译器做的是非常迷人的,远远超出我的理解:)

问题3:你能给我一些提示吗?如何在不改变我确定因素的方式的情况下优化这些实现?以任何方式优化:更好、更快、更“原生”的语言。

我得说你在玩一场不好笑的游戏。了解各种语言最好的部分是尽可能以不同的方式使用它们:)但我离题了,我只是对这一点没有任何建议。对不起,我希望有人能在这种情况下帮助你:)

问题4:我的函数实现是否允许LCO,从而避免在调用堆栈中添加不必要的帧?

据我所知,您只需要确保您的递归调用是返回值之前的最后一个命令。换句话说,像下面这样的函数可以使用这样的优化:

def factorial(n, acc=1):
    if n > 1:
        acc = acc * n
        n = n - 1
        return factorial(n, acc)
    else:
        return acc

然而,如果你的函数如下所示,你就不会有这样的优化,因为在递归调用之后有一个操作(乘法):

def factorial2(n):
    if n > 1:
        f = factorial2(n-1)
        return f*n
    else:
        return 1

我将操作分隔在一些局部变量中,以便明确执行哪些操作。然而,最常见的是看到这些函数如下所示,但它们对于我所说的观点是等价的:

def factorial(n, acc=1):
    if n > 1:
        return factorial(n-1, acc*n)
    else:
        return acc

def factorial2(n):
    if n > 1:
        return n*factorial(n-1)
    else:
        return 1

注意,这是由编译器/解释器来决定是否进行尾递归。例如,如果我记得很清楚,Python解释器就不会这样做(我在示例中使用Python只是因为它的语法流畅)。不管怎样,如果你发现了一些奇怪的东西,比如带两个参数的阶乘函数(其中一个参数有acc, accumulator等名称),现在你知道为什么人们这样做了:)

只是为了好玩。下面是一个更“原生”的Haskell实现:

import Control.Applicative
import Control.Monad
import Data.Either
import Math.NumberTheory.Powers.Squares

isInt :: RealFrac c => c -> Bool
isInt = (==) <$> id <*> fromInteger . round

intSqrt :: (Integral a) => a -> Int
--intSqrt = fromIntegral . floor . sqrt . fromIntegral
intSqrt = fromIntegral . integerSquareRoot'

factorize :: Int -> [Int]
factorize 1 = []
factorize n = first : factorize (quot n first)
  where first = (!! 0) $ [a | a <- [2..intSqrt n], rem n a == 0] ++ [n]

factorize2 :: Int -> [(Int,Int)]
factorize2 = foldl (\ls@((val,freq):xs) y -> if val == y then (val,freq+1):xs else (y,1):ls) [(0,0)] . factorize

numDivisors :: Int -> Int
numDivisors = foldl (\acc (_,y) -> acc * (y+1)) 1 <$> factorize2

nextTriangleNumber :: (Int,Int) -> (Int,Int)
nextTriangleNumber (n,acc) = (n+1,acc+n+1)

forward :: Int -> (Int, Int) -> Either (Int, Int) (Int, Int)
forward k val@(n,acc) = if numDivisors acc > k then Left val else Right (nextTriangleNumber val)

problem12 :: Int -> (Int, Int)
problem12 n = (!!0) . lefts . scanl (>>=) (forward n (1,1)) . repeat . forward $ n

main = do
  let (n,val) = problem12 1000
  print val

使用ghc -O3,它在我的机器上持续运行0.55-0.58秒(1.73GHz Core i7)。

C版本中一个更有效的factorCount函数:

int factorCount (int n)
{
  int count = 1;
  int candidate,tmpCount;
  while (n % 2 == 0) {
    count++;
    n /= 2;
  }
    for (candidate = 3; candidate < n && candidate * candidate < n; candidate += 2)
    if (n % candidate == 0) {
      tmpCount = 1;
      do {
        tmpCount++;
        n /= candidate;
      } while (n % candidate == 0);
       count*=tmpCount;
      }
  if (n > 1)
    count *= 2;
  return count;
}

在main中使用gcc -O3 -lm将long类型更改为int类型,该程序始终在0.31-0.35秒内运行。

如果您利用第n个三角形数= n*(n+1)/2,并且n和(n+1)具有完全不同的质因数分解,则可以使两者运行得更快,因此可以将每个一半的因数数相乘,以得到整体的因数数。以下几点:

int main ()
{
  int triangle = 0,count1,count2 = 1;
  do {
    count1 = count2;
    count2 = ++triangle % 2 == 0 ? factorCount(triangle+1) : factorCount((triangle+1)/2);
  } while (count1*count2 < 1001);
  printf ("%lld\n", ((long long)triangle)*(triangle+1)/2);
}

将c代码的运行时间减少到0.17-0.19秒,它可以处理更大的搜索——大于10000个因数在我的机器上大约需要43秒。我给感兴趣的读者留下了类似的haskell加速。