例如,乘法和除法可以使用位运算符来实现
i*2 = i<<1
i*3 = (i<<1) + i;
i*10 = (i<<3) + (i<<1)
等等。
用say (i<<3)+(i<<1)乘10是否比直接用i*10更快?有没有什么输入是不能用这种方法乘或除的?
例如,乘法和除法可以使用位运算符来实现
i*2 = i<<1
i*3 = (i<<1) + i;
i*10 = (i<<3) + (i<<1)
等等。
用say (i<<3)+(i<<1)乘10是否比直接用i*10更快?有没有什么输入是不能用这种方法乘或除的?
当前回答
In the case of signed integers and right shift vs division, it can make a difference. For negative numbers, the shift rounds rounds towards negative infinity whereas division rounds towards zero. Of course the compiler will change the division to something cheaper, but it will usually change it to something that has the same rounding behavior as division, because it is either unable to prove that the variable won't be negative or it simply doesn't care. So if you can prove that a number won't be negative or if you don't care which way it will round, you can do that optimization in a way that is more likely to make a difference.
其他回答
它是否真的更快取决于实际使用的硬件和编译器。
这取决于处理器和编译器。一些编译器已经通过这种方式优化代码了,其他的还没有。 因此,每次需要以这种方式优化代码时,您都需要检查。
除非您迫切需要优化,否则我不会为了节省汇编指令或处理器周期而打乱源代码。
如果在gcc编译器上比较x+x, x*2和x<<1语法的输出,那么在x86汇编中会得到相同的结果:https://godbolt.org/z/JLpp0j
push rbp
mov rbp, rsp
mov DWORD PTR [rbp-4], edi
mov eax, DWORD PTR [rbp-4]
add eax, eax
pop rbp
ret
因此,您可以认为gcc足够聪明,可以独立于您输入的内容确定自己的最佳解决方案。
我也想看看我能不能打败房子。这是一个更通用的任意数乘任意数的位乘法。我做的宏比普通的乘法要慢25%到两倍。正如其他人所说,如果它接近2的倍数或由几个2的倍数组成,你可能会赢。比如由(X<<4)+(X<<2)+(X<<1)+X组成的X*23要比由(X<<6)+X组成的X*65慢。
#include <stdio.h>
#include <time.h>
#define MULTIPLYINTBYMINUS(X,Y) (-((X >> 30) & 1)&(Y<<30))+(-((X >> 29) & 1)&(Y<<29))+(-((X >> 28) & 1)&(Y<<28))+(-((X >> 27) & 1)&(Y<<27))+(-((X >> 26) & 1)&(Y<<26))+(-((X >> 25) & 1)&(Y<<25))+(-((X >> 24) & 1)&(Y<<24))+(-((X >> 23) & 1)&(Y<<23))+(-((X >> 22) & 1)&(Y<<22))+(-((X >> 21) & 1)&(Y<<21))+(-((X >> 20) & 1)&(Y<<20))+(-((X >> 19) & 1)&(Y<<19))+(-((X >> 18) & 1)&(Y<<18))+(-((X >> 17) & 1)&(Y<<17))+(-((X >> 16) & 1)&(Y<<16))+(-((X >> 15) & 1)&(Y<<15))+(-((X >> 14) & 1)&(Y<<14))+(-((X >> 13) & 1)&(Y<<13))+(-((X >> 12) & 1)&(Y<<12))+(-((X >> 11) & 1)&(Y<<11))+(-((X >> 10) & 1)&(Y<<10))+(-((X >> 9) & 1)&(Y<<9))+(-((X >> 8) & 1)&(Y<<8))+(-((X >> 7) & 1)&(Y<<7))+(-((X >> 6) & 1)&(Y<<6))+(-((X >> 5) & 1)&(Y<<5))+(-((X >> 4) & 1)&(Y<<4))+(-((X >> 3) & 1)&(Y<<3))+(-((X >> 2) & 1)&(Y<<2))+(-((X >> 1) & 1)&(Y<<1))+(-((X >> 0) & 1)&(Y<<0))
#define MULTIPLYINTBYSHIFT(X,Y) (((((X >> 30) & 1)<<31)>>31)&(Y<<30))+(((((X >> 29) & 1)<<31)>>31)&(Y<<29))+(((((X >> 28) & 1)<<31)>>31)&(Y<<28))+(((((X >> 27) & 1)<<31)>>31)&(Y<<27))+(((((X >> 26) & 1)<<31)>>31)&(Y<<26))+(((((X >> 25) & 1)<<31)>>31)&(Y<<25))+(((((X >> 24) & 1)<<31)>>31)&(Y<<24))+(((((X >> 23) & 1)<<31)>>31)&(Y<<23))+(((((X >> 22) & 1)<<31)>>31)&(Y<<22))+(((((X >> 21) & 1)<<31)>>31)&(Y<<21))+(((((X >> 20) & 1)<<31)>>31)&(Y<<20))+(((((X >> 19) & 1)<<31)>>31)&(Y<<19))+(((((X >> 18) & 1)<<31)>>31)&(Y<<18))+(((((X >> 17) & 1)<<31)>>31)&(Y<<17))+(((((X >> 16) & 1)<<31)>>31)&(Y<<16))+(((((X >> 15) & 1)<<31)>>31)&(Y<<15))+(((((X >> 14) & 1)<<31)>>31)&(Y<<14))+(((((X >> 13) & 1)<<31)>>31)&(Y<<13))+(((((X >> 12) & 1)<<31)>>31)&(Y<<12))+(((((X >> 11) & 1)<<31)>>31)&(Y<<11))+(((((X >> 10) & 1)<<31)>>31)&(Y<<10))+(((((X >> 9) & 1)<<31)>>31)&(Y<<9))+(((((X >> 8) & 1)<<31)>>31)&(Y<<8))+(((((X >> 7) & 1)<<31)>>31)&(Y<<7))+(((((X >> 6) & 1)<<31)>>31)&(Y<<6))+(((((X >> 5) & 1)<<31)>>31)&(Y<<5))+(((((X >> 4) & 1)<<31)>>31)&(Y<<4))+(((((X >> 3) & 1)<<31)>>31)&(Y<<3))+(((((X >> 2) & 1)<<31)>>31)&(Y<<2))+(((((X >> 1) & 1)<<31)>>31)&(Y<<1))+(((((X >> 0) & 1)<<31)>>31)&(Y<<0))
int main()
{
int randomnumber=23;
int randomnumber2=23;
int checknum=23;
clock_t start, diff;
srand(time(0));
start = clock();
for(int i=0;i<1000000;i++)
{
randomnumber = rand() % 10000;
randomnumber2 = rand() % 10000;
checknum=MULTIPLYINTBYMINUS(randomnumber,randomnumber2);
if (checknum!=randomnumber*randomnumber2)
{
printf("s %i and %i and %i",checknum,randomnumber,randomnumber2);
}
}
diff = clock() - start;
int msec = diff * 1000 / CLOCKS_PER_SEC;
printf("MULTIPLYINTBYMINUS Time %d milliseconds", msec);
start = clock();
for(int i=0;i<1000000;i++)
{
randomnumber = rand() % 10000;
randomnumber2 = rand() % 10000;
checknum=MULTIPLYINTBYSHIFT(randomnumber,randomnumber2);
if (checknum!=randomnumber*randomnumber2)
{
printf("s %i and %i and %i",checknum,randomnumber,randomnumber2);
}
}
diff = clock() - start;
msec = diff * 1000 / CLOCKS_PER_SEC;
printf("MULTIPLYINTBYSHIFT Time %d milliseconds", msec);
start = clock();
for(int i=0;i<1000000;i++)
{
randomnumber = rand() % 10000;
randomnumber2 = rand() % 10000;
checknum= randomnumber*randomnumber2;
if (checknum!=randomnumber*randomnumber2)
{
printf("s %i and %i and %i",checknum,randomnumber,randomnumber2);
}
}
diff = clock() - start;
msec = diff * 1000 / CLOCKS_PER_SEC;
printf("normal * Time %d milliseconds", msec);
return 0;
}
刚刚在我的机器上编译了这个:
int a = ...;
int b = a * 10;
当分解它时会产生输出:
MOV EAX,DWORD PTR SS:[ESP+1C] ; Move a into EAX
LEA EAX,DWORD PTR DS:[EAX+EAX*4] ; Multiply by 5 without shift !
SHL EAX, 1 ; Multiply by 2 using shift
这个版本比纯移位和加法的手工优化代码更快。
你永远不知道编译器会得到什么,所以最好只是简单地写一个普通的乘法,让它按自己想要的方式优化,除非在非常精确的情况下,你知道编译器无法优化。