UPDATE 2021
我修改了基准代码,如下所示:
<random>
代替rand()
我用GCC 10-O2得到了以下结果(单位:秒):
exp c++ pow c pow x*x*x...
2 0.204243 1.39962 0.0902527
3 1.36162 1.38291 0.107679
4 1.37717 1.38197 0.106103
5 1.3815 1.39139 0.117097
GCC 10-O3与GCC 10-O2几乎相同.
GCC 10-O2-ffast数学:
exp c++ pow c pow x*x*x...
2 0.203625 1.4056 0.0913414
3 0.11094 1.39938 0.108027
4 0.201593 1.38618 0.101585
5 0.102141 1.38212 0.10662
GCC 10-O3-FAST-MATH:
exp c++ pow c pow x*x*x...
2 0.0451995 1.175 0.0450497
3 0.0470842 1.20226 0.051399
4 0.0475239 1.18033 0.0473844
5 0.0522424 1.16817 0.0522291
随着12-O2的叮当声:
exp c++ pow c pow x*x*x...
2 0.106242 0.105435 0.105533
3 1.45909 1.4425 0.102235
4 1.45629 1.44262 0.108861
5 1.45837 1.44483 0.1116
铿锵12-O3与铿锵12-O2几乎相同.
使用clang 12-O2-ffast-MATH:
exp c++ pow c pow x*x*x...
2 0.0233731 0.0232457 0.0231076
3 0.0271074 0.0266663 0.0278415
4 0.026897 0.0270698 0.0268115
5 0.0312481 0.0296402 0.029811
Clang12-O3-ffast数学与Clang12-O2-ffast数学几乎相同.
这台机器是Linux 5.4.0-73-generic x86_64上的Intel Core i7-7700K.
结论:
x*x*x...
比always快std::pow
的速度与x*x*x...
的速度一样快std::pow
的速度都是x*x*x...
的速度,大约是-O2的两倍.pow(double, double)
总是慢得多x*x*x...
更快pow(double, double)
的速度和std::pow
的速度一样快我最终会在我的机器上安装一个更新版本的GCC,并在安装后更新结果.
以下是更新后的基准代码:
#include <cmath>
#include <chrono>
#include <iostream>
#include <random>
using Moment = std::chrono::high_resolution_clock::time_point;
using FloatSecs = std::chrono::duration<double>;
inline Moment now()
{
return std::chrono::high_resolution_clock::now();
}
#define TEST(num, expression) \
double test##num(double b, long loops) \
{ \
double x = 0.0; \
\
auto startTime = now(); \
for (long i=0; i<loops; ++i) \
{ \
x += expression; \
b += 1.0; \
} \
auto elapsed = now() - startTime; \
auto seconds = std::chrono::duration_cast<FloatSecs>(elapsed); \
std::cout << seconds.count() << "\t"; \
return x; \
}
TEST(2, b*b)
TEST(3, b*b*b)
TEST(4, b*b*b*b)
TEST(5, b*b*b*b*b)
template <int exponent>
double testCppPow(double base, long loops)
{
double x = 0.0;
auto startTime = now();
for (long i=0; i<loops; ++i)
{
x += std::pow(base, exponent);
base += 1.0;
}
auto elapsed = now() - startTime;
auto seconds = std::chrono::duration_cast<FloatSecs>(elapsed); \
std::cout << seconds.count() << "\t"; \
return x;
}
double testCPow(double base, double exponent, long loops)
{
double x = 0.0;
auto startTime = now();
for (long i=0; i<loops; ++i)
{
x += ::pow(base, exponent);
base += 1.0;
}
auto elapsed = now() - startTime;
auto seconds = std::chrono::duration_cast<FloatSecs>(elapsed); \
std::cout << seconds.count() << "\t"; \
return x;
}
int main()
{
using std::cout;
long loops = 100000000l;
double x = 0;
std::random_device rd;
std::default_random_engine re(rd());
std::uniform_real_distribution<double> dist(1.1, 1.2);
cout << "exp\tc++ pow\tc pow\tx*x*x...";
cout << "\n2\t";
double b = dist(re);
x += testCppPow<2>(b, loops);
x += testCPow(b, 2.0, loops);
x += test2(b, loops);
cout << "\n3\t";
b = dist(re);
x += testCppPow<3>(b, loops);
x += testCPow(b, 3.0, loops);
x += test3(b, loops);
cout << "\n4\t";
b = dist(re);
x += testCppPow<4>(b, loops);
x += testCPow(b, 4.0, loops);
x += test4(b, loops);
cout << "\n5\t";
b = dist(re);
x += testCppPow<5>(b, loops);
x += testCPow(b, 5.0, loops);
x += test5(b, loops);
std::cout << "\n" << x << "\n";
}
Old Answer, 2010
对于Small i
,我使用以下代码测试了x*x*...
和pow(x,i)
之间的性能差异:
#include <cstdlib>
#include <cmath>
#include <boost/date_time/posix_time/posix_time.hpp>
inline boost::posix_time::ptime now()
{
return boost::posix_time::microsec_clock::local_time();
}
#define TEST(num, expression) \
double test##num(double b, long loops) \
{ \
double x = 0.0; \
\
boost::posix_time::ptime startTime = now(); \
for (long i=0; i<loops; ++i) \
{ \
x += expression; \
x += expression; \
x += expression; \
x += expression; \
x += expression; \
x += expression; \
x += expression; \
x += expression; \
x += expression; \
x += expression; \
} \
boost::posix_time::time_duration elapsed = now() - startTime; \
\
std::cout << elapsed << " "; \
\
return x; \
}
TEST(1, b)
TEST(2, b*b)
TEST(3, b*b*b)
TEST(4, b*b*b*b)
TEST(5, b*b*b*b*b)
template <int exponent>
double testpow(double base, long loops)
{
double x = 0.0;
boost::posix_time::ptime startTime = now();
for (long i=0; i<loops; ++i)
{
x += std::pow(base, exponent);
x += std::pow(base, exponent);
x += std::pow(base, exponent);
x += std::pow(base, exponent);
x += std::pow(base, exponent);
x += std::pow(base, exponent);
x += std::pow(base, exponent);
x += std::pow(base, exponent);
x += std::pow(base, exponent);
x += std::pow(base, exponent);
}
boost::posix_time::time_duration elapsed = now() - startTime;
std::cout << elapsed << " ";
return x;
}
int main()
{
using std::cout;
long loops = 100000000l;
double x = 0.0;
cout << "1 ";
x += testpow<1>(rand(), loops);
x += test1(rand(), loops);
cout << "\n2 ";
x += testpow<2>(rand(), loops);
x += test2(rand(), loops);
cout << "\n3 ";
x += testpow<3>(rand(), loops);
x += test3(rand(), loops);
cout << "\n4 ";
x += testpow<4>(rand(), loops);
x += test4(rand(), loops);
cout << "\n5 ";
x += testpow<5>(rand(), loops);
x += test5(rand(), loops);
cout << "\n" << x << "\n";
}
结果如下:
1 00:00:01.126008 00:00:01.128338
2 00:00:01.125832 00:00:01.127227
3 00:00:01.125563 00:00:01.126590
4 00:00:01.126289 00:00:01.126086
5 00:00:01.126570 00:00:01.125930
2.45829e+54
请注意,我会累计每次功率计算的结果,以确保编译器不会将其优化.
如果我使用std::pow(double, double)
和loops = 1000000l
版本,我会得到:
1 00:00:00.011339 00:00:00.011262
2 00:00:00.011259 00:00:00.011254
3 00:00:00.975658 00:00:00.011254
4 00:00:00.976427 00:00:00.011254
5 00:00:00.973029 00:00:00.011254
2.45829e+52
这是运行Ubuntu 9.10 64位的英特尔酷睿双核处理器.使用带-O2优化的GCC 4.4.1编译.
所以在C语言中,是的,x*x*x
比pow(x, 3)
快,因为没有pow(double, int)
过载.在C++中,它将大致相同.(假设我测试中的方法是正确的.)
这是对Markm comments 的回应:
即使发出了using namespace std
指令,如果pow
的第二个参数是int
,则将调用来自<cmath>
的std::pow(double, int)
重载,而不是来自<math.h>
的::pow(double, double)
.
此测试代码确认了以下行为:
#include <iostream>
namespace foo
{
double bar(double x, int i)
{
std::cout << "foo::bar\n";
return x*i;
}
}
double bar(double x, double y)
{
std::cout << "::bar\n";
return x*y;
}
using namespace foo;
int main()
{
double a = bar(1.2, 3); // Prints "foo::bar"
std::cout << a << "\n";
return 0;
}