我一直认为std::vector是“作为数组实现的”,等等等等。今天我去测试了一下,结果似乎不是这样:
以下是一些测试结果:
UseArray completed in 2.619 seconds
UseVector completed in 9.284 seconds
UseVectorPushBack completed in 14.669 seconds
The whole thing completed in 26.591 seconds
这大约要慢3 - 4倍!这并不能证明“向量可能会慢几纳秒”的评论是正确的。
我使用的代码是:
#include <cstdlib>
#include <vector>
#include <iostream>
#include <string>
#include <boost/date_time/posix_time/ptime.hpp>
#include <boost/date_time/microsec_time_clock.hpp>
class TestTimer
{
public:
TestTimer(const std::string & name) : name(name),
start(boost::date_time::microsec_clock<boost::posix_time::ptime>::local_time())
{
}
~TestTimer()
{
using namespace std;
using namespace boost;
posix_time::ptime now(date_time::microsec_clock<posix_time::ptime>::local_time());
posix_time::time_duration d = now - start;
cout << name << " completed in " << d.total_milliseconds() / 1000.0 <<
" seconds" << endl;
}
private:
std::string name;
boost::posix_time::ptime start;
};
struct Pixel
{
Pixel()
{
}
Pixel(unsigned char r, unsigned char g, unsigned char b) : r(r), g(g), b(b)
{
}
unsigned char r, g, b;
};
void UseVector()
{
TestTimer t("UseVector");
for(int i = 0; i < 1000; ++i)
{
int dimension = 999;
std::vector<Pixel> pixels;
pixels.resize(dimension * dimension);
for(int i = 0; i < dimension * dimension; ++i)
{
pixels[i].r = 255;
pixels[i].g = 0;
pixels[i].b = 0;
}
}
}
void UseVectorPushBack()
{
TestTimer t("UseVectorPushBack");
for(int i = 0; i < 1000; ++i)
{
int dimension = 999;
std::vector<Pixel> pixels;
pixels.reserve(dimension * dimension);
for(int i = 0; i < dimension * dimension; ++i)
pixels.push_back(Pixel(255, 0, 0));
}
}
void UseArray()
{
TestTimer t("UseArray");
for(int i = 0; i < 1000; ++i)
{
int dimension = 999;
Pixel * pixels = (Pixel *)malloc(sizeof(Pixel) * dimension * dimension);
for(int i = 0 ; i < dimension * dimension; ++i)
{
pixels[i].r = 255;
pixels[i].g = 0;
pixels[i].b = 0;
}
free(pixels);
}
}
int main()
{
TestTimer t1("The whole thing");
UseArray();
UseVector();
UseVectorPushBack();
return 0;
}
我做错了吗?还是我刚刚打破了这个性能神话?
我使用Visual Studio 2005中的发布模式。
在Visual c++中,#define _SECURE_SCL 0将UseVector减少了一半(减少到4秒)。在我看来,这真的是件大事。
我不得不说我不是c++方面的专家。但要补充一些实验结果:
编译:
gcc-6.2.0/bin/g++ -O3 -std=c++14 vector.cpp
机:
Intel(R) Xeon(R) CPU E5-2690 v2 @ 3.00GHz
OS:
2.6.32-642.13.1.el6.x86_64
输出:
UseArray completed in 0.167821 seconds
UseVector completed in 0.134402 seconds
UseConstructor completed in 0.134806 seconds
UseFillConstructor completed in 1.00279 seconds
UseVectorPushBack completed in 6.6887 seconds
The whole thing completed in 8.12888 seconds
这里我唯一感到奇怪的是“UseFillConstructor”的性能与“UseConstructor”相比。
代码:
void UseConstructor()
{
TestTimer t("UseConstructor");
for(int i = 0; i < 1000; ++i)
{
int dimension = 999;
std::vector<Pixel> pixels(dimension*dimension);
for(int i = 0; i < dimension * dimension; ++i)
{
pixels[i].r = 255;
pixels[i].g = 0;
pixels[i].b = 0;
}
}
}
void UseFillConstructor()
{
TestTimer t("UseFillConstructor");
for(int i = 0; i < 1000; ++i)
{
int dimension = 999;
std::vector<Pixel> pixels(dimension*dimension, Pixel(255,0,0));
}
}
因此提供的额外“值”大大降低了性能,我认为这是由于多次调用复制构造函数造成的。但是…
编译:
gcc-6.2.0/bin/g++ -std=c++14 -O vector.cpp
输出:
UseArray completed in 1.02464 seconds
UseVector completed in 1.31056 seconds
UseConstructor completed in 1.47413 seconds
UseFillConstructor completed in 1.01555 seconds
UseVectorPushBack completed in 6.9597 seconds
The whole thing completed in 11.7851 seconds
因此,在这种情况下,gcc优化非常重要,但当一个值作为默认值提供时,它帮不了你太多。这,其实是对我的学费。希望它能帮助新程序员选择哪种矢量初始化格式。
一些分析器数据(像素对齐为32位):
g++ -msse3 -O3 -ftree-vectorize -g test.cpp -DNDEBUG && ./a.out
UseVector completed in 3.123 seconds
UseArray completed in 1.847 seconds
UseVectorPushBack completed in 9.186 seconds
The whole thing completed in 14.159 seconds
Blah
andrey@nv:~$ opannotate --source libcchem/src/a.out | grep "Total samples for file" -A3
Overflow stats not available
* Total samples for file : "/usr/include/c++/4.4/ext/new_allocator.h"
*
* 141008 52.5367
*/
--
* Total samples for file : "/home/andrey/libcchem/src/test.cpp"
*
* 61556 22.9345
*/
--
* Total samples for file : "/usr/include/c++/4.4/bits/stl_vector.h"
*
* 41956 15.6320
*/
--
* Total samples for file : "/usr/include/c++/4.4/bits/stl_uninitialized.h"
*
* 20956 7.8078
*/
--
* Total samples for file : "/usr/include/c++/4.4/bits/stl_construct.h"
*
* 2923 1.0891
*/
在分配器:
: // _GLIBCXX_RESOLVE_LIB_DEFECTS
: // 402. wrong new expression in [some_] allocator::construct
: void
: construct(pointer __p, const _Tp& __val)
141008 52.5367 : { ::new((void *)__p) _Tp(__val); }
向量:
:void UseVector()
:{ /* UseVector() total: 60121 22.3999 */
...
:
:
10790 4.0201 : for (int i = 0; i < dimension * dimension; ++i) {
:
495 0.1844 : pixels[i].r = 255;
:
12618 4.7012 : pixels[i].g = 0;
:
2253 0.8394 : pixels[i].b = 0;
:
: }
数组
:void UseArray()
:{ /* UseArray() total: 35191 13.1114 */
:
...
:
136 0.0507 : for (int i = 0; i < dimension * dimension; ++i) {
:
9897 3.6874 : pixels[i].r = 255;
:
3511 1.3081 : pixels[i].g = 0;
:
21647 8.0652 : pixels[i].b = 0;
大部分开销都在复制构造函数中。例如,
std::vector < Pixel > pixels;//(dimension * dimension, Pixel());
pixels.reserve(dimension * dimension);
for (int i = 0; i < dimension * dimension; ++i) {
pixels[i].r = 255;
pixels[i].g = 0;
pixels[i].b = 0;
}
它具有与数组相同的性能。
我只是想提一下vector(和smart_ptr)只是原始数组(和原始指针)上的一个薄层。
实际上在连续存储器中向量的访问时间比数组快。
下面的代码显示了初始化和访问向量和数组的结果。
#include <boost/date_time/posix_time/posix_time.hpp>
#include <iostream>
#include <vector>
#define SIZE 20000
int main() {
srand (time(NULL));
vector<vector<int>> vector2d;
vector2d.reserve(SIZE);
int index(0);
boost::posix_time::ptime start_total = boost::posix_time::microsec_clock::local_time();
// timer start - build + access
for (int i = 0; i < SIZE; i++) {
vector2d.push_back(vector<int>(SIZE));
}
boost::posix_time::ptime start_access = boost::posix_time::microsec_clock::local_time();
// timer start - access
for (int i = 0; i < SIZE; i++) {
index = rand()%SIZE;
for (int j = 0; j < SIZE; j++) {
vector2d[index][index]++;
}
}
boost::posix_time::ptime end = boost::posix_time::microsec_clock::local_time();
boost::posix_time::time_duration msdiff = end - start_total;
cout << "Vector total time: " << msdiff.total_milliseconds() << "milliseconds.\n";
msdiff = end - start_acess;
cout << "Vector access time: " << msdiff.total_milliseconds() << "milliseconds.\n";
int index(0);
int** raw2d = nullptr;
raw2d = new int*[SIZE];
start_total = boost::posix_time::microsec_clock::local_time();
// timer start - build + access
for (int i = 0; i < SIZE; i++) {
raw2d[i] = new int[SIZE];
}
start_access = boost::posix_time::microsec_clock::local_time();
// timer start - access
for (int i = 0; i < SIZE; i++) {
index = rand()%SIZE;
for (int j = 0; j < SIZE; j++) {
raw2d[index][index]++;
}
}
end = boost::posix_time::microsec_clock::local_time();
msdiff = end - start_total;
cout << "Array total time: " << msdiff.total_milliseconds() << "milliseconds.\n";
msdiff = end - start_acess;
cout << "Array access time: " << msdiff.total_milliseconds() << "milliseconds.\n";
for (int i = 0; i < SIZE; i++) {
delete [] raw2d[i];
}
return 0;
}
输出结果为:
Vector total time: 925milliseconds.
Vector access time: 4milliseconds.
Array total time: 30milliseconds.
Array access time: 21milliseconds.
所以如果使用得当,速度几乎是一样的。
(正如其他人提到的使用reserve()或resize())。
一个更好的基准测试(我认为…),编译器由于优化可以改变代码,因为分配的向量/数组的结果不会在任何地方使用。
结果:
$ g++ test.cpp -o test -O3 -march=native
$ ./test
UseArray inner completed in 0.652 seconds
UseArray completed in 0.773 seconds
UseVector inner completed in 0.638 seconds
UseVector completed in 0.757 seconds
UseVectorPushBack inner completed in 6.732 seconds
UseVectorPush completed in 6.856 seconds
The whole thing completed in 8.387 seconds
编译器:
gcc version 6.2.0 20161019 (Debian 6.2.0-9)
CPU:
model name : Intel(R) Core(TM) i7-3630QM CPU @ 2.40GHz
代码是:
#include <cstdlib>
#include <vector>
#include <iostream>
#include <string>
#include <boost/date_time/posix_time/ptime.hpp>
#include <boost/date_time/microsec_time_clock.hpp>
class TestTimer
{
public:
TestTimer(const std::string & name) : name(name),
start(boost::date_time::microsec_clock<boost::posix_time::ptime>::local_time())
{
}
~TestTimer()
{
using namespace std;
using namespace boost;
posix_time::ptime now(date_time::microsec_clock<posix_time::ptime>::local_time());
posix_time::time_duration d = now - start;
cout << name << " completed in " << d.total_milliseconds() / 1000.0 <<
" seconds" << endl;
}
private:
std::string name;
boost::posix_time::ptime start;
};
struct Pixel
{
Pixel()
{
}
Pixel(unsigned char r, unsigned char g, unsigned char b) : r(r), g(g), b(b)
{
}
unsigned char r, g, b;
};
void UseVector(std::vector<std::vector<Pixel> >& results)
{
TestTimer t("UseVector inner");
for(int i = 0; i < 1000; ++i)
{
int dimension = 999;
std::vector<Pixel>& pixels = results.at(i);
pixels.resize(dimension * dimension);
for(int i = 0; i < dimension * dimension; ++i)
{
pixels[i].r = 255;
pixels[i].g = 0;
pixels[i].b = 0;
}
}
}
void UseVectorPushBack(std::vector<std::vector<Pixel> >& results)
{
TestTimer t("UseVectorPushBack inner");
for(int i = 0; i < 1000; ++i)
{
int dimension = 999;
std::vector<Pixel>& pixels = results.at(i);
pixels.reserve(dimension * dimension);
for(int i = 0; i < dimension * dimension; ++i)
pixels.push_back(Pixel(255, 0, 0));
}
}
void UseArray(Pixel** results)
{
TestTimer t("UseArray inner");
for(int i = 0; i < 1000; ++i)
{
int dimension = 999;
Pixel * pixels = (Pixel *)malloc(sizeof(Pixel) * dimension * dimension);
results[i] = pixels;
for(int i = 0 ; i < dimension * dimension; ++i)
{
pixels[i].r = 255;
pixels[i].g = 0;
pixels[i].b = 0;
}
// free(pixels);
}
}
void UseArray()
{
TestTimer t("UseArray");
Pixel** array = (Pixel**)malloc(sizeof(Pixel*)* 1000);
UseArray(array);
for(int i=0;i<1000;++i)
free(array[i]);
free(array);
}
void UseVector()
{
TestTimer t("UseVector");
{
std::vector<std::vector<Pixel> > vector(1000, std::vector<Pixel>());
UseVector(vector);
}
}
void UseVectorPushBack()
{
TestTimer t("UseVectorPush");
{
std::vector<std::vector<Pixel> > vector(1000, std::vector<Pixel>());
UseVectorPushBack(vector);
}
}
int main()
{
TestTimer t1("The whole thing");
UseArray();
UseVector();
UseVectorPushBack();
return 0;
}