• vector性能调优之resize与reserve


    vector的resize与reserve

    reserve()函数为当前vector预留至少共容纳size个元素的空间.(译注:实际空间可能大于size)

    resize() 函数( void resize( size_type size, TYPE val ) )改变当前vector的大小为size,且对新创建的元素赋值val

    在这里插入图片描述
    (翻译:

    调整容器大小以包含count元素。

    如果当前大小大于count,则容器将被缩减为其第一个count元素,就像重复调用pop_back()一样。

    如果当前大小小于count,则附加元素并用值的副本初始化。)

    resize和reserve函数本质都涉及了vector的内存存储空间,因为vector在内存中是连续存放的,所以当resize的空间大于现有的存储空间(capacity() 函数 返回当前vector在重新进行内存分配以前所能容纳的元素数量.)时,会重新选择更大的空间,并将所有元素复制过去。resize在初始化内存容量时有对值的初始化,所以此时push_back会产生size+1,内存容量不够,重新寻找更大的内存空间并复制所有元素,所以这个过程是很费事的。

    void testResize(){
        vector<int> vector1;
        vector1.resize(10);
        vector1.push_back(1);
        vector1.push_back(2);
        vector1.push_back(3);
        cout<<"vector1的长度:"<<vector1.size()<<endl;//vector1的长度:13
        for_each(vector1.begin(),vector1.end(),[](int x){cout<<x<<" ";});//0 0 0 0 0 0 0 0 0 0 1 2 3
        cout<<endl<<"当前vector在重新进行内存分配以前所能容纳的元素数量:"<<vector1.capacity()<<endl;//20
    }
    void testReserve(){
        vector<int> vector1;
        vector1.reserve(10);
        vector1.push_back(1);//vector1的长度:3
        vector1.push_back(2);//1 2 3
        vector1.push_back(3);
        cout<<"vector1的长度:"<<vector1.size()<<endl;//vector1的长度:3
        for_each(vector1.begin(),vector1.end(),[](int x){cout<<x<<" ";});// 1 2 3
        cout<<endl<<"当前vector在重新进行内存分配以前所能容纳的元素数量:"<<vector1.capacity()<<endl;//10
    }
    
    

    插入测试

    接下来探讨插入的效率的实例,分别尝试在插入大数据3.8GB和小数据380MB时,各种情况的实现。

    (1)push_back直接插入

    结论:费事,在插入的过程中,不断寻找“庇护所”,不断“迁移大本营”,舟车劳顿效率低下

    void testPushBack_bigsize(){
        vector<int> vector1;
        clock_t start = clock();
        for (int i = 0; i < 1000000000; ++i) {//3814MB
            vector1.push_back(i);
        }
        cout <<"共耗时:"<< (clock() - start)/ CLOCKS_PER_SEC <<"s"<<endl;//共耗时:42s
        cout <<"size:"<<vector1.size() << " capacity:" << vector1.capacity() << endl;//size:1000000000 capacity:1073741824
        clock_t start2 = clock();
        vector1.push_back(1);
        cout <<"共耗时:"<< (clock() - start2)/ CLOCKS_PER_SEC <<"s"<<endl;//共耗时:0s
        cout <<"size:"<<vector1.size() << " capacity:" << vector1.capacity() << endl;//size:1000000001 capacity:1073741824
    }
    

    (2)先reserve在push_back

    结论:先分配空间再进行后续处理,能够有效的减少插入时间的损耗,耗时占原插入方式的1/3到1/2之间。

    void testPushBack_byReserve_bigsize(){
        vector<int> vector1;
        vector1.reserve(1000000000);//3814MB
        clock_t start = clock();
        for (int i = 0; i < 1000000000; ++i) {
            vector1.push_back(i);
        }
        cout <<"共耗时:"<< (clock() - start)/ CLOCKS_PER_SEC <<"s"<<endl;//共耗时:17s
        cout <<"size:"<<vector1.size() << " capacity:" << vector1.capacity() << endl;//size:1000000000 capacity:1000000000
        clock_t start2 = clock();
        vector1.push_back(1);
        cout <<"共耗时:"<< (clock() - start2)/ CLOCKS_PER_SEC <<"s"<<endl;//共耗时:76s
        cout <<"size:"<<vector1.size() << " capacity:" << vector1.capacity() << endl;//size:1000000001 capacity:2000000000
    }
    void testPushBack_byReserve_smallsize(){
        vector<int> vector1;
        vector1.reserve(100000000);//381MB
        clock_t start = clock();
        for (int i = 0; i < 100000000; ++i) {
            vector1.push_back(i);
        }
        cout <<"共耗时:"<< (clock() - start)/ CLOCKS_PER_SEC <<"s"<<endl;//共耗时:1s
        cout <<"size:"<<vector1.size() << " capacity:" << vector1.capacity() << endl;//size:100000000 capacity:100000000
        clock_t start2 = clock();
        vector1.push_back(1);
        cout <<"共耗时:"<< (clock() - start2)/ CLOCKS_PER_SEC <<"s"<<endl;//共耗时:2s
        cout <<"size:"<<vector1.size() << " capacity:" << vector1.capacity() << endl;//size:100000001 capacity:200000000
    }
    

    (2)先resize在利用坐标进行赋值(相当于插入)

    结论:在分配空间时直接对空间进行初始化,赋予初值,极大提升了存储的速率。但是在resize后进行push_back是不明智的选择。

    void testinsert_byResize_bigsize(){
        vector<int> vector1;
        vector1.resize(1000000000);
        clock_t start = clock();
        for (int i = 0; i < 1000000000; ++i) {
            vector1[i]=i;
        }
        cout <<"共耗时:"<< (clock() - start)/ CLOCKS_PER_SEC <<"s"<<endl;//共耗时:3s
        cout <<"size:"<<vector1.size() << " capacity:" << vector1.capacity() << endl;//size:1000000000 capacity:1000000000
        clock_t start2 = clock();
        vector1.push_back(1);
        cout <<"共耗时:"<< (clock() - start2)/ CLOCKS_PER_SEC <<"s"<<endl;//共耗时:66s
        cout <<"size:"<<vector1.size() << " capacity:" << vector1.capacity() << endl;//size:1000000001 capacity:2000000000
    }
    void testinsert_byResize_smallsize(){
        vector<int> vector1;
        vector1.resize(100000000);
        clock_t start = clock();
        for (int i = 0; i < 100000000; ++i) {
            vector1[i]=i;
        }
        cout <<"共耗时:"<< (clock() - start)/ CLOCKS_PER_SEC <<"s"<<endl;//共耗时:0s
        cout <<"size:"<<vector1.size() << " capacity:" << vector1.capacity() << endl;//size:size:10000000 capacity:10000000
        clock_t start2 = clock();
        vector1.push_back(1);
        cout <<"共耗时:"<< (clock() - start2)/ CLOCKS_PER_SEC <<"s"<<endl;//共耗时:2s
        cout <<"size:"<<vector1.size() << " capacity:" << vector1.capacity() << endl;//size:10000001 capacity:20000000
    }
    

    vector优化结论

    防止reallocate内存,而导致的数据拷贝产生的额外耗时

    vector在push_back的时候,如果空间不足,会自动增补一些空间,如果没有预留的空间可用
    就直接申请另一块可用的连续的空间,把数据拷贝过去,然后删除旧空间,使用新空间
    结果造成效率低下 。

    可以通过以下两种组合来防止reallocate.

    1. vector::resize() 使用array index,效率最高,但是需要提前知道size大小

    2. vector::reserve()使用 push_back(),效率一般,较原生有一定提升。

  • 相关阅读:
    Cookie和Session的作用和工作原理
    df和du显示的磁盘空间使用情况不一致问题
    haproxy配置详解
    使用LVS实现负载均衡原理及安装配置详解
    四层、七层负载均衡的区别
    Linux内核参数之arp_ignore和arp_announce
    Megacli查看Dell服务器Raid状态
    Visual Studio 2015中使用gdb远程调试linux程序
    编译Qt-mingw使用的opencv
    [webrtc] 强制使用tcp传输
  • 原文地址:https://www.cnblogs.com/sunqiangstyle/p/10312258.html
Copyright © 2020-2023  润新知