• 测试merge效率


    测试说明:

    MERGE是oracle提供的一种特殊的sql语法,非常适用于数据同步场景,即: (把A表数据插到B表,如果B表存在相同主键的记录则使用A表数据对B表进行更新) 数据同步的常规做法是先尝试插入,插入失败再进行更新,MERGE比这种常规做法效率高很多。 (特别是A与B表基本一致,同步时主键冲突比较多的情况,效率能相差10倍以上)

    为了验证MERGE效率,我建了两张表,tab_test_C(初始化生成50000条记录)和tab_test_Q(初始化从tab_test_C生成40000条记录), 写了两个plsql脚本,分别将tab_test_C的数据同步到tab_test_Q,看它们效率区别。

    第一个脚本使用merge语法,第二个脚本使用常规先插入,出现主键冲突的操作。


    测试结果:
     使用merge语法的脚本同步数据耗时0.04秒,使用常规操作耗时14.77秒,效率差369倍

    测试脚本:

    SET SERVEROUTPUT ON
    -- 启动计时 以便观察脚本执行时间
    SET TIMING ON
    SET TIME ON
    -- 数据初始化 
    DROP TABLE tab_test_C;
    CREATE TABLE tab_test_C 
    (
        C1 VARCHAR2(512),
        C2 VARCHAR2(512),
        C3 VARCHAR2(512),
        C4 VARCHAR2(512),
        C5 VARCHAR2(512),
        C6 VARCHAR2(512),
        C7 VARCHAR2(512),
        C8 VARCHAR2(512),
        C9 VARCHAR2(512),
        C10 VARCHAR2(512)
    );
    
    DECLARE
        v_total number;
    BEGIN
        v_total := 0;
        LOOP
            EXIT WHEN v_total >= 50000;
            for cur in (select owner, object_name, subobject_name, object_id, data_object_id, object_type, 
                created, last_ddl_time, timestamp from all_objects where rownum < 101)
            loop
                insert into tab_test_C values (cur.owner, cur.object_name, cur.subobject_name, 
                cur.object_id, cur.data_object_id, 
                cur.object_type, cur.created, 
                cur.last_ddl_time, cur.timestamp, v_total);
                v_total := v_total + 1;
            end loop;
        END LOOP;
        COMMIT;
    END;
    /
    
    -- 建唯一索引
    select count(1) from tab_test_C;
    create UNIQUE INDEX uid_test_c_1 on tab_test_C(C10);
    --初始化tab_test_Q表数据,先从tab_test_C生成同步40000条数据,剩下10000条数据使用脚本同步过来
    DROP TABLE tab_test_Q;
    CREATE TABLE tab_test_Q AS SELECT * FROM tab_test_C where rownum < 40001;
    create UNIQUE INDEX uid_test_q_1 on tab_test_Q(C10);
    -- 验证数据未同步成功 此时记录数差1000
    select count(*) from tab_test_Q;
    -- 使用merge语法同步tab_test_C的数据到tab_test_Q
    DECLARE
        CURSOR cur is select * from tab_test_C;
        type mergeArray_t is table of tab_test_C % ROWTYPE index by BINARY_INTEGER;
        mergeArray mergeArray_t;
    BEGIN
        OPEN cur;
        LOOP
            EXIT WHEN cur % NOTFOUND;
            FETCH cur bulk collect into mergeArray LIMIT 16; -- 每次限十几条记录,不要占用太多内存 这个数字调大点效率会更高
            BEGIN
                FORALL rw IN 1 .. mergeArray.count
                    MERGE INTO tab_test_Q A
                    USING (SELECT mergeArray(rw).C1 C1, mergeArray(rw).C2 C2, mergeArray(rw).C3 C3, mergeArray(rw).C4 C4, 
                        mergeArray(rw).C5 C5, mergeArray(rw).C6 C6, mergeArray(rw).C7 C7, mergeArray(rw).C8 C8, 
                        mergeArray(rw).C9 C9, mergeArray(rw).C10 C10 FROM DUAL) B
                    ON (A.C10 = B.C10)
                    WHEN MATCHED THEN
                        UPDATE SET A.C1 = mergeArray(rw).C1, A.C2 = mergeArray(rw).C2, A.C3 = mergeArray(rw).C3, 
                            A.C4 = mergeArray(rw).C4, A.C5 = mergeArray(rw).C5, 
                            A.C6 = mergeArray(rw).C6, A.C7 = mergeArray(rw).C7, A.C8 = mergeArray(rw).C8, 
                            A.C9 = mergeArray(rw).C9
                    WHEN NOT MATCHED THEN
                        INSERT (C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) VALUES(mergeArray(rw).C1, mergeArray(rw).C2,
                            mergeArray(rw).C3, mergeArray(rw).C4, mergeArray(rw).C5, mergeArray(rw).C6, 
                            mergeArray(rw).C7, mergeArray(rw).C8, mergeArray(rw).C9, mergeArray(rw).C10);
                -- DBMS_OUTPUT.PUT_LINE(mergeArray.count);
            EXCEPTION
                WHEN OTHERS THEN
                        DBMS_OUTPUT.PUT_LINE('error1');
            END;
        END LOOP;
        CLOSE cur;
        COMMIT;
    END;
    /
    --耗时0.04秒
    -- 验证数据同步成功 
    select count(*) from tab_test_Q;
    --初始化tab_test_Q表数据,先从tab_test_C生成同步40000条数据,剩下10000条数据使用脚本同步过来
    DROP TABLE tab_test_Q;
    CREATE TABLE tab_test_Q AS SELECT * FROM tab_test_C where rownum < 40001;
    create UNIQUE INDEX uid_test_q_1 on tab_test_Q(C10);
    -- 验证数据未同步成功 此时记录数差1000
    select count(*) from tab_test_Q;
    -- 使用常规语法同步tab_test_C的数据到tab_test_Q
    BEGIN
        for cur in (select * from tab_test_C)
        LOOP
            BEGIN
                INSERT INTO tab_test_Q(C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) 
                VALUES(cur.C1, cur.C2, cur.C3, cur.C4, cur.C5, cur.C6, cur.C7, cur.C8, cur.C9, cur.C10);
            EXCEPTION
                WHEN DUP_VAL_ON_INDEX THEN --唯一索引冲突时更新
                    UPDATE tab_test_Q SET C1 = cur.C1, C2 = cur.C2, C3 = cur.C3, C4 = cur.C4, C5 = cur.C5, C6 = cur.C6, C7 = cur.C7, C8 = cur.C8, C9 = cur.C9
                    WHERE C10 = cur.C10;
                WHEN OTHERS THEN
                        DBMS_OUTPUT.PUT_LINE('error1');
            END;
        END LOOP;
        COMMIT;
    END;
    /
    --耗时14.77秒
    -- 验证数据同步成功
    select count(*) from tab_test_Q;
  • 相关阅读:
    Ajax技术的工作原理
    《统计学习方法》第七章,支持向量机
    《西瓜书》第四章,决策树3
    OpenCL 矩阵乘法
    MPI 集合通信函数 MPI_Reduce(),MPI_Allreduce(),MPI_Bcast(),MPI_Scatter(),MPI_Gather(),MPI_Allgather(),MPI_Scan(),MPI_Reduce_Scatter()
    有关CUBLAS中的矩阵乘法函数
    TensorFlow 笔记04
    TensorFlow 笔记03-TensoeFlow 和 TensorRT 调试的一些方法
    TensorFlow 笔记02-mnist 的 tensorRT 实现,从 .npz 文件中加载参数进行推理
    TensorFlow 笔记01-mnist 的两种 tensorFlow 实现,保存和加载模型的三种方法
  • 原文地址:https://www.cnblogs.com/kingstarer/p/10865941.html
Copyright © 2020-2023  润新知