• erlang下lists模块sort(排序)方法源码解析(一)


    排序算法一直是各种语言最简单也是最复杂的算法,例如十大经典排序算法(动图演示)里面讲的那样

    第一次看lists的sort方法的时候,蒙了,几百行的代码,我心想要这么复杂么(因为C语言的冒泡排序我记得不超过30行),于是自己就实现了下

    结果更蒙了

    bubble_sort(L)->
    	bubble_sort(L,length(L)).
     
    bubble_sort(L,0)->
    	L;
    bubble_sort(L,N)->
    	bubble_sort(do_bubble_sort(L),N-1).
     
    do_bubble_sort([A])->
    	[A];
    do_bubble_sort([A,B|R])->
    case A<B of
    	true ->
    		[A|do_bubble_sort([B|R])];
    	false ->
    		[B|do_bubble_sort([A|R])]
    end.
    

    对比结果如下

    6> timer:tc(tt1,bubble_sort,[B]).
    {21130,
     [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
      23,24,25,26,27|...]}
    7> timer:tc(lists,sort,[B]).     
    {162,
     [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
      23,24,25,26,27|...]}
    8> 
    

    B是一个打乱顺序的1到1000的序列,我X,这不是一个数量级的算法啊~~~~,不是说好越简单的代码越快么,三观被刷新了。

    还是老实读lists的源码,一共250+行,摘录于lists.erl

      1 -spec sort(List1) -> List2 when
      2       List1 :: [T],
      3       List2 :: [T],
      4       T :: term().
      5 
      6 sort([X, Y | L] = L0) when X =< Y ->
      7     case L of
      8     [] -> 
      9         L0;
     10     [Z] when Y =< Z ->
     11         L0;
     12     [Z] when X =< Z ->
     13         [X, Z, Y];
     14     [Z] ->
     15         [Z, X, Y];
     16     _ when X == Y ->
     17         sort_1(Y, L, [X]);
     18     _ ->
     19         split_1(X, Y, L, [], [])
     20     end;
     21 sort([X, Y | L]) ->
     22     case L of
     23     [] ->
     24         [Y, X];
     25     [Z] when X =< Z ->
     26         [Y, X | L];
     27     [Z] when Y =< Z ->
     28         [Y, Z, X];
     29     [Z] ->
     30         [Z, Y, X];
     31     _ ->
     32         split_2(X, Y, L, [], [])
     33     end;
     34 sort([_] = L) ->
     35     L;
     36 sort([] = L) ->
     37     L.
     38 
     39 sort_1(X, [Y | L], R) when X == Y ->
     40     sort_1(Y, L, [X | R]);
     41 sort_1(X, [Y | L], R) when X < Y ->
     42     split_1(X, Y, L, R, []);
     43 sort_1(X, [Y | L], R) ->
     44     split_2(X, Y, L, R, []);
     45 sort_1(X, [], R) ->
     46     lists:reverse(R, [X]).
     47 
     48 %% Ascending.
     49 split_1(X, Y, [Z | L], R, Rs) when Z >= Y ->
     50     split_1(Y, Z, L, [X | R], Rs);
     51 split_1(X, Y, [Z | L], R, Rs) when Z >= X ->
     52     split_1(Z, Y, L, [X | R], Rs);
     53 split_1(X, Y, [Z | L], [], Rs) ->
     54     split_1(X, Y, L, [Z], Rs);
     55 split_1(X, Y, [Z | L], R, Rs) ->
     56     split_1_1(X, Y, L, R, Rs, Z);
     57 split_1(X, Y, [], R, Rs) ->
     58     rmergel([[Y, X | R] | Rs], []).
     59 
     60 split_1_1(X, Y, [Z | L], R, Rs, S) when Z >= Y ->
     61     split_1_1(Y, Z, L, [X | R], Rs, S);
     62 split_1_1(X, Y, [Z | L], R, Rs, S) when Z >= X ->
     63     split_1_1(Z, Y, L, [X | R], Rs, S);
     64 split_1_1(X, Y, [Z | L], R, Rs, S) when S =< Z ->
     65     split_1(S, Z, L, [], [[Y, X | R] | Rs]);
     66 split_1_1(X, Y, [Z | L], R, Rs, S) ->
     67     split_1(Z, S, L, [], [[Y, X | R] | Rs]);
     68 split_1_1(X, Y, [], R, Rs, S) ->    
     69     rmergel([[S], [Y, X | R] | Rs], []).
     70 
     71 %% Descending.
     72 split_2(X, Y, [Z | L], R, Rs) when Z =< Y ->
     73     split_2(Y, Z, L, [X | R], Rs);
     74 split_2(X, Y, [Z | L], R, Rs) when Z =< X ->
     75     split_2(Z, Y, L, [X | R], Rs);
     76 split_2(X, Y, [Z | L], [], Rs) ->
     77     split_2(X, Y, L, [Z], Rs);
     78 split_2(X, Y, [Z | L], R, Rs) ->
     79     split_2_1(X, Y, L, R, Rs, Z);
     80 split_2(X, Y, [], R, Rs) ->
     81     mergel([[Y, X | R] | Rs], []).
     82 
     83 split_2_1(X, Y, [Z | L], R, Rs, S) when Z =< Y ->
     84     split_2_1(Y, Z, L, [X | R], Rs, S);
     85 split_2_1(X, Y, [Z | L], R, Rs, S) when Z =< X ->
     86     split_2_1(Z, Y, L, [X | R], Rs, S);
     87 split_2_1(X, Y, [Z | L], R, Rs, S) when S > Z ->
     88     split_2(S, Z, L, [], [[Y, X | R] | Rs]);
     89 split_2_1(X, Y, [Z | L], R, Rs, S) ->
     90     split_2(Z, S, L, [], [[Y, X | R] | Rs]);
     91 split_2_1(X, Y, [], R, Rs, S) ->
     92     mergel([[S], [Y, X | R] | Rs], []).
     93 
     94 %% merge/1
     95 
     96 mergel([[] | L], Acc) ->
     97     mergel(L, Acc);
     98 mergel([T1, [H2 | T2], [H3 | T3] | L], Acc) ->
     99     mergel(L, [merge3_1(T1, [], H2, T2, H3, T3) | Acc]);
    100 mergel([T1, [H2 | T2]], Acc) ->
    101     rmergel([merge2_1(T1, H2, T2, []) | Acc], []);
    102 mergel([L], []) ->
    103     L;
    104 mergel([L], Acc) ->
    105     rmergel([lists:reverse(L, []) | Acc], []);
    106 mergel([], []) ->
    107     [];
    108 mergel([], Acc) ->
    109     rmergel(Acc, []);
    110 mergel([A, [] | L], Acc) ->
    111     mergel([A | L], Acc);
    112 mergel([A, B, [] | L], Acc) ->
    113     mergel([A, B | L], Acc).
    114 
    115 rmergel([[H3 | T3], [H2 | T2], T1 | L], Acc) ->
    116     rmergel(L, [rmerge3_1(T1, [], H2, T2, H3, T3) | Acc]);
    117 rmergel([[H2 | T2], T1], Acc) ->
    118     mergel([rmerge2_1(T1, H2, T2, []) | Acc], []);
    119 rmergel([L], Acc) ->
    120     mergel([lists:reverse(L, []) | Acc], []);
    121 rmergel([], Acc) ->
    122     mergel(Acc, []).
    123 
    124 %% merge3/3
    125 
    126 %% Take L1 apart.
    127 merge3_1([H1 | T1], M, H2, T2, H3, T3) when H1 =< H2 ->
    128     merge3_12(T1, H1, H2, T2, H3, T3, M);
    129 merge3_1([H1 | T1], M, H2, T2, H3, T3) ->
    130     merge3_21(T1, H1, H2, T2, H3, T3, M);
    131 merge3_1([], M, H2, T2, H3, T3) when H2 =< H3 ->
    132     merge2_1(T2, H3, T3, [H2 | M]);
    133 merge3_1([], M, H2, T2, H3, T3) ->
    134     merge2_2(T2, H3, T3, M, H2).
    135 
    136 %% Take L2 apart.
    137 merge3_2(T1, H1, M, [H2 | T2], H3, T3) when H1 =< H2 ->
    138     merge3_12(T1, H1, H2, T2, H3, T3, M);
    139 merge3_2(T1, H1, M, [H2 | T2], H3, T3) ->
    140     merge3_21(T1, H1, H2, T2, H3, T3, M);
    141 merge3_2(T1, H1, M, [], H3, T3) when H1 =< H3 ->
    142     merge2_1(T1, H3, T3, [H1 | M]);
    143 merge3_2(T1, H1, M, [], H3, T3) ->
    144     merge2_2(T1, H3, T3, M, H1).
    145 
    146 % H1 =< H2. Inlined.
    147 merge3_12(T1, H1, H2, T2, H3, T3, M) when H1 =< H3 ->
    148     merge3_1(T1, [H1 | M], H2, T2, H3, T3);
    149 merge3_12(T1, H1, H2, T2, H3, T3, M) ->
    150     merge3_12_3(T1, H1, H2, T2, [H3 | M], T3).
    151 
    152 % H1 =< H2, take L3 apart.
    153 merge3_12_3(T1, H1, H2, T2, M, [H3 | T3]) when H1 =< H3 ->
    154     merge3_1(T1, [H1 | M], H2, T2, H3, T3);
    155 merge3_12_3(T1, H1, H2, T2, M, [H3 | T3]) ->
    156     merge3_12_3(T1, H1, H2, T2, [H3 | M], T3);
    157 merge3_12_3(T1, H1, H2, T2, M, []) ->
    158     merge2_1(T1, H2, T2, [H1 | M]).
    159 
    160 % H1 > H2. Inlined.
    161 merge3_21(T1, H1, H2, T2, H3, T3, M) when H2 =< H3 ->
    162     merge3_2(T1, H1, [H2 | M], T2, H3, T3);
    163 merge3_21(T1, H1, H2, T2, H3, T3, M) ->
    164     merge3_21_3(T1, H1, H2, T2, [H3 | M], T3).
    165 
    166 % H1 > H2, take L3 apart.
    167 merge3_21_3(T1, H1, H2, T2, M, [H3 | T3]) when H2 =< H3 ->
    168     merge3_2(T1, H1, [H2 | M], T2, H3, T3);
    169 merge3_21_3(T1, H1, H2, T2, M, [H3 | T3]) ->
    170     merge3_21_3(T1, H1, H2, T2, [H3 | M], T3);
    171 merge3_21_3(T1, H1, H2, T2, M, []) ->
    172     merge2_2(T1, H2, T2, M, H1).
    173 
    174 %% rmerge/3
    175 
    176 %% Take L1 apart.
    177 rmerge3_1([H1 | T1], M, H2, T2, H3, T3) when H1 =< H2 ->
    178     rmerge3_12(T1, H1, H2, T2, H3, T3, M);
    179 rmerge3_1([H1 | T1], M, H2, T2, H3, T3) ->
    180     rmerge3_21(T1, H1, H2, T2, H3, T3, M);
    181 rmerge3_1([], M, H2, T2, H3, T3) when H2 =< H3 ->
    182     rmerge2_2(T2, H3, T3, M, H2);
    183 rmerge3_1([], M, H2, T2, H3, T3) ->
    184     rmerge2_1(T2, H3, T3, [H2 | M]).
    185 
    186 %% Take L2 apart.
    187 rmerge3_2(T1, H1, M, [H2 | T2], H3, T3) when H1 =< H2 ->
    188     rmerge3_12(T1, H1, H2, T2, H3, T3, M);
    189 rmerge3_2(T1, H1, M, [H2 | T2], H3, T3) ->
    190     rmerge3_21(T1, H1, H2, T2, H3, T3, M);
    191 rmerge3_2(T1, H1, M, [], H3, T3) when H1 =< H3 ->
    192     rmerge2_2(T1, H3, T3, M, H1);
    193 rmerge3_2(T1, H1, M, [], H3, T3) ->
    194     rmerge2_1(T1, H3, T3, [H1 | M]).
    195 
    196 % H1 =< H2. Inlined.
    197 rmerge3_12(T1, H1, H2, T2, H3, T3, M) when H2 =< H3 ->
    198     rmerge3_12_3(T1, H1, H2, T2, [H3 | M], T3);
    199 rmerge3_12(T1, H1, H2, T2, H3, T3, M) ->
    200     rmerge3_2(T1, H1, [H2 | M], T2, H3, T3).
    201 
    202 % H1 =< H2, take L3 apart.
    203 rmerge3_12_3(T1, H1, H2, T2, M, [H3 | T3]) when H2 =< H3 ->
    204     rmerge3_12_3(T1, H1, H2, T2, [H3 | M], T3);
    205 rmerge3_12_3(T1, H1, H2, T2, M, [H3 | T3]) ->
    206     rmerge3_2(T1, H1, [H2 | M], T2, H3, T3);
    207 rmerge3_12_3(T1, H1, H2, T2, M, []) ->
    208     rmerge2_2(T1, H2, T2, M, H1).
    209 
    210 % H1 > H2. Inlined.
    211 rmerge3_21(T1, H1, H2, T2, H3, T3, M) when H1 =< H3 ->
    212     rmerge3_21_3(T1, H1, H2, T2, [H3 | M], T3);
    213 rmerge3_21(T1, H1, H2, T2, H3, T3, M) ->
    214     rmerge3_1(T1, [H1 | M], H2, T2, H3, T3).
    215 
    216 % H1 > H2, take L3 apart.
    217 rmerge3_21_3(T1, H1, H2, T2, M, [H3 | T3]) when H1 =< H3 ->
    218     rmerge3_21_3(T1, H1, H2, T2, [H3 | M], T3);
    219 rmerge3_21_3(T1, H1, H2, T2, M, [H3 | T3]) ->
    220     rmerge3_1(T1, [H1 | M], H2, T2, H3, T3);
    221 rmerge3_21_3(T1, H1, H2, T2, M, []) ->
    222     rmerge2_1(T1, H2, T2, [H1 | M]).
    223 
    224 %% merge/2
    225 
    226 merge2_1([H1 | T1], H2, T2, M) when H1 =< H2 ->
    227     merge2_1(T1, H2, T2, [H1 | M]);
    228 merge2_1([H1 | T1], H2, T2, M) ->
    229     merge2_2(T1, H2, T2, M, H1);
    230 merge2_1([], H2, T2, M) ->
    231     lists:reverse(T2, [H2 | M]).
    232 
    233 merge2_2(T1, HdM, [H2 | T2], M, H1) when H1 =< H2 ->
    234     merge2_1(T1, H2, T2, [H1, HdM | M]);
    235 merge2_2(T1, HdM, [H2 | T2], M, H1) ->
    236     merge2_2(T1, H2, T2, [HdM | M], H1);
    237 merge2_2(T1, HdM, [], M, H1) ->
    238     lists:reverse(T1, [H1, HdM | M]).
    239 
    240 %% rmerge/2
    241 
    242 rmerge2_1([H1 | T1], H2, T2, M) when H1 =< H2 ->
    243     rmerge2_2(T1, H2, T2, M, H1);
    244 rmerge2_1([H1 | T1], H2, T2, M) ->
    245     rmerge2_1(T1, H2, T2, [H1 | M]);
    246 rmerge2_1([], H2, T2, M) ->
    247     lists:reverse(T2, [H2 | M]).
    248 
    249 rmerge2_2(T1, HdM, [H2 | T2], M, H1) when H1 =< H2 ->
    250     rmerge2_2(T1, H2, T2, [HdM | M], H1);
    251 rmerge2_2(T1, HdM, [H2 | T2], M, H1) ->
    252     rmerge2_1(T1, H2, T2, [H1, HdM | M]);
    253 rmerge2_2(T1, HdM, [], M, H1) ->
    254     lists:reverse(T1, [H1, HdM | M]).

    好,这是我见过最复杂的排序算法了。

    这个算法和归并排序有点像,可是由于erlang的特性,变量不能变,使得和大部分的排序方法有很大的区别,这个算法的复杂度应该是0(2n)

    这个算法可以份3大块,第一块是sort_*函数,第二块是split_*,第3块是rmergel和mergel

    首先

    sort([X, Y | L] = L0) when X =< Y ->  %当list是3个对比会返回,当list超过3个进入sort_1或者splite_*函数
    ..........
    sort([X, Y | L]) ->            %分了2种情况,第一个元素大于第二个 或者 第一个元素小于等于第二个
    .......
    sort([_] = L) ->         %list只有1个也直接返回   
        L;
    sort([] = L) ->          %list为空直接返回
        L.
    
    sort_1(X, [Y | L], R) when X == Y ->
        sort_1(Y, L, [X | R]);
    sort_1(X, [Y | L], R) when X < Y ->
        split_1(X, Y, L, R, []);
    sort_1(X, [Y | L], R) ->
        split_2(X, Y, L, R, []);
    sort_1(X, [], R) ->
        lists:reverse(R, [X]).

    当这段代码还是比较清晰的,就说把超过3个元素的list传入split_*

    下面看split_1系列

    %% Ascending.

    split_1(X, Y, [Z | L], R, Rs) when Z >= Y ->  %这里的时候是X<Y,也就是Z>=Y就是说这时X<Y<=Z,我们把最小X的放到R里面,而且Y,Z替换X,Y

      split_1(Y, Z, L, [X | R], Rs);

    split_1(X, Y, [Z | L], R, Rs) when Z >= X ->  %这里的时候Z>=X,也就是X<=Z<Y,我们把最小的X放到R里面,而且Z替代X成了Z,Y

      split_1(Z, Y, L, [X | R], Rs);

    split_1(X, Y, [Z | L], [], Rs) ->        %这里的时候Z<X,也就是Z<X<Y,我们把最小的Z放到R里面(R目前为空)

      split_1(X, Y, L, [Z], Rs);

    split_1(X, Y, [Z | L], R, Rs) ->         %这里的时候Z<X,也就是Z<X<Y,我们把最小的Z放到最后的参数(R不为空的时候),调用split_1_1,为什么???

      split_1_1(X, Y, L, R, Rs, Z);

    split_1(X, Y, [], R, Rs) ->            %当列表完成后调用下个函数rmerge1,这个后面再讲

      rmergel([[Y, X | R] | Rs], []).

    WTF,这些到底在干什么,erlang又没有调试跟踪,又没说明,完全就蒙了,仔细研究下终于明白了这2个函数的意义,不得说写源码的真是大神啊~~~

    通过上面的分析,我们知道了一个规律,每次都会比较3个数的大小,而且还会处理其中最小的数

    X:下桩  Y:上桩, Z:目前list的第一个元素 R:经过排序了的list,Rs和S是split_1_1使用的变量

    split_1这个函数的作用是把X,Y,Z中最小的放到R中,同时要保证这个数比R中现有的元素都大,

    这个怎么保证呢,当Z>X(包括Z>X和Z>Y两种情况)的时候把直接X放进去R,

    原因就是X一直小于Y,而且R里面的元素都比X小才放进去的,而且整个过程X和Y的值都是增加的,所以X肯定大于R中的任何一个

    开始是R代表R中任何一个),假设Z0>Y0

    1. R0<X0<Y0<Z0  开始R0为空,比较成立
    2. R1<X1<Y1<Z1   这时R1=[X0|R0],X1=Y0,Y1=Z0,当Z1>Y1,比较还是成立
    3. R2<X2<Y2<Z2   这时R2=[X1|R1],X2=Y1,Y2=Z1,当Z2>Y2,比较还是成立
    4. 。。。。。。。

    当Z>X的时候也一样,于是当Z>X或者Z>Y的时候,只要把X的值放到R中就行,R里面的元素越来越大,是排好序的(从大到小),于是上面绿色的注释的代码就能理解了

    蓝色的注释代码当R为空, Z<X<Y,当然R<Z<X<Y,于是也能理解了

    主要是褐色的代码模块当R不为空,我们知道R<X<Y,而且Z<X<Y,可是R里面的元素和Z不能确定,

    于是我们知道了当前最小的是Z,可是Z不一定大于R的所有元素,上面的split_1函数的逻辑就不通了,然后把Z存入到最后一个参数进入split_1_1

    我们来查看split_1_1

    split_1_1(X, Y, [Z | L], R, Rs, S) when Z >= Y ->    %这时候X<Y<=Z,R<X, S<X,我们这里不管S(S不变)于是R<X<Y<=Z,按照上面逻辑,X存入R,Y,Z替换X,Y
        split_1_1(Y, Z, L, [X | R], Rs, S);
    split_1_1(X, Y, [Z | L], R, Rs, S) when Z >= X ->    %这时候X<=Z<Y,R<X, S<X,我们这里不管S(S不变)于是R<X<=Z<Y,按照上面逻辑,X存入R,Z替换X
        split_1_1(Z, Y, L, [X | R], Rs, S);
    split_1_1(X, Y, [Z | L], R, Rs, S) when S =< Z ->     %这时候S<=Z<X<Y,R<X,在这里我们知道Y>X>R,这里S,Z设置为X,Y,因为X,Y被重新设置,所以后面没有比较性
        split_1(S, Z, L, [], [[Y, X | R] | Rs]);       %于是我们把Y,X存入R(R里面的还是有序的),然后把R存入RS,清空R,返回到开始split_1的函数
    split_1_1(X, Y, [Z | L], R, Rs, S) ->           %当S>Z一样
        split_1(Z, S, L, [], [[Y, X | R] | Rs]);
    split_1_1(X, Y, [], R, Rs, S) ->    
        rmergel([[S], [Y, X | R] | Rs], []).

    我们可以看到,紫色注释的代码,当S<=Z<X<Y,R<X我们知道最小的数是S,然后是Z,可是我们不能比较R里面的元素与这2个数的大小,

    如果按照上面函数的逻辑,可以在弄个函数split_1_1_1,可这样函数不是闭环的,于是大神直接把肯定比R大的2个元素存入R(保证了R的有序),再回到split_1,这里真是太厉害了

     1 X:12,Y:13,Z:54,L:[32,1,4521,32,214,541,1,12,3],R:[],Rs:[]
     2 X:13,Y:54,Z:32,L:[1,4521,32,214,541,1,12,3],R:"f",Rs:[]
     3 X:32,Y:54,Z:1,L:[4521,32,214,541,1,12,3],R:"
    f",Rs:[]
     4 X:32,Y:54,Z:4521,L:[32,214,541,1,12,3],R:"
    f",Rs:[],S:1
     5 X:54,Y:4521,Z:32,L:[214,541,1,12,3],R:" 
    f",Rs:[],S:1
     6 X:1,Y:32,Z:214,L:[541,1,12,3],R:[],Rs:[[4521,54,32,13,12]]
     7 X:32,Y:214,Z:541,L:[1,12,3],R:[1],Rs:[[4521,54,32,13,12]]
     8 X:214,Y:541,Z:1,L:[12,3],R:[32,1],Rs:[[4521,54,32,13,12]]
     9 X:214,Y:541,Z:12,L:[3],R:[32,1],Rs:[[4521,54,32,13,12]],S:1
    10 X:1,Y:12,Z:3,L:[],R:[],Rs:[[541,214,32,1],[4521,54,32,13,12]]
    11 Rs:[[12,3,1],[541,214,32,1],[4521,54,32,13,12]]
    我们看个简单的例子执行过程,大概就能明白这个逻辑了。
    这里的List = [12,13,54,32,1,4521,32,214,541,1,12,3],这2个函数执行完成后的结果是[[12,3,1],[541,214,32,1],[4521,54,32,13,12]]
    可以看到这里经过了N次循环(N是List长度),生成了几个子list,每个子list都是有序的,这样肯定没有完成,剩下的就是mergel和rmergel函数的作用了

     篇幅太长,不好排版,下面的函数分析放

    erlang下lists模块sort(排序)方法源码解析(二)

    未完待续。。。

  • 相关阅读:
    AI ResNet V1
    Lua基础
    git命令小结
    定时器及时间轮
    expect
    vscode 常用快捷键
    动态链接库与共享内存:
    container_of机制
    ELF文件格式
    git学习补充
  • 原文地址:https://www.cnblogs.com/tudou008/p/9071361.html
Copyright © 2020-2023  润新知