• map结构初探


    map结构初探

    maps是erlang新出的一种数据结构,传说用来替代record。这里主要说下maps的具体实现,并分析各种操作的时间复杂度(c层面)。并对优缺点做一个总结。

    环境:版本为R17

    map结构

    typedef struct map_s {
        Eterm thing_word;
        Uint  size;
        Eterm keys;      /* tuple */
    } map_t;
    /* map node
     *
     * -----------
     * Eterm   THING
     * Uint    size
     * Eterm   Keys -> {K1, K2, K3, ..., Kn} where n = size
     * ----
     * Eterm   V1
     * ...
     * Eterm   Vn, where n = size
     * -----------
     */
    

    这个结构定义在erl_map.h中,这里的注释已经对map的结构说的很清楚了。
    所以在map的结构大概是这样。

    在erl_map.c的注释中,可以看到此模块主要实现了下面的一系列maps模块的函数

    • maps:find/2
    • maps:from_list/1
    • maps:get/2
    • maps:is_key/2
    • maps:keys/1
    • maps:merge/2
    • maps:new/0
    • maps:put/3
    • maps:remove/2
    • maps:to_list/1
    • maps:update/3
    • maps:values/1

    下面挑一些能够看清map结构的函数,做一个简单的分析。

    maps:find/2

    时间复杂度:O(N), 由下面代码可以看到在map中查找实际需要遍历整个keys,所以时间复杂度是O(N)

    BIF_RETTYPE maps_find_2(BIF_ALIST_2) {
        if (is_map(BIF_ARG_2)) {
    	Eterm *hp, value,res;
    
    	if (erts_maps_find(BIF_ARG_1, BIF_ARG_2, &value)) {
    	    hp    = HAlloc(BIF_P, 3);
    	    res   = make_tuple(hp);
    	    *hp++ = make_arityval(2);
    	    *hp++ = am_ok;
    	    *hp++ = value;
    	    BIF_RET(res);
    	}
    
    	BIF_RET(am_error);
        }
        BIF_ERROR(BIF_P, BADARG);
    }
    
    int erts_maps_find(Eterm key, Eterm map, Eterm *value) {
        Eterm *ks,*vs;
        map_t *mp;
        Uint n,i;
    
        mp  = (map_t*)map_val(map);
        n   = map_get_size(mp);
        ks  = map_get_keys(mp);
        vs  = map_get_values(mp);
    
        //遍历所有的key,找到相等的key,O(N)
        for( i = 0; i < n; i++) {
    	if (EQ(ks[i], key)) {
    	    *value = vs[i];
    	    return 1;
    	}
        }
        return 0;
    }
    

    maps:from_list/1

    可以看到,from_list的操作是有点费时的,最坏情况下能到O(N^2)。

    BIF_RETTYPE maps_from_list_1(BIF_ALIST_1) {
        Eterm *kv, item = BIF_ARG_1;
        Eterm *hp, *thp,*vs, *ks, keys, res;
        map_t *mp;
        Uint  size = 0, unused_size = 0;
        Sint  c = 0;
        Sint  idx = 0;
    
        if (is_list(item) || is_nil(item)) {
    
    	/* Calculate size and check validity */
    	//检查所有的list是否符合规范,这里遍历一遍,并得到元素个数。
    	while(is_list(item)) {
    	    res = CAR(list_val(item));
    	    if (is_not_tuple(res))
    		goto error;
    
    	    kv = tuple_val(res);
    	    if (*kv != make_arityval(2))
    		goto error;
    
    	    size++;
    	    item = CDR(list_val(item));
    	}
    
    	if (is_not_nil(item))
    	    goto error;
    	//分配用于存放maps,3是map_t结构大小,1是tuple占用一个size,2倍大小的size是key跟value,由于这里是从头开始构造map,所以这里固定了keys的位置。
    	hp    = HAlloc(BIF_P, 3 + 1 + (2 * size));
    	thp   = hp;
    	keys  = make_tuple(hp);//构造一个tuple
    	*hp++ = make_arityval(size);//填充tuple的大小
    	ks    = hp;//keys的位置
    	hp   += size;
    	mp    = (map_t*)hp;//map结构指针
    	res   = make_map(mp);
    	hp   += MAP_HEADER_SIZE;
    	vs    = hp;//values指针
    
    	//填充map头
    	mp->thing_word = MAP_HEADER;
    	mp->size = size; /* set later, might shrink*/
    	mp->keys = keys;
    
    	if (size == 0)
    	    BIF_RET(res);
    
    	item  = BIF_ARG_1;
    
    	//填充第一个元素的值
    	/* first entry */
    	kv    = tuple_val(CAR(list_val(item)));
    	ks[0] = kv[1];
    	vs[0] = kv[2];
    	size  = 1;
    	item  = CDR(list_val(item));
    
    	/* insert sort key/value pairs */
    	while(is_list(item)) {
    
    	    kv = tuple_val(CAR(list_val(item)));
    
    	    /* compare ks backwards
    	     * idx represent word index to be written (hole position).
    	     * We cannot copy the elements when searching since we might
    	     * have an equal key. So we search for just the index first =(
    	     *
    	     * It is perhaps faster to move the values in the first pass.
    	     * Check for uniqueness during insert phase and then have a
    	     * second phace compacting the map if duplicates are found
    	     * during insert. .. or do someother sort .. shell-sort perhaps.
    	     */
    
    	    idx = size;
    		//相当于使用了插入排序,保持keys有序
    	    while(idx > 0 && (c = CMP_TERM(kv[1],ks[idx-1])) < 0) { idx--; }
    
    	    if (c == 0) {
    		/* last compare was equal,
    		 * i.e. we have to release memory
    		 * and overwrite that key/value
    		 */
            //如果key存在,直接改写数据就可以了
    		ks[idx-1] = kv[1];
    		vs[idx-1] = kv[2];
    		unused_size++;
    	    } else {
            //不存在,由于要保持有序,需要搬运后面的数据。
    		Uint i = size;
    		while(i > idx) {
    		    ks[i] = ks[i-1];
    		    vs[i] = vs[i-1];
    		    i--;
    		}
    		ks[idx] = kv[1];
    		vs[idx] = kv[2];
    		size++;
    	    }
    	    item = CDR(list_val(item));
    	}
    
    	if (unused_size) {
    	    /* the key tuple is embedded in the heap
    	     * write a bignum to clear it.
    	     */
    	    /* release values as normal since they are on the top of the heap */
    
    	    ks[size] = make_pos_bignum_header(unused_size - 1);
    	    HRelease(BIF_P, vs + size + unused_size, vs + size);
    	}
    
    	*thp = make_arityval(size);
    	mp->size = size;
    	BIF_RET(res);
        }
    
    error:
    
        BIF_ERROR(BIF_P, BADARG);
    }
    

    maps:get/2

    由于erts_maps_get的时间效率是O(N),所以get的时间效率也是O(N)

    BIF_RETTYPE maps_get_2(BIF_ALIST_2) {
        if (is_map(BIF_ARG_2)) {
    	Eterm *hp;
    	Eterm value, error;
    	char *s_error;
    
    	if (erts_maps_get(BIF_ARG_1, BIF_ARG_2, &value)) {
    	    BIF_RET(value);
    	}
    
    	s_error = "bad_key";
    	error = am_atom_put(s_error, sys_strlen(s_error));
    
    	hp = HAlloc(BIF_P, 3);
    	BIF_P->fvalue = TUPLE2(hp, error, BIF_ARG_1);
    	BIF_ERROR(BIF_P, EXC_ERROR_2);
        }
        BIF_ERROR(BIF_P, BADARG);
    }
    
    int erts_maps_get(Eterm key, Eterm map, Eterm *value) {
        Eterm *ks,*vs;
        map_t *mp;
        Uint n,i;
    
        mp  = (map_t*)map_val(map);
        n   = map_get_size(mp);
    
        if (n == 0)
    	return 0;
    
        ks  = map_get_keys(mp);
        vs  = map_get_values(mp);
    
        if (is_immed(key)) {
    	for( i = 0; i < n; i++) {
    	    if (ks[i] == key) {
    		*value = vs[i];
    		return 1;
    	    }
    	}
        }
    
        for( i = 0; i < n; i++) {
    	if (EQ(ks[i], key)) {
    	    *value = vs[i];
    	    return 1;
    	}
        }
        return 0;
    }
    

    maps:is_key/2

    由代码可以看到还是需要遍历所有的key找到是否相等,时间效率O(N)

    BIF_RETTYPE maps_is_key_2(BIF_ALIST_2) {
        if (is_map(BIF_ARG_2)) {
    	Eterm *ks, key;
    	map_t *mp;
    	Uint n,i;
    
    	mp  = (map_t*)map_val(BIF_ARG_2);
    	key = BIF_ARG_1;
    	n   = map_get_size(mp);
    	ks  = map_get_keys(mp);
    
    	if (n == 0)
    	    BIF_RET(am_false);
    
    	if (is_immed(key)) {
    	    for( i = 0; i < n; i++) {
    		if (ks[i] == key) {
    		    BIF_RET(am_true);
    		}
    	    }
    	}
    
    	for( i = 0; i < n; i++) {
    	    if (EQ(ks[i], key)) {
    		BIF_RET(am_true);
    	    }
    	}
    	BIF_RET(am_false);
        }
        BIF_ERROR(BIF_P, BADARG);
    }
    

    maps:keys/1

    虽然得到keys只要O(1),但是构造list需要O(N)

    BIF_RETTYPE maps_keys_1(BIF_ALIST_1) {
        if (is_map(BIF_ARG_1)) {
    	Eterm *hp, *ks, res = NIL;
    	map_t *mp;
    	Uint n;
    
    	mp  = (map_t*)map_val(BIF_ARG_1);
    	n   = map_get_size(mp);
    
    	if (n == 0)
    	    BIF_RET(res);
    
    	hp  = HAlloc(BIF_P, (2 * n));
    	ks  = map_get_keys(mp);
    
    	while(n--) {
    	    res = CONS(hp, ks[n], res); hp += 2;
    	}
    
    	BIF_RET(res);
        }
        BIF_ERROR(BIF_P, BADARG);
    }
    

    maps:new/0

    BIF_RETTYPE maps_new_0(BIF_ALIST_0) {
        Eterm* hp;
        Eterm tup;
        map_t *mp;
    	//分配空间多分配1,是因为keys的结构是个tuple需要存放size
        hp    = HAlloc(BIF_P, (MAP_HEADER_SIZE + 1));
        tup   = make_tuple(hp);
        *hp++ = make_arityval(0);
    
        mp    = (map_t*)hp;
        mp->thing_word = MAP_HEADER;
        mp->size = 0;
        mp->keys = tup;
    
        BIF_RET(make_map(mp));
    }
    
    

    maps:put/3

    函数先假设已经存在这个key,找到key对于的value,然后修改它。如果key不存在这个结构中,则按序复制相应的key,value对。

    BIF_RETTYPE maps_put_3(BIF_ALIST_3) {
        if (is_map(BIF_ARG_3)) {
    	BIF_RET(erts_maps_put(BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3));
        }
        BIF_ERROR(BIF_P, BADARG);
    }
    
    Eterm erts_maps_put(Process *p, Eterm key, Eterm value, Eterm map) {
        Sint n,i;
        Sint c = 0;
        Eterm* hp, *shp;
        Eterm *ks,*vs, res, tup;
        map_t *mp = (map_t*)map_val(map);
    
        n = map_get_size(mp);
    
        if (n == 0) {
    	hp    = HAlloc(p, MAP_HEADER_SIZE + 1 + 2);
    	tup   = make_tuple(hp);
    	*hp++ = make_arityval(1);
    	*hp++ = key;
    	res   = make_map(hp);
    	*hp++ = MAP_HEADER;
    	*hp++ = 1;
    	*hp++ = tup;
    	*hp++ = value;
    
    	return res;
        }
    
        ks  = map_get_keys(mp);
        vs  = map_get_values(mp);
    
        /* only allocate for values,
         * assume key-tuple will be intact
         */
    
        hp  = HAlloc(p, MAP_HEADER_SIZE + n);
        shp = hp; /* save hp, used if optimistic update fails */
        res = make_map(hp);
        *hp++ = MAP_HEADER;
        *hp++ = n;
        *hp++ = mp->keys;
    
    	//对构造的新map进行值复制
        if (is_immed(key)) {
    	for( i = 0; i < n; i ++) {
    	    if (ks[i] == key) {
    		*hp++ = value;
    		vs++;
    		c = 1;
    	    } else {
    		*hp++ = *vs++;
    	    }
    	}
        } else {
    	for( i = 0; i < n; i ++) {
    	    if (EQ(ks[i], key)) {
    		*hp++ = value;
    		vs++;
    		c = 1;
    	    } else {
    		*hp++ = *vs++;
    	    }
    	}
        }
    	//如果发现了key,直接返回
        if (c)
    	return res;
    
        /* need to make a new tuple,
         * use old hp since it needs to be recreated anyway.
         */
        tup    = make_tuple(shp);
        *shp++ = make_arityval(n+1);
    
        hp    = HAlloc(p, 3 + n + 1);
        res   = make_map(hp);
        *hp++ = MAP_HEADER;
        *hp++ = n + 1;
        *hp++ = tup;
    
        ks  = map_get_keys(mp);
        vs  = map_get_values(mp);
    
        ASSERT(n >= 0);
    
        /* copy map in order */
        while (n && ((c = CMP_TERM(*ks, key)) < 0)) {
    	*shp++ = *ks++;
    	*hp++  = *vs++;
    	n--;
        }
    
        *shp++ = key;
        *hp++  = value;
    
        ASSERT(n >= 0);
    
        while(n--) {
    	*shp++ = *ks++;
    	*hp++  = *vs++;
        }
        /* we have one word remaining
         * this will work out fine once we get the size word
         * in the header.
         */
        *shp = make_pos_bignum_header(0);
        return res;
    }
    
    

    总结

    总的来说,在erlang的map结构实现的很简单,给我的感觉是很随意。各种操作都比较耗时。

  • 相关阅读:
    服务器(Ubuntu 12.04 LTS)上编译基于OpenCV的项目遇到的问题及解决方案
    ubuntu 16.04 LTS 降级安装gcc 4.8
    C#程序中获取电脑硬件配置信息的一种方法
    C#程序将对象保存为json文件的方法
    C#中运用事件实现异步调用
    Redis实现分布式锁 php
    CI框架整合UEditor编辑器上传功能
    PHP给图片加水印具体实现
    检测网站是否被和谐!
    vue的双向绑定和依赖收集
  • 原文地址:https://www.cnblogs.com/quitboy/p/4571150.html
Copyright © 2020-2023  润新知