• 第五章:Python高级编程-深入Python的dict和set


    5.1 dict的abc继承关系

    和list(Sequence)相似,都继承于Collection,添加了一些方法

    from collections.abc import Mapping,MutableMapping
    # dict是属于Mapping类型的
    a = {}
    print(type(a)) # dict
    print(isinstance(a,MutableMapping)) # 是属于MutableMapping类型的
    """
    <class 'dict'>
    True
    """
    # 但是它不是通过继承的方式,而是实现了这个类中的一些方法,通过MutableMapping.register(dict)的方法
    

    collections.abc模块

    class Mapping(Collection):
    
        __slots__ = ()
    
        """A Mapping is a generic container for associating key/value
        pairs.
    
        This class provides concrete generic implementations of all
        methods except for __getitem__, __iter__, and __len__.
    
        """
    
        @abstractmethod
        def __getitem__(self, key):
            raise KeyError
    
        def get(self, key, default=None):
            'D.get(k[,d]) -> D[k] if k in D, else d.  d defaults to None.'
            try:
                return self[key]
            except KeyError:
                return default
    
        def __contains__(self, key):
            try:
                self[key]
            except KeyError:
                return False
            else:
                return True
    
        def keys(self):
            "D.keys() -> a set-like object providing a view on D's keys"
            return KeysView(self)
    
        def items(self):
            "D.items() -> a set-like object providing a view on D's items"
            return ItemsView(self)
    
        def values(self):
            "D.values() -> an object providing a view on D's values"
            return ValuesView(self)
    
        def __eq__(self, other):
            if not isinstance(other, Mapping):
                return NotImplemented
            return dict(self.items()) == dict(other.items())
    
        __reversed__ = None
    
    
    class MutableMapping(Mapping):
    
        __slots__ = ()
    
        """A MutableMapping is a generic container for associating
        key/value pairs.
    
        This class provides concrete generic implementations of all
        methods except for __getitem__, __setitem__, __delitem__,
        __iter__, and __len__.
    
        """
    
        @abstractmethod
        def __setitem__(self, key, value):
            raise KeyError
    
        @abstractmethod
        def __delitem__(self, key):
            raise KeyError
    
        __marker = object()
    
        def pop(self, key, default=__marker):
            '''D.pop(k[,d]) -> v, remove specified key and return the corresponding value.
              If key is not found, d is returned if given, otherwise KeyError is raised.
            '''
            try:
                value = self[key]
            except KeyError:
                if default is self.__marker:
                    raise
                return default
            else:
                del self[key]
                return value
    
        def popitem(self):
            '''D.popitem() -> (k, v), remove and return some (key, value) pair
               as a 2-tuple; but raise KeyError if D is empty.
            '''
            try:
                key = next(iter(self))
            except StopIteration:
                raise KeyError from None
            value = self[key]
            del self[key]
            return key, value
    
        def clear(self):
            'D.clear() -> None.  Remove all items from D.'
            try:
                while True:
                    self.popitem()
            except KeyError:
                pass
    
        def update(*args, **kwds):
            ''' D.update([E, ]**F) -> None.  Update D from mapping/iterable E and F.
                If E present and has a .keys() method, does:     for k in E: D[k] = E[k]
                If E present and lacks .keys() method, does:     for (k, v) in E: D[k] = v
                In either case, this is followed by: for k, v in F.items(): D[k] = v
            '''
            if not args:
                raise TypeError("descriptor 'update' of 'MutableMapping' object "
                                "needs an argument")
            self, *args = args
            if len(args) > 1:
                raise TypeError('update expected at most 1 arguments, got %d' %
                                len(args))
            if args:
                other = args[0]
                if isinstance(other, Mapping):
                    for key in other:
                        self[key] = other[key]
                elif hasattr(other, "keys"):
                    for key in other.keys():
                        self[key] = other[key]
                else:
                    for key, value in other:
                        self[key] = value
            for key, value in kwds.items():
                self[key] = value
    
        def setdefault(self, key, default=None):
            'D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if k not in D'
            try:
                return self[key]
            except KeyError:
                self[key] = default
            return default
    

    5.2 dict的常用方法

    浅拷贝

    a = {'LYQ1':{'SWPU':'软件工程'},
         'LYQ2':{'SWPU2':'软件工程2'}}
    #这是浅拷贝,指向的是同一值,修改一个,另一个也会修改,所以我们看到下面a和b输出是一样的
    b=a.copy()
    b['LYQ1']['SWPU']='我是浅拷贝'
    print(b)
    print(a)
    
    """
    {'LYQ1': {'SWPU': '我是浅拷贝'}, 'LYQ2': {'SWPU2': '软件工程2'}}
    {'LYQ1': {'SWPU': '我是浅拷贝'}, 'LYQ2': {'SWPU2': '软件工程2'}}
    """
    
    # 值是不可变对象,copy方法是浅拷贝:深拷贝父对象(一级目录),子对象(二级目录)不拷贝,还是引用
    c = {"a": "b"}
    d = c.copy()
    d["a"] = "sx"
    print(c)
    print(d)
    
    """
    {'a': 'b'}
    {'a': 'sx'}
    """
    

    深拷贝

    a = {'LYQ1':{'SWPU':'软件工程'},
         'LYQ2':{'SWPU2':'软件工程2'}}
    import copy
    #深拷贝,指向不同的对象
    deep_b=copy.deepcopy(a)
    deep_b['LYQ1']['SWPU']='我是深拷贝'
    print(deep_b)
    print(a)
    
    """
    {'LYQ1': {'SWPU': '我是深拷贝'}, 'LYQ2': {'SWPU2': '软件工程2'}}
    {'LYQ1': {'SWPU': '软件工程'}, 'LYQ2': {'SWPU2': '软件工程2'}}
    """
    

    fromkeys():

    #把一个可迭代对象转换为dict,{'SWPU':'软件工程'}为默认值
    my_list=['Stu1','Stu2']
    my_dict=dict.fromkeys(my_list,{'SWPU':'软件工程'})
    print(my_dict)
    
    """
    {'Stu1': {'SWPU': '软件工程'}, 'Stu2': {'SWPU': '软件工程'}}
    """
    

    get(key, value)

    # 为了预防keyerror
    new_dict = {'Stu1': {'SWPU': '软件工程'}, 'Stu2': {'SWPU': '软件工程'}}
    aa = new_dict.get("stu6", {"age": 18})
    print(aa)
    

    items():循环,返回key,value

    new_dict = {'Stu1': {'SWPU': '软件工程'}, 'Stu2': {'SWPU': '软件工程'}}
    for k, v in new_dict.items():
        print(k, v)
        
    """
    Stu1 {'SWPU': '软件工程'}
    Stu2 {'SWPU': '软件工程'}
    """
    

    setdefault(): 有值直接取值,没有值则将值设置进去,并获取该值返回

    new_dict = {'Stu1': {'SWPU': '软件工程'}, 'Stu2': {'SWPU': '软件工程'}}
    default_value1 = new_dict.setdefault("Stu12", "kobe")
    default_value2 = new_dict.setdefault("Stu2", "kobe")
    print(default_value1)
    print(default_value2)
    print(new_dict)
    
    """
    kobe
    {'SWPU': '软件工程'}
    {'Stu1': {'SWPU': '软件工程'}, 'Stu2': {'SWPU': '软件工程'}, 'Stu12': 'kobe'}
    """
    

    update():添加键值对或更新键值对

    a = {'kobe':{'SWPU':'软件工程'},
         'james':{'SWPU2':'软件工程2'}}
    #添加新键值对(即合并两个字典)
    a.update({'LYQ3':'NEW'})
    #第二种方式
    a.update(LYQ4='NEW2',LYQ5='NEW3')
    #第三种方式,list里面放tuple,tuple里面放tuple等(可迭代就行)
    a.update([('LYQ6','NEW6')])
    print(a)
    print("*"*60)
    #修改键值对
    a.update({'kobe':'我修改了'})
    print(a)
    
    """
    {'kobe': {'SWPU': '软件工程'}, 'james': {'SWPU2': '软件工程2'}, 'LYQ3': 'NEW', 'LYQ4': 'NEW2', 'LYQ5': 'NEW3', 'LYQ6': 'NEW6'}
    ************************************************************
    {'kobe': '我修改了', 'james': {'SWPU2': '软件工程2'}, 'LYQ3': 'NEW', 'LYQ4': 'NEW2', 'LYQ5': 'NEW3', 'LYQ6': 'NEW6'}
    """
    

    dict源码:

    class dict(object):
        """
        dict() -> new empty dictionary
        dict(mapping) -> new dictionary initialized from a mapping object's
            (key, value) pairs
        dict(iterable) -> new dictionary initialized as if via:
            d = {}
            for k, v in iterable:
                d[k] = v
        dict(**kwargs) -> new dictionary initialized with the name=value pairs
            in the keyword argument list.  For example:  dict(one=1, two=2)
        """
        def clear(self): # real signature unknown; restored from __doc__
            """ D.clear() -> None.  Remove all items from D. """
            pass
    
        def copy(self): # real signature unknown; restored from __doc__
            """ D.copy() -> a shallow copy of D """
            pass
    
        @staticmethod # known case
        def fromkeys(*args, **kwargs): # real signature unknown
            """ Create a new dictionary with keys from iterable and values set to value. """
            pass
    
        def get(self, *args, **kwargs): # real signature unknown
            """ Return the value for key if key is in the dictionary, else default. """
            pass
    
        def items(self): # real signature unknown; restored from __doc__
            """ D.items() -> a set-like object providing a view on D's items """
            pass
    
        def keys(self): # real signature unknown; restored from __doc__
            """ D.keys() -> a set-like object providing a view on D's keys """
            pass
    
        def pop(self, k, d=None): # real signature unknown; restored from __doc__
            """
            D.pop(k[,d]) -> v, remove specified key and return the corresponding value.
            If key is not found, d is returned if given, otherwise KeyError is raised
            """
            pass
    
        def popitem(self): # real signature unknown; restored from __doc__
            """
            D.popitem() -> (k, v), remove and return some (key, value) pair as a
            2-tuple; but raise KeyError if D is empty.
            """
            pass
    
        def setdefault(self, *args, **kwargs): # real signature unknown
            """
            Insert key with a value of default if key is not in the dictionary.
            
            Return the value for key if key is in the dictionary, else default.
            """
            pass
    
        def update(self, E=None, **F): # known special case of dict.update
            """
            D.update([E, ]**F) -> None.  Update D from dict/iterable E and F.
            If E is present and has a .keys() method, then does:  for k in E: D[k] = E[k]
            If E is present and lacks a .keys() method, then does:  for k, v in E: D[k] = v
            In either case, this is followed by: for k in F:  D[k] = F[k]
            """
            pass
    
        def values(self): # real signature unknown; restored from __doc__
            """ D.values() -> an object providing a view on D's values """
            pass
    
        def __contains__(self, *args, **kwargs): # real signature unknown
            """ True if the dictionary has the specified key, else False. """
            pass
    
        def __delitem__(self, *args, **kwargs): # real signature unknown
            """ Delete self[key]. """
            pass
    
        def __eq__(self, *args, **kwargs): # real signature unknown
            """ Return self==value. """
            pass
    
        def __getattribute__(self, *args, **kwargs): # real signature unknown
            """ Return getattr(self, name). """
            pass
    
        def __getitem__(self, y): # real signature unknown; restored from __doc__
            """ x.__getitem__(y) <==> x[y] """
            pass
    
        def __ge__(self, *args, **kwargs): # real signature unknown
            """ Return self>=value. """
            pass
    
        def __gt__(self, *args, **kwargs): # real signature unknown
            """ Return self>value. """
            pass
    
        def __init__(self, seq=None, **kwargs): # known special case of dict.__init__
            """
            dict() -> new empty dictionary
            dict(mapping) -> new dictionary initialized from a mapping object's
                (key, value) pairs
            dict(iterable) -> new dictionary initialized as if via:
                d = {}
                for k, v in iterable:
                    d[k] = v
            dict(**kwargs) -> new dictionary initialized with the name=value pairs
                in the keyword argument list.  For example:  dict(one=1, two=2)
            # (copied from class doc)
            """
            pass
    
        def __iter__(self, *args, **kwargs): # real signature unknown
            """ Implement iter(self). """
            pass
    
        def __len__(self, *args, **kwargs): # real signature unknown
            """ Return len(self). """
            pass
    
        def __le__(self, *args, **kwargs): # real signature unknown
            """ Return self<=value. """
            pass
    
        def __lt__(self, *args, **kwargs): # real signature unknown
            """ Return self<value. """
            pass
    
        @staticmethod # known case of __new__
        def __new__(*args, **kwargs): # real signature unknown
            """ Create and return a new object.  See help(type) for accurate signature. """
            pass
    
        def __ne__(self, *args, **kwargs): # real signature unknown
            """ Return self!=value. """
            pass
    
        def __repr__(self, *args, **kwargs): # real signature unknown
            """ Return repr(self). """
            pass
    
        def __setitem__(self, *args, **kwargs): # real signature unknown
            """ Set self[key] to value. """
            pass
    
        def __sizeof__(self): # real signature unknown; restored from __doc__
            """ D.__sizeof__() -> size of D in memory, in bytes """
            pass
    
        __hash__ = None
    

    5.3 dict的子类

    当我们要自定义一个字典的时候,不要使用直接继承自dict,因为有些操作会不生效

    """
    不建议直接继承dict,而是collections.UserDict
    """
    # 不建议继承list和dict
    class MyDict(dict):
        def __setitem__(self, key, value):
            super().__setitem__(key, value * 2)
    #未调用自己写的方法, c语言编写的dict某些时候不会去调用覆盖的方法
    my_dict=MyDict(one=1)
    print(my_dict)
    
    print("*"*10)
    
    #调用自己写的方法
    my_dict['one']=1
    print(my_dict)
    
    """
    {'one': 1}
    **********
    {'one': 2}
    """
    

    使用继承UserDict的方式来实现自定义的字典.

    from collections import UserDict
    class MyDict2(UserDict):
        def __setitem__(self, key, value):
            super().__setitem__(key, value * 2)
    my_dict2=MyDict2(one=1)
    print(my_dict2)
    

    Userdict源码:当取不到某个key时,就会调用__missing__方法(如果有__missing__)获取默认值

    创建带有默认值的字典. collections中的defaultdict

    字典之所以可以实现带有默认值,其实是它内部实现了__missing__方法,在UserDict类里面的__getitem__方法中会调用__missing__方法

    from collections import defaultdict
    #可以时dict,int,str,list,tuple等等
    my_dict=defaultdict(dict)
    #找不到key,实际调用的时__missing__方法
    print(my_dict['haha'])
    

    defaultdict源码: 之所以可以设置默认值就是因为实现了__missing__方法

    5.4 set和frozenset

    """
    set 集合 
    frozenset(不可变集合) 无序 不重复
    """
    
    s = set("abcde")  # 接受迭代类型;字符串,列表...
    print(s)
    
    # 向set添加数据
    s.add()
    s.update()
    
    difference()  # 差值
    -  # 差集 实现于__ior__魔法函数
    
    # / & -
    
    #set 集合 fronzenset (不可变集合) 无序, 不重复
    # s = set('abcdee')
    # s = set(['a','b','c','d','e'])
    s = {'a','b', 'c'}
    # s = frozenset("abcde") # frozenset 可以作为dict的key
    # print(s)
    
    # clear() 清空集合
    # copy() 浅拷贝集合
    # pop() 弹出最后一个元素
    # remove() 删除一个集合元素
    
    #向set添加数据
    another_set = set("cef")
    re_set = s.difference(another_set)
    re_set = s - another_set
    re_set = s & another_set # 交集
    re_set = s | another_set # 并集
    
    #set性能很高
    # | & -  #集合运算
    print(re_set)
    
    print (s.issubset(re_set))
    # 也可以用if in判断(实现于__contains__魔法函数)
    # if "c" in re_set:
    #     print ("i am in set")
    

    5.5 dict和set的实现原理

    """
    测试list和dict的性能
    """
    from random import randint
    
    
    def load_list_data(total_nums, target_nums):
        """
        从文件中读取数据,以list的方式返回
        :param total_nums: 读取的数量
        :param target_nums: 需要查询的数据的数量
        """
        all_data = []
        target_data = []
        file_name = "D:/note/fbobject_idnew.txt"
        with open(file_name, encoding="utf8", mode="r") as f_open:
            for count, line in enumerate(f_open):
                if count < total_nums:
                    all_data.append(line)
                else:
                    break
    
        for x in range(target_nums):
            random_index = randint(0, total_nums)
            if all_data[random_index] not in target_data:
                target_data.append(all_data[random_index])
                if len(target_data) == target_nums:
                    break
    
        return all_data, target_data
    
    def load_dict_data(total_nums, target_nums):
        """
        从文件中读取数据,以dict的方式返回
        :param total_nums: 读取的数量
        :param target_nums: 需要查询的数据的数量
        """
        all_data = {}
        target_data = []
        ## 1000万或上百万字符串的文本
        file_name = "D:/note/fbobject_idnew.txt"
        with open(file_name, encoding="utf8", mode="r") as f_open:
            for count, line in enumerate(f_open):
                if count < total_nums:
                    all_data[line] = 0
                else:
                    break
        all_data_list = list(all_data)
        for x in range(target_nums):
            random_index = randint(0, total_nums-1)
            if all_data_list[random_index] not in target_data:
                target_data.append(all_data_list[random_index])
                if len(target_data) == target_nums:
                    break
    
        return all_data, target_data
    
    
    def find_test(all_data, target_data):
        #测试运行时间
        test_times = 100
        total_times = 0
        import time
        for i in range(test_times):
            find = 0
            start_time = time.time()
            for data in target_data:
                if data in all_data:
                    find += 1
            last_time = time.time() - start_time
            total_times += last_time
        return total_times/test_times
    
    
    if __name__ == "__main__":
        # all_data, target_data = load_list_data(10000, 1000)
        # all_data, target_data = load_list_data(100000, 1000)
        # all_data, target_data = load_list_data(1000000, 1000)
    
    
        # all_data, target_data = load_dict_data(10000, 1000)
        # all_data, target_data = load_dict_data(100000, 1000)
        # all_data, target_data = load_dict_data(1000000, 1000)
        all_data, target_data = load_dict_data(2000000, 1000)
        last_time = find_test(all_data, target_data)
    
        #dict查找的性能远远大于list
        #在list中随着list数据的增大 查找时间会增大
        #在dict中查找元素不会随着dict的增大而增大
        print(last_time)
    """
    1.dict的key或者set的值,都必须是可以hash的(不可变对象都是可以hash的,如str,frozenset,tuple,自己实现的类【实现__hash__魔法函数】);
    2.dict内存花销大,但是查询速度快,自定义的对象或者python内置的对象都是用dict包装的;
    3.dict的存储顺序与元素添加顺序有关;
    4.添加数据有可能改变已有数据的顺序;
    5.取数据的时间复杂度为O(1)
    """
    

    通过hash函数计算key(有很多的算法),这里是通过hash函数计算然后与7进行与运算,在计算过程中有可能冲突,得到同样的位置(有很多的解决方法),如’abc‘取一位'c'加一位随机数,如果冲突,就向前多取一位再计算...(还有先声明一个很小的内存空间,可能存在一些空白,计算空白,如果小于1/3,然后声明一个更大的空间,拷贝过去,减少冲突)

    查找数据,先计算hash值定位,查找是否为空,为空就抛出错误,如果不为空查看是否相等,如果被其他占领就不相等,然后又进行冲突解决

  • 相关阅读:
    Java路径问题终于解决方式—可定位全部资源的相对路径寻址
    易学设计模式看书笔记(2)
    js算法:分治法-棋盘覆盖
    [NIO]dawn之Task具体解释
    C#高级编程五十八天----并行集合
    [Swift通天遁地]九、拔剑吧-(12)创建Preview-Transition图像预览界面
    [Swift通天遁地]九、拔剑吧-(11)创建强大的Pinterest风格的瀑布流界面
    [Swift通天遁地]九、拔剑吧-(10)快速创建美观的聊天界面:可发送文字、表情、图片
    [Swift通天遁地]九、拔剑吧-(9)创建支持缩放、移动、裁切的相机视图控制器
    [Swift通天遁地]九、拔剑吧-(8)创建气泡式页面切换效果
  • 原文地址:https://www.cnblogs.com/lianhaifeng/p/12913610.html
Copyright © 2020-2023  润新知