• 萌新向Python数据分析及数据挖掘 第二章 pandas 第三节 Built-in Data Structures, Functions, Data Structures and Sequences


    Built-in Data Structures, Functions,

     

    Data Structures and Sequences

     
     
     
     
     
     
    ### 元组
     
    In [1]:
     
     
     
     
     
    tup = 4, 5, 6
    tup
     
     
    Out[1]:
    (4, 5, 6)
    In [2]:
     
     
     
     
     
    nested_tup = (4, 5, 6), (7, 8)
    nested_tup
     
     
    Out[2]:
    ((4, 5, 6), (7, 8))
    In [5]:
     
     
     
     
     
    tuple([4, 0, 2])
     
     
    Out[5]:
    (4, 0, 2)
    In [66]:
     
     
     
     
     
    tup = tuple('string')
    tup
     
     
    Out[66]:
    ('s', 't', 'r', 'i', 'n', 'g')
     
     
     
     
     
     
    Init signature: tuple(self, /, *args, **kwargs)
    Docstring:    
    tuple() -> empty tuple
    tuple(iterable) -> tuple initialized from iterable's items
    If the argument is a tuple, the return value is the same object.
    Type:           type
     
    In [7]:
     
     
     
     
     
    tup[0]
     
     
    Out[7]:
    's'
    In [8]:
     
     
     
     
     
    tup = tuple(['foo', [1, 2], True])
    tup[2] = False
     
     
     
    ---------------------------------------------------------------------------
    TypeError                                 Traceback (most recent call last)
    <ipython-input-8-11b694945ab9> in <module>()
          1 tup = tuple(['foo', [1, 2], True])
    ----> 2 tup[2] = False
    
    TypeError: 'tuple' object does not support item assignment
    
    
    In [39]:
     
     
     
     
     
    tup[1].append(3)
    tup
     
     
     
    ---------------------------------------------------------------------------
    AttributeError                            Traceback (most recent call last)
    <ipython-input-39-8df41c71d9f3> in <module>()
    ----> 1 tup[1].append(3)
          2 tup
    
    AttributeError: 'str' object has no attribute 'append'
    
    
    In [10]:
     
     
     
     
     
    (4, None, 'foo') + (6, 0) + ('bar',)
     
     
    Out[10]:
    (4, None, 'foo', 6, 0, 'bar')
    In [11]:
     
     
     
     
     
    ('foo', 'bar') * 4
     
     
    Out[11]:
    ('foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'bar')
     

    Unpacking tuples

    In [12]:
     
     
     
     
     
    tup = (4, 5, 6)
    a, b, c = tup
    b
     
     
    Out[12]:
    5
    In [13]:
     
     
     
     
     
    tup = 4, 5, (6, 7)
    a, b, (c, d) = tup
    d
     
     
    Out[13]:
    7
     

    tmp = a a = b b = tmp

    In [16]:
     
     
     
     
     
    a, b = 1, 2
    a
     
     
    Out[16]:
    1
    In [17]:
     
     
     
     
     
    b
     
     
    Out[17]:
    2
    In [18]:
     
     
     
     
     
    b, a = a, b
    a
     
     
    Out[18]:
    2
    In [19]:
     
     
     
     
     
    b
     
     
    Out[19]:
    1
    In [ ]:
     
     
     
     
     
     
     
    In [ ]:
     
     
     
     
     
     
     
    In [ ]:
     
     
     
     
     
     
     
    In [20]:
     
     
     
     
     
    seq = [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
    for a, b, c in seq:
        print('a={0}, b={1}, c={2}'.format(a, b, c))
     
     
     
    a=1, b=2, c=3
    a=4, b=5, c=6
    a=7, b=8, c=9
    
    In [24]:
     
     
     
     
     
    values = 1, 2, 3, 4, 5
    a, b, *rest = values
    a, b
     
     
    Out[24]:
    (1, 2)
    In [23]:
     
     
     
     
     
    rest
     
     
    Out[23]:
    [3, 4, 5]
    In [25]:
     
     
     
     
     
    a, b, *_ = values
     
     
     

    Tuple methods

    In [26]:
     
     
     
     
     
    a = (1, 2, 2, 2, 3, 4, 2)
    a.count(2)
     
     
    Out[26]:
    4
     

    List

    In [27]:
     
     
     
     
     
    a_list = [2, 3, 7, None]
    tup = ('foo', 'bar', 'baz')
    b_list = list(tup)
    b_list
    b_list[1] = 'peekaboo'
    b_list
     
     
    Out[27]:
    ['foo', 'peekaboo', 'baz']
    In [28]:
     
     
     
     
     
    gen = range(10)
    gen
    list(gen)
     
     
    Out[28]:
    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
     

    Adding and removing elements

    In [40]:
     
     
     
     
     
    b_list.append('dwarf')
    b_list
     
     
    Out[40]:
    ['red', 'baz', 'dwarf', 'foo', 'dwarf']
     
     
     
     
     
     
    Docstring: L.append(object) -> None -- append object to end
    Type:      builtin_function_or_method
     
    In [41]:
     
     
     
     
     
    b_list.insert(1, 'red')
    b_list
     
     
    Out[41]:
    ['red', 'red', 'baz', 'dwarf', 'foo', 'dwarf']
     
     
     
     
     
     
    Docstring: L.insert(index, object) -- insert object before index
    Type:      builtin_function_or_method
     
    In [42]:
     
     
     
     
     
    b_list.pop(2)
    b_list
     
     
    Out[42]:
    ['red', 'red', 'dwarf', 'foo', 'dwarf']
     
     
     
     
     
     
    Docstring:
    L.pop([index]) -> item -- remove and return item at index (default last).
    Raises IndexError if list is empty or index is out of range.
    Type:      builtin_function_or_method
     
    In [43]:
     
     
     
     
     
    b_list.append('foo')
    b_list
    b_list.remove('foo')
    b_list
     
     
    Out[43]:
    ['red', 'red', 'dwarf', 'dwarf', 'foo']
     
     
     
     
     
     
    Docstring:
    L.remove(value) -> None -- remove first occurrence of value.
    Raises ValueError if the value is not present.
    Type:      builtin_function_or_method
     
    In [33]:
     
     
     
     
     
    'dwarf' in b_list
     
     
    Out[33]:
    True
    In [34]:
     
     
     
     
     
    'dwarf' not in b_list
     
     
    Out[34]:
    False
     

    Concatenating and combining lists

    In [35]:
     
     
     
     
     
    [4, None, 'foo'] + [7, 8, (2, 3)]
     
     
    Out[35]:
    [4, None, 'foo', 7, 8, (2, 3)]
    In [44]:
     
     
     
     
     
    x = [4, None, 'foo']
    x.extend([7, 8, (2, 3)])
    x
     
     
    Out[44]:
    [4, None, 'foo', 7, 8, (2, 3)]
     
     
     
     
     
     
    Docstring: L.extend(iterable) -> None -- extend list by appending elements from the iterable
    Type:      builtin_function_or_method
     
     

    everything = [] for chunk in list_of_lists: everything.extend(chunk)

     

    everything = [] for chunk in list_of_lists: everything = everything + chunk

     

    Sorting

    In [45]:
     
     
     
     
     
    a = [7, 2, 5, 1, 3]
    a.sort()
    a
     
     
    Out[45]:
    [1, 2, 3, 5, 7]
     
     
     
     
     
     
    Docstring: L.sort(key=None, reverse=False) -> None -- stable sort *IN PLACE*
    Type:      builtin_function_or_method
     
    In [46]:
     
     
     
     
     
    b = ['saw', 'small', 'He', 'foxes', 'six']
    b.sort(key=len)
    b
     
     
    Out[46]:
    ['He', 'saw', 'six', 'small', 'foxes']
     

    Binary search and maintaining a sorted list

    In [55]:
     
     
     
     
     
    import bisect ###输出一个元素在已排序的列表中的序号,而不改变列表
    c = [1, 2, 2, 2, 3, 4, 7]
    bisect.bisect(c, 2)
     
     
    Out[55]:
    4
     
     
     
     
     
     
    Docstring: Alias for bisect_right().
    Type:      builtin_function_or_method
     
    In [52]:
     
     
     
     
     
    bisect.bisect(c, 5)
     
     
    Out[52]:
    6
    In [53]:
     
     
     
     
     
    bisect.insort(c, 6)
     
     
    In [54]:
     
     
     
     
     
    c
     
     
    Out[54]:
    [1, 2, 2, 2, 3, 4, 6, 7]
     

    Slicing

    In [56]:
     
     
     
     
     
    seq = [7, 2, 3, 7, 5, 6, 0, 1]
    seq[1:5]
     
     
    Out[56]:
    [2, 3, 7, 5]
    In [57]:
     
     
     
     
     
    seq[3:4] = [6, 3]
    seq
     
     
    Out[57]:
    [7, 2, 3, 6, 3, 5, 6, 0, 1]
    In [58]:
     
     
     
     
     
    seq[:5]
    seq[3:]
     
     
    Out[58]:
    [6, 3, 5, 6, 0, 1]
    In [59]:
     
     
     
     
     
    seq[-4:]
    seq[-6:-2]
     
     
    Out[59]:
    [6, 3, 5, 6]
    In [60]:
     
     
     
     
     
    seq[::2]
     
     
    Out[60]:
    [7, 3, 3, 6, 1]
    In [61]:
     
     
     
     
     
    seq[::-1]
     
     
    Out[61]:
    [1, 0, 6, 5, 3, 6, 3, 2, 7]
     

    Built-in Sequence Functions

     

    enumerate

     

    i = 0 for value in collection:

    do something with value

    i += 1

     

    for i, value in enumerate(collection):

    do something with value

    In [64]:
     
     
     
     
     
    some_list = ['foo', 'bar', 'baz']
    mapping = {}
    for i, v in enumerate(some_list):
        mapping[v] = i
    mapping
     
     
    Out[64]:
    {'bar': 1, 'baz': 2, 'foo': 0}
     
     
     
     
     
     
    Init signature: enumerate(self, /, *args, **kwargs)
    Docstring:    
    enumerate(iterable[, start]) -> iterator for index, value of iterable
    Return an enumerate object.  iterable must be another object that supports
    iteration.  The enumerate object yields pairs containing a count (from
    start, which defaults to zero) and a value yielded by the iterable argument.
    enumerate is useful for obtaining an indexed list:
        (0, seq[0]), (1, seq[1]), (2, seq[2]), ...
    Type:           type
     
     

    sorted

    In [65]:
     
     
     
     
     
    sorted([7, 1, 2, 6, 0, 3, 2])
    sorted('horse race')
     
     
    Out[65]:
    [' ', 'a', 'c', 'e', 'e', 'h', 'o', 'r', 'r', 's']
     
     
     
     
     
     
    Signature: sorted(iterable, /, *, key=None, reverse=False)
    Docstring:
    Return a new list containing all items from the iterable in ascending order.
    A custom key function can be supplied to customize the sort order, and the
    reverse flag can be set to request the result in descending order.
    Type:      builtin_function_or_method
     
     

    zip

    In [67]:
     
     
     
     
     
    seq1 = ['foo', 'bar', 'baz']
    seq2 = ['one', 'two', 'three']
    zipped = zip(seq1, seq2)
    list(zipped)
     
     
    Out[67]:
    [('foo', 'one'), ('bar', 'two'), ('baz', 'three')]
    In [ ]:
     
     
     
     
     
    seq3 = [False, True]
    list(zip(seq1, seq2, seq3))###转置大法
     
     
    In [68]:
     
     
     
     
     
    for i, (a, b) in enumerate(zip(seq1, seq2)):
        print('{0}: {1}, {2}'.format(i, a, b))
     
     
     
    0: foo, one
    1: bar, two
    2: baz, three
    
    In [ ]:
     
     
     
     
     
     
     
    In [69]:
     
     
     
     
     
    pitchers = [('Nolan', 'Ryan'), ('Roger', 'Clemens'),
                ('Schilling', 'Curt')]
    first_names, last_names = zip(*pitchers#反向zip
    first_names
    last_names
     
     
    Out[69]:
    ('Ryan', 'Clemens', 'Curt')
     

    reversed

    In [71]:
     
     
     
     
     
    list(reversed(range(10)))
     
     
    Out[71]:
    [9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
     
     
     
     
     
     
    Init signature: reversed(self, /, *args, **kwargs)
    Docstring:    
    reversed(sequence) -> reverse iterator over values of the sequence
    Return a reverse iterator
    Type:           type
     
     

    dict

    In [72]:
     
     
     
     
     
    empty_dict = {}
    d1 = {'a' : 'some value', 'b' : [1, 2, 3, 4]}
    d1
     
     
    Out[72]:
    {'a': 'some value', 'b': [1, 2, 3, 4]}
    In [74]:
     
     
     
     
     
    d1[7] = 'an integer'
    d1
     
     
    Out[74]:
    {7: 'an integer', 'a': 'some value', 'b': [1, 2, 3, 4]}
    In [ ]:
     
     
     
     
     
    d1['b']
     
     
    In [ ]:
     
     
     
     
     
    'b' in d1
     
     
    In [75]:
     
     
     
     
     
    d1[5] = 'some value'
    d1
     
     
    Out[75]:
    {5: 'some value', 7: 'an integer', 'a': 'some value', 'b': [1, 2, 3, 4]}
    In [76]:
     
     
     
     
     
    d1['dummy'] = 'another value'
    d1
     
     
    Out[76]:
    {5: 'some value',
     7: 'an integer',
     'a': 'some value',
     'b': [1, 2, 3, 4],
     'dummy': 'another value'}
    In [77]:
     
     
     
     
     
    del d1[5]
    d1
     
     
    Out[77]:
    {7: 'an integer',
     'a': 'some value',
     'b': [1, 2, 3, 4],
     'dummy': 'another value'}
    In [78]:
     
     
     
     
     
    ret = d1.pop('dummy')
    ret
     
     
    Out[78]:
    'another value'
    In [79]:
     
     
     
     
     
    d1
     
     
    Out[79]:
    {7: 'an integer', 'a': 'some value', 'b': [1, 2, 3, 4]}
    In [83]:
     
     
     
     
     
    list(d1.keys())
     
     
    Out[83]:
    ['a', 'b', 7, 'c']
    In [84]:
     
     
     
     
     
    list(d1.values())
     
     
    Out[84]:
    ['some value', 'foo', 'an integer', 12]
    In [85]:
     
     
     
     
     
    d1.update({'b' : 'foo', 'c' : 12})
    d1
     
     
    Out[85]:
    {7: 'an integer', 'a': 'some value', 'b': 'foo', 'c': 12}
    In [ ]:
     
     
     
     
     
     
     
     

    Creating dicts from sequences

     

    mapping = {} for key, value in zip(key_list, value_list): mapping[key] = value

    In [86]:
     
     
     
     
     
    mapping = dict(zip(range(5), reversed(range(5))))
    mapping
     
     
    Out[86]:
    {0: 4, 1: 3, 2: 2, 3: 1, 4: 0}
     

    Init signature: zip(self, /, args, *kwargs) Docstring:
    zip(iter1 [,iter2 [...]]) --> zip object

    Return a zip object whose .next() method returns a tuple where the i-th element comes from the i-th iterable argument. The .next() method continues until the shortest iterable in the argument sequence is exhausted and then it raises StopIteration. Type: type

     

    Default values

     

    if key in some_dict: value = some_dict[key] else: value = default_value

     

    value = some_dict.get(key, default_value)

    In [87]:
     
     
     
     
     
    words = ['apple', 'bat', 'bar', 'atom', 'book']
    by_letter = {}
    for word in words:
        letter = word[0]
        if letter not in by_letter:
            by_letter[letter] = [word]
        else:
            by_letter[letter].append(word)
    by_letter
     
     
    Out[87]:
    {'a': ['apple', 'atom'], 'b': ['bat', 'bar', 'book']}
     

    for word in words: letter = word[0] by_letter.setdefault(letter, []).append(word)

     

    from collections import defaultdict by_letter = defaultdict(list) for word in words: by_letter[word[0]].append(word)

     

    Valid dict key types

    In [90]:
     
     
     
     
     
    hash('string')
    hash((1, 2, (2, 3)))
     
     
    Out[90]:
    1097636502276347782
    In [91]:
     
     
     
     
     
    hash((1, 2, [2, 3])) # fails because lists are mutable
     
     
     
    ---------------------------------------------------------------------------
    TypeError                                 Traceback (most recent call last)
    <ipython-input-91-473c35a62c0b> in <module>()
    ----> 1 hash((1, 2, [2, 3])) # fails because lists are mutable
    
    TypeError: unhashable type: 'list'
    
    
    In [92]:
     
     
     
     
     
    d = {}
    d[tuple([1, 2, 3])] = 5#元组作为key
    d
     
     
    Out[92]:
    {(1, 2, 3): 5}
     

    set

    In [95]:
     
     
     
     
     
    set([2, 2, 2, 1, 3, 3])
    {2, 2, 2, 1, 3, 3}
     
     
    Out[95]:
    {1, 2, 3}
    In [103]:
     
     
     
     
     
    a = {1, 2, 3, 4, 5}
    b = {3, 4, 5, 6, 7, 8}
     
     
    In [104]:
     
     
     
     
     
    a.union(b)#并集
    a | b
     
     
    Out[104]:
    {1, 2, 3, 4, 5, 6, 7, 8}
    In [105]:
     
     
     
     
     
    a.intersection(b)#交集
    a & b
     
     
    Out[105]:
    {3, 4, 5}
    In [106]:
     
     
     
     
     
    c = a.copy()
    c |= b
    c
     
     
    Out[106]:
    {1, 2, 3, 4, 5, 6, 7, 8}
    In [107]:
     
     
     
     
     
    d = a.copy()
    d &= b
    d
     
     
    Out[107]:
    {3, 4, 5}
    In [108]:
     
     
     
     
     
    my_data = [1, 2, 3, 4]
    my_set = {tuple(my_data)}
    my_set
     
     
    Out[108]:
    {(1, 2, 3, 4)}
    In [101]:
     
     
     
     
     
    a_set = {1, 2, 3, 4, 5}
    {1, 2, 3}.issubset(a_set)
    a_set.issuperset({1, 2, 3})
     
     
    Out[101]:
    True
    In [102]:
     
     
     
     
     
    {1, 2, 3} == {3, 2, 1}#无排序
     
     
    Out[102]:
    True
     
     
     
     
     
     
    ### List, Set, and Dict Comprehensions
     
     

    [

     

    result = [] for val in collection: if

    In [110]:
     
     
     
     
     
    strings = ['a', 'as', 'bat', 'car', 'dove', 'python']
    [x.upper() for x in strings if len(x) > 2]
     
     
    Out[110]:
    ['BAT', 'CAR', 'DOVE', 'PYTHON']
     

    dict_comp = {

     

    set_comp = {

    In [111]:
     
     
     
     
     
    unique_lengths = {len(x) for x in strings}
    unique_lengths
     
     
    Out[111]:
    {1, 2, 3, 4, 6}
    In [112]:
     
     
     
     
     
    set(map(len, strings))
     
     
    Out[112]:
    {1, 2, 3, 4, 6}
    In [113]:
     
     
     
     
     
    loc_mapping = {val : index for index, val in enumerate(strings)}
    loc_mapping
     
     
    Out[113]:
    {'a': 0, 'as': 1, 'bat': 2, 'car': 3, 'dove': 4, 'python': 5}
     

    Nested list comprehensions

    In [114]:
     
     
     
     
     
    all_data = [['John', 'Emily', 'Michael', 'Mary', 'Steven'],
                ['Maria', 'Juan', 'Javier', 'Natalia', 'Pilar']]
     
     
     

    names_of_interest = [] for names in all_data: enough_es = [name for name in names if name.count('e') >= 2] names_of_interest.extend(enough_es)

    In [115]:
     
     
     
     
     
    result = [name for names in all_data for name in names #列表筛选
              if name.count('e') >= 2]
    result
     
     
    Out[115]:
    ['Steven']
    In [116]:
     
     
     
     
     
    some_tuples = [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
    flattened = [x for tup in some_tuples for x in tup]
    flattened
     
     
    Out[116]:
    [1, 2, 3, 4, 5, 6, 7, 8, 9]
     

    flattened = []

    for tup in some_tuples: for x in tup: flattened.append(x)

    In [ ]:
     
     
     
     
     
    [[x for x in tup] for tup in some_tuples]
     
     
     

    Functions

     

    def my_function(x, y, z=1.5): if z > 1: return z * (x + y) else: return z / (x + y)

     

    my_function(5, 6, z=0.7) my_function(3.14, 7, 3.5) my_function(10, 20)

     

    Namespaces, Scope, and Local Functions

     

    def func(): a = [] for i in range(5): a.append(i)

     

    a = [] def func(): for i in range(5): a.append(i)

    In [117]:
     
     
     
     
     
    a = None
    def bind_a_variable():
        global a
        a = []
    bind_a_variable()
    print(a)
     
     
     
    []
    
     

    Returning Multiple Values

     

    def f(): a = 5 b = 6 c = 7 return a, b, c

    a, b, c = f()

     

    return_value = f()

     

    def f(): a = 5 b = 6 c = 7 return {'a' : a, 'b' : b, 'c' : c}

     

    Functions Are Objects

    In [118]:
     
     
     
     
     
    states = ['   Alabama ', 'Georgia!', 'Georgia', 'georgia', 'FlOrIda',
              'south   carolina##', 'West virginia?']
     
     
    In [119]:
     
     
     
     
     
    import re #正则匹配
    def clean_strings(strings):
        result = []
        for value in strings:
            value = value.strip()#去空格
            value = re.sub('[!#?]', '', value)#去特殊符号
            value = value.title()#首字母大写
            result.append(value)
        return result
     
     
    In [120]:
     
     
     
     
     
    clean_strings(states)
     
     
    Out[120]:
    ['Alabama',
     'Georgia',
     'Georgia',
     'Georgia',
     'Florida',
     'South   Carolina',
     'West Virginia']
    In [122]:
     
     
     
     
     
    def remove_punctuation(value):
        return re.sub('[!#?]', '', value)
    clean_ops = [str.strip, remove_punctuation, str.title]
    def clean_strings(strings, ops):
        result = []
        for value in strings:
            for function in ops:
                value = function(value)
            result.append(value)
        return result
     
     
    In [123]:
     
     
     
     
     
    clean_strings(states, clean_ops)
     
     
    Out[123]:
    ['Alabama',
     'Georgia',
     'Georgia',
     'Georgia',
     'Florida',
     'South   Carolina',
     'West Virginia']
    In [124]:
     
     
     
     
     
    for x in map(remove_punctuation, states):
        print(x)
     
     
     
       Alabama 
    Georgia
    Georgia
    georgia
    FlOrIda
    south   carolina
    West virginia
    
     

    Anonymous (Lambda) Functions

     

    def short_function(x): return x * 2

    equiv_anon = lambda x: x * 2

     

    def apply_to_list(some_list, f): return [f(x) for x in some_list]

    ints = [4, 0, 1, 5, 6] apply_to_list(ints, lambda x: x * 2)

    In [125]:
     
     
     
     
     
    strings = ['foo', 'card', 'bar', 'aaaa', 'abab']
     
     
    In [127]:
     
     
     
     
     
    strings.sort(key=lambda x: len(set(list(x))))#按字符串所含不同字母个数排序
    strings
     
     
    Out[127]:
    ['aaaa', 'foo', 'abab', 'bar', 'card']
     

    Currying: Partial Argument Application

     

    def add_numbers(x, y): return x + y

     

    add_five = lambda y: add_numbers(5, y)

     

    from functools import partial add_five = partial(add_numbers, 5)

     

    Generators

    In [130]:
     
     
     
     
     
    some_dict = {'a': 1, 'b': 2, 'c': 3}
    for key in some_dict:
        print(key)
     
     
     
    a
    b
    c
    
    In [131]:
     
     
     
     
     
    dict_iterator = iter(some_dict)
    dict_iterator
     
     
    Out[131]:
    <dict_keyiterator at 0x2297b85bea8>
    In [132]:
     
     
     
     
     
    list(dict_iterator)
     
     
    Out[132]:
    ['a', 'b', 'c']
    In [133]:
     
     
     
     
     
    def squares(n=10):
        print('Generating squares from 1 to {0}'.format(n ** 2))
        for i in range(1, n + 1):
            yield i ** 2
     
     
    In [134]:
     
     
     
     
     
    gen = squares()
    gen
     
     
    Out[134]:
    <generator object squares at 0x000002297B842E08>
    In [135]:
     
     
     
     
     
    for x in gen:
        print(x, end=' ')
     
     
     
    Generating squares from 1 to 100
    1 4 9 16 25 36 49 64 81 100 
     

    Generator expresssions

    In [136]:
     
     
     
     
     
    gen = (x ** 2 for x in range(100))
    gen
     
     
    Out[136]:
    <generator object <genexpr> at 0x000002297B85FAF0>
     

    def makegen(): for x in range(100): yield x ** 2 gen = makegen()

    In [138]:
     
     
     
     
     
    sum(x ** 2 for x in range(100))
    dict((i, i **2) for i in range(5))
     
     
    Out[138]:
    {0: 0, 1: 1, 2: 4, 3: 9, 4: 16}
     

    itertools module

    In [140]:
     
     
     
     
     
    import itertools
    first_letter = lambda x: x[0]
    names = ['Alan', 'Adam', 'Wes', 'Will', 'Albert', 'Steven']
    for letter, names in itertools.groupby(names, first_letter):
        print(letter, list(names)) # names is a generator
     
     
     
    A ['Alan', 'Adam']
    W ['Wes', 'Will']
    A ['Albert']
    S ['Steven']
    
     
     
     
     
     
     
    Init signature: itertools.groupby(self, /, *args, **kwargs)
    Docstring:    
    groupby(iterable, key=None) -> make an iterator that returns consecutive
    keys and groups from the iterable.  If the key function is not specified or
    is None, the element itself is used for grouping.
    Type:           type
     
     

    Errors and Exception Handling

    In [141]:
     
     
     
     
     
    float('1.2345')
    float('something')
     
     
     
    ---------------------------------------------------------------------------
    ValueError                                Traceback (most recent call last)
    <ipython-input-141-6d335c618d25> in <module>()
          1 float('1.2345')
    ----> 2 float('something')
    
    ValueError: could not convert string to float: 'something'
    
    
    In [172]:
     
     
     
     
     
    def attempt_float(x):
        try:
            return float(x)
        except:
            return x
     
     
    In [173]:
     
     
     
     
     
    attempt_float('1.2345')
    attempt_float('something')
     
     
    Out[173]:
    'something'
    In [171]:
     
     
     
     
     
    float((1, 2))
     
     
     
    ---------------------------------------------------------------------------
    TypeError                                 Traceback (most recent call last)
    <ipython-input-171-82f777b0e564> in <module>()
    ----> 1 float((1, 2))
    
    TypeError: float() argument must be a string or a number, not 'tuple'
    
    
    In [144]:
     
     
     
     
     
    def attempt_float(x):
        try:
            return float(x)
        except ValueError:
            return x
     
     
    In [174]:
     
     
     
     
     
    attempt_float((1, 2))
     
     
    Out[174]:
    (1, 2)
    In [145]:
     
     
     
     
     
    def attempt_float(x):
        try:
            return float(x)
        except (TypeError, ValueError):
            return x
     
     
     

    f = open(path, 'w')

    try: write_to_file(f) finally: f.close()

     

    f = open(path, 'w')

    try: write_to_file(f) except: print('Failed') else: print('Succeeded') finally: f.close()

     

    Exceptions in IPython

     

    In [10]: %run examples/ipython_bug.py

    AssertionError Traceback (most recent call last) /home/wesm/code/pydata-book/examples/ipython_bug.py in () 13 throws_an_exception() 14 ---> 15 calling_things()

    /home/wesm/code/pydata-book/examples/ipython_bug.py in calling_things() 11 def calling_things(): 12 works_fine() ---> 13 throws_an_exception() 14 15 calling_things()

    /home/wesm/code/pydata-book/examples/ipython_bug.py in throws_an_exception() 7 a = 5 8 b = 6 ----> 9 assert(a + b == 10) 10 11 def calling_things():

    AssertionError:

     

    Files and the Operating System

    In [175]:
     
     
     
     
     
    %pushd book-materials
     
     
     
    [WinError 2] 系统找不到指定的文件。: 'book-materials'
    C:Usersqq123DocumentsGitHubpydata-book
    
    Out[175]:
    ['~\Documents\GitHub\pydata-book']
    In [176]:
     
     
     
     
     
    path = 'examples/segismundo.txt'
    f = open(path)
     
     
     

    for line in f: pass

    In [178]:
     
     
     
     
     
    lines = [x.rstrip() for x in open(path)]
    lines
     
     
    Out[178]:
    ['Sue帽a el rico en su riqueza,',
     'que m谩s cuidados le ofrece;',
     '',
     'sue帽a el pobre que padece',
     'su miseria y su pobreza;',
     '',
     'sue帽a el que a medrar empieza,',
     'sue帽a el que afana y pretende,',
     'sue帽a el que agravia y ofende,',
     '',
     'y en el mundo, en conclusi贸n,',
     'todos sue帽an lo que son,',
     'aunque ninguno lo entiende.',
     '']
    In [179]:
     
     
     
     
     
    f.close()
     
     
    In [180]:
     
     
     
     
     
    with open(path) as f:
        lines = [x.rstrip() for x in f]
     
     
    In [181]:
     
     
     
     
     
    f = open(path)
    f.read(10)
    f2 = open(path, 'rb')  # Binary mode
    f2.read(10)
     
     
    Out[181]:
    b'Suexc3xb1a el '
    In [182]:
     
     
     
     
     
    f.tell()
    f2.tell()
     
     
    Out[182]:
    10
    In [183]:
     
     
     
     
     
    import sys
    sys.getdefaultencoding()#获得默认编码
     
     
    Out[183]:
    'utf-8'
    In [184]:
     
     
     
     
     
    f.seek(3)
    f.read(1)
     
     
    Out[184]:
    '帽'
    In [185]:
     
     
     
     
     
    f.close()
    f2.close()
     
     
    In [186]:
     
     
     
     
     
    with open('tmp.txt', 'w') as handle:
        handle.writelines(x for x in open(path) if len(x) > 1)
    with open('tmp.txt') as f:
        lines = f.readlines()
    lines
     
     
    Out[186]:
    ['Sue帽a el rico en su riqueza,
    ',
     'que m谩s cuidados le ofrece;
    ',
     'sue帽a el pobre que padece
    ',
     'su miseria y su pobreza;
    ',
     'sue帽a el que a medrar empieza,
    ',
     'sue帽a el que afana y pretende,
    ',
     'sue帽a el que agravia y ofende,
    ',
     'y en el mundo, en conclusi贸n,
    ',
     'todos sue帽an lo que son,
    ',
     'aunque ninguno lo entiende.
    ']
    In [187]:
     
     
     
     
     
    import os
    os.remove('tmp.txt')
     
     
     

    Bytes and Unicode with Files

    In [188]:
     
     
     
     
     
    with open(path) as f:
        chars = f.read(10)
    chars
     
     
    Out[188]:
    'Sue帽a el r'
    In [189]:
     
     
     
     
     
    with open(path, 'rb') as f:
        data = f.read(10)
    data
     
     
    Out[189]:
    b'Suexc3xb1a el '
    In [192]:
     
     
     
     
     
    data.decode('utf8')
     
     
    Out[192]:
    'Sueña el '
    In [193]:
     
     
     
     
     
    data[:4].decode('utf8')
     
     
     
    ---------------------------------------------------------------------------
    UnicodeDecodeError                        Traceback (most recent call last)
    <ipython-input-193-0ad9ad6a11bd> in <module>()
    ----> 1 data[:4].decode('utf8')
    
    UnicodeDecodeError: 'utf-8' codec can't decode byte 0xc3 in position 3: unexpected end of data
    
    
    In [195]:
     
     
     
     
     
    sink_path = 'sink.txt'
    with open(path) as source:
        with open(sink_path, 'xt', encoding='iso-8859-1') as sink:
            sink.write(source.read())
    with open(sink_path, encoding='iso-8859-1') as f:
        print(f.read(10))
     
     
     
    ---------------------------------------------------------------------------
    FileExistsError                           Traceback (most recent call last)
    <ipython-input-195-83e4bc815eaa> in <module>()
          1 sink_path = 'sink.txt'
          2 with open(path) as source:
    ----> 3     with open(sink_path, 'xt', encoding='iso-8859-1') as sink:
          4         sink.write(source.read())
          5 with open(sink_path, encoding='iso-8859-1') as f:
    
    FileExistsError: [Errno 17] File exists: 'sink.txt'
    
    
    In [196]:
     
     
     
     
     
    os.remove(sink_path)
     
     
    In [197]:
     
     
     
     
     
    f = open(path)
    f.read(5)
    f.seek(4)
    f.read(1)
    f.close()
     
     
    In [198]:
     
     
     
     
     
    %popd
     
     
     
    C:Usersqq123DocumentsGitHubpydata-book
    popd -> ~DocumentsGitHubpydata-book
  • 相关阅读:
    线性回归(Linear Regression)的理解及原理
    3个模型搞清楚用户留存分析
    机器学习简单介绍
    数据分析经典方法:5W2H分析法
    使用guava RateLimiter限流
    Maven之assembly自定义打包
    IDE自动编译
    神奇的$scope
    二分法查找
    深入理解CSS选择器优先级
  • 原文地址:https://www.cnblogs.com/romannista/p/10683980.html
Copyright © 2020-2023  润新知