• CS190.1x-ML_lab1_review_student



    Part 1 Math review


    Part 2 NumPy


    Scalar multiplication


    # It is convention to import NumPy with the alias np
    import numpy as np
    # TODO: Replace <FILL IN> with appropriate code
    # Create a numpy array with the values 1, 2, 3
    simpleArray = np.array([1,2,3])
    # Perform the scalar product of 5 and the numpy array
    timesFive = 5 * simpleArray
    print simpleArray
    print timesFive

    Element-wise multiplication and dot product


    # TODO: Replace <FILL IN> with appropriate code
    # Create a ndarray based on a range and step size.
    u = np.arange(0, 5, .5)
    v = np.arange(5, 10, .5)
    elementWise = u * v
    dotProduct = np.dot(u,v)
    print 'u: {0}'.format(u)
    print 'v: {0}'.format(v)
    print '
    print '

    Matrix math


    # TODO: Replace <FILL IN> with appropriate code
    from numpy.linalg import inv
    A = np.matrix([[1,2,3,4],[5,6,7,8]])
    print 'A:
    # Print A transpose
    print '
    A transpose:
    # Multiply A by A transpose
    AAt = A.dot(np.matrix.transpose(A))
    print '
    # Invert AAt with np.linalg.inv()
    AAtInv = inv(AAt)
    print '
    # Show inverse times matrix equals identity
    # We round due to numerical precision
    print '
    AAtInv * AAt:
    {0}'.format((AAtInv * AAt).round(4))

    Part 3 Additional NumPy and Spark linear algebra



    # TODO: Replace <FILL IN> with appropriate code
    features = np.array([1, 2, 3, 4])
    print 'features:
    # The last three elements of features
    lastThree = features[-3:]
    print '

    Combining ndarray objects

    这里介绍np.hstack():按照列来合并; np.vstack():按照行来合并。

    # TODO: Replace <FILL IN> with appropriate code
    zeros = np.zeros(8)
    ones = np.ones(8)
    print 'zeros:
    print '
    zerosThenOnes = np.hstack((zeros,ones)) # A 1 by 16 array
    zerosAboveOnes = np.vstack((zeros,ones))  # A 2 by 8 array
    print '
    print '

    PySpark's DenseVector


    from pyspark.mllib.linalg import DenseVector
    # TODO: Replace <FILL IN> with appropriate code
    numpyVector = np.array([-3, -4, 5])
    print '
    # Create a DenseVector consisting of the values [3.0, 4.0, 5.0]
    myDenseVector = DenseVector([3.0, 4.0, 5.0])
    # Calculate the dot product between the two vectors.
    denseDotProduct = myDenseVector.dot(numpyVector)
    print 'myDenseVector:
    print '

    Part 4 Python lambda expressions


    # Example function
    def addS(x):
        return x + 's'
    print type(addS)
    print addS
    print addS('cat')
    # As a lambda
    addSLambda = lambda x: x + 's'
    print type(addSLambda)
    print addSLambda
    print addSLambda('cat')
    # TODO: Replace <FILL IN> with appropriate code
    # Recall that: "lambda x, y: x + y" creates a function that adds together two numbers
    multiplyByTen = lambda x: x * 10
    print multiplyByTen(5)
    # Note that the function still shows its name as <lambda>
    print '
    ', multiplyByTen

    lambda fewer steps than def


    # Code using def that we will recreate with lambdas
    def plus(x, y):
        return x + y
    def minus(x, y):
        return x - y
    functions = [plus, minus]
    print functions[0](4, 5)
    print functions[1](4, 5)
    # TODO: Replace <FILL IN> with appropriate code
    # The first function should add two values, while the second function should subtract the second
    # value from the first value.
    lambdaFunctions = [lambda x,y : x+y ,  lambda x,y : x-y]
    print lambdaFunctions[0](4, 5)
    print lambdaFunctions[1](4, 5)

    Lambda expression arguments


    # Examples.  Note that the spacing has been modified to distinguish parameters from tuples.
    # One-parameter function
    a1 = lambda x: x[0] + x[1]
    a2 = lambda (x0, x1): x0 + x1
    print 'a1( (3,4) ) = {0}'.format( a1( (3,4) ) )
    print 'a2( (3,4) ) = {0}'.format( a2( (3,4) ) )
    # Two-parameter function
    b1 = lambda x, y: (x[0] + y[0], x[1] + y[1])
    b2 = lambda (x0, x1), (y0, y1): (x0 + y0, x1 + y1)
    print '
    b1( (1,2), (3,4) ) = {0}'.format( b1( (1,2), (3,4) ) )
    print 'b2( (1,2), (3,4) ) = {0}'.format( b2( (1,2), (3,4) ) )
    # TODO: Replace <FILL IN> with appropriate code
    # Use both syntaxes to create a function that takes in a tuple of two values and swaps their order
    # E.g. (1, 2) => (2, 1)
    swap1 = lambda x: (x[1],x[0])
    swap2 = lambda (x0, x1): (x1,x0)
    print 'swap1((1, 2)) = {0}'.format(swap1((1, 2)))
    print 'swap2((1, 2)) = {0}'.format(swap2((1, 2)))
    # Using either syntax, create a function that takes in a tuple with three values and returns a tuple
    # of (2nd value, 3rd value, 1st value).  E.g. (1, 2, 3) => (2, 3, 1)
    swapOrder = lambda x:(x[1],x[2],x[0])
    print 'swapOrder((1, 2, 3)) = {0}'.format(swapOrder((1, 2, 3)))
    # Using either syntax, create a function that takes in three tuples each with two values.  The
    # function should return a tuple with the values in the first position summed and the values in the
    # second position summed. E.g. (1, 2), (3, 4), (5, 6) => (1 + 3 + 5, 2 + 4 + 6) => (9, 12)
    sumThree = lambda x,y,z :(x[0]+y[0]+z[0],x[1]+y[1]+z[1])
    print 'sumThree((1, 2), (3, 4), (5, 6)) = {0}'.format(sumThree((1, 2), (3, 4), (5, 6)))

    Functional programming

    # Create a class to give our examples the same syntax as PySpark
    class FunctionalWrapper(object):
        def __init__(self, data):
            self.data = data
        def map(self, function):
            """Call `map` on the items in `data` using the provided `function`"""
            return FunctionalWrapper(map(function, self.data))
        def reduce(self, function):
            """Call `reduce` on the items in `data` using the provided `function`"""
            return reduce(function, self.data)
        def filter(self, function):
            """Call `filter` on the items in `data` using the provided `function`"""
            return FunctionalWrapper(filter(function, self.data))
        def __eq__(self, other):
            return (isinstance(other, self.__class__)
                and self.__dict__ == other.__dict__)
        def __getattr__(self, name):  return getattr(self.data, name)
        def __getitem__(self, k):  return self.data.__getitem__(k)
        def __repr__(self):  return 'FunctionalWrapper({0})'.format(repr(self.data))
        def __str__(self):  return 'FunctionalWrapper({0})'.format(str(self.data))
    # Map example
    # Create some data
    mapData = FunctionalWrapper(range(5))
    # Define a function to be applied to each element
    f = lambda x: x + 3
    # Imperative programming: loop through and create a new object by applying f
    mapResult = FunctionalWrapper([])  # Initialize the result
    for element in mapData:
        mapResult.append(f(element))  # Apply f and save the new value
    print 'Result from for loop: {0}'.format(mapResult)
    # Functional programming: use map rather than a for loop
    print 'Result from map call: {0}'.format(mapData.map(f))
    # Note that the results are the same but that the map function abstracts away the implementation
    # and requires less code
    # TODO: Replace <FILL IN> with appropriate code
    dataset = FunctionalWrapper(range(10))
    # Multiply each element by 5
    mapResult = dataset.map(lambda x :x*5)
    # Keep the even elements
    # Note that "x % 2" evaluates to the remainder of x divided by 2
    filterResult = dataset.filter(lambda x : x%2==0)
    # Sum the elements
    reduceResult = dataset.reduce(lambda x,y: x+y)
    print 'mapResult: {0}'.format(mapResult)
    print '
    filterResult: {0}'.format(filterResult)
    print '
    reduceResult: {0}'.format(reduceResult)


    # Example of a mult-line expression statement
    # Note that placing parentheses around the expression allow it to exist on multiple lines without
    # causing a syntax error.
     .map(lambda x: x + 2)
     .reduce(lambda x, y: x * y))
    # TODO: Replace <FILL IN> with appropriate code
    # Multiply the elements in dataset by five, keep just the even values, and sum those values
    finalSum = dataset.map(lambda x :x*5).filter(lambda x : x%2==0).reduce(lambda x,y: x+y)
    print finalSum
