• 每周一个机器学习小项目005-自动求导


    每周一个机器学习小项目005-自动求导

    每周一个机器学习小项目005-自动求导

    深度学习中实现一个自动求导实际上就是实现一个图算法。我们以一个简单的示例来说:

    [公式]

    将上的关系绘制成图

    我们可以将所有的计算拆分为二元的计算方法:

    [公式]

    对于每一步而言仅需计算其依赖的输入的偏导数即可(对于y来说是f和g)。之后递归的向前计算每一层的导数。因此在计算的过程中需要实现的类的功能为记录计算过程中的依赖的输入。 以一个仅有加法的示例来说,我们定义公式为:

    [公式]

    我们在定义个Tensor类后需要完成导数的计算。

    class Tensor:
        def __init__(self, data, depend=[]):
            """初始化"""
            self.data = data  
            self.depend = depend 
            self.grad = 0 
        def __mul__(self, data):
            """乘法"""
            def grad_fn1(grad):
                return grad * data.data 
            def grad_fn2(grad):
                return grad * self.data  
            depend = [(self, grad_fn1), (data, grad_fn2)]
            new = Tensor(self.data * data.data, depend) 
            return new 
        def __rmul__(self, data):
            def grad_fn1(grad):
                return grad * data.data 
            def grad_fn2(grad):
                return grad * self.data  
            depend = [(self, grad_fn1), (data, grad_fn2)]
            new = Tensor(self.data * data.data, depend) 
            return new 
        def __add__(self, data):
            """加法"""
            def grad_fn(grad):
                return grad   
            depend = [(self, grad_fn), (data, grad_fn)]
            new = Tensor(self.data * data.data, depend) 
            return new    
        def __radd__(self, data):
            def grad_fn(grad):
                return grad   
            depend = [(self, grad_fn), (data, grad_fn)]
            new = Tensor(self.data * data.data, depend) 
            return new  
        def __repr__(self):
            return f"Tensor:{self.data}"
        def backward(self, grad=None):
            """
            反向传播,需要递归计算
            """
            if grad == None:
                self.grad = 1 
            else:
                # 这一步用于计算图中的分支
                self.grad += grad
            # 这一步是递归计算
            for tensor, grad_fn in self.depend:
                bw = grad_fn(self.grad)
                tensor.backward(bw)

    实际上这个实现相当简单,仅需递归的计算计算图中的每个节点即可,下面仿照公式来定义一个计算图:

    x = Tensor(2) 
    f = x * x 
    g = x * x 
    y = f + g
    y.backward()
    
    print(y, g.grad, x.grad)

    可以改成一个更复杂一些的例子,比如加入更多矩阵的计算。代码如下:

    """
    自动求导功能的实现
    参考github:autograd
    """
    import numpy as np
    
    def data_trans(data):
        """
        转换为array类型数据
        """
        if isinstance(data, np.ndarray):
            return data
        else:
            return np.array(data)
    def tensor_trans(data):
        if isinstance(data, Tensor):
            return data
        else:
            return Tensor(data)
    
    class Tensor:
        def __init__(self, data, training=False, depends_on=[], name="input"):
            self._data = data_trans(data)
            self.training = training
            self.shape = self._data.shape
            self.grad = None 
            self.depends_on = depends_on 
            self.step = -1 
            self.name = name
            if self.training:
                self.zero_grad()
    
        def zero_grad(self) -> None:
            self.grad = Tensor(np.zeros_like(self.data, dtype=np.float64))
    
        @property
        def data(self) -> np.ndarray:
            return self._data
    
        @data.setter
        def data(self, new_data: np.ndarray) -> None:
            self._data = new_data
            # Setting the data manually means we invalidate the gradient.
            self.grad = None
    
        def __repr__(self) -> str:
            return f"Tensor({self._data}, training={self.training})"
    
        def __add__(self, other):
            """加法"""
            return _add(self, tensor_trans(other))
    
        def __radd__(self, other):
            """右加"""
            return _add(tensor_trans(other), self)
    
        def __mul__(self, other):
            return _mul(self, tensor_trans(other))
    
        def __rmul__(self, other):
            return _mul(tensor_trans(other), self)
    
        def __matmul__(self, other):
            return _matmul(self, tensor_trans(other))
    
        def __sub__(self, other):
            return _sub(self, tensor_trans(other))
    
        def __rsub__(self, other):
            return _sub(tensor_trans(other), self)
    
        def __neg__(self) -> 'Tensor':
            return _neg(self)
    
        def __getitem__(self, idxs):
            return _slice(self, idxs)
    
        def backward(self, grad=None):
            if grad is None:
                if self.shape == ():
                    grad = Tensor(1.0) 
            #print(self.grad.data, grad.data)
            self.grad.data = self.grad.data + grad.data  
            for tensor, grad_fn in self.depends_on:
                backward_grad = grad_fn(grad.data)
                tensor.backward(Tensor(backward_grad))
        def export_graph(self, prev=0, point=[], edge=[], prevname="out"): 
            #edge = []
            
            if prev == 0:
                point = [0] 
                edge = []
                step = 0 
            a = np.max(point) 
            if self.step not in point:
                self.step = a + 1 
                point.append(self.step)
                edge.append((f"{self.name}:{self.step}", f"{prevname}:{prev}")) 
            else:
                edge.append((f"{self.name}:{self.step}", f"{prevname}:{prev}")) 
            for tensor, grad_fn in self.depends_on:
                tensor.export_graph(self.step, point, edge, self.name)
            return point, edge 
        def sum(self):
            return tensor_sum(self)
    
    
    def tensor_sum(t: Tensor) -> Tensor:
        """
        Takes a tensor and returns the 0-tensor
        that's the sum of all its elements.
        """
        data = t.data.sum()
        training = t.training
    
        if training:
            def grad_fn(grad): 
                """
                grad is necessarily a 0-tensor, so each input element
                contributes that much
                """
                return grad * np.ones_like(t.data)
    
            depends_on = [(t, grad_fn)]
    
        else:
            depends_on = []
    
        return Tensor(data,
                      training,
                      depends_on)
    
    def _add(t1: Tensor, t2: Tensor) -> Tensor:
        data = t1.data + t2.data
        training = t1.training or t2.training
    
        depends_on = []
    
        if t1.training:
            def grad_fn1(grad) :
                # Sum out added dims
                ndims_added = grad.ndim - t1.data.ndim
                for _ in range(ndims_added):
                    grad = grad.sum(axis=0)
    
                # Sum across broadcasted (but non-added dims)
                for i, dim in enumerate(t1.shape):
                    if dim == 1:
                        grad = grad.sum(axis=i, keepdims=True)
                return grad
    
            depends_on.append((t1, grad_fn1))
    
        if t2.training:
            def grad_fn2(grad: np.ndarray) -> np.ndarray:
                # Sum out added dims
                ndims_added = grad.ndim - t2.data.ndim
                for _ in range(ndims_added):
                    grad = grad.sum(axis=0)
    
                # Sum across broadcasted (but non-added dims)
                for i, dim in enumerate(t2.shape):
                    if dim == 1:
                        grad = grad.sum(axis=i, keepdims=True)
    
                return grad
    
            depends_on.append((t2, grad_fn2))
    
        return Tensor(data,
                      training,
                      depends_on)
    
    def _mul(t1: Tensor, t2: Tensor) -> Tensor:
        data = t1.data * t2.data
        training = t1.training or t2.training
    
        depends_on = []
    
        if t1.training:
            def grad_fn1(grad: np.ndarray) -> np.ndarray:
                grad = grad * t2.data
    
                # Sum out added dims
                ndims_added = grad.ndim - t1.data.ndim
                for _ in range(ndims_added):
                    grad = grad.sum(axis=0)
    
                # Sum across broadcasted (but non-added dims)
                for i, dim in enumerate(t1.shape):
                    if dim == 1:
                        grad = grad.sum(axis=i, keepdims=True)
    
                return grad
    
            depends_on.append((t1, grad_fn1))
    
        if t2.training:
            def grad_fn2(grad: np.ndarray) -> np.ndarray:
                grad = grad * t1.data
    
                # Sum out added dims
                ndims_added = grad.ndim - t2.data.ndim
                for _ in range(ndims_added):
                    grad = grad.sum(axis=0)
    
                # Sum across broadcasted (but non-added dims)
                for i, dim in enumerate(t2.shape):
                    if dim == 1:
                        grad = grad.sum(axis=i, keepdims=True)
    
                return grad
    
            depends_on.append((t2, grad_fn2))
    
        return Tensor(data,               
                      training,      
                      depends_on, "mul")         
    
    def _neg(t: Tensor) -> Tensor:
        data = -t.data
        training = t.training
        if training:
            depends_on = [(t, lambda x: -x)]
        else:
            depends_on = []
    
        return Tensor(data, training, depends_on, "neg")
    
    def _sub(t1: Tensor, t2: Tensor) -> Tensor:
        return t1 + -t2
    
    def _matmul(t1: Tensor, t2: Tensor) -> Tensor:
        """
        if t1 is (n1, m1) and t2 is (m1, m2), then t1 @ t2 is (n1, m2)
        so grad3 is (n1, m2)
        if t3 = t1 @ t2, and grad3 is the gradient of some function wrt t3, then
            grad1 = grad3 @ t2.T
            grad2 = t1.T @ grad3
        """
        data = t1.data @ t2.data
        training = t1.training or t2.training
    
        depends_on = []
    
        if t1.training:
            def grad_fn1(grad: np.ndarray) -> np.ndarray:
                return grad @ t2.data.T
    
            depends_on.append((t1, grad_fn1))
    
        if t2.training:
            def grad_fn2(grad: np.ndarray) -> np.ndarray:
                return t1.data.T @ grad
            depends_on.append((t2, grad_fn2))
    
        return Tensor(data,
                      training,
                      depends_on, "matmul")
    
    def _slice(t: Tensor, idxs) -> Tensor:
        data = t.data[idxs]
        training = t.training
    
        if training:
            def grad_fn(grad: np.ndarray) -> np.ndarray:
                bigger_grad = np.zeros_like(data)
                bigger_grad[idxs] = grad
                return bigger_grad
    
            depends_on = [(t, grad_fn)]
        else:
            depends_on = []
    
        return Tensor(data, training, depends_on, "slice")

    做一个简单的优化:

    import AutoGrad as ad 
    import numpy as np 
    import matplotlib.pyplot as plt 
    
    x = ad.Tensor(np.random.normal(-1, 1, [100, 1]))
    y = x * x + np.random.normal(0, 0.3, [100, 1])
    
    w = ad.Tensor(np.zeros([1, 1]), True) 
    b = ad.Tensor(np.zeros([1]), True)
    
    for itr in range(100):
        h = x @ w 
        l = (h-y) * (h-y) 
        loss = l.sum()
    
        loss.backward() 
        w = w - 0.001 * w.grad
        b = b - 0.001 * b.grad 
        w.zero_grad() 
        b.zero_grad() 
     
    
    plt.scatter(x.data, y.data)
    x = ad.Tensor(np.linspace(-1, 3, 100).reshape([100, 1])) 
    h = x @ w 
    plt.plot(x.data, h.data)
    plt.show()

    绘制计算图

    import AutoGrad as ad 
    import numpy as np 
    import matplotlib.pyplot as plt 
    import networkx as nx 
    
    G = nx.Graph() 
    
    x = ad.Tensor(1, True) 
    b = x * x 
    c = x * x
    h = b + c 
    
    a, b = h.export_graph() 
    print(b)
    G.add_edges_from(b) 
    nx.draw(G, with_labels=True, arrows=True) 
    plt.show()

    将结果进行输出

  • 相关阅读:
    STM32F407 开发环境搭建 程序下载 个人笔记
    用bootstrap_table实现html 表格翻页
    STM32F407 正点原子 资料网址记录
    C51 动态数码管 个人笔记
    C51 继电器 个人笔记
    谷歌浏览器截长图
    C51 原创电子琴 (蜂鸣器/计时器/中断/矩阵按键)
    从零自学Hadoop(01):认识Hadoop
    Centos修改DNS重启或者重启network服务后丢失问题处理
    初次体验VS2015正式版,安装详细过程。
  • 原文地址:https://www.cnblogs.com/sea520/p/13212633.html
Copyright © 2020-2023  润新知