公司需要做idw,赶鸭子上架了。
先学人家怎么用pytorch的,然后上手写起来。
关于基础知识,后面再进行补充
一:入门
1.基本的创建
import torch ## 基本方法 # 创建 x = torch.empty(5, 3) print(x) x = torch.rand(5, 3) print(x) # dtype x = torch.zeros(5, 3, dtype=torch.long) print(x) print(x.dtype) # 直接构建tensor x = torch.tensor([5.55, 6]) print(x) # 重用之前的形状 x = x.new_ones(5, 3) print(x) x = torch.randn_like(x) print(x) print(x.shape) print(x.size()) # + x = torch.rand(5, 3) y = torch.rand(5, 3) print(x + y) print(x.add(y)) print(torch.add(x, y)) # 将相加完成的值保存到y中 y.add_(x) print(y) # 切片 z = x[:, 1:] print(z) # 改变形状 x = torch.randn(4, 4) y = x.view(16) print(y) y = x.view(2, 8) print(y) y = x.view(2, -1) print(y) x = x.data print(x) # 拿到一个数 z = torch.randn(1) z = z.item() print(z)
2.numpy与torch的转换,gpu的使用
import torch import numpy as np ## 转换 + gpu # torch 转变为tensor # 且共享空间 a = torch.ones(5) b = a.numpy() print(b) # numpy转为tensor a = np.ones(5) b = torch.from_numpy(a) print(b) # 是否是gpu c = torch.cuda.is_available() print(c) # gpu上操作 x = torch.ones(5) if torch.cuda.is_available(): device = torch.device("cuda") y = torch.ones_like(x, device=device) x = x.to(device) z = x + y print(z) print(z.to("cpu"), torch.double) print(z.cpu().data.numpy())
3.使用numpy写两层的神经网络
# 使用numpy写两层的神经网络 import numpy as np # h = w1 * X # a = max(0, h) # y = w2 * a # forword pass # loss # backward pass N, D_in, H, D_out = 64, 1000, 100, 10 # 随机产生一些训练数据 x = np.random.randn(N, D_in) y = np.random.randn(N, D_out) # 1000维度到100维度 w1 = np.random.randn(D_in, H) # 100维度到10维度 w2 = np.random.randn(H, D_out) learning_rate = 1e-6 for t in range(500): # forward pass h = x.dot(w1) # N * H h_relu = np.maximum(h, 0) y_pred = h_relu.dot(w2) # N * D_out # loss loss = np.square(y_pred - y).sum() print(t, loss) # backward pass # gradient grad_y_pred = 2.0 * (y_pred - y) grad_w2 = h_relu.T.dot(grad_y_pred) grad_h_relu = grad_y_pred.dot(w2.T) grad_h = grad_h_relu.copy() grad_h[h < 0] = 0 grad_w1 = x.T.dot(grad_h) # update w1, w2 w1 -= learning_rate * grad_w1 w2 -= learning_rate * grad_w2
4.修改成torch的方法
# 使用numpy写两层的神经网络 import numpy as np # h = w1 * X # a = max(0, h) # y = w2 * a # forword pass # loss # backward pass import torch N, D_in, H, D_out = 64, 1000, 100, 10 # 随机产生一些训练数据 x = torch.randn(N, D_in) y = torch.randn(N, D_out) # 1000维度到100维度 w1 = torch.randn(D_in, H) # 100维度到10维度 w2 = torch.randn(H, D_out) learning_rate = 1e-6 for t in range(500): # forward pass h = x.mm(w1) # N * H h_relu = h.clamp(min=0) y_pred = h_relu.mm(w2) # N * D_out # loss loss = (y_pred - y).pow(2).sum().item() print(t, loss) # backward pass # gradient grad_y_pred = 2.0 * (y_pred - y) grad_w2 = h_relu.t().mm(grad_y_pred) grad_h_relu = grad_y_pred.mm(w2.t()) grad_h = grad_h_relu.clone() grad_h[h < 0] = 0 grad_w1 = x.t().mm(grad_h) # update w1, w2 w1 -= learning_rate * grad_w1 w2 -= learning_rate * grad_w2
5.自动求导方法
import torch # 自动gradient,求导 x = torch.tensor(1., requires_grad=True) w = torch.tensor(2., requires_grad=True) b = torch.tensor(3., requires_grad=True) y = w * x + b y.backward() print(w.grad) # tensor(1.) print(x.grad) # tensor(2.)
6.自动梯度优化,两层的神经网络
# 自动梯度优化,两层的神经网络 import numpy as np # h = w1 * X # a = max(0, h) # y = w2 * a # forword pass # loss # backward pass import torch N, D_in, H, D_out = 64, 1000, 100, 10 # 随机产生一些训练数据 x = torch.randn(N, D_in, requires_grad=True) y = torch.randn(N, D_out, requires_grad=True) # 1000维度到100维度 w1 = torch.randn(D_in, H, requires_grad=True) # 100维度到10维度 w2 = torch.randn(H, D_out, requires_grad=True) learning_rate = 1e-6 for t in range(500): # forward pass y_pred = x.mm(w1).clamp(min=0).mm(w2) # N * D_out # loss loss = (y_pred - y).pow(2).sum() print(t, loss.item()) # backward pass # gradient loss.backward() # update w1, w2 with torch.no_grad(): w1 -= learning_rate * (w1.grad) w2 -= learning_rate * (w2.grad) w1.grad.zero_() w2.grad.zero_()
7.使用model,写两层的神经网络
# 使用model,写两层的神经网络 import numpy as np import torch import torch.nn as nn # h = w1 * X # a = max(0, h) # y = w2 * a # forword pass # loss # backward pass N, D_in, H, D_out = 64, 1000, 100, 10 learning_rate = 1e-6 loss_fn = nn.MSELoss(reduction='sum') # 随机产生一些训练数据 x = torch.randn(N, D_in, requires_grad=True) y = torch.randn(N, D_out, requires_grad=True) model = nn.Sequential( nn.Linear(D_in, H), # w1 * x + b1 nn.ReLU(), nn.Linear(H, D_out) ) nn.init.normal_(model[0].weight) nn.init.normal_(model[2].weight) for t in range(500): # forward pass y_pred = model(x) # loss loss = loss_fn(y_pred, y) print(t, loss.item()) # backward pass # gradient loss.backward() # update w1, w2 with torch.no_grad(): for param in model.parameters(): param -= learning_rate * param.grad model.zero_grad()
8.使用optimizer,写两层的神经网络
# 使用optimizer,写两层的神经网络 import numpy as np import torch import torch.nn as nn # h = w1 * X # a = max(0, h) # y = w2 * a # forword pass # loss # backward pass N, D_in, H, D_out = 64, 1000, 100, 10 learning_rate = 1e-4 loss_fn = nn.MSELoss(reduction='sum') # 随机产生一些训练数据 x = torch.randn(N, D_in, requires_grad=True) y = torch.randn(N, D_out, requires_grad=True) model = nn.Sequential( nn.Linear(D_in, H), # w1 * x + b1 nn.ReLU(), nn.Linear(H, D_out) ) # optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) for t in range(500): # forward pass y_pred = model(x) # loss loss = loss_fn(y_pred, y) print(t, loss.item()) optimizer.zero_grad() # backward pass # gradient loss.backward() optimizer.step()
9.最终的定稿
# 使用optimizer,写两层的神经网络 import numpy as np import torch import torch.nn as nn # h = w1 * X # a = max(0, h) # y = w2 * a N, D_in, H, D_out = 64, 1000, 100, 10 learning_rate = 1e-4 # 随机产生一些训练数据 x = torch.randn(N, D_in, requires_grad=True) y = torch.randn(N, D_out, requires_grad=True) # 定义模型 class TwoLayNet(nn.Module): def __init__(self, D_in, H, D_out): super(TwoLayNet, self).__init__() self.linear1 = nn.Linear(D_in, H, bias=False) self.linear2 = nn.Linear(H, D_out, bias=False) def forward(self, x): y_pred = self.linear2(self.linear1(x).clamp(min=0)) return y_pred model = TwoLayNet(D_in, H, D_out) # 定义优化 optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # 定义loss函数 loss_fn = nn.MSELoss(reduction='sum') # 训练 for t in range(500): # forward pass y_pred = model(x) # loss loss = loss_fn(y_pred, y) print(t, loss.item()) optimizer.zero_grad() # backward pass # gradient loss.backward() optimizer.step()
二:词向量
1.一个小游戏
fizzbuzz。
规则,1可以往上数,遇到3的倍数就是fizz,遇到5的时候,就是buzz,遇到15的倍数就是fizzbuzz,其他情况下正常。
普通的python:
## 使用普通的程序 def fizz_buzz_encode(i): if i % 15 == 0: return 3 elif i % 5 == 0: return 2 elif i % 3 == 0: return 1 else: return 0 def fizz_buzz_decode(i, prediction): return [str(i), "fizz", "buzz", "fizzbuzz"][prediction] def helper(i): print(fizz_buzz_decode(i, fizz_buzz_encode(i))) for i in range(1, 16): helper(i)
使用机器学习,让机器学习会自己学会这个游戏
import numpy as np import torch.nn as nn import torch from wordvector.fizzbuzz import fizz_buzz_encode, fizz_buzz_decode # 十进制 NUM_DIGITS = 10 # 数字变成二级制的数字 def binary_encode(i, num_digits): return np.array([i >> d & 1 for d in range(num_digits)][::-1]) if __name__ == '__main__': # 训练数据与测试数据 trX = torch.Tensor([binary_encode(i, NUM_DIGITS) for i in range(101, 2 ** NUM_DIGITS)]) # torch.Size([923, 10]) trY = torch.LongTensor([fizz_buzz_encode(i) for i in range(101, 2 ** NUM_DIGITS)]) # torch.Size([923]) # 定义模型 NUM_HIDDEN = 100 model = nn.Sequential( nn.Linear(NUM_DIGITS, NUM_HIDDEN), # 10 * 100 nn.ReLU(), nn.Linear(NUM_HIDDEN, 4) # 4分类问题 ) # losss loss_fn = nn.CrossEntropyLoss() # 优化函数 optimizer = torch.optim.SGD(model.parameters(), lr=0.05) # 训练 BATCH_SIZE = 128 for epoch in range(10000): # for start in range(0, len(trX), BATCH_SIZE): end = start + BATCH_SIZE batchX = trX[start: end] batchY = trY[start:end] y_pred = model(batchX) loss = loss_fn(y_pred, batchY) print("epoch", epoch, loss.item()) # 优化 optimizer.zero_grad() loss.backward() optimizer.step() testX = torch.Tensor([binary_encode(i, NUM_DIGITS) for i in range(1, 100)]) with torch.no_grad(): testY = model(testX) # max 第一个维度上获取最大的值 predicts = zip(range(1, 101), testY.max(1)[1].data.numpy()) print([fizz_buzz_decode(i, x) for i, x in predicts])
效果:
['1', '2', 'fizz', 'buzz', 'buzz', 'fizz', '7', '8', 'fizz', 'buzz', '11', 'fizz', '13', '14', 'fizzbuzz', '16', '17', 'fizz', '19', 'buzz', 'fizz', '22', '23', 'fizz', 'buzz', '26', 'fizz', '28', '29', 'fizzbuzz', '31', 'buzz', 'fizz', '34', 'buzz', 'fizz', '37', '38', 'fizz', 'buzz', '41', '42', '43', '44', 'fizzbuzz', '46', '47', 'fizz', '49', 'buzz', 'fizz', '52', '53', 'fizz', 'buzz', '56', 'fizz', '58', '59', 'fizzbuzz', '61', '62', 'fizz', 'buzz', 'buzz', 'fizz', '67', 'buzz', 'fizz', 'buzz', '71', 'fizz', '73', '74', 'fizzbuzz', '76', '77', 'fizz', '79', 'buzz', 'fizz', '82', '83', 'fizz', 'buzz', '86', 'fizz', '88', '89', 'fizzbuzz', '91', '92', 'fizz', '94', 'buzz', 'fizz', '97', '98', 'fizz']