交叉熵计算函数
第一种代码
import torch as t
import torch.nn as nn
# batch_size=3,计算对应每个类别的分数(只有两个类别)
score = t.randn(1, 4)
# 三个样本分别属于1,0,1类,label必须是LongTensor
label = t.Tensor([1]).long()
# loss与普通的layer无差异
criterion = nn.CrossEntropyLoss()
loss = criterion(score, label)
loss
第二种代码
def cal_loss(logits, targets, tag2id):
"""计算损失
参数:
logits: [B, L, out_size] out_size为不同类别的估计值
targets: [B, L]
首先把target经过mask后展平为一维tensor,然后同样的对logit做mask,view后为(-1,out_size)的size,做cross_entropy
可以考虑加value weight
"""
PAD = tag2id.get('<pad>')
assert PAD is not None
mask = (targets != PAD) # [B, L]
targets = targets[mask] #变成一维的了
## logits最后一维的大小
out_size = logits.size(2)
## 只计算targets中输出不等于0的loss
logits = logits.masked_select(
mask.unsqueeze(2).expand(-1, -1, out_size)
).contiguous().view(-1, out_size)
assert logits.size(0) == targets.size(0)
loss = F.cross_entropy(logits, targets)
return loss
mask作用
import torch as t
tensor = t.randn(3,4)
tensor
tensor([[ 0.5354, 1.2540, 0.4883, 0.1540],
[-0.1744, 0.4162, -0.8495, 0.3767],
[ 0.7846, -1.0294, 0.5631, 0.0470]])
mask = tensor>0
mask
tensor([[ True, True, True, True],
[False, True, False, True],
[ True, False, True, True]])
new_tensor = tensor[mask]
new_tensor
tensor([0.5354, 1.2540, 0.4883, 0.1540, 0.4162, 0.3767, 0.7846, 0.5631, 0.0470])
.size() 无参数时返回所有维度,有参数时返回指定维度
tensor.size(1)
4
new_tensor.size(0)
9
.contiguous().view(-1, out_size) 相当于reshape()
tensor.contiguous().view(-1, 3)
tensor([[ 0.5354, 1.2540, 0.4883],
[ 0.1540, -0.1744, 0.4162],
[-0.8495, 0.3767, 0.7846],
[-1.0294, 0.5631, 0.0470]])
tensor.reshape(-1,3)
tensor([[ 0.5354, 1.2540, 0.4883],
[ 0.1540, -0.1744, 0.4162],
[-0.8495, 0.3767, 0.7846],
[-1.0294, 0.5631, 0.0470]])