• torch交叉熵计算


    交叉熵计算函数

    第一种代码

    import torch as t
    import torch.nn  as nn
    #  batch_size=3,计算对应每个类别的分数(只有两个类别)
    score = t.randn(1, 4)
    # 三个样本分别属于1,0,1类,label必须是LongTensor
    label = t.Tensor([1]).long()
    
    # loss与普通的layer无差异
    criterion = nn.CrossEntropyLoss()
    loss = criterion(score, label)
    loss
    

    第二种代码

    def cal_loss(logits, targets, tag2id):
        """计算损失
        参数:
            logits: [B, L, out_size]  out_size为不同类别的估计值
            targets: [B, L]
        首先把target经过mask后展平为一维tensor,然后同样的对logit做mask,view后为(-1,out_size)的size,做cross_entropy
        可以考虑加value weight
        """
        PAD = tag2id.get('<pad>')
        assert PAD is not None
    
        mask = (targets != PAD)  # [B, L]
        targets = targets[mask]  #变成一维的了
        ## logits最后一维的大小
        out_size = logits.size(2)
        ## 只计算targets中输出不等于0的loss
        logits = logits.masked_select(
            mask.unsqueeze(2).expand(-1, -1, out_size)
        ).contiguous().view(-1, out_size)
    
        assert logits.size(0) == targets.size(0)
        
        loss = F.cross_entropy(logits, targets)
    
        return loss
    
    

    mask作用

    import torch as t
    tensor = t.randn(3,4)
    tensor
    
    tensor([[ 0.5354,  1.2540,  0.4883,  0.1540],
            [-0.1744,  0.4162, -0.8495,  0.3767],
            [ 0.7846, -1.0294,  0.5631,  0.0470]])
    
    mask = tensor>0
    mask
    
    tensor([[ True,  True,  True,  True],
            [False,  True, False,  True],
            [ True, False,  True,  True]])
    
    new_tensor = tensor[mask]
    new_tensor
    
    tensor([0.5354, 1.2540, 0.4883, 0.1540, 0.4162, 0.3767, 0.7846, 0.5631, 0.0470])
    

    .size() 无参数时返回所有维度,有参数时返回指定维度

    tensor.size(1)
    
    4
    
    new_tensor.size(0)
    
    9
    

    .contiguous().view(-1, out_size) 相当于reshape()

    tensor.contiguous().view(-1, 3)
    
    tensor([[ 0.5354,  1.2540,  0.4883],
            [ 0.1540, -0.1744,  0.4162],
            [-0.8495,  0.3767,  0.7846],
            [-1.0294,  0.5631,  0.0470]])
    
    tensor.reshape(-1,3)
    
    tensor([[ 0.5354,  1.2540,  0.4883],
            [ 0.1540, -0.1744,  0.4162],
            [-0.8495,  0.3767,  0.7846],
            [-1.0294,  0.5631,  0.0470]])
  • 相关阅读:
    Linux内核info leak漏洞
    ELK Stack部署
    centos下安装opencv
    windows10 进入BIOS
    Dockerfile语法简介
    JAVA 容器配置 JVM 监控
    docker registry
    squid
    正反向代理
    安装plsql
  • 原文地址:https://www.cnblogs.com/rise0111/p/11533885.html
Copyright © 2020-2023  润新知