• 【AI】PytorchSegmentCode


    From: https://liudongdong1.github.io/

    0. 基础配置

    0.1. 设置随机种子

    def set_seeds(seed, cuda):
        """ Set Numpy and PyTorch seeds.
        """
        np.random.seed(seed)
        torch.manual_seed(seed)
        if cuda:
            torch.cuda.manual_seed_all(seed)
        print ("==> Set NumPy and PyTorch seeds.")
    

    0.2. 张量处理与转化

    tensor.type()   # Data type
    tensor.size()   # Shape of the tensor. It is a subclass of Python tuple
    tensor.dim()    # Number of dimensions.
    
    # Type convertions.
    tensor = tensor.cuda()
    tensor = tensor.cpu()
    tensor = tensor.float()
    tensor = tensor.long()
    
    #tensor 与python数据类型转化
    #Tensor ----> 单个Python数据,使用data.item(),data为Tensor变量且只能为包含单个数据
    #Tensor ----> Python list,使用data.tolist(),data为Tensor变量,返回shape相同的可嵌套的list
    
    #CPU&GPU 位置
    #CPU张量 ---->  GPU张量,使用data.cuda()
    #GPU张量 ----> CPU张量,使用data.cpu()
    
    #tensor 与np.ndarray
    ndarray = tensor.cpu().numpy()
    ndarray = tensor.numpy()
    tensor.cpu().detach().numpy().tolist()[0]
    # np.ndarray -> torch.Tensor.
    tensor = torch.from_numpy(ndarray).float()
    tensor = torch.from_numpy(ndarray.copy()).float()  # If ndarray has negative stride
    # torch.Tensor -> PIL.Image.
    image = PIL.Image.fromarray(torch.clamp(tensor * 255, min=0, max=255
        ).byte().permute(1, 2, 0).cpu().numpy())
    image = torchvision.transforms.functional.to_pil_image(tensor)  # Equivalently way
    # PIL.Image -> torch.Tensor.
    tensor = torch.from_numpy(np.asarray(PIL.Image.open(path))
        ).permute(2, 0, 1).float() / 255
    tensor = torchvision.transforms.functional.to_tensor(PIL.Image.open(path))  # Equivalently way
    # np.ndarray -> PIL.Image.
    image = PIL.Image.fromarray(ndarray.astypde(np.uint8))
    # PIL.Image -> np.ndarray.
    ndarray = np.asarray(PIL.Image.open(path))
    
    #复制张量
    # Operation                 |  New/Shared memory | Still in computation graph |
    tensor.clone()            # |        New         |          Yes               |
    tensor.detach()           # |      Shared        |          No                |
    tensor.detach.clone()()   # |        New         |          No                |
    #reshape 操作
    tensor = torch.reshape(tensor, shape)
    # Expand tensor of shape 64*512 to shape 64*512*7*7.
    torch.reshape(tensor, (64, 512, 1, 1)).expand(64, 512, 7, 7)
    
    #向量拼接 注意torch.cat和torch.stack的区别在于torch.cat沿着给定的维度拼接,而torch.stack会新增一维。例如当参数是3个10×5的张量,torch.cat的结果是30×5的张量,而torch.stack的结果是3×10×5的张量。
    tensor = torch.cat(list_of_tensors, dim=0)
    tensor = torch.stack(list_of_tensors, dim=0)
    
    #得到0/非0 元素
    torch.nonzero(tensor)               # Index of non-zero elements
    torch.nonzero(tensor == 0)          # Index of zero elements
    torch.nonzero(tensor).size(0)       # Number of non-zero elements
    torch.nonzero(tensor == 0).size(0)  # Number of zero elements
    
    #向量乘法
    # Matrix multiplication: (m*n) * (n*p) -> (m*p).
    result = torch.mm(tensor1, tensor2)
    # Batch matrix multiplication: (b*m*n) * (b*n*p) -> (b*m*p).
    result = torch.bmm(tensor1, tensor2)
    # Element-wise multiplication.
    result = tensor1 * tensor2
    
    #计算两组数据之间的两两欧式距离
    # X1 is of shape m*d.
    X1 = torch.unsqueeze(X1, dim=1).expand(m, n, d)
    # X2 is of shape n*d.
    X2 = torch.unsqueeze(X2, dim=0).expand(m, n, d)
    # dist is of shape m*n, where dist[i][j] = sqrt(|X1[i, :] - X[j, :]|^2)
    dist = torch.sqrt(torch.sum((X1 - X2) ** 2, dim=2))
    
    #卷积核
    conv = torch.nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=True)
    

    0.3. pytorch 版本

    torch.__version__               # PyTorch version
    torch.version.cuda              # Corresponding CUDA version
    torch.backends.cudnn.version()  # Corresponding cuDNN version
    torch.cuda.get_device_name(0)   # GPU type
    

    0.4. GPU指定

    torch.cuda.is_available()
    os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
    

    1. 数据加载分割

    1.0. Transform 变化

    其中ToTensor操作会将PIL.Image或形状为H×W×D,数值范围为[0, 255]的np.ndarray转换为形状为D×H×W,数值范围为[0.0, 1.0]的torch.Tensor。 Normalize 需要注意数据的维度,否则容易报错。

    train_transform = torchvision.transforms.Compose([
        torchvision.transforms.RandomResizedCrop(size=224,
                                                 scale=(0.08, 1.0)),
        torchvision.transforms.RandomHorizontalFlip(),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406),
                                         std=(0.229, 0.224, 0.225)),
     ])
     val_transform = torchvision.transforms.Compose([
        torchvision.transforms.Resize(256),
        torchvision.transforms.CenterCrop(224),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406),
                                         std=(0.229, 0.224, 0.225)),
    ])
    

    1.1. 自定义dataset类

    class CharDataset(Dataset):
        def __init__(self, csv_file, root_dir, transform = None):
            # args: path to csv file with keypoint data, directory with images, transform to be applied
            self.key_pts_frame = pd.read_csv(csv_file)
            self.root_dir = root_dir
            self.transform = transform
        def __len__(self):
            # return size of dataset
            return len(self.key_pts_frame.shape)
        def __getitem__(self, idx):
            image_name = os.path.join(self.root_dir, self.key_pts_frame.iloc[idx, 0])
            image = mpimg.imread(image_name)
            # removing alpha color channel if present
            if image.shape[2] == 4:
                image = image[:, :, 0:3]
            key_pts = self.key_pts_frame.iloc[idx, 1:].values()
            key_pts = key_pts.astype('float').reshape(-1, 2)
            sample = {'image': image, 'keypoints': key_pts}
            # apply transform
            if self.transform:
                sample = self.transform(sample)
            return sample
    if __name__ == "__main__":
        chardata=CharDataset("D:\Model\CharPointDetection\data\test\")
        print(len(chardata))    #1198
        print(chardata[0].get("image").shape)  #(96, 96)  最大值1, 最小值0
    
    • dataset
    import json
    import matplotlib.pyplot as plt
    import numpy as np
    from torch.utils.data import Dataset,DataLoader
    import matplotlib.pyplot as plt
    from torchvision import transforms, utils
    import cv2
    from util.imageUtil import *
    from util.config import *
    class DatasetCustom(Dataset):
        def __init__(self, rootcsv, imgroot,train=True, transform = None,ratio=0.7):
            self.train = train
            self.transform = transform
            self.allItem=self.readcsv(rootcsv)
            self.imgroot=imgroot
            #todo 添加打乱操作 训练和测试数据集进行分割处理
            if self.train :
                self.labelItem=self.allItem[:int(len(self.allItem)*ratio)]
            else:
                self.labelItem=self.allItem[int(len(self.allItem)*ratio)+1:]
    
    
        def readcsv(self,filename):
            '''
                读取CSV中clothdata数据
            '''
            with open(filename,encoding = 'utf-8') as f:
                data = np.loadtxt(f,str,delimiter = ",", skiprows = 1)
                data=data[::2,:]     #或取csv 文件数据
                return data
    
        def __getitem__(self, index):
            index=index%self.__len__()
            img_name = self.labelItem[index][0].split('_')  # 或取图片对于路径
            imgpath="{}/camera{}_{}_{}_{}.jpg".format(self.imgroot,img_name[0],img_name[1],0-int(img_name[1]),img_name[2])
            ratioW,ratioH,img=imageloadCV(imgpath,RESIZE)  #图片大小进行了resize处理,对于x,y也进行缩放处理
            keypoints = self.labelCoordinateHandle(self.labelItem[index][10:],ratioW,ratioH)
            if self.transform is not None:
                img = self.transform(img)
            # return img, keypoints     对于这种枚举方式:for step ,(b_x,b_y) in enumerate(train_loader):
            # return {                                           
            #     'image': torch.tensor(img, dtype=torch.float),
            #     'keypoints': torch.tensor(keypoints, dtype=torch.float),
            # }   
            # 对应代码枚举方式                        
            # for i, data in tqdm(enumerate(dataloader), total=num_batches):
            #     image, keypoints = data['image'].to(DEVICE), data['keypoints'].to(DEVICE)                       
            return {
                'image': img,
                'keypoints': keypoints,
            }
    
        def labelCoordinateHandle(self,data,ratioW,ratioH):
            '''
                对图片的长宽进行了相应的缩放处理
            '''
            data=[float(i) for i in data]
            data[0]=data[0]*ratioW
            data[1]=data[1]*ratioH
            data[3]=data[3]*ratioW
            data[4]=data[4]*ratioH
            return np.array(data, dtype='float32')
    
        def __len__(self):
            return len(self.labelItem) 
    
     
    if __name__ == '__main__':
        train_dataset =DatasetCustom(rootcsv=ROOT_CSV,imgroot=IMG_ROOT,train=True,transform=transforms.ToTensor(),ratio=0.7)
        test_dataset = DatasetCustom(rootcsv=ROOT_CSV,imgroot=IMG_ROOT,train=False,transform=transforms.ToTensor(),ratio=0.7)
        
        #single record
        data= train_dataset.__getitem__(1)     #toTensor中进行了转化  img = torch.from_numpy(pic.transpose((2, 0, 1)))
        img, label = data['image'], data['keypoints']
        img = np.transpose(img.numpy(),(1,2,0))
        plt.imshow(img)
        plt.show()
        print("label",label)
    
        #DataLoader查看
        train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
        test_loader = DataLoader(dataset=test_dataset, batch_size=6, shuffle=False)
        def imshow(img):
            npimg = img.numpy()
            plt.imshow(np.transpose(npimg, (1, 2, 0)))
        print('num_of_trainData:', len(train_loader))
        print('num_of_testData:', len(test_loader))
        #显示要给batch 中图片内容
        for step ,(b_x,b_y) in enumerate(train_loader):
            #print("step:",step)
            if step < 1:
                imgs = utils.make_grid(b_x)
                print(imgs.shape)
                imgs = np.transpose(imgs,(1,2,0))
                print(imgs.shape)
                plt.imshow(imgs)
                plt.show()
                break
    

    1.2. 数据分割获取

    Dataset = CharDataset(rootdir)  # 自定义的dataset 类
    l=Dataset.__len__()
    test_percent=5
    torch.manual_seed(1)
    indices = torch.randperm(len(Dataset)).tolist()
    dataset = torch.utils.data.Subset(Dataset, indices[:-int(np.ceil(l*test_percent/100))])
    dataset_test = torch.utils.data.Subset(Dataset, indices[int(-np.ceil(l*test_percent/100)):])
    # define training and validation data loaders
    import utils
    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=2, shuffle=True, 
        collate_fn=utils.collate_fn)
    data_loader_test = torch.utils.data.DataLoader(
        dataset_test, batch_size=(1), shuffle=False, 
        collate_fn=utils.collate_fn)
    for batch_i, data in enumerate(data_loader):
        images = data['image']
        key_pts = data['keypoints']
    

    1.3. 视频图像数据

    import cv2
    video = cv2.VideoCapture(mp4_path)
    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = int(video.get(cv2.CAP_PROP_FPS))
    video.release()
    

    1.4. ImageFolder等类

    import torchvision.datasets as dset
    dataset = dset.ImageFolder('./data/dogcat_2') #没有transform,先看看取得的原始图像数据
    print(dataset.classes)  #根据分的文件夹的名字来确定的类别
    print(dataset.class_to_idx) #按顺序为这些类别定义索引为0,1...
    print(dataset.imgs) #返回从所有文件夹中得到的图片的路径以及其类别
    
    
    #获取图片
    datalength=min(len(os.listdir(os.path.join(imageFolder,'protectivesuit'))),len(os.listdir(os.path.join(imageFolder,'whitecoat'))))
    print("数据划分:",[int(datalength*0.7), int(datalength*0.2), int(datalength*0.1)])
    all_dataset = datasets.ImageFolder(root=DATA_PATH_TRAIN, transform=trans)
    # 使用random_split实现数据集的划分,lengths是一个list,按照对应的数量返回数据个数。
    # 这儿需要注意的是,lengths的数据量总和等于all_dataset中的数据个数,这儿不是按比例划分的
    train, test, valid = torch.utils.data.random_split(dataset= all_dataset, lengths=[int(datalength*0.7), int(datalength*0.2), int(datalength*0.1)])
    # 接着按照正常方式使用DataLoader读取数据,返回的是DataLoader对象
    train = DataLoader(train, batch_size=batch_size, shuffle=True, num_workers=num_of_workers)
    test  = DataLoader(test,  batch_size=batch_size, shuffle=True, num_workers=num_of_workers)
    valid = DataLoader(valid, batch_size=batch_size, shuffle=True, num_workers=num_of_workers)
    print(train.classes)  #根据分的文件夹的名字来确定的类别
    print(train.class_to_idx) #按顺序为这些类别定义索引为0,1...
    print(train.imgs) #返回从所有文件夹中得到的图片的路径以及其类别
    

    1.5. OneHot 编码

    # pytorch的标记默认从0开始
    tensor = torch.tensor([0, 2, 1, 3])
    N = tensor.size(0)
    num_classes = 4
    one_hot = torch.zeros(N, num_classes).long()
    one_hot.scatter_(dim=1, index=torch.unsqueeze(tensor, dim=1), src=torch.ones(N, num_classes).long())
    

    2. 训练基本框架

    for epoch in range(2):  # loop over the dataset multiple times
    
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs
            inputs, labels = data
    
            # zero the parameter gradients
            optimizer.zero_grad()
    
            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)  #这里以及进行了平均处理
            loss.backward()
            optimizer.step()
    
            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0
    
    print('Finished Training')
    
    for t in epoch(80):
        for images, labels in tqdm.tqdm(train_loader, desc='Epoch %3d' % (t + 1)):
            images, labels = images.cuda(), labels.cuda()
            scores = model(images)
            loss = loss_function(scores, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    
    #计算 softmax 输出准确率
    score = model(images)
    prediction = torch.argmax(score, dim=1)   # 按行 返回每行最大值在的该行索引, 如果没有dim 则按照一维数组计算
    num_correct = torch.sum(prediction == labels).item()
    accuruacy = num_correct / labels.size(0)
    
    • Label One-hot编码时
    for images, labels in train_loader:
        images, labels = images.cuda(), labels.cuda()
        N = labels.size(0)
        # C is the number of classes.
        smoothed_labels = torch.full(size=(N, C), fill_value=0.1 / (C - 1)).cuda()
        smoothed_labels.scatter_(dim=1, index=torch.unsqueeze(labels, dim=1), value=0.9)
    
        score = model(images)
        log_prob = torch.nn.functional.log_softmax(score, dim=1)
        loss = -torch.sum(log_prob * smoothed_labels) / N
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    

    3. 模型保存与加载

    注意,torch.load函数要确定存储的位置:map_location='cpu'

    torch.sava有俩种方式:

    • 保存权重和模型,但是文件结果不能改变,否则报错
    • 保存权重,加载时,先初始化类,然后加载权重信息。
    # 保存整个网络
    torch.save(net, PATH) 
    # 保存网络中的参数, 速度快,占空间少
    torch.save(net.state_dict(),PATH)
    #--------------------------------------------------
    #针对上面一般的保存方法,加载的方法分别是:
    model_dict=torch.load(PATH)
    model_dict=model.load_state_dict(torch.load(PATH))
    mlp_mixer.load_state_dict(torch.load(Config.MLPMIXER_WEIGHT,map_location='cpu'))
    
    #save model
    def save_models(tempmodel,save_path):
        torch.save("./model/"+tempmodel.state_dict(), save_path)
        print("Checkpoint saved")
    # load model
    model=Net()  #模型的结构
    model.load_state_dict(torch.load(Path("./model/95.model")))
    model.eval()  #运行推理之前,必须先调用以将退出和批处理规范化层设置为评估模式。不这样做将产生不一致的推断结果。
    
    #断点保存
    # Save checkpoint.
    is_best = current_acc > best_acc
    best_acc = max(best_acc, current_acc)
    checkpoint = {
        'best_acc': best_acc,    
        'epoch': t + 1,
        'model': model.state_dict(),
        'optimizer': optimizer.state_dict(),
    }
    model_path = os.path.join('model', 'checkpoint.pth.tar')
    torch.save(checkpoint, model_path)
    if is_best:
        shutil.copy('checkpoint.pth.tar', model_path)
     
    # Load checkpoint.
    if resume:
        model_path = os.path.join('model', 'checkpoint.pth.tar')
        assert os.path.isfile(model_path)
        checkpoint = torch.load(model_path)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print('Load checkpoint at epoch %d.' % start_epoch)
    

    4. 计算准确率,查准率,查全率

    # data['label'] and data['prediction'] are groundtruth label and prediction 
    # for each image, respectively.
    accuracy = np.mean(data['label'] == data['prediction']) * 100
     
    # Compute recision and recall for each class.
    for c in range(len(num_classes)):
        tp = np.dot((data['label'] == c).astype(int),
                    (data['prediction'] == c).astype(int))
        tp_fp = np.sum(data['prediction'] == c)
        tp_fn = np.sum(data['label'] == c)
        precision = tp / tp_fp * 100
        recall = tp / tp_fn * 100
        
    # data['label'] and data['prediction'] are groundtruth label and prediction 
    # for each image, respectively.
    accuracy = np.mean(data['label'] == data['prediction']) * 100
     
    # Compute recision and recall for each class.
    for c in range(len(num_classes)):
        tp = np.dot((data['label'] == c).astype(int),
                    (data['prediction'] == c).astype(int))
        tp_fp = np.sum(data['prediction'] == c)
        tp_fn = np.sum(data['label'] == c)
        precision = tp / tp_fp * 100
        recall = tp / tp_fn * 100
    

    建议有参数的层和汇合(pooling)层使用torch.nn模块定义,激活函数直接使用torch.nn.functional。torch.nn模块和torch.nn.functional的区别在于,torch.nn模块在计算时底层调用了torch.nn.functional,但torch.nn模块包括该层参数,还可以应对训练和测试两种网络状态。model(x)前用model.train()和model.eval()切换网络状态。loss.backward()前用optimizer.zero_grad()清除累积梯度。optimizer.zero_grad()和model.zero_grad()效果一样。

    5. 可视化部分

    有 Facebook 自己开发的 Visdom 和 Tensorboard 两个选择。
    https://github.com/facebookresearch/visdom
    https://github.com/lanpa/tensorboardX

    # Example using Visdom.
    vis = visdom.Visdom(env='Learning curve', use_incoming_socket=False)
    assert self._visdom.check_connection()
    self._visdom.close()
    options = collections.namedtuple('Options', ['loss', 'acc', 'lr'])(
        loss={'xlabel': 'Epoch', 'ylabel': 'Loss', 'showlegend': True},
        acc={'xlabel': 'Epoch', 'ylabel': 'Accuracy', 'showlegend': True},
        lr={'xlabel': 'Epoch', 'ylabel': 'Learning rate', 'showlegend': True})
    
    for t in epoch(80):
        tran(...)
        val(...)
        vis.line(X=torch.Tensor([t + 1]), Y=torch.Tensor([train_loss]),
                 name='train', win='Loss', update='append', opts=options.loss)
        vis.line(X=torch.Tensor([t + 1]), Y=torch.Tensor([val_loss]),
                 name='val', win='Loss', update='append', opts=options.loss)
        vis.line(X=torch.Tensor([t + 1]), Y=torch.Tensor([train_acc]),
                 name='train', win='Accuracy', update='append', opts=options.acc)
        vis.line(X=torch.Tensor([t + 1]), Y=torch.Tensor([val_acc]),
                 name='val', win='Accuracy', update='append', opts=options.acc)
        vis.line(X=torch.Tensor([t + 1]), Y=torch.Tensor([lr]),
                 win='Learning rate', update='append', opts=options.lr)
    
    
    • pytorch graphviz

    pip install torchviz

    model = nn.Sequential()
    model.add_module('W0', nn.Linear(8, 16))
    model.add_module('tanh', nn.Tanh())
    model.add_module('W1', nn.Linear(16, 1))
    
    x = torch.randn(1, 8)
    y = model(x)
    
    make_dot(y.mean(), params=dict(model.named_parameters()), show_attrs=True, show_saved=True)
    

    • 显示图片中的关键点
    def show_landmarks(image, landmarks):
        """Show image with landmarks"""
        plt.imshow(image)
        plt.scatter(landmarks[:, 0], landmarks[:, 1], s=10, marker='.', c='r')
        plt.pause(0.001)  # pause a bit so that plots are updated
    
    plt.figure()
    show_landmarks(io.imread(os.path.join('data/faces/', img_name)),
                   landmarks)
    plt.show()
    
  • 相关阅读:
    tensorflow实践学习一
    计算CPU的MIPS
    计算机原理一
    SecureCRT连接虚拟机失败及虚拟机ping不通外网
    03.os
    02.random
    01.time
    01.面试过程中其他问题
    06.秒杀系统架构
    05.项目并发分析
  • 原文地址:https://www.cnblogs.com/liu-dongdong/p/15189632.html
Copyright © 2020-2023  润新知