• 语义分割-地表建筑物识别


    语义分割-地表建筑物识别

    赛题和数据下载:零基础入门语义分割-地表建筑物识别-天池大赛-阿里云天池 (aliyun.com)

    实验记录

    1.赛题理解与baseline

    1. backbone代码

    main.py

    import numpy as np
    import pandas as pd
    import os
    import numba, cv2,time
    from tqdm import tqdm
    import matplotlib.pyplot as plt
    import warnings
    warnings.filterwarnings('ignore')
    import albumentations as A
    import torch
    import torch.nn as nn
    import torch.utils.data as D
    import torchvision
    from rle import rle_encode,rle_decode
    from Tianchidataset import TianChiDataset
    from loss import loss_fn
    import argparse
    from torchvision import transforms as T
    
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
    def get_model():
        model = torchvision.models.segmentation.fcn_resnet101(True)
    
        #     pth = torch.load("../input/pretrain-coco-weights-pytorch/fcn_resnet50_coco-1167a1af.pth")
        #     for key in ["aux_classifier.0.weight", "aux_classifier.1.weight", "aux_classifier.1.bias", "aux_classifier.1.running_mean", "aux_classifier.1.running_var", "aux_classifier.1.num_batches_tracked", "aux_classifier.4.weight", "aux_classifier.4.bias"]:
        #         del pth[key]
    
        model.classifier[4] = nn.Conv2d(512, 1, kernel_size=(1, 1), stride=(1, 1))
        return model
    
    
    @torch.no_grad()
    def validation(model, loader, loss_fn):
        losses = []
        model.eval()
        for image, target in loader:
            image, target = image.to(DEVICE), target.float().to(DEVICE)
            output = model(image)['out']
            loss = loss_fn(output, target)
            losses.append(loss.item())
    
        return np.array(losses).mean()
    
    def parse_args():
        parser = argparse.ArgumentParser(description='Train semantic segmentation network')
        parser.add_argument('--modelDir',
                            help='saved model path name',
                            default="./checkpoints/model_best.pth",
                            type=str)
        parser.add_argument('--data_path',
                            help='dataset path',
                            default='/home/dzh/Desktop/data/dataset/segmentation/tianchi',
                            type=str)
        parser.add_argument('--epoch',
                            help='total train epoch num',
                            default=30,
                            type=int)
        parser.add_argument('--batch_size',
                            help='total train epoch num',
                            default=160,
                            type=int)
        parser.add_argument('--image_size',
                            help='total train epoch num',
                            default=256,
                            type=int)
        parser.add_argument('--gpu_ids',
                            help='gpu ids: e.g. 0  0,1,2, 0,2. use -1 for CPU',
                            default=[0,1,2,3],
                            type=str)
        args=parser.parse_args()
        return args
    
    
    def main():
        args = parse_args()
        #--------------------------加载数据及数据增强----------------------------
        train_mask = pd.read_csv(os.path.join(args.data_path,'train_mask.csv'), sep='	', names=['name', 'mask'])
        train_mask['name'] = train_mask['name'].apply(lambda x: os.path.join(args.data_path,'train/') + x)
        mask = rle_decode(train_mask['mask'].iloc[0])
        print(rle_encode(mask) == train_mask['mask'].iloc[0])
    
        trfm = A.Compose([
            A.Resize(args.image_size, args.image_size),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.RandomRotate90(),
        ])
        dataset = TianChiDataset(
            train_mask['name'].values,
            train_mask['mask'].fillna('').values,
            trfm, False
        )
        valid_idx, train_idx = [], []
        for i in range(len(dataset)):
            if i % 7 == 0:
                valid_idx.append(i)
            #     else:
            elif i % 7 == 1:
                train_idx.append(i)
    
        train_ds = D.Subset(dataset, train_idx)
        valid_ds = D.Subset(dataset, valid_idx)
        # define training and validation data loaders
        loader = D.DataLoader(train_ds, batch_size=args.batch_size, shuffle=True, num_workers=0)
        vloader = D.DataLoader(valid_ds, batch_size=args.batch_size, shuffle=False, num_workers=0)
    
        #----------------------------加载模型及优化器------------------------------------
        model = get_model()
        model.to(DEVICE)
        model = torch.nn.DataParallel(model, device_ids=args.gpu_ids, output_device=0)
    
        optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-3)
        train_loss = []
        if os.path.exists(args.modelDir):
            checkpoint=torch.load(args.modelDir)
            model.load_state_dict(checkpoint['state_dict'])
            if 'epoch' in checkpoint:
                start_epoch=checkpoint['epoch']
            if 'optimizer' in checkpoint:
                optimizer.load_state_dict(checkpoint['optimizer'])
            if 'train_loss' in checkpoint:
                train_loss = checkpoint['train_loss']
            print("load model from {}".format(args.modelDir))
        else:
            start_epoch = 0
            print("==> no checkpoint found at '{}'".format(args.modelDir))
    
        # ----------------------------训练-----------------------------------
        header = r'''
                Train | Valid
        Epoch |  Loss |  Loss | Time, m
        '''
        #          Epoch         metrics            time
        raw_line = '{:6d}' + 'u2502{:7.3f}' * 2 + 'u2502{:6.2f}'
        print(header)
        best_loss = 10
    
        for epoch in range(start_epoch, args.epoch):
            losses = []
            start_time = time.time()
            model.train()
            for image, target in tqdm(loader):
                image, target = image.to(DEVICE), target.float().to(DEVICE)
                optimizer.zero_grad()
                output = model(image)['out']
                loss = loss_fn(output, target)
                loss.backward()
                optimizer.step()
                losses.append(loss.item())
                # print(loss.item())
    
            vloss = validation(model, vloader, loss_fn)
            print(raw_line.format(epoch, np.array(losses).mean(), vloss,(time.time() - start_time) / 60 ** 1))
            train_loss.append(np.array(losses).mean())
            if vloss < best_loss:
                best_loss = vloss
                state={
                    'epoch':epoch,
                    'state_dict':model.state_dict(),
                    'optimizer':optimizer.state_dict(),
                    'train_loss':train_loss
    
                }
                torch.save(state,args.modelDir)
    
        plt.figure(figsize=(10, 5))
        plt.title("Loss During Training")
        plt.plot(train_loss, label="loss")
        plt.xlabel("iterations")
        plt.ylabel("Loss")
        plt.legend()
        plt.show()
        plt.savefig('./loss.png')
        #--------------------------------验证-----------------------------------
    def valid():
        args = parse_args()
        trfm = T.Compose([
            T.ToPILImage(),
            T.Resize(args.image_size),
            T.ToTensor(),
            T.Normalize([0.625, 0.448, 0.688],
                        [0.131, 0.177, 0.101]),
        ])
        subm = []
        model = get_model()
        model.to(DEVICE)
        model = torch.nn.DataParallel(model, device_ids=args.gpu_ids, output_device=0)
        if os.path.exists(args.modelDir):
            checkpoint = torch.load(args.modelDir)
            model.load_state_dict(checkpoint['state_dict'])
            print("load model from {}".format(args.modelDir))
        model.eval()
        test_mask = pd.read_csv(os.path.join(args.data_path,'test_a_samplesubmit.csv'), sep='	', names=['name', 'mask'])
        test_mask['name'] = test_mask['name'].apply(lambda x: os.path.join(args.data_path,'test_a/') + x)
    
        for idx, name in enumerate(tqdm(test_mask['name'].iloc[:])):
            image = cv2.imread(name)
            image = trfm(image)
            with torch.no_grad():
                image = image.to(DEVICE)[None]
                score = model(image)['out'][0][0]
                score_sigmoid = score.sigmoid().cpu().numpy()
                score_sigmoid = (score_sigmoid > 0.5).astype(np.uint8)
                score_sigmoid = cv2.resize(score_sigmoid, (512, 512))
                # break
            subm.append([name.split('/')[-1], rle_encode(score_sigmoid)])
        subm = pd.DataFrame(subm)
        subm.to_csv('./tmp.csv', index=None, header=None, sep='	')
        # plt.imsave('./output.png',rle_decode(subm[1].fillna('').iloc[0]), cmap='gray')
    
    if __name__ == '__main__':
    
        main()
        valid()
    

    Tianchidataset.py

    import torch.utils.data as D
    import cv2
    from torchvision import transforms as T
    from rle import rle_decode
    IMAGE_SIZE = 256
    class TianChiDataset(D.Dataset):
        def __init__(self, paths, rles, transform, test_mode=False):
            self.paths = paths
            self.rles = rles
            self.transform = transform
            self.test_mode = test_mode
    
            self.len = len(paths)
            self.as_tensor = T.Compose([
                T.ToPILImage(),
                T.Resize(IMAGE_SIZE),
                T.ToTensor(),
                T.Normalize([0.625, 0.448, 0.688],
                            [0.131, 0.177, 0.101]),
            ])
    
        # get data operation
        def __getitem__(self, index):
            img = cv2.imread(self.paths[index])
            if not self.test_mode:
                mask = rle_decode(self.rles[index])
                augments = self.transform(image=img, mask=mask)
                return self.as_tensor(augments['image']), augments['mask'][None]
            else:
                return self.as_tensor(img), ''
    
        def __len__(self):
            """
            Total number of samples in the dataset
            """
            return self.len
    

    loss.py

    import torch.nn as nn
    class SoftDiceLoss(nn.Module):
        def __init__(self, smooth=1., dims=(-2, -1)):
            super(SoftDiceLoss, self).__init__()
            self.smooth = smooth
            self.dims = dims
    
        def forward(self, x, y):
            tp = (x * y).sum(self.dims)
            fp = (x * (1 - y)).sum(self.dims)
            fn = ((1 - x) * y).sum(self.dims)
    
            dc = (2 * tp + self.smooth) / (2 * tp + fp + fn + self.smooth)
            dc = dc.mean()
            return 1 - dc
    def loss_fn(y_pred, y_true):
        bce_fn = nn.BCEWithLogitsLoss()
        dice_fn = SoftDiceLoss()
        bce = bce_fn(y_pred, y_true)
        dice = dice_fn(y_pred.sigmoid(), y_true)
        return 0.8*bce+ 0.2*dice
    
    

    rle.py

    import numpy as np
    def rle_encode(im):
        '''
        im: numpy array, 1 - mask, 0 - background
        Returns run length as string formated
        '''
        pixels = im.flatten(order = 'F')
        pixels = np.concatenate([[0], pixels, [0]])
        runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
        runs[1::2] -= runs[::2]
        return ' '.join(str(x) for x in runs)
    
    def rle_decode(mask_rle, shape=(512, 512)):
        '''
        mask_rle: run-length as string formated (start length)
        shape: (height,width) of array to return
        Returns numpy array, 1 - mask, 0 - background
    
        '''
        s = mask_rle.split()
        starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
        starts -= 1
        ends = starts + lengths
        img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
        for lo, hi in zip(starts, ends):
            img[lo:hi] = 1
        return img.reshape(shape, order='F')
    

    2)提交测试结果

    1. 数据增强方法

    A.Rotate(),
    A.ShiftScaleRotate(),
    A.Cutout(),
    # A.RandomScale(),
    A.ShiftScaleRotate(),
    

    本次改进在原程序的基础上增加了以上数据增强方法,并选用了deeplabv3_resnet101进行训练,因为服务器占用只训练了这个版本,达到效果为:

    每天进步一点点
  • 相关阅读:
    第4天--linux内核学习
    make menuconfig出错,需要安装libncurses5-dev找不到文件的终极解决办法(不必更换源,适用于ubuntu 32位平台)
    uboot学习第一天
    与或左移右移操作在ARM寄存器配置中的作用
    第四天网络编程笔记
    socket编程热身程序
    线程的创建pthread_create.c
    json.dumps与json.dump的区别 json.loads与json.load的区别
    解决在Vim中鼠标右键不能粘贴
    Python with语句
  • 原文地址:https://www.cnblogs.com/sariel-sakura/p/14421430.html
Copyright © 2020-2023  润新知