• 语义分割-地表建筑物识别


    语义分割-地表建筑物识别

    赛题和数据下载:零基础入门语义分割-地表建筑物识别-天池大赛-阿里云天池 (aliyun.com)

    实验记录

    1.赛题理解与baseline

    1. backbone代码

    main.py

    import numpy as np
    import pandas as pd
    import os
    import numba, cv2,time
    from tqdm import tqdm
    import matplotlib.pyplot as plt
    import warnings
    warnings.filterwarnings('ignore')
    import albumentations as A
    import torch
    import torch.nn as nn
    import torch.utils.data as D
    import torchvision
    from rle import rle_encode,rle_decode
    from Tianchidataset import TianChiDataset
    from loss import loss_fn
    import argparse
    from torchvision import transforms as T
    
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
    def get_model():
        model = torchvision.models.segmentation.fcn_resnet101(True)
    
        #     pth = torch.load("../input/pretrain-coco-weights-pytorch/fcn_resnet50_coco-1167a1af.pth")
        #     for key in ["aux_classifier.0.weight", "aux_classifier.1.weight", "aux_classifier.1.bias", "aux_classifier.1.running_mean", "aux_classifier.1.running_var", "aux_classifier.1.num_batches_tracked", "aux_classifier.4.weight", "aux_classifier.4.bias"]:
        #         del pth[key]
    
        model.classifier[4] = nn.Conv2d(512, 1, kernel_size=(1, 1), stride=(1, 1))
        return model
    
    
    @torch.no_grad()
    def validation(model, loader, loss_fn):
        losses = []
        model.eval()
        for image, target in loader:
            image, target = image.to(DEVICE), target.float().to(DEVICE)
            output = model(image)['out']
            loss = loss_fn(output, target)
            losses.append(loss.item())
    
        return np.array(losses).mean()
    
    def parse_args():
        parser = argparse.ArgumentParser(description='Train semantic segmentation network')
        parser.add_argument('--modelDir',
                            help='saved model path name',
                            default="./checkpoints/model_best.pth",
                            type=str)
        parser.add_argument('--data_path',
                            help='dataset path',
                            default='/home/dzh/Desktop/data/dataset/segmentation/tianchi',
                            type=str)
        parser.add_argument('--epoch',
                            help='total train epoch num',
                            default=30,
                            type=int)
        parser.add_argument('--batch_size',
                            help='total train epoch num',
                            default=160,
                            type=int)
        parser.add_argument('--image_size',
                            help='total train epoch num',
                            default=256,
                            type=int)
        parser.add_argument('--gpu_ids',
                            help='gpu ids: e.g. 0  0,1,2, 0,2. use -1 for CPU',
                            default=[0,1,2,3],
                            type=str)
        args=parser.parse_args()
        return args
    
    
    def main():
        args = parse_args()
        #--------------------------加载数据及数据增强----------------------------
        train_mask = pd.read_csv(os.path.join(args.data_path,'train_mask.csv'), sep='	', names=['name', 'mask'])
        train_mask['name'] = train_mask['name'].apply(lambda x: os.path.join(args.data_path,'train/') + x)
        mask = rle_decode(train_mask['mask'].iloc[0])
        print(rle_encode(mask) == train_mask['mask'].iloc[0])
    
        trfm = A.Compose([
            A.Resize(args.image_size, args.image_size),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.RandomRotate90(),
        ])
        dataset = TianChiDataset(
            train_mask['name'].values,
            train_mask['mask'].fillna('').values,
            trfm, False
        )
        valid_idx, train_idx = [], []
        for i in range(len(dataset)):
            if i % 7 == 0:
                valid_idx.append(i)
            #     else:
            elif i % 7 == 1:
                train_idx.append(i)
    
        train_ds = D.Subset(dataset, train_idx)
        valid_ds = D.Subset(dataset, valid_idx)
        # define training and validation data loaders
        loader = D.DataLoader(train_ds, batch_size=args.batch_size, shuffle=True, num_workers=0)
        vloader = D.DataLoader(valid_ds, batch_size=args.batch_size, shuffle=False, num_workers=0)
    
        #----------------------------加载模型及优化器------------------------------------
        model = get_model()
        model.to(DEVICE)
        model = torch.nn.DataParallel(model, device_ids=args.gpu_ids, output_device=0)
    
        optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-3)
        train_loss = []
        if os.path.exists(args.modelDir):
            checkpoint=torch.load(args.modelDir)
            model.load_state_dict(checkpoint['state_dict'])
            if 'epoch' in checkpoint:
                start_epoch=checkpoint['epoch']
            if 'optimizer' in checkpoint:
                optimizer.load_state_dict(checkpoint['optimizer'])
            if 'train_loss' in checkpoint:
                train_loss = checkpoint['train_loss']
            print("load model from {}".format(args.modelDir))
        else:
            start_epoch = 0
            print("==> no checkpoint found at '{}'".format(args.modelDir))
    
        # ----------------------------训练-----------------------------------
        header = r'''
                Train | Valid
        Epoch |  Loss |  Loss | Time, m
        '''
        #          Epoch         metrics            time
        raw_line = '{:6d}' + 'u2502{:7.3f}' * 2 + 'u2502{:6.2f}'
        print(header)
        best_loss = 10
    
        for epoch in range(start_epoch, args.epoch):
            losses = []
            start_time = time.time()
            model.train()
            for image, target in tqdm(loader):
                image, target = image.to(DEVICE), target.float().to(DEVICE)
                optimizer.zero_grad()
                output = model(image)['out']
                loss = loss_fn(output, target)
                loss.backward()
                optimizer.step()
                losses.append(loss.item())
                # print(loss.item())
    
            vloss = validation(model, vloader, loss_fn)
            print(raw_line.format(epoch, np.array(losses).mean(), vloss,(time.time() - start_time) / 60 ** 1))
            train_loss.append(np.array(losses).mean())
            if vloss < best_loss:
                best_loss = vloss
                state={
                    'epoch':epoch,
                    'state_dict':model.state_dict(),
                    'optimizer':optimizer.state_dict(),
                    'train_loss':train_loss
    
                }
                torch.save(state,args.modelDir)
    
        plt.figure(figsize=(10, 5))
        plt.title("Loss During Training")
        plt.plot(train_loss, label="loss")
        plt.xlabel("iterations")
        plt.ylabel("Loss")
        plt.legend()
        plt.show()
        plt.savefig('./loss.png')
        #--------------------------------验证-----------------------------------
    def valid():
        args = parse_args()
        trfm = T.Compose([
            T.ToPILImage(),
            T.Resize(args.image_size),
            T.ToTensor(),
            T.Normalize([0.625, 0.448, 0.688],
                        [0.131, 0.177, 0.101]),
        ])
        subm = []
        model = get_model()
        model.to(DEVICE)
        model = torch.nn.DataParallel(model, device_ids=args.gpu_ids, output_device=0)
        if os.path.exists(args.modelDir):
            checkpoint = torch.load(args.modelDir)
            model.load_state_dict(checkpoint['state_dict'])
            print("load model from {}".format(args.modelDir))
        model.eval()
        test_mask = pd.read_csv(os.path.join(args.data_path,'test_a_samplesubmit.csv'), sep='	', names=['name', 'mask'])
        test_mask['name'] = test_mask['name'].apply(lambda x: os.path.join(args.data_path,'test_a/') + x)
    
        for idx, name in enumerate(tqdm(test_mask['name'].iloc[:])):
            image = cv2.imread(name)
            image = trfm(image)
            with torch.no_grad():
                image = image.to(DEVICE)[None]
                score = model(image)['out'][0][0]
                score_sigmoid = score.sigmoid().cpu().numpy()
                score_sigmoid = (score_sigmoid > 0.5).astype(np.uint8)
                score_sigmoid = cv2.resize(score_sigmoid, (512, 512))
                # break
            subm.append([name.split('/')[-1], rle_encode(score_sigmoid)])
        subm = pd.DataFrame(subm)
        subm.to_csv('./tmp.csv', index=None, header=None, sep='	')
        # plt.imsave('./output.png',rle_decode(subm[1].fillna('').iloc[0]), cmap='gray')
    
    if __name__ == '__main__':
    
        main()
        valid()
    

    Tianchidataset.py

    import torch.utils.data as D
    import cv2
    from torchvision import transforms as T
    from rle import rle_decode
    IMAGE_SIZE = 256
    class TianChiDataset(D.Dataset):
        def __init__(self, paths, rles, transform, test_mode=False):
            self.paths = paths
            self.rles = rles
            self.transform = transform
            self.test_mode = test_mode
    
            self.len = len(paths)
            self.as_tensor = T.Compose([
                T.ToPILImage(),
                T.Resize(IMAGE_SIZE),
                T.ToTensor(),
                T.Normalize([0.625, 0.448, 0.688],
                            [0.131, 0.177, 0.101]),
            ])
    
        # get data operation
        def __getitem__(self, index):
            img = cv2.imread(self.paths[index])
            if not self.test_mode:
                mask = rle_decode(self.rles[index])
                augments = self.transform(image=img, mask=mask)
                return self.as_tensor(augments['image']), augments['mask'][None]
            else:
                return self.as_tensor(img), ''
    
        def __len__(self):
            """
            Total number of samples in the dataset
            """
            return self.len
    

    loss.py

    import torch.nn as nn
    class SoftDiceLoss(nn.Module):
        def __init__(self, smooth=1., dims=(-2, -1)):
            super(SoftDiceLoss, self).__init__()
            self.smooth = smooth
            self.dims = dims
    
        def forward(self, x, y):
            tp = (x * y).sum(self.dims)
            fp = (x * (1 - y)).sum(self.dims)
            fn = ((1 - x) * y).sum(self.dims)
    
            dc = (2 * tp + self.smooth) / (2 * tp + fp + fn + self.smooth)
            dc = dc.mean()
            return 1 - dc
    def loss_fn(y_pred, y_true):
        bce_fn = nn.BCEWithLogitsLoss()
        dice_fn = SoftDiceLoss()
        bce = bce_fn(y_pred, y_true)
        dice = dice_fn(y_pred.sigmoid(), y_true)
        return 0.8*bce+ 0.2*dice
    
    

    rle.py

    import numpy as np
    def rle_encode(im):
        '''
        im: numpy array, 1 - mask, 0 - background
        Returns run length as string formated
        '''
        pixels = im.flatten(order = 'F')
        pixels = np.concatenate([[0], pixels, [0]])
        runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
        runs[1::2] -= runs[::2]
        return ' '.join(str(x) for x in runs)
    
    def rle_decode(mask_rle, shape=(512, 512)):
        '''
        mask_rle: run-length as string formated (start length)
        shape: (height,width) of array to return
        Returns numpy array, 1 - mask, 0 - background
    
        '''
        s = mask_rle.split()
        starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
        starts -= 1
        ends = starts + lengths
        img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
        for lo, hi in zip(starts, ends):
            img[lo:hi] = 1
        return img.reshape(shape, order='F')
    

    2)提交测试结果

    1. 数据增强方法

    A.Rotate(),
    A.ShiftScaleRotate(),
    A.Cutout(),
    # A.RandomScale(),
    A.ShiftScaleRotate(),
    

    本次改进在原程序的基础上增加了以上数据增强方法,并选用了deeplabv3_resnet101进行训练,因为服务器占用只训练了这个版本,达到效果为:

    每天进步一点点
  • 相关阅读:
    (7)常量和变量
    (6)python基础数据类型
    PEP8规范
    (5)原码反码补码
    (4)二八十六进制转换
    (3)你的第一个python程序
    (2)python开发环境搭建
    几种常见的开发语言对比
    (1)python的基础认知
    (25)线程---local数据隔离
  • 原文地址:https://www.cnblogs.com/sariel-sakura/p/14421430.html
Copyright © 2020-2023  润新知