• Pytorch自定义dataloader以及在迭代过程中返回image的name


    pytorch官方给的加载数据的方式是已经定义好的dataset以及loader,如何加载自己本地的图片以及label?

    形如数据格式为

    image1 label1

    image2 label2

    ...

    imagen labeln

    实验中我采用的数据的格式如下,一个图片的名字对应一个label,每一个label是一个9维的向量

    1_-2_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.304295635957 0.952577642997 0.0614006041909 0.0938333659301 -0.995587916479 0.126405046864 -0.999368204665 0.0355414055005 0.382030624629 0.0
    1_0_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.271224474168 0.962516121742 0.061399602839 0.128727689658 -0.991679979588 0.126495313272 -0.999999890616 0.000467726796359 0.381981952872 0.0
    1_2_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.237868729379 0.971297311632 0.0614713240576 0.163626102983 -0.986522426721 0.1265439964 -0.999400990041 -0.0346072406472 0.382020891324 0.0
    1.1_-2_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.303575822293 0.95280728383 0.0675229548933 0.0939225945957 -0.995579502714 0.138745857429 -0.999376861795 0.0352971402251 0.410670255038 0.1
    1.1_0_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.270745576918 0.962650940154 0.0674654115238 0.128659340525 -0.991688849436 0.138685653232 -0.999999909615 0.000425170029598 0.410739827476 0.1
    1.1_2_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.23757921143 0.971368168253 0.0674866175928 0.16322766122 -0.986588430204 0.138789623782 -0.999406504329 -0.0344476284471 0.410661183171 0.1
    1.2_-2_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.305474635089 0.952200213882 0.0736939767933 0.0939968709874 -0.995572492712 0.150981626608 -0.999370773952 0.0354690875311 0.437620875774 0.2
    1.2_0_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.270346113421 0.962763199836 0.073518963401 0.128433455959 -0.991718129002 0.150964425444 -0.999999924062 0.000389711583812 0.437667827367 0.2
    1.2_2_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.237337349604 0.971427291403 0.0734898449879 0.162895476227 -0.986643331617 0.150931800731 -0.999411541516 -0.0343011761519 0.437608139736 0.2
    1.3_-2_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.305514664536 0.952187371137 0.0795990377393 0.0941741911595 -0.995555735115 0.162914965783 -0.999378340534 0.0352552474342 0.462816755558 0.3
    1.3_0_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.272366931798 0.962193459998 0.0796135882128 0.128398130503 -0.991722703221 0.162940731132 -0.999999935257 0.000359841646368 0.462733965419 0.3

    ...

    源程序如下

      1 import torch
      2 import torch.nn as nn
      3 import math
      4 import os
      5 from PIL import Image
      6 import random
      7 from torchvision import datasets, transforms
      8 import torch.utils.data as data
      9 from torch.autograd import Variable
     10 
     11 torch.cuda.set_device(0)
     12 # os.environ["CUDA_VISIBLE_DEVICES"] = "1"
     13 kwargs = {'num_workers': 1, 'pin_memory': True}
     14 batch_size = 8
     15 # load the data
     16 def random_choose_data(label_path):
     17     random.seed(1)
     18     file = open(label_path)
     19     lines = file.readlines()
     20     slice_initial = random.sample(lines, 200000)  # if don't change this ,it will be all the same
     21     slice = list(set(lines)-set(slice_initial))
     22     random.shuffle(slice)
     23 
     24     train_label = slice[:150000]
     25     test_label = slice[150000:200000]
     26     return train_label, test_label  # output the list and delvery it into ImageFolder
     27 
     28 
     29 # def my data loader, return the data and corresponding label
     30 def default_loader(path):
     31     return Image.open(path).convert('RGB')  # operation object is the PIL image object
     32 
     33 
     34 class myImageFloder(data.Dataset):  # Class inheritance
     35     def __init__(self, root, label, transform=None, target_transform=None, loader=default_loader):
     36         # fh = open(label)
     37         c = 0
     38         imgs = []
     39         class_names = ['regression']
     40         for line in label:  # label is a list
     41             cls = line.split()  # cls is a list
     42             fn = cls.pop(0)
     43             if os.path.isfile(os.path.join(root, fn)):
     44                 imgs.append((fn, tuple([float(v) for v in cls[:len(cls)-1]])))
     45                 # access the last label
     46                 # images is the list,and the content is the tuple, every image corresponds to a label
     47                 # despite the label's dimension
     48                 # we can use the append way to append the element for list
     49             c = c + 1
     50         print('the total image is',c)
     51         print(class_names)
     52         self.root = root
     53         self.imgs = imgs
     54         self.classes = class_names
     55         self.transform = transform
     56         self.target_transform = target_transform
     57         self.loader = loader
     58     def __getitem__(self, index):
     59         fn, label = self.imgs[index]  # even though the imgs is just a list, it can return the elements of it
     60         # in a proper way
     61         img = self.loader(os.path.join(self.root, fn))
     62         if self.transform is not None:
     63             img = self.transform(img)
     64         return img, torch.Tensor(label), fn
     65 
     66     def __len__(self):
     67         return len(self.imgs)
     68 
     69     def getName(self):
     70         return self.classes
     71 
     72 mytransform = transforms.Compose([transforms.ToTensor()])  # transform [0,255] to [0,1]
     73 test_data_root = "/home/ying/data/google_streetview_train_test1"
     74 data_label = "/home/ying/data/google_streetview_train_test1/label.txt"
     75 # test_label="/home/ying/data/google_streetview_train_test1/label.txt"
     76 train_label, test_label = random_choose_data(data_label)
     77 test_loader = torch.utils.data.DataLoader(
     78          myImageFloder(root=test_data_root, label=test_label, transform=mytransform),batch_size=batch_size, shuffle=True, **kwargs)
     79 
     80 
     81 def conv3x3(in_planes, out_planes, stride=1):
     82     "3x3 convolution with padding"
     83     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
     84                      padding=1, bias=False)
     85 
     86 
     87 class BasicBlock(nn.Module):
     88     expansion = 1
     89 
     90     def __init__(self, inplanes, planes, stride=1, downsample=None):
     91         super(BasicBlock, self).__init__()
     92         self.conv1 = conv3x3(inplanes, planes, stride)
     93         self.bn1 = nn.BatchNorm2d(planes)
     94         self.relu = nn.ReLU(inplace=True)
     95         self.conv2 = conv3x3(planes, planes)
     96         self.bn2 = nn.BatchNorm2d(planes)
     97         self.downsample = downsample
     98         self.stride = stride
     99 
    100     def forward(self, x):
    101         residual = x
    102 
    103         out = self.conv1(x)
    104         out = self.bn1(out)
    105         out = self.relu(out)
    106 
    107         out = self.conv2(out)
    108         out = self.bn2(out)
    109 
    110         if self.downsample is not None:
    111             residual = self.downsample(x)
    112 
    113         out += residual
    114         out = self.relu(out)
    115 
    116         return out
    117 
    118 
    119 class Bottleneck(nn.Module):
    120     expansion = 4
    121 
    122     def __init__(self, inplanes, planes, stride=1, downsample=None):
    123         super(Bottleneck, self).__init__()
    124         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)  # decrease the channel, does't change size
    125         self.bn1 = nn.BatchNorm2d(planes)
    126         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
    127                                padding=1, bias=False)
    128         self.bn2 = nn.BatchNorm2d(planes)
    129         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
    130         self.bn3 = nn.BatchNorm2d(planes * 4)
    131         self.relu = nn.ReLU(inplace=True)
    132         self.downsample = downsample
    133         self.stride = stride
    134 
    135     def forward(self, x):
    136         residual = x
    137 
    138         out = self.conv1(x)
    139         out = self.bn1(out)
    140         out = self.relu(out)
    141 
    142         out = self.conv2(out)
    143         out = self.bn2(out)
    144         out = self.relu(out)
    145 
    146         out = self.conv3(out)
    147         out = self.bn3(out)
    148 
    149         if self.downsample is not None:
    150             residual = self.downsample(x)
    151 
    152         out += residual
    153         out = self.relu(out)
    154 
    155         return out
    156 
    157 
    158 class ResNet(nn.Module):
    159 
    160     def __init__(self, block, layers, num_classes=9):
    161         self.inplanes = 64
    162         super(ResNet, self).__init__()
    163         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
    164                                bias=False)  # the size become 1/2
    165         self.bn1 = nn.BatchNorm2d(64)
    166         self.relu = nn.ReLU(inplace=True)
    167         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)  # the size become 1/2
    168         self.layer1 = self._make_layer(block, 64, layers[0])
    169         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
    170         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
    171         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
    172         self.avgpool = nn.AvgPool2d(7)
    173         # self.fc = nn.Linear(512 * block.expansion, num_classes)
    174         self.fc = nn.Linear(2048, num_classes)
    175 
    176 
    177         for m in self.modules():
    178             if isinstance(m, nn.Conv2d):
    179                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
    180                 m.weight.data.normal_(0, math.sqrt(2. / n))
    181             elif isinstance(m, nn.BatchNorm2d):
    182                 m.weight.data.fill_(1)
    183                 m.bias.data.zero_()
    184 
    185     def _make_layer(self, block, planes, blocks, stride=1):
    186         #  block: object, planes: output channel, blocks: the num of blocks
    187         downsample = None
    188         if stride != 1 or self.inplanes != planes * block.expansion:
    189             downsample = nn.Sequential(
    190                 nn.Conv2d(self.inplanes, planes * block.expansion,
    191                           kernel_size=1, stride=stride, bias=False),
    192                 nn.BatchNorm2d(planes * block.expansion),
    193             )
    194 
    195         layers = []
    196         layers.append(block(self.inplanes, planes, stride, downsample))
    197         self.inplanes = planes * block.expansion  # the input channel num become 4 times
    198         for i in range(1, blocks):
    199             layers.append(block(self.inplanes, planes))
    200 
    201         return nn.Sequential(*layers)
    202 
    203     def forward(self, x):
    204         x = self.conv1(x)
    205         x = self.bn1(x)
    206         x = self.relu(x)
    207         x = self.maxpool(x)
    208 
    209         x = self.layer1(x)
    210         x = self.layer2(x)
    211         x = self.layer3(x)
    212         x = self.layer4(x)
    213 
    214         x = self.avgpool(x)
    215         x = x.view(x.size(0), -1)
    216         x = self.fc(x)
    217         return x
    218 
    219 
    220 def resnet50(pretrained = True):
    221     """Constructs a ResNet-50 model.
    222 
    223     Args:
    224         pretrained (bool): If True, returns a model pre-trained on ImageNet
    225     """
    226     model = ResNet(Bottleneck, [3, 4, 6, 3])
    227     # model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
    228     model.load_state_dict(torch.load('./resnet50_20170907_state_dict.pth'))
    229     return model
    230 cnn = resnet50(pretrained=True)  # the output number is 9
    231 cnn.cuda()
    232 cnn.eval()
    233 criterion = nn.MSELoss().cuda()
    234 
    235 for i, (test_images, test_labels, fn) in enumerate(test_loader):  # the first i in index, and the () is the content
    236     test_images = Variable(test_images.cuda())
    237     test_labels = Variable(test_labels.cuda())
    238     outputs = cnn(test_images)
    239     print(outputs.data[0])
    240     print(fn)
    241     loss = criterion(outputs, test_labels)
    242     print("Iter [%d/%d] Test_Loss: %.4f" % (i + 1, 781, loss.data[0]))

    着重看定义dataloader以及返回图像名称的一段代码:

     1 def random_choose_data(label_path):
     2     random.seed(1)
     3     file = open(label_path)
     4     lines = file.readlines()
     5     slice_initial = random.sample(lines, 200000)  # if don't change this ,it will be all the same
     6     slice = list(set(lines)-set(slice_initial))
     7     random.shuffle(slice)
     8 
     9     train_label = slice[:150000]
    10     test_label = slice[150000:200000]
    11     return train_label, test_label  # output the list and delvery it into ImageFolder
    12 
    13 
    14 # def my data loader, return the data and corresponding label
    15 def default_loader(path):
    16     return Image.open(path).convert('RGB')  # operation object is the PIL image object
    17 
    18 
    19 class myImageFloder(data.Dataset):  # Class inheritance,继承Dataset类
    20     def __init__(self, root, label, transform=None, target_transform=None, loader=default_loader):
    21         # fh = open(label)
    22         c = 0
    23         imgs = []
    24         class_names = ['regression']
    25         for line in label:  # label is a list
    26             cls = line.split()  # cls is a list
    27             fn = cls.pop(0)
    28             if os.path.isfile(os.path.join(root, fn)):
    29                 imgs.append((fn, tuple([float(v) for v in cls[:len(cls)-1]])))
    30                 # access the last label
    31                 # images is the list,and the content is the tuple, every image corresponds to a label
    32                 # despite the label's dimension
    33                 # we can use the append way to append the element for list
    34             c = c + 1
    35         print('the total image is',c)
    36         print(class_names)
    37         self.root = root
    38         self.imgs = imgs
    39         self.classes = class_names
    40         self.transform = transform
    41         self.target_transform = target_transform
    42         self.loader = loader
    43     def __getitem__(self, index):
    44         fn, label = self.imgs[index]  # even though the imgs is just a list, it can return the elements of it
    45         # in a proper way
    46         img = self.loader(os.path.join(self.root, fn))
    47         if self.transform is not None:
    48             img = self.transform(img)
    49         return img, torch.Tensor(label), fn  # 在这里返回图像数据以及对应的label以及对应的名称
    50 
    51     def __len__(self):
    52         return len(self.imgs)
    53 
    54     def getName(self):
    55         return self.classes

    实际上是继承Dataset这个类中的两个函数__getitem____len__,并且返回的变量类型是torch.Tensor即可

    看dataloader定义方式以及如何在dataloader中加载数据

     1 mytransform = transforms.Compose([transforms.ToTensor()])  # transform [0,255] to [0,1]
     2 test_data_root = "/home/ying/data/google_streetview_train_test1"
     3 data_label = "/home/ying/data/google_streetview_train_test1/label.txt"
     4 # test_label="/home/ying/data/google_streetview_train_test1/label.txt"
     5 train_label, test_label = random_choose_data(data_label)
     6 test_loader = torch.utils.data.DataLoader(
     7          myImageFloder(root=test_data_root, label=test_label, transform=mytransform),batch_size=batch_size, shuffle=True, **kwargs)
     8 ...
     9 for i, (test_images, test_labels, fn) in enumerate(test_loader):  # the first i in index, and the () is the content
    10     test_images = Variable(test_images.cuda())
    11     test_labels = Variable(test_labels.cuda())
    12     outputs = cnn(test_images)
    13     print(outputs.data[0])
    14     print(fn)
    15     loss = criterion(outputs, test_labels)
    16     print("Iter [%d/%d] Test_Loss: %.4f" % (i + 1, 781, loss.data[0]))

    实际上刚刚在myImageFloder中定义的__getitem__实际上就是i, (test_images, test_labels, fn) in enumerate(test_loader): 中返回的对象, 其中第一个i是与enumberate相关的index

    这样就能够在模型test的时候观察哪些数据误差比较大并且进行输出

  • 相关阅读:
    编程命名规范
    python 字符串编解码介绍
    django之sqlite3常见错误
    asp.net请求过程文章推荐
    python 多线程的文章
    工作中处理文本的python代码片段
    memcached一些知识
    咱计算机专业的人,能不能不那么特别地彰显对语文的无知?——再谈面向对象...
    你真的了解分层架构吗?——写给被PetShop"毒害"的朋友们...
    混蛋的面试题——《大话设计模式》读后感
  • 原文地址:https://www.cnblogs.com/yongjieShi/p/7513824.html
Copyright © 2020-2023  润新知