pytorch官方给的加载数据的方式是已经定义好的dataset以及loader,如何加载自己本地的图片以及label?
形如数据格式为
image1 label1
image2 label2
...
imagen labeln
实验中我采用的数据的格式如下,一个图片的名字对应一个label,每一个label是一个9维的向量
1_-2_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.304295635957 0.952577642997 0.0614006041909 0.0938333659301 -0.995587916479 0.126405046864 -0.999368204665 0.0355414055005 0.382030624629 0.0
1_0_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.271224474168 0.962516121742 0.061399602839 0.128727689658 -0.991679979588 0.126495313272 -0.999999890616 0.000467726796359 0.381981952872 0.0
1_2_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.237868729379 0.971297311632 0.0614713240576 0.163626102983 -0.986522426721 0.1265439964 -0.999400990041 -0.0346072406472 0.382020891324 0.0
1.1_-2_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.303575822293 0.95280728383 0.0675229548933 0.0939225945957 -0.995579502714 0.138745857429 -0.999376861795 0.0352971402251 0.410670255038 0.1
1.1_0_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.270745576918 0.962650940154 0.0674654115238 0.128659340525 -0.991688849436 0.138685653232 -0.999999909615 0.000425170029598 0.410739827476 0.1
1.1_2_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.23757921143 0.971368168253 0.0674866175928 0.16322766122 -0.986588430204 0.138789623782 -0.999406504329 -0.0344476284471 0.410661183171 0.1
1.2_-2_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.305474635089 0.952200213882 0.0736939767933 0.0939968709874 -0.995572492712 0.150981626608 -0.999370773952 0.0354690875311 0.437620875774 0.2
1.2_0_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.270346113421 0.962763199836 0.073518963401 0.128433455959 -0.991718129002 0.150964425444 -0.999999924062 0.000389711583812 0.437667827367 0.2
1.2_2_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.237337349604 0.971427291403 0.0734898449879 0.162895476227 -0.986643331617 0.150931800731 -0.999411541516 -0.0343011761519 0.437608139736 0.2
1.3_-2_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.305514664536 0.952187371137 0.0795990377393 0.0941741911595 -0.995555735115 0.162914965783 -0.999378340534 0.0352552474342 0.462816755558 0.3
1.3_0_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.272366931798 0.962193459998 0.0796135882128 0.128398130503 -0.991722703221 0.162940731132 -0.999999935257 0.000359841646368 0.462733965419 0.3
...
源程序如下
1 import torch 2 import torch.nn as nn 3 import math 4 import os 5 from PIL import Image 6 import random 7 from torchvision import datasets, transforms 8 import torch.utils.data as data 9 from torch.autograd import Variable 10 11 torch.cuda.set_device(0) 12 # os.environ["CUDA_VISIBLE_DEVICES"] = "1" 13 kwargs = {'num_workers': 1, 'pin_memory': True} 14 batch_size = 8 15 # load the data 16 def random_choose_data(label_path): 17 random.seed(1) 18 file = open(label_path) 19 lines = file.readlines() 20 slice_initial = random.sample(lines, 200000) # if don't change this ,it will be all the same 21 slice = list(set(lines)-set(slice_initial)) 22 random.shuffle(slice) 23 24 train_label = slice[:150000] 25 test_label = slice[150000:200000] 26 return train_label, test_label # output the list and delvery it into ImageFolder 27 28 29 # def my data loader, return the data and corresponding label 30 def default_loader(path): 31 return Image.open(path).convert('RGB') # operation object is the PIL image object 32 33 34 class myImageFloder(data.Dataset): # Class inheritance 35 def __init__(self, root, label, transform=None, target_transform=None, loader=default_loader): 36 # fh = open(label) 37 c = 0 38 imgs = [] 39 class_names = ['regression'] 40 for line in label: # label is a list 41 cls = line.split() # cls is a list 42 fn = cls.pop(0) 43 if os.path.isfile(os.path.join(root, fn)): 44 imgs.append((fn, tuple([float(v) for v in cls[:len(cls)-1]]))) 45 # access the last label 46 # images is the list,and the content is the tuple, every image corresponds to a label 47 # despite the label's dimension 48 # we can use the append way to append the element for list 49 c = c + 1 50 print('the total image is',c) 51 print(class_names) 52 self.root = root 53 self.imgs = imgs 54 self.classes = class_names 55 self.transform = transform 56 self.target_transform = target_transform 57 self.loader = loader 58 def __getitem__(self, index): 59 fn, label = self.imgs[index] # even though the imgs is just a list, it can return the elements of it 60 # in a proper way 61 img = self.loader(os.path.join(self.root, fn)) 62 if self.transform is not None: 63 img = self.transform(img) 64 return img, torch.Tensor(label), fn 65 66 def __len__(self): 67 return len(self.imgs) 68 69 def getName(self): 70 return self.classes 71 72 mytransform = transforms.Compose([transforms.ToTensor()]) # transform [0,255] to [0,1] 73 test_data_root = "/home/ying/data/google_streetview_train_test1" 74 data_label = "/home/ying/data/google_streetview_train_test1/label.txt" 75 # test_label="/home/ying/data/google_streetview_train_test1/label.txt" 76 train_label, test_label = random_choose_data(data_label) 77 test_loader = torch.utils.data.DataLoader( 78 myImageFloder(root=test_data_root, label=test_label, transform=mytransform),batch_size=batch_size, shuffle=True, **kwargs) 79 80 81 def conv3x3(in_planes, out_planes, stride=1): 82 "3x3 convolution with padding" 83 return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 84 padding=1, bias=False) 85 86 87 class BasicBlock(nn.Module): 88 expansion = 1 89 90 def __init__(self, inplanes, planes, stride=1, downsample=None): 91 super(BasicBlock, self).__init__() 92 self.conv1 = conv3x3(inplanes, planes, stride) 93 self.bn1 = nn.BatchNorm2d(planes) 94 self.relu = nn.ReLU(inplace=True) 95 self.conv2 = conv3x3(planes, planes) 96 self.bn2 = nn.BatchNorm2d(planes) 97 self.downsample = downsample 98 self.stride = stride 99 100 def forward(self, x): 101 residual = x 102 103 out = self.conv1(x) 104 out = self.bn1(out) 105 out = self.relu(out) 106 107 out = self.conv2(out) 108 out = self.bn2(out) 109 110 if self.downsample is not None: 111 residual = self.downsample(x) 112 113 out += residual 114 out = self.relu(out) 115 116 return out 117 118 119 class Bottleneck(nn.Module): 120 expansion = 4 121 122 def __init__(self, inplanes, planes, stride=1, downsample=None): 123 super(Bottleneck, self).__init__() 124 self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) # decrease the channel, does't change size 125 self.bn1 = nn.BatchNorm2d(planes) 126 self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 127 padding=1, bias=False) 128 self.bn2 = nn.BatchNorm2d(planes) 129 self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 130 self.bn3 = nn.BatchNorm2d(planes * 4) 131 self.relu = nn.ReLU(inplace=True) 132 self.downsample = downsample 133 self.stride = stride 134 135 def forward(self, x): 136 residual = x 137 138 out = self.conv1(x) 139 out = self.bn1(out) 140 out = self.relu(out) 141 142 out = self.conv2(out) 143 out = self.bn2(out) 144 out = self.relu(out) 145 146 out = self.conv3(out) 147 out = self.bn3(out) 148 149 if self.downsample is not None: 150 residual = self.downsample(x) 151 152 out += residual 153 out = self.relu(out) 154 155 return out 156 157 158 class ResNet(nn.Module): 159 160 def __init__(self, block, layers, num_classes=9): 161 self.inplanes = 64 162 super(ResNet, self).__init__() 163 self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 164 bias=False) # the size become 1/2 165 self.bn1 = nn.BatchNorm2d(64) 166 self.relu = nn.ReLU(inplace=True) 167 self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) # the size become 1/2 168 self.layer1 = self._make_layer(block, 64, layers[0]) 169 self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 170 self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 171 self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 172 self.avgpool = nn.AvgPool2d(7) 173 # self.fc = nn.Linear(512 * block.expansion, num_classes) 174 self.fc = nn.Linear(2048, num_classes) 175 176 177 for m in self.modules(): 178 if isinstance(m, nn.Conv2d): 179 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 180 m.weight.data.normal_(0, math.sqrt(2. / n)) 181 elif isinstance(m, nn.BatchNorm2d): 182 m.weight.data.fill_(1) 183 m.bias.data.zero_() 184 185 def _make_layer(self, block, planes, blocks, stride=1): 186 # block: object, planes: output channel, blocks: the num of blocks 187 downsample = None 188 if stride != 1 or self.inplanes != planes * block.expansion: 189 downsample = nn.Sequential( 190 nn.Conv2d(self.inplanes, planes * block.expansion, 191 kernel_size=1, stride=stride, bias=False), 192 nn.BatchNorm2d(planes * block.expansion), 193 ) 194 195 layers = [] 196 layers.append(block(self.inplanes, planes, stride, downsample)) 197 self.inplanes = planes * block.expansion # the input channel num become 4 times 198 for i in range(1, blocks): 199 layers.append(block(self.inplanes, planes)) 200 201 return nn.Sequential(*layers) 202 203 def forward(self, x): 204 x = self.conv1(x) 205 x = self.bn1(x) 206 x = self.relu(x) 207 x = self.maxpool(x) 208 209 x = self.layer1(x) 210 x = self.layer2(x) 211 x = self.layer3(x) 212 x = self.layer4(x) 213 214 x = self.avgpool(x) 215 x = x.view(x.size(0), -1) 216 x = self.fc(x) 217 return x 218 219 220 def resnet50(pretrained = True): 221 """Constructs a ResNet-50 model. 222 223 Args: 224 pretrained (bool): If True, returns a model pre-trained on ImageNet 225 """ 226 model = ResNet(Bottleneck, [3, 4, 6, 3]) 227 # model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) 228 model.load_state_dict(torch.load('./resnet50_20170907_state_dict.pth')) 229 return model 230 cnn = resnet50(pretrained=True) # the output number is 9 231 cnn.cuda() 232 cnn.eval() 233 criterion = nn.MSELoss().cuda() 234 235 for i, (test_images, test_labels, fn) in enumerate(test_loader): # the first i in index, and the () is the content 236 test_images = Variable(test_images.cuda()) 237 test_labels = Variable(test_labels.cuda()) 238 outputs = cnn(test_images) 239 print(outputs.data[0]) 240 print(fn) 241 loss = criterion(outputs, test_labels) 242 print("Iter [%d/%d] Test_Loss: %.4f" % (i + 1, 781, loss.data[0]))
着重看定义dataloader以及返回图像名称的一段代码:
1 def random_choose_data(label_path): 2 random.seed(1) 3 file = open(label_path) 4 lines = file.readlines() 5 slice_initial = random.sample(lines, 200000) # if don't change this ,it will be all the same 6 slice = list(set(lines)-set(slice_initial)) 7 random.shuffle(slice) 8 9 train_label = slice[:150000] 10 test_label = slice[150000:200000] 11 return train_label, test_label # output the list and delvery it into ImageFolder 12 13 14 # def my data loader, return the data and corresponding label 15 def default_loader(path): 16 return Image.open(path).convert('RGB') # operation object is the PIL image object 17 18 19 class myImageFloder(data.Dataset): # Class inheritance,继承Dataset类 20 def __init__(self, root, label, transform=None, target_transform=None, loader=default_loader): 21 # fh = open(label) 22 c = 0 23 imgs = [] 24 class_names = ['regression'] 25 for line in label: # label is a list 26 cls = line.split() # cls is a list 27 fn = cls.pop(0) 28 if os.path.isfile(os.path.join(root, fn)): 29 imgs.append((fn, tuple([float(v) for v in cls[:len(cls)-1]]))) 30 # access the last label 31 # images is the list,and the content is the tuple, every image corresponds to a label 32 # despite the label's dimension 33 # we can use the append way to append the element for list 34 c = c + 1 35 print('the total image is',c) 36 print(class_names) 37 self.root = root 38 self.imgs = imgs 39 self.classes = class_names 40 self.transform = transform 41 self.target_transform = target_transform 42 self.loader = loader 43 def __getitem__(self, index): 44 fn, label = self.imgs[index] # even though the imgs is just a list, it can return the elements of it 45 # in a proper way 46 img = self.loader(os.path.join(self.root, fn)) 47 if self.transform is not None: 48 img = self.transform(img) 49 return img, torch.Tensor(label), fn # 在这里返回图像数据以及对应的label以及对应的名称 50 51 def __len__(self): 52 return len(self.imgs) 53 54 def getName(self): 55 return self.classes
实际上是继承Dataset这个类中的两个函数__getitem__与__len__,并且返回的变量类型是torch.Tensor即可
看dataloader定义方式以及如何在dataloader中加载数据
1 mytransform = transforms.Compose([transforms.ToTensor()]) # transform [0,255] to [0,1] 2 test_data_root = "/home/ying/data/google_streetview_train_test1" 3 data_label = "/home/ying/data/google_streetview_train_test1/label.txt" 4 # test_label="/home/ying/data/google_streetview_train_test1/label.txt" 5 train_label, test_label = random_choose_data(data_label) 6 test_loader = torch.utils.data.DataLoader( 7 myImageFloder(root=test_data_root, label=test_label, transform=mytransform),batch_size=batch_size, shuffle=True, **kwargs) 8 ... 9 for i, (test_images, test_labels, fn) in enumerate(test_loader): # the first i in index, and the () is the content 10 test_images = Variable(test_images.cuda()) 11 test_labels = Variable(test_labels.cuda()) 12 outputs = cnn(test_images) 13 print(outputs.data[0]) 14 print(fn) 15 loss = criterion(outputs, test_labels) 16 print("Iter [%d/%d] Test_Loss: %.4f" % (i + 1, 781, loss.data[0]))
实际上刚刚在myImageFloder中定义的__getitem__实际上就是i, (test_images, test_labels, fn) in enumerate(test_loader): 中返回的对象, 其中第一个i是与enumberate相关的index
这样就能够在模型test的时候观察哪些数据误差比较大并且进行输出