在caffe中训练的时候如果使用多GPU则直接在运行程序的时候指定GPU的index即可,但是在Pytorch中则需要在声明模型之后,对声明的模型进行初始化,如:
cnn = DataParallel(AlexNet())
之后直接运行Pytorch之后则默认使用所有的GPU,为了说明上述初始化的作用,我用了一组畸变图像的数据集,写了一个Resent的模块,过了50个epoch,对比一下实验耗时的差别,代码如下:
1 # -*- coding: utf-8 -*- 2 # Implementation of https://arxiv.org/pdf/1512.03385.pdf/ 3 # See section 4.2 for model architecture on CIFAR-10. 4 # Some part of the code was referenced below. 5 # https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py 6 7 import os 8 from PIL import Image 9 import time 10 11 import torch 12 import torch.nn as nn 13 import torchvision.datasets as dsets 14 import torchvision.transforms as transforms 15 from torch.autograd import Variable 16 import torch.utils.data as data 17 from torch.nn import DataParallel 18 19 20 kwargs = {'num_workers': 1, 'pin_memory': True} 21 # def my dataloader, return the data and corresponding label 22 23 24 def default_loader(path): 25 return Image.open(path).convert('RGB') 26 27 28 class myImageFloder(data.Dataset): # Class inheritance 29 def __init__(self, root, label, transform=None, target_transform=None, loader=default_loader): 30 fh = open(label) 31 c = 0 32 imgs = [] 33 class_names = [] 34 for line in fh.readlines(): 35 if c == 0: 36 class_names = [n.strip() for n in line.rstrip().split(' ')] 37 else: 38 cls = line.split() # cls is a list 39 fn = cls.pop(0) 40 if os.path.isfile(os.path.join(root, fn)): 41 imgs.append((fn, tuple([float(v) for v in cls]))) # imgs is the list,and the content is the tuple 42 # we can use the append way to append the element for list 43 c = c + 1 44 self.root = root 45 self.imgs = imgs 46 self.classes = class_names 47 self.transform = transform 48 self.target_transform = target_transform 49 self.loader = loader 50 51 def __getitem__(self, index): 52 fn, label = self.imgs[index] # eventhough the imgs is just a list, it can return the elements of is 53 # in a proper way 54 img = self.loader(os.path.join(self.root, fn)) 55 if self.transform is not None: 56 img = self.transform(img) 57 return img, torch.Tensor(label) 58 59 def __len__(self): 60 return len(self.imgs) 61 62 def getName(self): 63 return self.classes 64 65 mytransform = transforms.Compose([transforms.ToTensor()]) # almost dont do any operation 66 train_data_root = "/home/ying/shiyongjie/rjp/generate_distortion_image_2016_03_15/0_Distorted_Image/Training" 67 test_data_root = "/home/ying/shiyongjie/rjp/generate_distortion_image_2016_03_15/0_Distorted_Image/Testing" 68 train_label = "/home/ying/shiyongjie/rjp/generate_distortion_image_2016_03_15/0_Distorted_Image/NameList_train.txt" 69 test_label = "/home/ying/shiyongjie/rjp/generate_distortion_image_2016_03_15/0_Distorted_Image/NameList_test.txt" 70 71 train_loader = torch.utils.data.DataLoader( 72 myImageFloder(root=train_data_root, label=train_label, transform=mytransform), 73 batch_size=64, shuffle=True, **kwargs) 74 75 test_loader = torch.utils.data.DataLoader( 76 myImageFloder(root=test_data_root, label=test_label, transform=mytransform), 77 batch_size=64, shuffle=True, **kwargs) 78 79 80 # 3x3 Convolution 81 def conv3x3(in_channels, out_channels, stride=1): 82 return nn.Conv2d(in_channels, out_channels, kernel_size=3, 83 stride=stride, padding=1, bias=False) 84 85 86 # Residual Block 87 class ResidualBlock(nn.Module): 88 def __init__(self, in_channels, out_channels, stride=1, downsample=None): 89 super(ResidualBlock, self).__init__() 90 self.conv1 = conv3x3(in_channels, out_channels, stride) # kernel size is default 3 91 self.bn1 = nn.BatchNorm2d(out_channels) 92 self.relu = nn.ReLU(inplace=True) 93 self.conv2 = conv3x3(out_channels, out_channels) 94 self.bn2 = nn.BatchNorm2d(out_channels) 95 self.downsample = downsample 96 97 def forward(self, x): 98 residual = x 99 out = self.conv1(x) 100 out = self.bn1(out) 101 out = self.relu(out) 102 out = self.conv2(out) 103 out = self.bn2(out) 104 if self.downsample: 105 residual = self.downsample(x) 106 out += residual 107 out = self.relu(out) 108 return out 109 110 111 # ResNet Module 112 class ResNet(nn.Module): 113 def __init__(self, block, layers, num_classes=1): 114 super(ResNet, self).__init__() 115 self.in_channels = 16 116 self.conv = conv3x3(3, 16) 117 self.bn = nn.BatchNorm2d(16) 118 self.relu = nn.ReLU(inplace=True) 119 self.layer1 = self.make_layer(block, 16, layers[0]) 120 self.layer2 = self.make_layer(block, 32, layers[0], 2) 121 self.layer3 = self.make_layer(block, 64, layers[1], 2) # the input arg is blocks and the stride 122 self.layer4 = self.make_layer(block, 128, layers[1], 2) 123 self.layer5 = self.make_layer(block, 256, layers[1], 2) 124 self.avg_pool = nn.AvgPool2d(kernel_size=8,stride=8) # 2*2 125 self.fc = nn.Linear(256*2*2, num_classes) 126 127 def make_layer(self, block, out_channels, blocks, stride=1): 128 downsample = None 129 if (stride != 1) or (self.in_channels != out_channels): # the input channel is not consistant with the output's 130 downsample = nn.Sequential( # do the downsample, def a conv, for example: 256*256*16 -> 128*128*32 131 conv3x3(self.in_channels, out_channels, stride=stride), 132 nn.BatchNorm2d(out_channels)) 133 layers = [] 134 layers.append(block(self.in_channels, out_channels, stride, downsample)) 135 self.in_channels = out_channels # update the input channel and the output channel 136 for i in range(1, blocks): # reduce a block because the first block is already appened 137 layers.append(block(out_channels, out_channels)) # 32*32 -> 8*8 138 return nn.Sequential(*layers) 139 140 def forward(self, x): 141 out = self.conv(x) 142 out = self.bn(out) 143 out = self.relu(out) 144 out = self.layer1(out) 145 out = self.layer2(out) 146 out = self.layer3(out) 147 out=self.layer4(out) 148 out=self.layer5(out) 149 out = self.avg_pool(out) 150 out = out.view(out.size(0), -1) 151 out = self.fc(out) 152 return out 153 154 155 resnet = DataParallel(ResNet(ResidualBlock, [3, 3, 3])) 156 resnet.cuda() 157 158 # Loss and Optimizer 159 criterion = nn.MSELoss() 160 lr = 0.001 161 optimizer = torch.optim.Adam(resnet.parameters(), lr=lr) 162 163 # Training 164 start=time.clock() 165 for epoch in range(50): 166 for i, (images, labels) in enumerate(train_loader): 167 images = Variable(images.cuda()) 168 labels = Variable(labels.cuda()) 169 170 # Forward + Backward + Optimize 171 optimizer.zero_grad() 172 outputs = resnet(images) 173 loss = criterion(outputs, labels) 174 loss.backward() 175 optimizer.step() 176 177 if (i + 1) % 100 == 0: 178 print ("Epoch [%d/%d], Iter [%d/%d] Loss: %.4f" % (epoch + 1, 80, i + 1, 500, loss.data[0])) 179 180 # Decaying Learning Rate 181 if (epoch + 1) % 20 == 0: 182 lr /= 3 183 optimizer = torch.optim.Adam(resnet.parameters(), lr=lr) 184 elapsed=time.clock()-start 185 print("time used:",elapsed) 186 # # Test 187 # correct = 0 188 # total = 0 189 # for images, labels in test_loader: 190 # images = Variable(images.cuda()) 191 # outputs = resnet(images) 192 # _, predicted = torch.max(outputs.data, 1) 193 # total += labels.size(0) 194 # correct += (predicted.cpu() == labels).sum() 195 # 196 # print('Accuracy of the model on the test images: %d %%' % (100 * correct / total)) 197 198 # Save the Model 199 torch.save(resnet.state_dict(), 'resnet.pkl')
作为对比实验,我们同时把ResNet的声明方式修改为
1 resnet = ResNet(ResidualBlock, [3, 3, 3])
其余不变,再运行程序的时候不指定GPU,直接python resnet.py,在声明DataParallel时,运行耗时结果如下:
('time used:', 17124.861335999998),watch -n 1 nvidia-smi确实显示占用两块GPU
在不声明DataParallel时,实验运行结果耗时如下:
('time used:', 30318.149681000003),watch -n 1 nvidia-smi确实显示占用一块GPU
可以看出,在声明DataParallel时时间压缩了近一半,所以在声明DataParalle是使用多GPU运行Pytorch的一种方法。
官方的doc也给出了多GPU使用的例子以及部分数据在GPU与部分数据在CPU上运行的例子
以下是两组实验结果的输出:
DataParalle初始化
1 Epoch [1/80], Iter [100/500] Loss: 916.5578 2 3 Epoch [1/80], Iter [200/500] Loss: 172.2591 4 5 Epoch [1/80], Iter [300/500] Loss: 179.8360 6 7 Epoch [1/80], Iter [400/500] Loss: 259.6867 8 9 Epoch [1/80], Iter [500/500] Loss: 244.0616 10 11 Epoch [1/80], Iter [600/500] Loss: 74.7015 12 13 Epoch [1/80], Iter [700/500] Loss: 63.1657 14 15 Epoch [1/80], Iter [800/500] Loss: 90.3517 16 17 Epoch [1/80], Iter [900/500] Loss: 70.4562 18 19 Epoch [2/80], Iter [100/500] Loss: 52.3249 20 21 Epoch [2/80], Iter [200/500] Loss: 129.1855 22 23 Epoch [2/80], Iter [300/500] Loss: 110.0157 24 25 Epoch [2/80], Iter [400/500] Loss: 64.9313 26 27 Epoch [2/80], Iter [500/500] Loss: 87.8385 28 29 Epoch [2/80], Iter [600/500] Loss: 118.5828 30 31 Epoch [2/80], Iter [700/500] Loss: 123.9575 32 33 Epoch [2/80], Iter [800/500] Loss: 79.1908 34 35 Epoch [2/80], Iter [900/500] Loss: 61.8099 36 37 Epoch [3/80], Iter [100/500] Loss: 50.4294 38 39 Epoch [3/80], Iter [200/500] Loss: 106.8135 40 41 Epoch [3/80], Iter [300/500] Loss: 83.2198 42 43 Epoch [3/80], Iter [400/500] Loss: 60.7116 44 45 Epoch [3/80], Iter [500/500] Loss: 101.9553 46 47 Epoch [3/80], Iter [600/500] Loss: 64.6967 48 49 Epoch [3/80], Iter [700/500] Loss: 66.2446 50 51 Epoch [3/80], Iter [800/500] Loss: 81.1825 52 53 Epoch [3/80], Iter [900/500] Loss: 53.9905 54 55 Epoch [4/80], Iter [100/500] Loss: 76.2977 56 57 Epoch [4/80], Iter [200/500] Loss: 18.4255 58 59 Epoch [4/80], Iter [300/500] Loss: 57.6188 60 61 Epoch [4/80], Iter [400/500] Loss: 45.6235 62 63 Epoch [4/80], Iter [500/500] Loss: 82.9265 64 65 Epoch [4/80], Iter [600/500] Loss: 119.6085 66 67 Epoch [4/80], Iter [700/500] Loss: 53.1355 68 69 Epoch [4/80], Iter [800/500] Loss: 29.5248 70 71 Epoch [4/80], Iter [900/500] Loss: 57.0401 72 73 Epoch [5/80], Iter [100/500] Loss: 47.2671 74 75 Epoch [5/80], Iter [200/500] Loss: 31.6928 76 77 Epoch [5/80], Iter [300/500] Loss: 38.0040 78 79 Epoch [5/80], Iter [400/500] Loss: 24.5184 80 81 Epoch [5/80], Iter [500/500] Loss: 33.8515 82 83 Epoch [5/80], Iter [600/500] Loss: 43.6560 84 85 Epoch [5/80], Iter [700/500] Loss: 68.2500 86 87 Epoch [5/80], Iter [800/500] Loss: 30.8259 88 89 Epoch [5/80], Iter [900/500] Loss: 43.9696 90 91 Epoch [6/80], Iter [100/500] Loss: 22.4120 92 93 Epoch [6/80], Iter [200/500] Loss: 45.5722 94 95 Epoch [6/80], Iter [300/500] Loss: 26.8331 96 97 Epoch [6/80], Iter [400/500] Loss: 58.1139 98 99 Epoch [6/80], Iter [500/500] Loss: 12.8767 100 101 Epoch [6/80], Iter [600/500] Loss: 26.6725 102 103 Epoch [6/80], Iter [700/500] Loss: 31.9800 104 105 Epoch [6/80], Iter [800/500] Loss: 91.2332 106 107 Epoch [6/80], Iter [900/500] Loss: 44.1361 108 109 Epoch [7/80], Iter [100/500] Loss: 13.1401 110 111 Epoch [7/80], Iter [200/500] Loss: 20.9435 112 113 Epoch [7/80], Iter [300/500] Loss: 28.0944 114 115 Epoch [7/80], Iter [400/500] Loss: 24.0240 116 117 Epoch [7/80], Iter [500/500] Loss: 43.3279 118 119 Epoch [7/80], Iter [600/500] Loss: 23.3077 120 121 Epoch [7/80], Iter [700/500] Loss: 32.9658 122 123 Epoch [7/80], Iter [800/500] Loss: 27.2044 124 125 Epoch [7/80], Iter [900/500] Loss: 25.5850 126 127 Epoch [8/80], Iter [100/500] Loss: 39.7642 128 129 Epoch [8/80], Iter [200/500] Loss: 17.7421 130 131 Epoch [8/80], Iter [300/500] Loss: 29.8965 132 133 Epoch [8/80], Iter [400/500] Loss: 20.6153 134 135 Epoch [8/80], Iter [500/500] Loss: 43.0224 136 137 Epoch [8/80], Iter [600/500] Loss: 58.1552 138 139 Epoch [8/80], Iter [700/500] Loss: 19.1967 140 141 Epoch [8/80], Iter [800/500] Loss: 34.9122 142 143 Epoch [8/80], Iter [900/500] Loss: 15.0651 144 145 Epoch [9/80], Iter [100/500] Loss: 18.5950 146 147 Epoch [9/80], Iter [200/500] Loss: 36.1891 148 149 Epoch [9/80], Iter [300/500] Loss: 22.4936 150 151 Epoch [9/80], Iter [400/500] Loss: 14.8044 152 153 Epoch [9/80], Iter [500/500] Loss: 16.6958 154 155 Epoch [9/80], Iter [600/500] Loss: 24.8461 156 157 Epoch [9/80], Iter [700/500] Loss: 13.7112 158 159 Epoch [9/80], Iter [800/500] Loss: 21.2906 160 161 Epoch [9/80], Iter [900/500] Loss: 31.6950 162 163 Epoch [10/80], Iter [100/500] Loss: 20.7707 164 165 Epoch [10/80], Iter [200/500] Loss: 15.6260 166 167 Epoch [10/80], Iter [300/500] Loss: 28.5737 168 169 Epoch [10/80], Iter [400/500] Loss: 36.6791 170 171 Epoch [10/80], Iter [500/500] Loss: 38.9839 172 173 Epoch [10/80], Iter [600/500] Loss: 14.4459 174 175 Epoch [10/80], Iter [700/500] Loss: 10.0907 176 177 Epoch [10/80], Iter [800/500] Loss: 17.9035 178 179 Epoch [10/80], Iter [900/500] Loss: 24.5759 180 181 Epoch [11/80], Iter [100/500] Loss: 19.8531 182 183 Epoch [11/80], Iter [200/500] Loss: 15.7126 184 185 Epoch [11/80], Iter [300/500] Loss: 18.0198 186 187 Epoch [11/80], Iter [400/500] Loss: 19.3038 188 189 Epoch [11/80], Iter [500/500] Loss: 27.4435 190 191 Epoch [11/80], Iter [600/500] Loss: 18.1086 192 193 Epoch [11/80], Iter [700/500] Loss: 10.8124 194 195 Epoch [11/80], Iter [800/500] Loss: 31.2389 196 197 Epoch [11/80], Iter [900/500] Loss: 14.4881 198 199 Epoch [12/80], Iter [100/500] Loss: 10.6320 200 201 Epoch [12/80], Iter [200/500] Loss: 26.8394 202 203 Epoch [12/80], Iter [300/500] Loss: 16.0246 204 205 Epoch [12/80], Iter [400/500] Loss: 16.3263 206 207 Epoch [12/80], Iter [500/500] Loss: 24.5880 208 209 Epoch [12/80], Iter [600/500] Loss: 15.7498 210 211 Epoch [12/80], Iter [700/500] Loss: 11.4933 212 213 Epoch [12/80], Iter [800/500] Loss: 9.7252 214 215 Epoch [12/80], Iter [900/500] Loss: 31.6774 216 217 Epoch [13/80], Iter [100/500] Loss: 21.1929 218 219 Epoch [13/80], Iter [200/500] Loss: 17.0953 220 221 Epoch [13/80], Iter [300/500] Loss: 21.1883 222 223 Epoch [13/80], Iter [400/500] Loss: 15.9005 224 225 Epoch [13/80], Iter [500/500] Loss: 14.7924 226 227 Epoch [13/80], Iter [600/500] Loss: 12.4324 228 229 Epoch [13/80], Iter [700/500] Loss: 12.0840 230 231 Epoch [13/80], Iter [800/500] Loss: 30.9664 232 233 Epoch [13/80], Iter [900/500] Loss: 14.9601 234 235 Epoch [14/80], Iter [100/500] Loss: 6.5126 236 237 Epoch [14/80], Iter [200/500] Loss: 11.3227 238 239 Epoch [14/80], Iter [300/500] Loss: 12.9980 240 241 Epoch [14/80], Iter [400/500] Loss: 13.8523 242 243 Epoch [14/80], Iter [500/500] Loss: 10.6771 244 245 Epoch [14/80], Iter [600/500] Loss: 7.3953 246 247 Epoch [14/80], Iter [700/500] Loss: 14.6829 248 249 Epoch [14/80], Iter [800/500] Loss: 15.6956 250 251 Epoch [14/80], Iter [900/500] Loss: 21.8876 252 253 Epoch [15/80], Iter [100/500] Loss: 5.1943 254 255 Epoch [15/80], Iter [200/500] Loss: 13.0731 256 257 Epoch [15/80], Iter [300/500] Loss: 6.8931 258 259 Epoch [15/80], Iter [400/500] Loss: 15.3212 260 261 Epoch [15/80], Iter [500/500] Loss: 8.1775 262 263 Epoch [15/80], Iter [600/500] Loss: 11.5664 264 265 Epoch [15/80], Iter [700/500] Loss: 5.5951 266 267 Epoch [15/80], Iter [800/500] Loss: 10.9075 268 269 Epoch [15/80], Iter [900/500] Loss: 14.8503 270 271 Epoch [16/80], Iter [100/500] Loss: 19.5184 272 273 Epoch [16/80], Iter [200/500] Loss: 10.3570 274 275 Epoch [16/80], Iter [300/500] Loss: 10.0997 276 277 Epoch [16/80], Iter [400/500] Loss: 9.7350 278 279 Epoch [16/80], Iter [500/500] Loss: 11.3000 280 281 Epoch [16/80], Iter [600/500] Loss: 21.6213 282 283 Epoch [16/80], Iter [700/500] Loss: 9.7907 284 285 Epoch [16/80], Iter [800/500] Loss: 10.0128 286 287 Epoch [16/80], Iter [900/500] Loss: 10.7869 288 289 Epoch [17/80], Iter [100/500] Loss: 9.2015 290 291 Epoch [17/80], Iter [200/500] Loss: 7.3021 292 293 Epoch [17/80], Iter [300/500] Loss: 5.9662 294 295 Epoch [17/80], Iter [400/500] Loss: 17.5215 296 297 Epoch [17/80], Iter [500/500] Loss: 7.3349 298 299 Epoch [17/80], Iter [600/500] Loss: 8.5626 300 301 Epoch [17/80], Iter [700/500] Loss: 12.7575 302 303 Epoch [17/80], Iter [800/500] Loss: 10.7792 304 305 Epoch [17/80], Iter [900/500] Loss: 7.0889 306 307 Epoch [18/80], Iter [100/500] Loss: 10.5613 308 309 Epoch [18/80], Iter [200/500] Loss: 3.0777 310 311 Epoch [18/80], Iter [300/500] Loss: 6.3598 312 313 Epoch [18/80], Iter [400/500] Loss: 7.9515 314 315 Epoch [18/80], Iter [500/500] Loss: 10.8023 316 317 Epoch [18/80], Iter [600/500] Loss: 7.3443 318 319 Epoch [18/80], Iter [700/500] Loss: 8.0862 320 321 Epoch [18/80], Iter [800/500] Loss: 15.2795 322 323 Epoch [18/80], Iter [900/500] Loss: 10.2788 324 325 Epoch [19/80], Iter [100/500] Loss: 5.0786 326 327 Epoch [19/80], Iter [200/500] Loss: 8.8248 328 329 Epoch [19/80], Iter [300/500] Loss: 4.9262 330 331 Epoch [19/80], Iter [400/500] Loss: 7.8992 332 333 Epoch [19/80], Iter [500/500] Loss: 13.1279 334 335 Epoch [19/80], Iter [600/500] Loss: 8.2703 336 337 Epoch [19/80], Iter [700/500] Loss: 4.1547 338 339 Epoch [19/80], Iter [800/500] Loss: 9.0542 340 341 Epoch [19/80], Iter [900/500] Loss: 6.7904 342 343 Epoch [20/80], Iter [100/500] Loss: 8.6150 344 345 Epoch [20/80], Iter [200/500] Loss: 3.7212 346 347 Epoch [20/80], Iter [300/500] Loss: 6.2832 348 349 Epoch [20/80], Iter [400/500] Loss: 10.1591 350 351 Epoch [20/80], Iter [500/500] Loss: 9.7668 352 353 Epoch [20/80], Iter [600/500] Loss: 4.7498 354 355 Epoch [20/80], Iter [700/500] Loss: 4.8831 356 357 Epoch [20/80], Iter [800/500] Loss: 7.7877 358 359 Epoch [20/80], Iter [900/500] Loss: 8.5114 360 361 Epoch [21/80], Iter [100/500] Loss: 2.1853 362 363 Epoch [21/80], Iter [200/500] Loss: 5.8741 364 365 Epoch [21/80], Iter [300/500] Loss: 5.3676 366 367 Epoch [21/80], Iter [400/500] Loss: 3.1155 368 369 Epoch [21/80], Iter [500/500] Loss: 4.2433 370 371 Epoch [21/80], Iter [600/500] Loss: 1.9783 372 373 Epoch [21/80], Iter [700/500] Loss: 2.7622 374 375 Epoch [21/80], Iter [800/500] Loss: 2.0112 376 377 Epoch [21/80], Iter [900/500] Loss: 2.2692 378 379 Epoch [22/80], Iter [100/500] Loss: 2.1882 380 381 Epoch [22/80], Iter [200/500] Loss: 4.2540 382 383 Epoch [22/80], Iter [300/500] Loss: 4.0126 384 385 Epoch [22/80], Iter [400/500] Loss: 2.2220 386 387 Epoch [22/80], Iter [500/500] Loss: 2.4755 388 389 Epoch [22/80], Iter [600/500] Loss: 3.0793 390 391 Epoch [22/80], Iter [700/500] Loss: 1.9128 392 393 Epoch [22/80], Iter [800/500] Loss: 4.8721 394 395 Epoch [22/80], Iter [900/500] Loss: 2.1349 396 397 Epoch [23/80], Iter [100/500] Loss: 1.8705 398 399 Epoch [23/80], Iter [200/500] Loss: 2.4326 400 401 Epoch [23/80], Iter [300/500] Loss: 1.5636 402 403 Epoch [23/80], Iter [400/500] Loss: 2.0465 404 405 Epoch [23/80], Iter [500/500] Loss: 1.5183 406 407 Epoch [23/80], Iter [600/500] Loss: 2.2711 408 409 Epoch [23/80], Iter [700/500] Loss: 2.8997 410 411 Epoch [23/80], Iter [800/500] Loss: 2.6150 412 413 Epoch [23/80], Iter [900/500] Loss: 2.8083 414 415 Epoch [24/80], Iter [100/500] Loss: 2.7177 416 417 Epoch [24/80], Iter [200/500] Loss: 3.2044 418 419 Epoch [24/80], Iter [300/500] Loss: 3.8137 420 421 Epoch [24/80], Iter [400/500] Loss: 1.9400 422 423 Epoch [24/80], Iter [500/500] Loss: 2.3550 424 425 Epoch [24/80], Iter [600/500] Loss: 1.6304 426 427 Epoch [24/80], Iter [700/500] Loss: 1.1287 428 429 Epoch [24/80], Iter [800/500] Loss: 2.1436 430 431 Epoch [24/80], Iter [900/500] Loss: 1.3761 432 433 Epoch [25/80], Iter [100/500] Loss: 1.9115 434 435 Epoch [25/80], Iter [200/500] Loss: 0.9423 436 437 Epoch [25/80], Iter [300/500] Loss: 1.1732 438 439 Epoch [25/80], Iter [400/500] Loss: 1.8946 440 441 Epoch [25/80], Iter [500/500] Loss: 1.4359 442 443 Epoch [25/80], Iter [600/500] Loss: 2.7499 444 445 Epoch [25/80], Iter [700/500] Loss: 3.2734 446 447 Epoch [25/80], Iter [800/500] Loss: 1.5863 448 449 Epoch [25/80], Iter [900/500] Loss: 2.8276 450 451 Epoch [26/80], Iter [100/500] Loss: 3.3783 452 453 Epoch [26/80], Iter [200/500] Loss: 1.6336 454 455 Epoch [26/80], Iter [300/500] Loss: 1.8298 456 457 Epoch [26/80], Iter [400/500] Loss: 1.1775 458 459 Epoch [26/80], Iter [500/500] Loss: 2.5811 460 461 Epoch [26/80], Iter [600/500] Loss: 1.2587 462 463 Epoch [26/80], Iter [700/500] Loss: 2.3547 464 465 Epoch [26/80], Iter [800/500] Loss: 3.2238 466 467 Epoch [26/80], Iter [900/500] Loss: 1.8571 468 469 Epoch [27/80], Iter [100/500] Loss: 1.9582 470 471 Epoch [27/80], Iter [200/500] Loss: 0.8752 472 473 Epoch [27/80], Iter [300/500] Loss: 1.5140 474 475 Epoch [27/80], Iter [400/500] Loss: 1.4624 476 477 Epoch [27/80], Iter [500/500] Loss: 3.6735 478 479 Epoch [27/80], Iter [600/500] Loss: 2.5618 480 481 Epoch [27/80], Iter [700/500] Loss: 1.3707 482 483 Epoch [27/80], Iter [800/500] Loss: 1.2286 484 485 Epoch [27/80], Iter [900/500] Loss: 2.4623 486 487 Epoch [28/80], Iter [100/500] Loss: 0.8966 488 489 Epoch [28/80], Iter [200/500] Loss: 1.4363 490 491 Epoch [28/80], Iter [300/500] Loss: 1.3229 492 493 Epoch [28/80], Iter [400/500] Loss: 1.4402 494 495 Epoch [28/80], Iter [500/500] Loss: 1.4920 496 497 Epoch [28/80], Iter [600/500] Loss: 1.9604 498 499 Epoch [28/80], Iter [700/500] Loss: 3.1165 500 501 Epoch [28/80], Iter [800/500] Loss: 1.0391 502 503 Epoch [28/80], Iter [900/500] Loss: 2.5201 504 505 Epoch [29/80], Iter [100/500] Loss: 1.8787 506 507 Epoch [29/80], Iter [200/500] Loss: 0.9840 508 509 Epoch [29/80], Iter [300/500] Loss: 1.4460 510 511 Epoch [29/80], Iter [400/500] Loss: 2.2886 512 513 Epoch [29/80], Iter [500/500] Loss: 1.4231 514 515 Epoch [29/80], Iter [600/500] Loss: 1.4980 516 517 Epoch [29/80], Iter [700/500] Loss: 2.3995 518 519 Epoch [29/80], Iter [800/500] Loss: 1.7662 520 521 Epoch [29/80], Iter [900/500] Loss: 2.3659 522 523 Epoch [30/80], Iter [100/500] Loss: 1.9505 524 525 Epoch [30/80], Iter [200/500] Loss: 1.1663 526 527 Epoch [30/80], Iter [300/500] Loss: 0.9471 528 529 Epoch [30/80], Iter [400/500] Loss: 0.9364 530 531 Epoch [30/80], Iter [500/500] Loss: 1.0124 532 533 Epoch [30/80], Iter [600/500] Loss: 1.2437 534 535 Epoch [30/80], Iter [700/500] Loss: 0.8796 536 537 Epoch [30/80], Iter [800/500] Loss: 1.2183 538 539 Epoch [30/80], Iter [900/500] Loss: 2.3959 540 541 Epoch [31/80], Iter [100/500] Loss: 1.4337 542 543 Epoch [31/80], Iter [200/500] Loss: 1.1861 544 545 Epoch [31/80], Iter [300/500] Loss: 1.2915 546 547 Epoch [31/80], Iter [400/500] Loss: 1.0188 548 549 Epoch [31/80], Iter [500/500] Loss: 2.2067 550 551 Epoch [31/80], Iter [600/500] Loss: 2.6476 552 553 Epoch [31/80], Iter [700/500] Loss: 1.1402 554 555 Epoch [31/80], Iter [800/500] Loss: 1.4248 556 557 Epoch [31/80], Iter [900/500] Loss: 1.0669 558 559 Epoch [32/80], Iter [100/500] Loss: 1.5955 560 561 Epoch [32/80], Iter [200/500] Loss: 1.7216 562 563 Epoch [32/80], Iter [300/500] Loss: 1.2304 564 565 Epoch [32/80], Iter [400/500] Loss: 1.7058 566 567 Epoch [32/80], Iter [500/500] Loss: 1.2115 568 569 Epoch [32/80], Iter [600/500] Loss: 1.6176 570 571 Epoch [32/80], Iter [700/500] Loss: 1.3043 572 573 Epoch [32/80], Iter [800/500] Loss: 1.9501 574 575 Epoch [32/80], Iter [900/500] Loss: 1.9035 576 577 Epoch [33/80], Iter [100/500] Loss: 1.9505 578 579 Epoch [33/80], Iter [200/500] Loss: 1.5603 580 581 Epoch [33/80], Iter [300/500] Loss: 1.5528 582 583 Epoch [33/80], Iter [400/500] Loss: 1.4192 584 585 Epoch [33/80], Iter [500/500] Loss: 1.2211 586 587 Epoch [33/80], Iter [600/500] Loss: 1.3927 588 589 Epoch [33/80], Iter [700/500] Loss: 2.3885 590 591 Epoch [33/80], Iter [800/500] Loss: 1.0948 592 593 Epoch [33/80], Iter [900/500] Loss: 1.6951 594 595 Epoch [34/80], Iter [100/500] Loss: 0.9534 596 597 Epoch [34/80], Iter [200/500] Loss: 0.7364 598 599 Epoch [34/80], Iter [300/500] Loss: 1.2372 600 601 Epoch [34/80], Iter [400/500] Loss: 1.6718 602 603 Epoch [34/80], Iter [500/500] Loss: 0.7804 604 605 Epoch [34/80], Iter [600/500] Loss: 2.1848 606 607 Epoch [34/80], Iter [700/500] Loss: 0.6333 608 609 Epoch [34/80], Iter [800/500] Loss: 1.6399 610 611 Epoch [34/80], Iter [900/500] Loss: 0.9555 612 613 Epoch [35/80], Iter [100/500] Loss: 1.5851 614 615 Epoch [35/80], Iter [200/500] Loss: 3.7824 616 617 Epoch [35/80], Iter [300/500] Loss: 2.5642 618 619 Epoch [35/80], Iter [400/500] Loss: 0.8965 620 621 Epoch [35/80], Iter [500/500] Loss: 1.9092 622 623 Epoch [35/80], Iter [600/500] Loss: 1.3729 624 625 Epoch [35/80], Iter [700/500] Loss: 2.2079 626 627 Epoch [35/80], Iter [800/500] Loss: 0.9051 628 629 Epoch [35/80], Iter [900/500] Loss: 1.1845 630 631 Epoch [36/80], Iter [100/500] Loss: 0.8240 632 633 Epoch [36/80], Iter [200/500] Loss: 1.1929 634 635 Epoch [36/80], Iter [300/500] Loss: 1.7051 636 637 Epoch [36/80], Iter [400/500] Loss: 0.7341 638 639 Epoch [36/80], Iter [500/500] Loss: 0.8078 640 641 Epoch [36/80], Iter [600/500] Loss: 0.7525 642 643 Epoch [36/80], Iter [700/500] Loss: 1.5739 644 645 Epoch [36/80], Iter [800/500] Loss: 1.3938 646 647 Epoch [36/80], Iter [900/500] Loss: 0.7145 648 649 Epoch [37/80], Iter [100/500] Loss: 0.9577 650 651 Epoch [37/80], Iter [200/500] Loss: 0.9464 652 653 Epoch [37/80], Iter [300/500] Loss: 1.0931 654 655 Epoch [37/80], Iter [400/500] Loss: 1.0390 656 657 Epoch [37/80], Iter [500/500] Loss: 1.3472 658 659 Epoch [37/80], Iter [600/500] Loss: 0.6312 660 661 Epoch [37/80], Iter [700/500] Loss: 0.6754 662 663 Epoch [37/80], Iter [800/500] Loss: 0.5888 664 665 Epoch [37/80], Iter [900/500] Loss: 3.1377 666 667 Epoch [38/80], Iter [100/500] Loss: 0.8339 668 669 Epoch [38/80], Iter [200/500] Loss: 0.9345 670 671 Epoch [38/80], Iter [300/500] Loss: 0.6615 672 673 Epoch [38/80], Iter [400/500] Loss: 1.6327 674 675 Epoch [38/80], Iter [500/500] Loss: 0.4701 676 677 Epoch [38/80], Iter [600/500] Loss: 1.1513 678 679 Epoch [38/80], Iter [700/500] Loss: 0.9013 680 681 Epoch [38/80], Iter [800/500] Loss: 2.7680 682 683 Epoch [38/80], Iter [900/500] Loss: 1.2733 684 685 Epoch [39/80], Iter [100/500] Loss: 3.0368 686 687 Epoch [39/80], Iter [200/500] Loss: 1.5569 688 689 Epoch [39/80], Iter [300/500] Loss: 0.5049 690 691 Epoch [39/80], Iter [400/500] Loss: 0.4075 692 693 Epoch [39/80], Iter [500/500] Loss: 0.9771 694 695 Epoch [39/80], Iter [600/500] Loss: 0.9003 696 697 Epoch [39/80], Iter [700/500] Loss: 1.6323 698 699 Epoch [39/80], Iter [800/500] Loss: 0.4881 700 701 Epoch [39/80], Iter [900/500] Loss: 2.1344 702 703 Epoch [40/80], Iter [100/500] Loss: 1.2439 704 705 Epoch [40/80], Iter [200/500] Loss: 1.3419 706 707 Epoch [40/80], Iter [300/500] Loss: 0.9575 708 709 Epoch [40/80], Iter [400/500] Loss: 1.4438 710 711 Epoch [40/80], Iter [500/500] Loss: 0.8559 712 713 Epoch [40/80], Iter [600/500] Loss: 1.0400 714 715 Epoch [40/80], Iter [700/500] Loss: 0.9063 716 717 Epoch [40/80], Iter [800/500] Loss: 1.0714 718 719 Epoch [40/80], Iter [900/500] Loss: 0.5098 720 721 Epoch [41/80], Iter [100/500] Loss: 0.5906 722 723 Epoch [41/80], Iter [200/500] Loss: 0.6610 724 725 Epoch [41/80], Iter [300/500] Loss: 0.4230 726 727 Epoch [41/80], Iter [400/500] Loss: 0.6014 728 729 Epoch [41/80], Iter [500/500] Loss: 0.3004 730 731 Epoch [41/80], Iter [600/500] Loss: 0.5606 732 733 Epoch [41/80], Iter [700/500] Loss: 0.4994 734 735 Epoch [41/80], Iter [800/500] Loss: 0.8664 736 737 Epoch [41/80], Iter [900/500] Loss: 0.5302 738 739 Epoch [42/80], Iter [100/500] Loss: 0.2961 740 741 Epoch [42/80], Iter [200/500] Loss: 0.2826 742 743 Epoch [42/80], Iter [300/500] Loss: 0.3575 744 745 Epoch [42/80], Iter [400/500] Loss: 0.3224 746 747 Epoch [42/80], Iter [500/500] Loss: 0.6851 748 749 Epoch [42/80], Iter [600/500] Loss: 0.2997 750 751 Epoch [42/80], Iter [700/500] Loss: 0.3907 752 753 Epoch [42/80], Iter [800/500] Loss: 0.4437 754 755 Epoch [42/80], Iter [900/500] Loss: 0.4847 756 757 Epoch [43/80], Iter [100/500] Loss: 0.5418 758 759 Epoch [43/80], Iter [200/500] Loss: 0.4099 760 761 Epoch [43/80], Iter [300/500] Loss: 0.3339 762 763 Epoch [43/80], Iter [400/500] Loss: 0.5546 764 765 Epoch [43/80], Iter [500/500] Loss: 0.5867 766 767 Epoch [43/80], Iter [600/500] Loss: 0.3540 768 769 Epoch [43/80], Iter [700/500] Loss: 0.4656 770 771 Epoch [43/80], Iter [800/500] Loss: 0.2922 772 773 Epoch [43/80], Iter [900/500] Loss: 0.3042 774 775 Epoch [44/80], Iter [100/500] Loss: 0.6309 776 777 Epoch [44/80], Iter [200/500] Loss: 0.2412 778 779 Epoch [44/80], Iter [300/500] Loss: 0.5505 780 781 Epoch [44/80], Iter [400/500] Loss: 0.4133 782 783 Epoch [44/80], Iter [500/500] Loss: 0.4317 784 785 Epoch [44/80], Iter [600/500] Loss: 0.4152 786 787 Epoch [44/80], Iter [700/500] Loss: 0.6375 788 789 Epoch [44/80], Iter [800/500] Loss: 0.3283 790 791 Epoch [44/80], Iter [900/500] Loss: 0.4399 792 793 Epoch [45/80], Iter [100/500] Loss: 0.2777 794 795 Epoch [45/80], Iter [200/500] Loss: 0.3131 796 797 Epoch [45/80], Iter [300/500] Loss: 0.2451 798 799 Epoch [45/80], Iter [400/500] Loss: 0.5350 800 801 Epoch [45/80], Iter [500/500] Loss: 0.2501 802 803 Epoch [45/80], Iter [600/500] Loss: 0.2076 804 805 Epoch [45/80], Iter [700/500] Loss: 0.2317 806 807 Epoch [45/80], Iter [800/500] Loss: 0.8772 808 809 Epoch [45/80], Iter [900/500] Loss: 0.4162 810 811 Epoch [46/80], Iter [100/500] Loss: 0.3190 812 813 Epoch [46/80], Iter [200/500] Loss: 0.2458 814 815 Epoch [46/80], Iter [300/500] Loss: 0.2976 816 817 Epoch [46/80], Iter [400/500] Loss: 0.3712 818 819 Epoch [46/80], Iter [500/500] Loss: 0.4305 820 821 Epoch [46/80], Iter [600/500] Loss: 0.5143 822 823 Epoch [46/80], Iter [700/500] Loss: 0.2622 824 825 Epoch [46/80], Iter [800/500] Loss: 0.5331 826 827 Epoch [46/80], Iter [900/500] Loss: 0.3598 828 829 Epoch [47/80], Iter [100/500] Loss: 0.2180 830 831 Epoch [47/80], Iter [200/500] Loss: 0.2275 832 833 Epoch [47/80], Iter [300/500] Loss: 0.5302 834 835 Epoch [47/80], Iter [400/500] Loss: 0.3535 836 837 Epoch [47/80], Iter [500/500] Loss: 0.5790 838 839 Epoch [47/80], Iter [600/500] Loss: 0.3741 840 841 Epoch [47/80], Iter [700/500] Loss: 0.5120 842 843 Epoch [47/80], Iter [800/500] Loss: 0.6204 844 845 Epoch [47/80], Iter [900/500] Loss: 0.4902 846 847 Epoch [48/80], Iter [100/500] Loss: 0.2668 848 849 Epoch [48/80], Iter [200/500] Loss: 0.5693 850 851 Epoch [48/80], Iter [300/500] Loss: 0.3328 852 853 Epoch [48/80], Iter [400/500] Loss: 0.2399 854 855 Epoch [48/80], Iter [500/500] Loss: 0.3160 856 857 Epoch [48/80], Iter [600/500] Loss: 0.2944 858 859 Epoch [48/80], Iter [700/500] Loss: 0.2742 860 861 Epoch [48/80], Iter [800/500] Loss: 0.5297 862 863 Epoch [48/80], Iter [900/500] Loss: 0.3755 864 865 Epoch [49/80], Iter [100/500] Loss: 0.2658 866 867 Epoch [49/80], Iter [200/500] Loss: 0.2223 868 869 Epoch [49/80], Iter [300/500] Loss: 0.4348 870 871 Epoch [49/80], Iter [400/500] Loss: 0.2313 872 873 Epoch [49/80], Iter [500/500] Loss: 0.2838 874 875 Epoch [49/80], Iter [600/500] Loss: 0.3415 876 877 Epoch [49/80], Iter [700/500] Loss: 0.3633 878 879 Epoch [49/80], Iter [800/500] Loss: 0.3768 880 881 Epoch [49/80], Iter [900/500] Loss: 0.5177 882 883 Epoch [50/80], Iter [100/500] Loss: 0.3538 884 885 Epoch [50/80], Iter [200/500] Loss: 0.2759 886 887 Epoch [50/80], Iter [300/500] Loss: 0.2255 888 889 Epoch [50/80], Iter [400/500] Loss: 0.3148 890 891 Epoch [50/80], Iter [500/500] Loss: 0.4502 892 893 Epoch [50/80], Iter [600/500] Loss: 0.3382 894 895 Epoch [50/80], Iter [700/500] Loss: 0.8207 896 897 Epoch [50/80], Iter [800/500] Loss: 0.3541 898 899 Epoch [50/80], Iter [900/500] Loss: 0.4090 900 901 ('time used:', 17124.861335999998)
未被DaraParallel初始化
1 Epoch [1/80], Iter [100/500] Loss: 635.6779 2 3 Epoch [1/80], Iter [200/500] Loss: 247.5514 4 5 Epoch [1/80], Iter [300/500] Loss: 231.7609 6 7 Epoch [1/80], Iter [400/500] Loss: 198.7304 8 9 Epoch [1/80], Iter [500/500] Loss: 207.1028 10 11 Epoch [1/80], Iter [600/500] Loss: 114.7708 12 13 Epoch [1/80], Iter [700/500] Loss: 126.9886 14 15 Epoch [1/80], Iter [800/500] Loss: 160.8622 16 17 Epoch [1/80], Iter [900/500] Loss: 153.8121 18 19 Epoch [2/80], Iter [100/500] Loss: 106.6578 20 21 Epoch [2/80], Iter [200/500] Loss: 91.5044 22 23 Epoch [2/80], Iter [300/500] Loss: 111.4231 24 25 Epoch [2/80], Iter [400/500] Loss: 50.7004 26 27 Epoch [2/80], Iter [500/500] Loss: 58.9242 28 29 Epoch [2/80], Iter [600/500] Loss: 55.2035 30 31 Epoch [2/80], Iter [700/500] Loss: 26.7637 32 33 Epoch [2/80], Iter [800/500] Loss: 52.5472 34 35 Epoch [2/80], Iter [900/500] Loss: 51.7907 36 37 Epoch [3/80], Iter [100/500] Loss: 35.7970 38 39 Epoch [3/80], Iter [200/500] Loss: 59.1204 40 41 Epoch [3/80], Iter [300/500] Loss: 70.5727 42 43 Epoch [3/80], Iter [400/500] Loss: 50.1149 44 45 Epoch [3/80], Iter [500/500] Loss: 26.3628 46 47 Epoch [3/80], Iter [600/500] Loss: 67.3355 48 49 Epoch [3/80], Iter [700/500] Loss: 56.8271 50 51 Epoch [3/80], Iter [800/500] Loss: 46.5803 52 53 Epoch [3/80], Iter [900/500] Loss: 34.9568 54 55 Epoch [4/80], Iter [100/500] Loss: 67.0837 56 57 Epoch [4/80], Iter [200/500] Loss: 36.8596 58 59 Epoch [4/80], Iter [300/500] Loss: 37.6830 60 61 Epoch [4/80], Iter [400/500] Loss: 52.1378 62 63 Epoch [4/80], Iter [500/500] Loss: 104.5909 64 65 Epoch [4/80], Iter [600/500] Loss: 71.3509 66 67 Epoch [4/80], Iter [700/500] Loss: 28.4496 68 69 Epoch [4/80], Iter [800/500] Loss: 56.1399 70 71 Epoch [4/80], Iter [900/500] Loss: 58.7510 72 73 Epoch [5/80], Iter [100/500] Loss: 42.5710 74 75 Epoch [5/80], Iter [200/500] Loss: 25.5430 76 77 Epoch [5/80], Iter [300/500] Loss: 25.9271 78 79 Epoch [5/80], Iter [400/500] Loss: 75.8942 80 81 Epoch [5/80], Iter [500/500] Loss: 70.6782 82 83 Epoch [5/80], Iter [600/500] Loss: 10.7801 84 85 Epoch [5/80], Iter [700/500] Loss: 29.9416 86 87 Epoch [5/80], Iter [800/500] Loss: 47.0781 88 89 Epoch [5/80], Iter [900/500] Loss: 45.4692 90 91 Epoch [6/80], Iter [100/500] Loss: 51.3811 92 93 Epoch [6/80], Iter [200/500] Loss: 30.6207 94 95 Epoch [6/80], Iter [300/500] Loss: 35.4928 96 97 Epoch [6/80], Iter [400/500] Loss: 37.9467 98 99 Epoch [6/80], Iter [500/500] Loss: 36.7505 100 101 Epoch [6/80], Iter [600/500] Loss: 64.3528 102 103 Epoch [6/80], Iter [700/500] Loss: 73.6308 104 105 Epoch [6/80], Iter [800/500] Loss: 33.1290 106 107 Epoch [6/80], Iter [900/500] Loss: 34.2442 108 109 Epoch [7/80], Iter [100/500] Loss: 34.9157 110 111 Epoch [7/80], Iter [200/500] Loss: 26.8041 112 113 Epoch [7/80], Iter [300/500] Loss: 43.5796 114 115 Epoch [7/80], Iter [400/500] Loss: 31.5104 116 117 Epoch [7/80], Iter [500/500] Loss: 41.2132 118 119 Epoch [7/80], Iter [600/500] Loss: 23.1634 120 121 Epoch [7/80], Iter [700/500] Loss: 26.7399 122 123 Epoch [7/80], Iter [800/500] Loss: 60.4979 124 125 Epoch [7/80], Iter [900/500] Loss: 32.8528 126 127 Epoch [8/80], Iter [100/500] Loss: 36.6079 128 129 Epoch [8/80], Iter [200/500] Loss: 49.1552 130 131 Epoch [8/80], Iter [300/500] Loss: 21.2926 132 133 Epoch [8/80], Iter [400/500] Loss: 33.5335 134 135 Epoch [8/80], Iter [500/500] Loss: 50.1770 136 137 Epoch [8/80], Iter [600/500] Loss: 21.9908 138 139 Epoch [8/80], Iter [700/500] Loss: 40.2040 140 141 Epoch [8/80], Iter [800/500] Loss: 22.5460 142 143 Epoch [8/80], Iter [900/500] Loss: 43.9564 144 145 Epoch [9/80], Iter [100/500] Loss: 19.8116 146 147 Epoch [9/80], Iter [200/500] Loss: 8.5169 148 149 Epoch [9/80], Iter [300/500] Loss: 37.0475 150 151 Epoch [9/80], Iter [400/500] Loss: 74.2606 152 153 Epoch [9/80], Iter [500/500] Loss: 16.3256 154 155 Epoch [9/80], Iter [600/500] Loss: 26.0609 156 157 Epoch [9/80], Iter [700/500] Loss: 24.3721 158 159 Epoch [9/80], Iter [800/500] Loss: 37.5132 160 161 Epoch [9/80], Iter [900/500] Loss: 27.4818 162 163 Epoch [10/80], Iter [100/500] Loss: 11.7654 164 165 Epoch [10/80], Iter [200/500] Loss: 9.3536 166 167 Epoch [10/80], Iter [300/500] Loss: 11.6718 168 169 Epoch [10/80], Iter [400/500] Loss: 24.4423 170 171 Epoch [10/80], Iter [500/500] Loss: 25.6966 172 173 Epoch [10/80], Iter [600/500] Loss: 35.2358 174 175 Epoch [10/80], Iter [700/500] Loss: 17.2685 176 177 Epoch [10/80], Iter [800/500] Loss: 22.3965 178 179 Epoch [10/80], Iter [900/500] Loss: 42.6901 180 181 Epoch [11/80], Iter [100/500] Loss: 17.9832 182 183 Epoch [11/80], Iter [200/500] Loss: 18.8705 184 185 Epoch [11/80], Iter [300/500] Loss: 25.3700 186 187 Epoch [11/80], Iter [400/500] Loss: 10.8511 188 189 Epoch [11/80], Iter [500/500] Loss: 18.3028 190 191 Epoch [11/80], Iter [600/500] Loss: 23.2316 192 193 Epoch [11/80], Iter [700/500] Loss: 10.2498 194 195 Epoch [11/80], Iter [800/500] Loss: 14.7609 196 197 Epoch [11/80], Iter [900/500] Loss: 20.1801 198 199 Epoch [12/80], Iter [100/500] Loss: 23.8675 200 201 Epoch [12/80], Iter [200/500] Loss: 15.7924 202 203 Epoch [12/80], Iter [300/500] Loss: 13.7092 204 205 Epoch [12/80], Iter [400/500] Loss: 12.0196 206 207 Epoch [12/80], Iter [500/500] Loss: 7.2408 208 209 Epoch [12/80], Iter [600/500] Loss: 10.7912 210 211 Epoch [12/80], Iter [700/500] Loss: 11.9665 212 213 Epoch [12/80], Iter [800/500] Loss: 13.7599 214 215 Epoch [12/80], Iter [900/500] Loss: 18.3869 216 217 Epoch [13/80], Iter [100/500] Loss: 11.1715 218 219 Epoch [13/80], Iter [200/500] Loss: 17.6397 220 221 Epoch [13/80], Iter [300/500] Loss: 9.3256 222 223 Epoch [13/80], Iter [400/500] Loss: 12.7995 224 225 Epoch [13/80], Iter [500/500] Loss: 7.8598 226 227 Epoch [13/80], Iter [600/500] Loss: 10.7001 228 229 Epoch [13/80], Iter [700/500] Loss: 26.3672 230 231 Epoch [13/80], Iter [800/500] Loss: 15.4815 232 233 Epoch [13/80], Iter [900/500] Loss: 14.0478 234 235 Epoch [14/80], Iter [100/500] Loss: 16.0473 236 237 Epoch [14/80], Iter [200/500] Loss: 4.7192 238 239 Epoch [14/80], Iter [300/500] Loss: 10.7586 240 241 Epoch [14/80], Iter [400/500] Loss: 13.6734 242 243 Epoch [14/80], Iter [500/500] Loss: 9.3228 244 245 Epoch [14/80], Iter [600/500] Loss: 5.5830 246 247 Epoch [14/80], Iter [700/500] Loss: 7.5252 248 249 Epoch [14/80], Iter [800/500] Loss: 7.6239 250 251 Epoch [14/80], Iter [900/500] Loss: 7.1024 252 253 Epoch [15/80], Iter [100/500] Loss: 17.5188 254 255 Epoch [15/80], Iter [200/500] Loss: 11.8842 256 257 Epoch [15/80], Iter [300/500] Loss: 9.0330 258 259 Epoch [15/80], Iter [400/500] Loss: 11.7120 260 261 Epoch [15/80], Iter [500/500] Loss: 17.0862 262 263 Epoch [15/80], Iter [600/500] Loss: 11.4103 264 265 Epoch [15/80], Iter [700/500] Loss: 12.2746 266 267 Epoch [15/80], Iter [800/500] Loss: 13.6224 268 269 Epoch [15/80], Iter [900/500] Loss: 12.7686 270 271 Epoch [16/80], Iter [100/500] Loss: 5.5978 272 273 Epoch [16/80], Iter [200/500] Loss: 12.2122 274 275 Epoch [16/80], Iter [300/500] Loss: 5.1189 276 277 Epoch [16/80], Iter [400/500] Loss: 14.1793 278 279 Epoch [16/80], Iter [500/500] Loss: 10.3744 280 281 Epoch [16/80], Iter [600/500] Loss: 5.2099 282 283 Epoch [16/80], Iter [700/500] Loss: 6.7522 284 285 Epoch [16/80], Iter [800/500] Loss: 13.2532 286 287 Epoch [16/80], Iter [900/500] Loss: 6.7040 288 289 Epoch [17/80], Iter [100/500] Loss: 10.7390 290 291 Epoch [17/80], Iter [200/500] Loss: 8.1525 292 293 Epoch [17/80], Iter [300/500] Loss: 14.2229 294 295 Epoch [17/80], Iter [400/500] Loss: 7.6302 296 297 Epoch [17/80], Iter [500/500] Loss: 6.4554 298 299 Epoch [17/80], Iter [600/500] Loss: 8.2380 300 301 Epoch [17/80], Iter [700/500] Loss: 6.4445 302 303 Epoch [17/80], Iter [800/500] Loss: 8.4644 304 305 Epoch [17/80], Iter [900/500] Loss: 9.0200 306 307 Epoch [18/80], Iter [100/500] Loss: 9.5088 308 309 Epoch [18/80], Iter [200/500] Loss: 3.8648 310 311 Epoch [18/80], Iter [300/500] Loss: 8.8408 312 313 Epoch [18/80], Iter [400/500] Loss: 7.4195 314 315 Epoch [18/80], Iter [500/500] Loss: 15.0480 316 317 Epoch [18/80], Iter [600/500] Loss: 5.6232 318 319 Epoch [18/80], Iter [700/500] Loss: 5.2233 320 321 Epoch [18/80], Iter [800/500] Loss: 6.5702 322 323 Epoch [18/80], Iter [900/500] Loss: 13.7427 324 325 Epoch [19/80], Iter [100/500] Loss: 3.5658 326 327 Epoch [19/80], Iter [200/500] Loss: 4.7062 328 329 Epoch [19/80], Iter [300/500] Loss: 10.7831 330 331 Epoch [19/80], Iter [400/500] Loss: 13.1375 332 333 Epoch [19/80], Iter [500/500] Loss: 22.2764 334 335 Epoch [19/80], Iter [600/500] Loss: 10.3463 336 337 Epoch [19/80], Iter [700/500] Loss: 7.2373 338 339 Epoch [19/80], Iter [800/500] Loss: 5.5266 340 341 Epoch [19/80], Iter [900/500] Loss: 9.2434 342 343 Epoch [20/80], Iter [100/500] Loss: 7.8164 344 345 Epoch [20/80], Iter [200/500] Loss: 9.6628 346 347 Epoch [20/80], Iter [300/500] Loss: 4.1032 348 349 Epoch [20/80], Iter [400/500] Loss: 16.5922 350 351 Epoch [20/80], Iter [500/500] Loss: 6.9907 352 353 Epoch [20/80], Iter [600/500] Loss: 10.9906 354 355 Epoch [20/80], Iter [700/500] Loss: 8.5092 356 357 Epoch [20/80], Iter [800/500] Loss: 7.1332 358 359 Epoch [20/80], Iter [900/500] Loss: 6.1639 360 361 Epoch [21/80], Iter [100/500] Loss: 6.3100 362 363 Epoch [21/80], Iter [200/500] Loss: 4.5190 364 365 Epoch [21/80], Iter [300/500] Loss: 4.3493 366 367 Epoch [21/80], Iter [400/500] Loss: 7.9860 368 369 Epoch [21/80], Iter [500/500] Loss: 8.8312 370 371 Epoch [21/80], Iter [600/500] Loss: 10.7502 372 373 Epoch [21/80], Iter [700/500] Loss: 3.2116 374 375 Epoch [21/80], Iter [800/500] Loss: 4.0126 376 377 Epoch [21/80], Iter [900/500] Loss: 5.3675 378 379 Epoch [22/80], Iter [100/500] Loss: 1.4893 380 381 Epoch [22/80], Iter [200/500] Loss: 1.6984 382 383 Epoch [22/80], Iter [300/500] Loss: 2.6195 384 385 Epoch [22/80], Iter [400/500] Loss: 2.1465 386 387 Epoch [22/80], Iter [500/500] Loss: 2.9847 388 389 Epoch [22/80], Iter [600/500] Loss: 4.9699 390 391 Epoch [22/80], Iter [700/500] Loss: 1.6728 392 393 Epoch [22/80], Iter [800/500] Loss: 1.3381 394 395 Epoch [22/80], Iter [900/500] Loss: 2.0680 396 397 Epoch [23/80], Iter [100/500] Loss: 1.9145 398 399 Epoch [23/80], Iter [200/500] Loss: 0.9280 400 401 Epoch [23/80], Iter [300/500] Loss: 2.9585 402 403 Epoch [23/80], Iter [400/500] Loss: 1.0787 404 405 Epoch [23/80], Iter [500/500] Loss: 3.1779 406 407 Epoch [23/80], Iter [600/500] Loss: 2.4411 408 409 Epoch [23/80], Iter [700/500] Loss: 2.0049 410 411 Epoch [23/80], Iter [800/500] Loss: 2.2844 412 413 Epoch [23/80], Iter [900/500] Loss: 2.2328 414 415 Epoch [24/80], Iter [100/500] Loss: 1.5221 416 417 Epoch [24/80], Iter [200/500] Loss: 2.0100 418 419 Epoch [24/80], Iter [300/500] Loss: 1.8868 420 421 Epoch [24/80], Iter [400/500] Loss: 1.4898 422 423 Epoch [24/80], Iter [500/500] Loss: 1.1626 424 425 Epoch [24/80], Iter [600/500] Loss: 1.2527 426 427 Epoch [24/80], Iter [700/500] Loss: 1.3430 428 429 Epoch [24/80], Iter [800/500] Loss: 1.3355 430 431 Epoch [24/80], Iter [900/500] Loss: 1.8292 432 433 Epoch [25/80], Iter [100/500] Loss: 2.2471 434 435 Epoch [25/80], Iter [200/500] Loss: 2.8727 436 437 Epoch [25/80], Iter [300/500] Loss: 1.3531 438 439 Epoch [25/80], Iter [400/500] Loss: 1.1110 440 441 Epoch [25/80], Iter [500/500] Loss: 2.7648 442 443 Epoch [25/80], Iter [600/500] Loss: 1.8364 444 445 Epoch [25/80], Iter [700/500] Loss: 1.4299 446 447 Epoch [25/80], Iter [800/500] Loss: 1.5985 448 449 Epoch [25/80], Iter [900/500] Loss: 2.5364 450 451 Epoch [26/80], Iter [100/500] Loss: 2.6469 452 453 Epoch [26/80], Iter [200/500] Loss: 3.1215 454 455 Epoch [26/80], Iter [300/500] Loss: 1.4029 456 457 Epoch [26/80], Iter [400/500] Loss: 1.2688 458 459 Epoch [26/80], Iter [500/500] Loss: 2.4794 460 461 Epoch [26/80], Iter [600/500] Loss: 1.1937 462 463 Epoch [26/80], Iter [700/500] Loss: 1.0709 464 465 Epoch [26/80], Iter [800/500] Loss: 1.4961 466 467 Epoch [26/80], Iter [900/500] Loss: 1.4560 468 469 Epoch [27/80], Iter [100/500] Loss: 2.0633 470 471 Epoch [27/80], Iter [200/500] Loss: 2.6687 472 473 Epoch [27/80], Iter [300/500] Loss: 5.2073 474 475 Epoch [27/80], Iter [400/500] Loss: 2.2762 476 477 Epoch [27/80], Iter [500/500] Loss: 1.6105 478 479 Epoch [27/80], Iter [600/500] Loss: 1.6631 480 481 Epoch [27/80], Iter [700/500] Loss: 1.0523 482 483 Epoch [27/80], Iter [800/500] Loss: 2.8945 484 485 Epoch [27/80], Iter [900/500] Loss: 1.5388 486 487 Epoch [28/80], Iter [100/500] Loss: 1.6230 488 489 Epoch [28/80], Iter [200/500] Loss: 1.8003 490 491 Epoch [28/80], Iter [300/500] Loss: 1.4840 492 493 Epoch [28/80], Iter [400/500] Loss: 0.9465 494 495 Epoch [28/80], Iter [500/500] Loss: 1.6054 496 497 Epoch [28/80], Iter [600/500] Loss: 3.3669 498 499 Epoch [28/80], Iter [700/500] Loss: 1.4555 500 501 Epoch [28/80], Iter [800/500] Loss: 2.2903 502 503 Epoch [28/80], Iter [900/500] Loss: 1.2850 504 505 Epoch [29/80], Iter [100/500] Loss: 1.7152 506 507 Epoch [29/80], Iter [200/500] Loss: 1.2824 508 509 Epoch [29/80], Iter [300/500] Loss: 1.5778 510 511 Epoch [29/80], Iter [400/500] Loss: 3.1152 512 513 Epoch [29/80], Iter [500/500] Loss: 1.2492 514 515 Epoch [29/80], Iter [600/500] Loss: 0.9721 516 517 Epoch [29/80], Iter [700/500] Loss: 1.4465 518 519 Epoch [29/80], Iter [800/500] Loss: 0.9678 520 521 Epoch [29/80], Iter [900/500] Loss: 1.5000 522 523 Epoch [30/80], Iter [100/500] Loss: 1.5524 524 525 Epoch [30/80], Iter [200/500] Loss: 1.5233 526 527 Epoch [30/80], Iter [300/500] Loss: 1.4226 528 529 Epoch [30/80], Iter [400/500] Loss: 0.9432 530 531 Epoch [30/80], Iter [500/500] Loss: 1.4623 532 533 Epoch [30/80], Iter [600/500] Loss: 1.3845 534 535 Epoch [30/80], Iter [700/500] Loss: 1.3301 536 537 Epoch [30/80], Iter [800/500] Loss: 1.0105 538 539 Epoch [30/80], Iter [900/500] Loss: 1.8372 540 541 Epoch [31/80], Iter [100/500] Loss: 1.3019 542 543 Epoch [31/80], Iter [200/500] Loss: 1.1216 544 545 Epoch [31/80], Iter [300/500] Loss: 0.8553 546 547 Epoch [31/80], Iter [400/500] Loss: 1.6882 548 549 Epoch [31/80], Iter [500/500] Loss: 1.7691 550 551 Epoch [31/80], Iter [600/500] Loss: 1.7412 552 553 Epoch [31/80], Iter [700/500] Loss: 2.2204 554 555 Epoch [31/80], Iter [800/500] Loss: 0.6559 556 557 Epoch [31/80], Iter [900/500] Loss: 1.4613 558 559 Epoch [32/80], Iter [100/500] Loss: 1.1408 560 561 Epoch [32/80], Iter [200/500] Loss: 3.6378 562 563 Epoch [32/80], Iter [300/500] Loss: 1.5543 564 565 Epoch [32/80], Iter [400/500] Loss: 2.1538 566 567 Epoch [32/80], Iter [500/500] Loss: 1.1102 568 569 Epoch [32/80], Iter [600/500] Loss: 1.3187 570 571 Epoch [32/80], Iter [700/500] Loss: 0.7230 572 573 Epoch [32/80], Iter [800/500] Loss: 1.6149 574 575 Epoch [32/80], Iter [900/500] Loss: 1.0926 576 577 Epoch [33/80], Iter [100/500] Loss: 1.9460 578 579 Epoch [33/80], Iter [200/500] Loss: 0.9948 580 581 Epoch [33/80], Iter [300/500] Loss: 1.4460 582 583 Epoch [33/80], Iter [400/500] Loss: 1.5855 584 585 Epoch [33/80], Iter [500/500] Loss: 1.5834 586 587 Epoch [33/80], Iter [600/500] Loss: 0.8896 588 589 Epoch [33/80], Iter [700/500] Loss: 1.1927 590 591 Epoch [33/80], Iter [800/500] Loss: 1.5707 592 593 Epoch [33/80], Iter [900/500] Loss: 0.7817 594 595 Epoch [34/80], Iter [100/500] Loss: 0.9155 596 597 Epoch [34/80], Iter [200/500] Loss: 0.7930 598 599 Epoch [34/80], Iter [300/500] Loss: 1.2760 600 601 Epoch [34/80], Iter [400/500] Loss: 0.7170 602 603 Epoch [34/80], Iter [500/500] Loss: 1.9962 604 605 Epoch [34/80], Iter [600/500] Loss: 1.2418 606 607 Epoch [34/80], Iter [700/500] Loss: 1.4847 608 609 Epoch [34/80], Iter [800/500] Loss: 0.8495 610 611 Epoch [34/80], Iter [900/500] Loss: 1.3709 612 613 Epoch [35/80], Iter [100/500] Loss: 1.8495 614 615 Epoch [35/80], Iter [200/500] Loss: 0.9494 616 617 Epoch [35/80], Iter [300/500] Loss: 0.6224 618 619 Epoch [35/80], Iter [400/500] Loss: 0.5101 620 621 Epoch [35/80], Iter [500/500] Loss: 0.9373 622 623 Epoch [35/80], Iter [600/500] Loss: 1.5811 624 625 Epoch [35/80], Iter [700/500] Loss: 1.5295 626 627 Epoch [35/80], Iter [800/500] Loss: 0.7787 628 629 Epoch [35/80], Iter [900/500] Loss: 1.0337 630 631 Epoch [36/80], Iter [100/500] Loss: 0.6236 632 633 Epoch [36/80], Iter [200/500] Loss: 1.8516 634 635 Epoch [36/80], Iter [300/500] Loss: 1.5021 636 637 Epoch [36/80], Iter [400/500] Loss: 1.0459 638 639 Epoch [36/80], Iter [500/500] Loss: 1.4737 640 641 Epoch [36/80], Iter [600/500] Loss: 0.7842 642 643 Epoch [36/80], Iter [700/500] Loss: 1.6798 644 645 Epoch [36/80], Iter [800/500] Loss: 1.7413 646 647 Epoch [36/80], Iter [900/500] Loss: 0.6222 648 649 Epoch [37/80], Iter [100/500] Loss: 0.5713 650 651 Epoch [37/80], Iter [200/500] Loss: 1.3030 652 653 Epoch [37/80], Iter [300/500] Loss: 1.6937 654 655 Epoch [37/80], Iter [400/500] Loss: 0.8656 656 657 Epoch [37/80], Iter [500/500] Loss: 1.3340 658 659 Epoch [37/80], Iter [600/500] Loss: 0.6310 660 661 Epoch [37/80], Iter [700/500] Loss: 1.1445 662 663 Epoch [37/80], Iter [800/500] Loss: 0.6099 664 665 Epoch [37/80], Iter [900/500] Loss: 1.3679 666 667 Epoch [38/80], Iter [100/500] Loss: 0.9127 668 669 Epoch [38/80], Iter [200/500] Loss: 1.9450 670 671 Epoch [38/80], Iter [300/500] Loss: 1.2240 672 673 Epoch [38/80], Iter [400/500] Loss: 1.4049 674 675 Epoch [38/80], Iter [500/500] Loss: 0.9247 676 677 Epoch [38/80], Iter [600/500] Loss: 1.5308 678 679 Epoch [38/80], Iter [700/500] Loss: 1.9777 680 681 Epoch [38/80], Iter [800/500] Loss: 1.2109 682 683 Epoch [38/80], Iter [900/500] Loss: 0.8337 684 685 Epoch [39/80], Iter [100/500] Loss: 0.7904 686 687 Epoch [39/80], Iter [200/500] Loss: 0.8451 688 689 Epoch [39/80], Iter [300/500] Loss: 1.6993 690 691 Epoch [39/80], Iter [400/500] Loss: 1.2196 692 693 Epoch [39/80], Iter [500/500] Loss: 1.0665 694 695 Epoch [39/80], Iter [600/500] Loss: 0.7412 696 697 Epoch [39/80], Iter [700/500] Loss: 0.6486 698 699 Epoch [39/80], Iter [800/500] Loss: 1.5608 700 701 Epoch [39/80], Iter [900/500] Loss: 1.9978 702 703 Epoch [40/80], Iter [100/500] Loss: 1.7101 704 705 Epoch [40/80], Iter [200/500] Loss: 1.4484 706 707 Epoch [40/80], Iter [300/500] Loss: 1.5894 708 709 Epoch [40/80], Iter [400/500] Loss: 1.3371 710 711 Epoch [40/80], Iter [500/500] Loss: 0.9766 712 713 Epoch [40/80], Iter [600/500] Loss: 1.9935 714 715 Epoch [40/80], Iter [700/500] Loss: 2.0719 716 717 Epoch [40/80], Iter [800/500] Loss: 0.9455 718 719 Epoch [40/80], Iter [900/500] Loss: 0.8072 720 721 Epoch [41/80], Iter [100/500] Loss: 1.3899 722 723 Epoch [41/80], Iter [200/500] Loss: 0.9863 724 725 Epoch [41/80], Iter [300/500] Loss: 1.3738 726 727 Epoch [41/80], Iter [400/500] Loss: 0.6883 728 729 Epoch [41/80], Iter [500/500] Loss: 0.8442 730 731 Epoch [41/80], Iter [600/500] Loss: 2.0286 732 733 Epoch [41/80], Iter [700/500] Loss: 1.1960 734 735 Epoch [41/80], Iter [800/500] Loss: 1.2499 736 737 Epoch [41/80], Iter [900/500] Loss: 0.6043 738 739 Epoch [42/80], Iter [100/500] Loss: 0.3437 740 741 Epoch [42/80], Iter [200/500] Loss: 0.6596 742 743 Epoch [42/80], Iter [300/500] Loss: 0.4450 744 745 Epoch [42/80], Iter [400/500] Loss: 0.7189 746 747 Epoch [42/80], Iter [500/500] Loss: 0.5022 748 749 Epoch [42/80], Iter [600/500] Loss: 0.4597 750 751 Epoch [42/80], Iter [700/500] Loss: 0.7743 752 753 Epoch [42/80], Iter [800/500] Loss: 0.3344 754 755 Epoch [42/80], Iter [900/500] Loss: 0.7295 756 757 Epoch [43/80], Iter [100/500] Loss: 0.5074 758 759 Epoch [43/80], Iter [200/500] Loss: 0.3128 760 761 Epoch [43/80], Iter [300/500] Loss: 0.2800 762 763 Epoch [43/80], Iter [400/500] Loss: 0.3059 764 765 Epoch [43/80], Iter [500/500] Loss: 0.3486 766 767 Epoch [43/80], Iter [600/500] Loss: 0.7222 768 769 Epoch [43/80], Iter [700/500] Loss: 0.7349 770 771 Epoch [43/80], Iter [800/500] Loss: 0.8455 772 773 Epoch [43/80], Iter [900/500] Loss: 0.7261 774 775 Epoch [44/80], Iter [100/500] Loss: 0.5404 776 777 Epoch [44/80], Iter [200/500] Loss: 0.5428 778 779 Epoch [44/80], Iter [300/500] Loss: 0.5385 780 781 Epoch [44/80], Iter [400/500] Loss: 0.4106 782 783 Epoch [44/80], Iter [500/500] Loss: 0.5296 784 785 Epoch [44/80], Iter [600/500] Loss: 0.6045 786 787 Epoch [44/80], Iter [700/500] Loss: 0.3837 788 789 Epoch [44/80], Iter [800/500] Loss: 0.7552 790 791 Epoch [44/80], Iter [900/500] Loss: 0.4996 792 793 Epoch [45/80], Iter [100/500] Loss: 0.3381 794 795 Epoch [45/80], Iter [200/500] Loss: 0.3910 796 797 Epoch [45/80], Iter [300/500] Loss: 0.3790 798 799 Epoch [45/80], Iter [400/500] Loss: 0.2718 800 801 Epoch [45/80], Iter [500/500] Loss: 0.3572 802 803 Epoch [45/80], Iter [600/500] Loss: 0.2913 804 805 Epoch [45/80], Iter [700/500] Loss: 0.5244 806 807 Epoch [45/80], Iter [800/500] Loss: 0.3647 808 809 Epoch [45/80], Iter [900/500] Loss: 0.3161 810 811 Epoch [46/80], Iter [100/500] Loss: 0.4728 812 813 Epoch [46/80], Iter [200/500] Loss: 0.4386 814 815 Epoch [46/80], Iter [300/500] Loss: 0.2861 816 817 Epoch [46/80], Iter [400/500] Loss: 0.2460 818 819 Epoch [46/80], Iter [500/500] Loss: 0.3490 820 821 Epoch [46/80], Iter [600/500] Loss: 0.5804 822 823 Epoch [46/80], Iter [700/500] Loss: 0.4951 824 825 Epoch [46/80], Iter [800/500] Loss: 0.4600 826 827 Epoch [46/80], Iter [900/500] Loss: 0.5658 828 829 Epoch [47/80], Iter [100/500] Loss: 0.2479 830 831 Epoch [47/80], Iter [200/500] Loss: 0.2688 832 833 Epoch [47/80], Iter [300/500] Loss: 0.3082 834 835 Epoch [47/80], Iter [400/500] Loss: 0.3929 836 837 Epoch [47/80], Iter [500/500] Loss: 0.3126 838 839 Epoch [47/80], Iter [600/500] Loss: 0.5041 840 841 Epoch [47/80], Iter [700/500] Loss: 0.5848 842 843 Epoch [47/80], Iter [800/500] Loss: 0.4968 844 845 Epoch [47/80], Iter [900/500] Loss: 0.3496 846 847 Epoch [48/80], Iter [100/500] Loss: 0.2753 848 849 Epoch [48/80], Iter [200/500] Loss: 0.3885 850 851 Epoch [48/80], Iter [300/500] Loss: 0.3743 852 853 Epoch [48/80], Iter [400/500] Loss: 0.2425 854 855 Epoch [48/80], Iter [500/500] Loss: 0.2472 856 857 Epoch [48/80], Iter [600/500] Loss: 0.3003 858 859 Epoch [48/80], Iter [700/500] Loss: 0.4936 860 861 Epoch [48/80], Iter [800/500] Loss: 0.3169 862 863 Epoch [48/80], Iter [900/500] Loss: 0.2543 864 865 Epoch [49/80], Iter [100/500] Loss: 0.4262 866 867 Epoch [49/80], Iter [200/500] Loss: 0.3396 868 869 Epoch [49/80], Iter [300/500] Loss: 0.4670 870 871 Epoch [49/80], Iter [400/500] Loss: 0.2543 872 873 Epoch [49/80], Iter [500/500] Loss: 0.3146 874 875 Epoch [49/80], Iter [600/500] Loss: 1.3187 876 877 Epoch [49/80], Iter [700/500] Loss: 0.2993 878 879 Epoch [49/80], Iter [800/500] Loss: 0.3053 880 881 Epoch [49/80], Iter [900/500] Loss: 0.3343 882 883 Epoch [50/80], Iter [100/500] Loss: 0.2081 884 885 Epoch [50/80], Iter [200/500] Loss: 0.5631 886 887 Epoch [50/80], Iter [300/500] Loss: 0.4358 888 889 Epoch [50/80], Iter [400/500] Loss: 0.4028 890 891 Epoch [50/80], Iter [500/500] Loss: 0.2510 892 893 Epoch [50/80], Iter [600/500] Loss: 0.5876 894 895 Epoch [50/80], Iter [700/500] Loss: 0.3692 896 897 Epoch [50/80], Iter [800/500] Loss: 0.4500 898 899 Epoch [50/80], Iter [900/500] Loss: 0.1850 900 901 ('time used:', 30318.149681000003)