目录
如何制作数据集txt,并进行加载
1. 根据图片制作train.txt val.txt 和 test.txt
# MakeTxt.py
"""
原始图片保存路径
**************文件保存方式 一 *******************
--data
--train
--category_1
--category_2
...
--val
--category_1
--category_2
...
--test
--category_1
--category_2
...
**************文件保存方式 二 *******************
--data
--category_1
--001.jpg
--002.jpg
...
--category_2
--001.jpg
--002.jpg
...
...
"""
import os
# 【1. 获取文件绝对路径】
def get_files_path(file_dir):
dir_names = []
for fn in os.listdir(file_dir): #fn 表示的是文件名
dir_names.append(os.path.join(file_dir,fn))
print(dir_names)
return dir_names
def get_dir_img(file_dir):
filenames = []
for root, dirs, files in os.walk(file_dir):
for name in files:
# print(os.path.join(root, name))
filenames.append(os.path.join(root, name))
return filenames
def make_txt(img_root,txt_root,quantity_proportion):
"""
img_root : 图像保存路径
txt_root : txt路径
quantity_proportion : [train,eval,test] 各数据集的比例 eg. [0.8, 0.1, 0.1]
"""
# 创建 txt文件
txt_name = [txt_root + '/train.txt', txt_root +'/val.txt', txt_root + '/test.txt']
for file_path in txt_name:
if os.path.isfile(file_path):
os.remove(file_path)
train = open(txt_name[0],'a')
val = open(txt_name[1], 'a')
test = open(txt_name[2], 'a')
sort_files = get_files_path(img_root)
for i,file in enumerate(sort_files):
tem_total_img = get_dir_img(file)
num_img = len(tem_total_img)
span_num = [int(x* num_img) for x in quantity_proportion]
print(span_num,num_img)
for j in range(span_num[0]+1):
train.write(tem_total_img[j] + ' ' + str(i) + '
')
for k in range(span_num[0]+1,span_num[0]+span_num[1]+1):
val.write(tem_total_img[k] + ' ' + str(i) + '
')
for p in range(span_num[0]+span_num[1]+1,num_img):
test.write(tem_total_img[p] + ' ' + str(i) + '
')
def main():
quantity_proportion = [0.8, 0.1, 0.1]
make_txt('./pokeman','./dataTxt',quantity_proportion)
pass
if __name__=='__main__':
main()
2. torch.utils.data.Dataset
用来制作数据集
- Image.open(path).convert('RGB') 图片转换成RGB通道,对应后面的网络输入channel改为3
#***************************一些必要的包的调用********************************
import torch.nn.functional as F
import torch
import torch
import torch.nn as nn
from torch.autograd import Variable
import torchvision.models as models
from torchvision import transforms, utils
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np
import torch.optim as optim
import os
#***************************初始化一些函数********************************
gpu_id =0
torch.cuda.set_device(gpu_id)#使用GPU
learning_rate = 0.0001 #学习率的设置
#*************************************数据集的设置****************************************************************************
#定义读取文件的格式
def default_loader(path):
return Image.open(path).convert('RGB')
class MyDataset(Dataset):
#创建自己的类: MyDataset,这个类是继承的torch.utils.data.Dataset
#********************************** #使用__init__()初始化一些需要传入的参数及数据集的调用**********************
def __init__(self, txt, transform=None,target_transform=None, loader=default_loader):
# super(MyDataset, self).__init__()
#对继承自父类的属性进行初始化
fh = open(txt, 'r')
imgs = []
# 按照传入的路径和txt文本参数以只读的方式打开这个文本
for line in fh: #迭代该列表#按行循环txt文本中的内
line = line.strip('
')
line = line.rstrip('
')# 删除 本行string 字符串末尾的指定字符,这个方法的详细介绍自己查询python
words = line.split() #用split将该行分割成列表 split的默认参数是空格,所以不传递任何参数时分割空格
imgs.append((words[0],int(words[1])))
#把txt里的内容读入imgs列表保存,具体是words几要看txt内容而定
# 很显然,根据我刚才截图所示txt的内容,words[0]是图片信息,words[1]是lable
self.imgs = imgs
self.transform = transform
self.target_transform = target_transform
self.loader = loader
#*************************** #使用__getitem__()对数据进行预处理并返回想要的信息**********************
def __getitem__(self, index):#这个方法是必须要有的,用于按照索引读取每个元素的具体内容
fn, label = self.imgs[index]
#fn是图片path #fn和label分别获得imgs[index]也即是刚才每行中word[0]和word[1]的信息
img = self.loader(fn) # 按照路径读取图片
if self.transform is not None:
img = self.transform(img) #数据标签转换为Tensor
return img,label
#return回哪些内容,那么我们在训练时循环读取每个batch时,就能获得哪些内容
#********************************** #使用__len__()初始化一些需要传入的参数及数据集的调用**********************
def __len__(self):
#这个函数也必须要写,它返回的是数据集的长度,也就是多少张图片,要和loader的长度作区分
return len(self.imgs)
def my_data_set(txt_root = './dataTxt/',batchsize=32,resize=227):
#图像的初始化操作
_transforms = transforms.Compose([
transforms.RandomResizedCrop((resize,resize)),
transforms.ToTensor(),])
train_data = MyDataset(txt=txt_root + 'train.txt', transform=_transforms)
test_data = MyDataset(txt=txt_root + 'test.txt', transform=_transforms)
val_data = MyDataset(txt=txt_root + 'val.txt', transform=_transforms)
train_loader = DataLoader(dataset=train_data, batch_size=batchsize, shuffle=True,num_workers=1)
test_loader = DataLoader(dataset=test_data, batch_size=batchsize, shuffle=False,num_workers=1)
val_loader = DataLoader(dataset=val_data, batch_size=batchsize, shuffle=False,num_workers=1)
print('num_of_trainData:', len(train_data))
print('num_of_testData:', len(test_data))
print('num_of_valData:', len(val_data))
return train_loader,test_loader,val_loader
# print(train_data)
3. 搭建RestNet18进行测试
import torch
import time
from torch import nn,optim
import torch.nn.functional as F
from PIL import Image
from torchvision import transforms
import MyDataset as md
import sys
sys.path.append("..")
from Pytorch_learning import pytorch_deep as pyd
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 创建残差块 rest block
class Residual(nn.Module):
def __init__(self, in_channels,out_channels,use_1x1conv = False,stride = 1):
super(Residual,self).__init__()
self.conv1 = nn.Conv2d(in_channels,out_channels,kernel_size=3,padding = 1, stride = stride)
self.conv2 = nn.Conv2d(out_channels,out_channels,kernel_size=3,padding=1)
if use_1x1conv:
self.conv3 = nn.Conv2d(in_channels,out_channels,kernel_size=3,padding = 1, stride = stride)
else:self.conv3 = None
self.bn1 = nn.BatchNorm2d(out_channels)
self.bn2 = nn.BatchNorm2d(out_channels)
def forward(self,X):
Y = F.relu(self.bn1(self.conv1(X)))
Y = self.bn2(self.conv2(Y))
if self.conv3:
X = self.conv3(X)
return F.relu(X+Y)
def resnet_block(in_channels, out_channels, num_residuals,first_block=False):
if first_block:
assert in_channels == out_channels # 第⼀个模块的通道数同输⼊通道数⼀致
blk = []
for i in range(num_residuals):
if i == 0 and not first_block:
blk.append(Residual(in_channels, out_channels,use_1x1conv=True, stride=2))
else:
blk.append(Residual(out_channels, out_channels))
return nn.Sequential(*blk)
def my_restnet_18(num_classes= 6):
net = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
net.add_module("resnet_block1", resnet_block(64, 64, 2,first_block=True))
net.add_module("resnet_block2", resnet_block(64, 128, 2))
net.add_module("resnet_block3", resnet_block(128, 256, 2))
net.add_module("resnet_block4", resnet_block(256, 512, 2))
net.add_module("global_avg_pool", pyd.GlobalAvgPool2d()) #GlobalAvgPool2d的输出: (Batch, 512, 1, 1)
net.add_module("fc", nn.Sequential(pyd.FlattenLayer(),nn.Linear(512, num_classes)))
return net
def my_train(save_path = './weight/restNet18_3.pth',resize=96,batch_size = 32):
## Training
net = my_restnet_18(num_classes= 6)
# 如出现“out of memory”的报错信息,可减⼩batch_size或resize
train_iter, test_iter,val_iter =md.my_data_set(batchsize=batch_size,resize=resize)
# train_iter, test_iter = pyd.load_data_fashion_mnist(batch_size,resize)
lr, num_epochs = 0.001, 10
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
###训练过程
net = net.to(device)
print("training on ", device)
## 损失函数 交叉熵损失
loss = torch.nn.CrossEntropyLoss()
batch_count = 0
for epoch in range(num_epochs):
train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0,time.time()
for X, y in train_iter:
# print(X)
# print(len(X.data))
X = X.to(device) # 数据放到GPU
y = y.to(device)
y_hat = net(X) #得到网络输出结果
l = loss(y_hat, y)
optimizer.zero_grad()
l.backward()
optimizer.step()
train_l_sum += l.cpu().item()
train_acc_sum += (y_hat.argmax(dim=1) ==y).sum().cpu().item()
n += y.shape[0]
batch_count += 1
test_acc = pyd.evaluate_accuracy(test_iter, net)
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f,time %.1f sec'%
(epoch + 1, train_l_sum / batch_count,train_acc_sum / n, test_acc, time.time() - start))
# torch.save(net,save_path)
def my_test(pth_path = './weight/restNet18.pth',resize = 96,batch_size = 32):
model_net = torch.load(pth_path)
train_iter, test_iter,val_iter =md.my_data_set(batchsize=batch_size,resize=resize)
# 预测正确的数量和总数量
correct = 0
total = 0
# 使用torch.no_grad的话在前向传播中不记录梯度,节省内存
# cv2.namedWindow('predictPic', cv2.WINDOW_NORMAL)
# to_pil_image = transforms.ToPILImage()
with torch.no_grad():
for images,labels in val_iter:
# images, labels = data
# print(images)
print(len(images.data))
images, labels = images.to(device), labels.to(device)
# 预测
# outputs = self.net(images)
outputs = model_net(images)
# 我们的网络输出的实际上是个概率分布,去最大概率的哪一项作为预测分类
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
# print(images.data[0])
# print(len(images.data[0]))
for i in range(len(images.data)):
print('实际标签 {},预测标签 {}'.format(labels[i],predicted[i]))
def test_one_img(img_path, pth_path = './weight/restNet18_2.pth',resize = 96):
img = Image.open(img_path).convert('RGBA')
test_transforms = transforms.Compose([
transforms.RandomResizedCrop((resize,resize)),
transforms.ToTensor(),])
img = test_transforms(img)
img = torch.unsqueeze(img, 0)
model_net = torch.load(pth_path)
with torch.no_grad():
img = img.to(device)
# 预测
# outputs = self.net(images)
outputs = model_net(img)
print(outputs)
# # 我们的网络输出的实际上是个概率分布,去最大概率的哪一项作为预测分类
_, predicted = torch.max(outputs.data, 1)
print('predicted = {}'.format(predicted.item()))
# total += labels.size(0)
# correct += (predicted == labels).sum().item()
def main():
save_path = './weight/restNet18_3.pth'
pth_path = save_path
resize = 227
batch_size = 32
#训练
my_train(save_path,resize,batch_size)
# 测试
# my_test(pth_path,resize,batch_size)
# 测试一张图片
# img_path = './pokeman/mewtwo/00000036.jpg'
# test_one_img(img_path,pth_path,resize)
pass
if __name__=='__main__':
main()
训练结果
- 训练结果不太理想,主要数据集图片中有些多目标,不利于分类,另外数据量较少
pytorch_deep.py文件下载
- 程序中有相关调用,以上传,需要的可以下载
https://download.csdn.net/download/wangxiaobei2017/14031906