• 制作训练集和验证集、测试集


      1 ##深度学习过程中,需要制作训练集和验证集、测试集。
      2 
      3 import os, random, shutil
      4 from config import *
      5 import re
      6 
      7 #用于清空并生成文件夹
      8 def test_train_dir():
      9     # 清空文件夹里面的所有文件,然后创建,解决重复占用问题
     10     # shutil.rmtree('要清空的文件夹名')
     11     # os.mkdir('要清空的文件夹名')
     12 
     13     if  os.path.exists(os.path.join(abs_path, "data")):
     14 
     15 
     16         shutil.rmtree(os.path.join(abs_path, "data"))
     17         # 通过makedirs创建多级目录
     18     os.makedirs(os.path.join(abs_path, "data", "test"))
     19     os.makedirs(os.path.join(abs_path, "data", "train"))
     20     os.makedirs(os.path.join(abs_path, "data", "samples"))
     21 
     22 def tet_data(fileDir):
     23     pathDir=os.listdir(fileDir)
     24     for i in pathDir:
     25         print(os.path.join(abs_path,"data","train",i))
     26         # shutil.move(fileDir + "\" + i, tarDir_test + "\" + i)
     27         if  os.path.exists(os.path.join(abs_path,"data","train",i)):
     28             continue
     29 
     30         else:
     31             shutil.copy(fileDir + "\" + i, tarDir_test + "\" + i)
     32 
     33 
     34 
     35 def moveFile(fileDir):
     36     pathDir = os.listdir(fileDir)  # 取图片的原始路径
     37     # filenumber = len(pathDir)
     38     # rate = 0.1  # 自定义抽取图片的比例,比方说100张抽10张,那就是0.1
     39     # picknumber = int(filenumber * rate)  # 按照rate比例从文件夹中取一定数量图片
     40     sample = random.sample(pathDir, 30)  # 随机选取picknumber数量的样本图片
     41     print(sample)
     42     for name in sample:
     43         # shutil.move(fileDir +"\"+ name, tarDir +"\"+name)
     44         shutil.copy(fileDir +"\"+ name, tarDir +"\"+name)
     45     # return
     46 
     47 def train_and_labels():
     48 
     49 
     50     # file1=open(os.path.join("D:Jonie_Project_sss15data","train_dir.txt"),"w")
     51     file2=open(os.path.join(abs_path,"data","samples","train_dir.txt"),"w")
     52     file3=open(os.path.join(abs_path,"data","samples","train_label.txt"),"w")
     53     dir_sam=os.listdir(os.path.join(abs_path,"data","train"))
     54     for i in dir_sam:
     55         # print(i.split("_")[-2])
     56         # print(os.path.join(abs_path,"data")+ i + "	" +i.split("_")[-2]+"
    ")
     57         print(os.path.join(abs_path,"data")+ i + "	" +re.split('_d+.',i)[0]+"
    ")
     58         # file2.write(os.path.join(abs_path,"dataset","scene_categories",i.split("_")[-2],i) +"
    ")#解决a_b_12.jpg.jpg截取结果为b的问题
     59         file2.write(os.path.join(abs_path,"dataset","scene_categories",re.split('_d+.',i)[0],i) +"
    ")
     60         # file3.write(i.split("_")[-2]+"
    ")
     61         file3.write(re.split('_d+.',i)[0]+"
    ")
     62     file2.close()
     63     file3.close()
     64 def tet_and_labels():
     65     # file1 = open(os.path.join("D:Jonie_Project_sss15data", "labels.txt"), "w")
     66     file2 = open(os.path.join(abs_path,"data","samples", "test_dir.txt"), "w")
     67     file3 = open(os.path.join(abs_path,"data","samples", "test_label.txt"), "w")
     68     dir_sam = os.listdir(os.path.join(abs_path,"data","test"))
     69     for i in dir_sam:
     70         # print(i.split("_")[-2])
     71         # print(os.path.join(abs_path,"data",i)+ i.split("_")[-2] + "
    ")
     72         print(os.path.join(abs_path,"data",i)+ re.split('_d+.',i)[0] + "
    ")
     73         # file2.write(os.path.join(abs_path,"dataset","scene_categories")+"\" +i.split("_")[-2]+"\"+i +"
    ")
     74         file2.write(os.path.join(abs_path,"dataset","scene_categories")+"\" +re.split('_d+.',i)[0]+"\"+i +"
    ")
     75         # file3.write( i.split("_")[-2] + "
    ")
     76         file3.write( re.split('_d+.',i)[0] + "
    ")
     77     file2.close()
     78     file3.close()
     79 
     80 if __name__ == '__main__':
     81 
     82 
     83     for num1 in range(4):
     84         test_train_dir()
     85 
     86         dir_name=os.listdir(os.path.join(abs_path,"dataset","scene_categories"))
     87         print("-------------------------",os.path.join(abs_path,"dataset","scene_categories"))
     88         for i in dir_name:
     89             # print(i)
     90             fileDir =os.path.join(abs_path,"dataset","scene_categories",i)   # 源图片文件夹路径
     91             tarDir = os.path.join(abs_path,"data","train") # 移动到新的文件夹路径
     92             tarDir_test = os.path.join(abs_path,"data","test") # 移动到新的文件夹路径
     93             moveFile(fileDir)#train
     94             # tarDir_test = r"D:Jonie_Project_sss15data	est"  # 移动到新的文件夹路径
     95 
     96             tet_data(fileDir)#test
     97         train_and_labels()
     98         tet_and_labels()
     99         if not os.path.exists(os.path.join(abs_path,"data1",str(num1))):
    100             os.makedirs(os.path.join(abs_path,"data1",str(num1)))
    101         new_path = os.path.join(abs_path, "data1", str(num1))
    102         for derName, subfolders, filenames in os.walk(os.path.join(abs_path, "data", "samples")):
    103             # print(derName/subfolders/filenames)
    104             for j in range(len(filenames)):
    105                 if filenames[j].endswith('.txt'):
    106                     file_path = derName + '\' + filenames[j]
    107                     newpath = new_path + '\' + filenames[j]
    108                     shutil.move(file_path, newpath)
  • 相关阅读:
    AOP概述
    AOP-动态代理
    IOC容器和Bean的配置
    Spring框架概述
    异常
    Optional 类
    Stream API
    方法引用(Method References)
    函数式(Functional)接口
    stm8笔记1-搭建工程+孤独的小灯闪烁
  • 原文地址:https://www.cnblogs.com/jqpy1994/p/11196202.html
Copyright © 2020-2023  润新知