import os import pdb import numpy as np train_path = '/home/wangxiao/Downloads/TracKit/dataset/train_subset/' test_path = '/home/wangxiao/Downloads/TracKit/dataset/test_subset/' trainFiles = os.listdir(train_path) testFiles = os.listdir(test_path) totalFrameNUM = 0 Max_sentence_NUM = 0 wordLIST = [] BBox_validateNUM = 0 f = open('/home/wangxiao/Downloads/TracKit/train_subset_wordList.txt', 'w') for i in range(len(trainFiles)): videoName = trainFiles[i] print(i, ' | ', len(trainFiles), ' ==>> videoName: ', videoName) videoPath = train_path + videoName + '/' language_txt_path = videoPath + 'language.txt' imgFiles = os.listdir(videoPath + 'imgs/') # BBox_txt_path = videoPath + 'groundtruth.txt' # BBox = np.loadtxt(BBox_txt_path, dtype=int) # for idx in range(len(BBox)): # line = BBox[idx].split(',') # pdb.set_trace() # if line[0] + line[1] + line[2] + line[3] > 0: # BBox_validateNUM = BBox_validateNUM + 1 frameNUM = len(imgFiles) totalFrameNUM = totalFrameNUM + frameNUM fid = open(language_txt_path, 'r') sentences = fid.read() sentences = sentences.split( ) if len(sentences) > Max_sentence_NUM: Max_sentence_NUM = len(sentences) for wordIDX in range(len(sentences)): currentWORD = sentences[wordIDX] if currentWORD not in wordLIST: wordLIST.append(currentWORD) fid.close() for i in range(len(testFiles)): videoName = testFiles[i] print(i, ' | ', len(testFiles), ' ==>> videoName: ', videoName) videoPath = TNL2k_test_path + videoName + '/' language_txt_path = videoPath + 'language.txt' imgFiles = os.listdir(videoPath + 'imgs/') # BBox_txt_path = videoPath + 'groundtruth.txt' # BBox = np.loadtxt(BBox_txt_path, dtype=int) # for idx in range(len(BBox)): # line = BBox[idx] # if line[0] + line[1] + line[2] + line[3] > 0: # BBox_validateNUM = BBox_validateNUM + 1 frameNUM = len(imgFiles) totalFrameNUM = totalFrameNUM + frameNUM fid = open(language_txt_path, 'r') sentences = fid.read() sentences = sentences.split( ) if len(sentences) > Max_sentence_NUM: Max_sentence_NUM = len(sentences) for wordIDX in range(len(sentences)): currentWORD = sentences[wordIDX] if currentWORD not in wordLIST: wordLIST.append(currentWORD) fid.close() print('==>> totalFrameNUM: ', totalFrameNUM) print('==>> Max_sentence_NUM: ', Max_sentence_NUM) print('==>> total word num: ', len(wordLIST)) print('==>> BBox NUM: ', BBox_validateNUM) for i in range(len(wordLIST)): eachWord = wordLIST[i] f.write(eachWord+' ')