• Python script --- count_words


    import os 
    import pdb 
    import numpy as np 
    
    train_path = '/home/wangxiao/Downloads/TracKit/dataset/train_subset/'
    test_path = '/home/wangxiao/Downloads/TracKit/dataset/test_subset/'
    
    trainFiles = os.listdir(train_path) 
    testFiles = os.listdir(test_path)   
    
    totalFrameNUM = 0 
    Max_sentence_NUM = 0 
    wordLIST = [] 
    BBox_validateNUM = 0 
    
    f = open('/home/wangxiao/Downloads/TracKit/train_subset_wordList.txt', 'w') 
     
    for i in range(len(trainFiles)):  
        videoName = trainFiles[i] 
    
        print(i, ' | ', len(trainFiles), ' ==>> videoName: ', videoName)
    
        videoPath = train_path + videoName + '/' 
        language_txt_path = videoPath + 'language.txt'
        imgFiles = os.listdir(videoPath + 'imgs/')
    
        # BBox_txt_path = videoPath + 'groundtruth.txt'
        # BBox = np.loadtxt(BBox_txt_path, dtype=int) 
    
        # for idx in range(len(BBox)): 
        #     line = BBox[idx].split(',')  
    
        #     pdb.set_trace() 
        #     if line[0] + line[1] + line[2] + line[3] > 0: 
        #         BBox_validateNUM = BBox_validateNUM + 1 
    
        frameNUM = len(imgFiles) 
        totalFrameNUM = totalFrameNUM + frameNUM 
        
        fid = open(language_txt_path, 'r')
        sentences = fid.read() 
        sentences = sentences.split( )
    
        if len(sentences) > Max_sentence_NUM: 
            Max_sentence_NUM = len(sentences) 
    
        for wordIDX in range(len(sentences)): 
            currentWORD = sentences[wordIDX] 
            if currentWORD not in wordLIST: 
                wordLIST.append(currentWORD) 
    
        fid.close() 
    
    
    for i in range(len(testFiles)): 
        videoName = testFiles[i] 
    
        print(i, ' | ', len(testFiles), ' ==>> videoName: ', videoName)
    
        videoPath = TNL2k_test_path + videoName + '/' 
        language_txt_path = videoPath + 'language.txt'
        imgFiles = os.listdir(videoPath + 'imgs/')
    
        # BBox_txt_path = videoPath + 'groundtruth.txt'
        # BBox = np.loadtxt(BBox_txt_path, dtype=int) 
    
        # for idx in range(len(BBox)): 
        #     line = BBox[idx] 
        #     if line[0] + line[1] + line[2] + line[3] > 0: 
        #         BBox_validateNUM = BBox_validateNUM + 1 
    
    
        frameNUM = len(imgFiles) 
        totalFrameNUM = totalFrameNUM + frameNUM 
        
        fid = open(language_txt_path, 'r')
        sentences = fid.read() 
        sentences = sentences.split( )
    
        if len(sentences) > Max_sentence_NUM: 
            Max_sentence_NUM = len(sentences) 
    
        for wordIDX in range(len(sentences)): 
            currentWORD = sentences[wordIDX] 
            if currentWORD not in wordLIST: 
                wordLIST.append(currentWORD) 
    
        fid.close() 
    
    
    print('==>> totalFrameNUM: ', totalFrameNUM)  
    print('==>> Max_sentence_NUM: ', Max_sentence_NUM)
    print('==>> total word num: ', len(wordLIST)) 
    print('==>> BBox NUM: ', BBox_validateNUM) 
    
    for i in range(len(wordLIST)): 
        eachWord = wordLIST[i] 
        f.write(eachWord+'
    ')  
  • 相关阅读:
    CF1109F Sasha and Algorithm of Silence's Sounds LCT、线段树
    Solution -「CF 757F」Team Rocket Rises Again
    Solution -「ZJOI2012」「洛谷 P2597」灾难
    Solution -「CF 156D」Clues
    「矩阵树定理」学习笔记
    Solution -「JSOI2008」「洛谷 P4208」最小生成树计数
    Solution -「SHOI2016」「洛谷 P4336」黑暗前的幻想乡
    Solution -「Code+#2」「洛谷 P4033」白金元首与独舞
    Solution -「HDU 5498」Tree
    呐~「多项式」全家桶
  • 原文地址:https://www.cnblogs.com/wangxiaocvpr/p/13877028.html
Copyright © 2020-2023  润新知