#from nltk.tree import * from nltk.tree import Tree f = open('C:/Users/gao/Desktop/文档/12.txt')#打开文件 se=f.read().replace(" ","")#读取文件,并替换掉换行符 #print(se) le=se.split('(ROOT')#切割成句子 #还原完整句子 item=[] for i in le: item.append(str('(ROOT'+i)) #移除掉第一个无用的元素 item.pop(0) #用自然怨言处理还原出单词 words=[] lenss=[] for j in item: test = Tree.fromstring(j) print(test.leaves()) lenss.append(len(test.leaves())-1)#添加每个句子的单词个数 #words.append([word for word in test.leaves() if word != '.' & word != ',' & word != '?']) #print(words) print(lenss)