1 import nltk 2 3 def freq_sorted(text,ranklimit): 4 fd=nltk.FreqDist(text) 5 cumulative = 0.0 6 for rank, (word,freq) in enumerate(sorted(fd.items(), key=lambda x: (-1*x[1], x[0]))[:ranklimit]): 7 cumulative += fd[word] * 100 / fd.N() 8 print "%3d %6.2f%% %s" % (rank+1, cumulative, word) 9 10 def test_freq_sorted(): 11 freq_sorted(nltk.corpus.brown.words(),15)
结果为:
1 5.00% the 2 10.00% , 3 14.00% . 4 17.00% of 5 19.00% and 6 21.00% to 7 22.00% a 8 23.00% in 9 23.00% that 10 23.00% is 11 23.00% was 12 23.00% for 13 23.00% `` 14 23.00% '' 15 23.00% The