import pysparnn.cluster_index as ci from sklearn.feature_extraction.text import TfidfVectorizer data = [ "hello world", "oh hello there", "Play it", "Play it again Sam", ] tv = TfidfVectorizer() tv.fit(data) #特征向量 features_vec = tv.transform(data) #建立搜索索引 cp = ci.MultiClusterIndex(features_vec,data) #搜索带有索引的 search_data = [ "oh there", "Play it again Frank" ] search_feature_vec = tv.transform(search_data) #k是返回的个数,k_clusters代表聚类的个数 print(cp.search(search_feature_vec,k = 1,k_clusters=2,return_distance=False)) [['oh hello there'], ['Play it again Sam']]