from sklearn.naive_bayes import MultinomialNB from sklearn.datasets import fetch_20newsgroups def article_category(): categories = ['alt.atheism','soc.religion.christian','comp.graphics','sci.med'] twenty_train = fetch_20newsgroups(subset='train',categories=categories) tfidf_transformer = TfidfVectorizer() X_train_tfidf = tfidf_transformer.fit_transform(twenty_train.data) clf = MultinomialNB(alpha=1.0).fit(X_train_tfidf, twenty_train.target) docs_new = ['Chemical reaction', ' Intel CPU is good'] X_new_tfidf = tfidf_transformer.transform(docs_new) predicted = clf.predict(X_new_tfidf) for doc, category in zip(docs_new,predicted): print('%r => %s' % (doc, twenty_train.target_names[category])) article_category()
在运行上述代码时候,报了一个错误
urllib.error.URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed
解决办法:
from sklearn.naive_bayes import MultinomialNB from sklearn.datasets import fetch_20newsgroups import ssl ssl._create_default_https_context = ssl._create_unverified_context def article_category(): categories = ['alt.atheism','soc.religion.christian','comp.graphics','sci.med'] twenty_train = fetch_20newsgroups(subset='train',categories=categories) tfidf_transformer = TfidfVectorizer() X_train_tfidf = tfidf_transformer.fit_transform(twenty_train.data) clf = MultinomialNB(alpha=1.0).fit(X_train_tfidf, twenty_train.target) docs_new = ['Chemical reaction', ' Intel CPU is good'] X_new_tfidf = tfidf_transformer.transform(docs_new) predicted = clf.predict(X_new_tfidf) for doc, category in zip(docs_new,predicted): print('%r => %s' % (doc, twenty_train.target_names[category])) article_category()
这样就可以解决了,如果你想想完美输出上面代码结果还需要下载一个东西,如果你的电脑自动下载那估计没问题,如果手动下载把文件放入用户目录下面的