import csv
file_path = r'F:SMSSpamCollectionjs.txt'
sms = open(file_path,'r',encoding = 'utf-8')
sms_data = []
sms_label = []
csv_reader = csv.reader(sms,delimiter = ' ') #用csv读取邮件数据
for line in csv_reader:
sms_label.append(line[0])
sms_data.append(line[1])
#sms_data.append(preprocessing(line[1]))
sms.close()
print(len(sms_label))
sms_label
file_path = r'F:SMSSpamCollectionjs.txt'
fo = open(file_path,'r',encoding = 'utf-8')
text = fo.read()
text
import nltk
nltk.download()
text = "'Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat..'"
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetlemmatizer
def preprocessing(text):
#text