# -*- coding: utf-8 -*- import csv import re csvfile = 'weibo.csv' def columns_data(path, column): columns_data = '' csvfile = open(path, 'r', encoding='utf-8') DicReader = csv.DictReader(csvfile) for row in DicReader: columns_data = columns_data+row.get(column) return columns_data comments = columns_data(csvfile, 'comment') print(comments) pattern = re.compile(r'[u4e00-u9fa5]+') filterdata = re.findall(pattern, comments) print(filterdata) cleaned_comments = ''.join(filterdata) print(cleaned_comments)
数据来源:Python爬取新浪微博评论数据,写入csv文件中
本文等同:筛选出一段文字中的中文