中文输出
#-*-coding:utf8-*- import requests import re timeout = 8 headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.118 Safari/537.36'} def banner(url): try: html = requests.get(url,headers=headers,timeout=timeout) html.encoding = 'utf-8' #这一行是将编码转为utf-8否则中文会显示乱码。 banner = re.findall(r'<title>(.*?)</title>',html.text) return banner[0] except Exception,e: print e return "no" if __name__ == "__main__": print banner('http://www.baidu.com')
将unicode写入文本
一:
>>> f = open('1.txt','w') >>> f.write(u'叉叉') Traceback (most recent call last): File "<stdin>", line 1, in <module> UnicodeEncodeError: 'ascii' codec can't encode characters in position 0-1: ordin al not in range(128) >>> a = unicode.encode(u'叉叉','utf-8') >>> f.write(a) >>> f.close()
二:
>>> import codecs >>> f = codecs.open('1.txt','w') >>> f.write(u'叉叉') Traceback (most recent call last): File "<stdin>", line 1, in <module> UnicodeEncodeError: 'ascii' codec can't encode characters in position 0-1: ordin al not in range(128) >>> f = codecs.open('1.txt','w','utf-8') >>> f.write(u'叉叉') >>> f.close()