urllib.urlencode
把字典数据转换为URL编码
# -*- coding: cp936 -*- import urllib params = {'score':100,'name':'爬虫基础','comment':'very good'} qs = urllib.urlencode(params) print(qs)
编码后跟在URL后面传递参数:
comment=very+good&score=100&name=%C5%C0%B3%E6%BB%F9%B4%A1
逆向
在urlparse包里
urlparse.parse_qs方法--返回字典
import urllib import urlparse params = {'score':100,'name':'爬虫基础','comment':'very good'} qs = urllib.urlencode(params) dic = urlparse.parse_qs(qs) print(dic)
运行结果:
{'comment': ['very good'], 'score': ['100'], 'name': ['xc5xc0xb3xe6xbbxf9xb4xa1']}
对某个url提取参数:
# -*- coding: cp936 -*- import urllib import urlparse url = 'https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=0&rsv_idx=1&tn=baidu&wd=python%20%E5%8F%91%E9%80%81%E9%82%AE%E4%BB%B6&rsv_pq=8ddee8730003c015&rsv_t=fc02P4%2By%2FPKzaFFtaqGCGLCxPvpVojkM6zg7pgczZB%2FeZAQkXhsuWRPpHqs&rqlang=cn&rsv_enter=1&rsv_sug3=14&rsv_sug1=10&rsv_sug7=101' result = urlparse.urlparse(url) print(result)
先用urlparse.urlparse(url)方法获取result
ParseResult(scheme='https', netloc='www.baidu.com', path='/s', params='', query='ie=utf-8&f=8&rsv_bp=0&rsv_idx=1&tn=baidu&wd=python%20%E5%8F%91%E9%80%81%E9%82%AE%E4%BB%B6&rsv_pq=8ddee8730003c015&rsv_t=fc02P4%2By%2FPKzaFFtaqGCGLCxPvpVojkM6zg7pgczZB%2FeZAQkXhsuWRPpHqs&rqlang=cn&rsv_enter=1&rsv_sug3=14&rsv_sug1=10&rsv_sug7=101', fragment='')
对result中的query参数中提取
import urllib import urlparse url = 'https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=0&rsv_idx=1&tn=baidu&wd=python%20%E5%8F%91%E9%80%81%E9%82%AE%E4%BB%B6&rsv_pq=8ddee8730003c015&rsv_t=fc02P4%2By%2FPKzaFFtaqGCGLCxPvpVojkM6zg7pgczZB%2FeZAQkXhsuWRPpHqs&rqlang=cn&rsv_enter=1&rsv_sug3=14&rsv_sug1=10&rsv_sug7=101' result = urlparse.urlparse(url) dic = urlparse.parse_qs(result.query) print(dic)
运行结果:
{'wd': ['python xe5x8fx91xe9x80x81xe9x82xaexe4xbbxb6'], 'f': ['8'], 'rsv_enter': ['1'], 'rsv_bp': ['0'], 'rsv_t': ['fc02P4+y/PKzaFFtaqGCGLCxPvpVojkM6zg7pgczZB/eZAQkXhsuWRPpHqs'], 'rsv_idx': ['1'], 'tn': ['baidu'], 'rqlang': ['cn'], 'rsv_sug7': ['101'], 'rsv_pq': ['8ddee8730003c015'], 'rsv_sug1': ['10'], 'rsv_sug3': ['14'], 'ie': ['utf-8']}