1.提取url信息 urlparse()
from urlparse import urlparse url = "http://scrapy-chs.readthedocs.io/zh_CN/1.0/topics/items.html" urlparse(url) #ParseResult(scheme='http', netloc='scrapy-chs.readthedocs.io', path='/zh_CN/1.0/topics/items.html', params='', query='', fragment='')