urllib基础
import urllib.request
urlretrieve(网址,本地文件存储) 直接下载网页到本地
urllib.request.urlretrieve("http//www.baidu.com","F:/f盘")
清楚缓存:urlcleanup()
urllib.request.urlcleanup()
看网页相应简介信息:info()
file = urllib.request.urlopen("http//www.baidu.com")
print(file)
获取网页状态码:getcode()
print(file.getcode())
获取当前访问的网页的url,geturl()
print(file.geturl())
超时设置
timeout
for i in range(0,100):
try:
file = urllib.rquest.urllib("http://www.baidu.com","F:/f盘")
print(len(file.read().decode("utf-8")))
except Exception as err:
print("出现异常"+str(err))
自动模拟HTTP请求-实现百度信息自动搜索
import urllib.request,re
keywd = "giao"
keywd = urllib.rquest.quote(keywd)
#page = (num-1)*10
for i in range(1,11):
url = "http://www.baidu.com/s?wd="+keywd+"&pn="+str((i-1)*10)
data = urllib.request.urlopen(url).read().decode("utf-8")
pat = "title:`(.*?)"
rst = re.compile(pat).findall(data)
for j in range(0.len(rst)):
print(rst[j])