import requests
import os
#url 请求
def getHTMLText(url):
try:
r = requests.get(url, timeout = 30)
r.raise_for_status() #如果状态不是200,引发HttpError异常
r.encoding = r.apparent_encoding
return r.text
except:
return "产生异常"
#带参数的url请求
def getHTMLParams(url, params):
try:
r = requests.get(url, params)
r.raise_for_status() #如果状态不是200,引发HttpError异常
r.encoding = r.apparent_encoding
return r.text
except:
return "产生异常"
#爬取图片 path存储文件的路径
def getPicture(url,path):
try:
if not os.path.exists(root):
os.mkdir(root)
if not os.path.exists(path):
r = requests.get(url)
with open(path, 'wb') as f:
f.write(r.content)
f.close()
print('文件保存成功')
else:
print('文件已存在')
except:
print('爬取失败')
if __name__ == "__main__":
# url = "http://www.baidu.com"
# print(getHTMLText(url))
#kv = {'wd': 'python'}
#url = "http://www.baidu.com/s"
#r = getHTMLParams(url, kv)
#print(len(r))
#爬取图片的简单样例
url = "http://pic41.nipic.com/20140508/18609517_112216473140_2.jpg"
root = "D://pics//"
path = root + url.split('/')[-1]
getPicture(url,path)
#robots协议 举例可以查看京东https://www.jd.com/robots.txt
#百度搜索关键字接口
#http://www.baidu.com/s?wd=keyword
#www.ip138.com IP归属地查询