爬虫学习汇总

import requests
import os

#url 请求
def getHTMLText(url):
    try:
        r = requests.get(url, timeout = 30)
        r.raise_for_status()  #如果状态不是200，引发HttpError异常
        r.encoding = r.apparent_encoding
        return r.text
    except:
        return "产生异常"

#带参数的url请求
def getHTMLParams(url, params):
    try:
        r = requests.get(url, params)
        r.raise_for_status()  #如果状态不是200，引发HttpError异常
        r.encoding = r.apparent_encoding
        return r.text
    except:
        return "产生异常"

#爬取图片  path存储文件的路径
def getPicture(url,path):
    try:
        if not os.path.exists(root):
            os.mkdir(root)
        if not os.path.exists(path):
            r = requests.get(url)
            with open(path, 'wb') as f:
                f.write(r.content)
                f.close()
                print('文件保存成功')
        else:
            print('文件已存在')
    except:
        print('爬取失败')


if __name__ == "__main__":
   # url = "http://www.baidu.com"
   # print(getHTMLText(url))

    #kv = {'wd': 'python'}
    #url = "http://www.baidu.com/s"
    #r = getHTMLParams(url, kv)
    #print(len(r))
    #爬取图片的简单样例
    url = "http://pic41.nipic.com/20140508/18609517_112216473140_2.jpg"
    root = "D://pics//"
    path = root + url.split('/')[-1]
    getPicture(url,path)
#robots协议  举例可以查看京东https://www.jd.com/robots.txt
#百度搜索关键字接口
#http://www.baidu.com/s?wd=keyword

#www.ip138.com  IP归属地查询

相关阅读:
开发板S3C2440挂起NFS步骤
 wind10系统 Atheros AR9271 Wireless Network Adapter USBwifi无线网卡的驱动安装解决无法搜索wifi信号，连接wifi信号无法上网的问题
 编写一个多线程函数实现对数组排序，要求： 1.至少用两个线程 2.数组的元素值可以事先定义好，或者可以从键盘输入（增加一个线程）。 3.用一个线程对数组排序，用另一个线程输出排序结果。 4.保证先排好序，再输出。
led.c驱动框架2nd
led.c驱动框架
 文件I/O的操作实例
 Python os.removedirs() 和shutil.rmtree() 用于删除文件夹
 Python os.remove() 删除文件
 Python os.chdir() 方法
 Python os.access() 方法
原文地址：https://www.cnblogs.com/mutong1228/p/11373740.html