• requsets模块和beautifulsoup模块


    2.requests模块方法

    requests是基于Python开发的HTTP库,使用Requests可以轻而易举的完成浏览器可有的任何操作。

    • request.get()

    • request.post()

    • request.put()

    • 以上方法均是在此方法的基础上构建requests.request(method, url, **kwargs)

      • method 包括 psot、get、put等等
      • **kwargs 包括常用参数
        • url = ‘’,
        • params = {'k1':'v1','k2','v2'}, # get方法仅限的参数传递方式
        • cookies = {},
        • headers = {}, # 请求头
        • data = {}, # post等请求参数传递
        • json = {}, # json数据参数

    2.1 requests.get

    requests.get(
        url='xxx',
        params={'k1':'v1','nid':888},
        cookies={},
        headers={},
    )
    
    # http://www.baidu.com?k1=v2&nid=888
    

    2.2 requests.post

    requests.post(
        url='xxx',
        params={'k1':'v1','nid':888},
        cookies={},
        
        # data
        headers={'content-type': 'application/x-www-form-urlencoded'},
        data={},
        
        # json
        # headers={'content-type': 'application/json'},
        # json={}
    )
    

    其他参数

    auth身份验证

    def param_auth():
        from requests.auth import HTTPBasicAuth, HTTPDigestAuth
    
        ret = requests.get('https://api.github.com/user', auth=HTTPBasicAuth('wupeiqi', 'sdfasdfasdf'))
        print(ret.text)
    
        # ret = requests.get('http://192.168.1.1',
        # auth=HTTPBasicAuth('admin', 'admin'))
        # ret.encoding = 'gbk'
        # print(ret.text)
    
        # ret = requests.get('http://httpbin.org/digest-auth/auth/user/pass', auth=HTTPDigestAuth('user', 'pass'))
        # print(ret)
      
    

    allow_redirects 重定向(控制是否url跳转)

    def param_allow_redirects():
        ret = requests.get('http://127.0.0.1:8000/test/', allow_redirects=False)
        print(ret.text)
    
    

    stream 流 (true相应内容按流式下载)

    def param_stream():
        ret = requests.get('http://127.0.0.1:8000/test/', stream=True)
        print(ret.content)
        ret.close()
    
        # from contextlib import closing
        # with closing(requests.get('http://httpbin.org/get', stream=True)) as r:
        # # 在此处理响应。
        # for i in r.iter_content():
        # print(i)
    

    cert 是否携带证书(证书名)

    requests.get('http://httpbin.org/get',cert="xxxx.pem")
    

    session

    此处的session并非之前所学session,
    此处的session,是个容器,携带所有请求头、体等等,
    所以,我们每次requests请求时,都需要cookies等手动添加请求中,
    利用session可以自动携带cookies、session等信息发送请求

    session = requests.Session()
    session.post(url,data={}) # 省略cookies=cookie
    
    # response_ = requests.post(url,data={},cookies=cookie)
    

    3.BeautifulSoup模块方法

    BeautifulSoup是一个模块,该模块用于接收一个HTML或XML字符串,然后将其进行格式化,之后遍可以使用他提供的方法进行快速查找指定元素,从而使得在HTML或XML中查找指定元素变得简单。

    from bs4.Element import Tag

    1,name 标签名

    # tag = soup.find('a')
    # name = tag.name # 获取
    # print(name)
    # tag.name = 'span' # 设置
    # print(soup)
    

    2,attr 属性标签

    # tag = soup.find('a')
    # attrs = tag.attrs    # 获取
    # print(attrs)
    # tag.attrs = {'ik':123} # 设置
    # tag.attrs['id'] = 'iiiii' # 设置
    # print(soup)
    

    3,children,所有子标签

    # body = soup.find('body')
    # v = body.children
    

    4, children,所有后代标签

    # body = soup.find('body')
    # v = body.descendants
    

    5, clear,将标签的所有子标签全部清空(保留标签名)

    # tag = soup.find('body')
    # tag.clear()
    # print(soup)
    

    10, find,获取匹配的第一个标签

    # tag = soup.find('a')
    # print(tag)
    # tag = soup.find(name='a', attrs={'class': 'sister'}, recursive=True, text='Lacie')
    # tag = soup.find(name='a', class_='sister', recursive=True, text='Lacie')
    # print(tag)
    

    11, find_all,获取匹配的所有标签

    # tags = soup.find_all('a')
    # print(tags)
     
    # tags = soup.find_all('a',limit=1)
    # print(tags)
     
    # tags = soup.find_all(name='a', attrs={'class': 'sister'}, recursive=True, text='Lacie')
    # # tags = soup.find(name='a', class_='sister', recursive=True, text='Lacie')
    # print(tags)
     
     
    # ####### 列表 #######
    # v = soup.find_all(name=['a','div'])
    # print(v)
     
    # v = soup.find_all(class_=['sister0', 'sister'])
    # print(v)
     
    # v = soup.find_all(text=['Tillie'])
    # print(v, type(v[0]))
     
     
    # v = soup.find_all(id=['link1','link2'])
    # print(v)
     
    # v = soup.find_all(href=['link1','link2'])
    # print(v)
     
    # ####### 正则 #######
    import re
    # rep = re.compile('p')
    # rep = re.compile('^p')
    # v = soup.find_all(name=rep)
    # print(v)
     
    # rep = re.compile('sister.*')
    # v = soup.find_all(class_=rep)
    # print(v)
     
    # rep = re.compile('http://www.oldboy.com/static/.*')
    # v = soup.find_all(href=rep)
    # print(v)
     
    # ####### 方法筛选 #######
    # def func(tag):
    # return tag.has_attr('class') and tag.has_attr('id')
    # v = soup.find_all(name=func)
    # print(v)
     
     
    # ## get,获取标签属性
    # tag = soup.find('a')
    # v = tag.get('id')
    # print(v)
    

    12, has_attr,检查标签是否具有该属性

    # tag = soup.find('a')
    # v = tag.has_attr('id')
    # print(v)
    

    13, get_text,获取标签内部文本内容

    # tag = soup.find('a')
    # v = tag.get_text('id')
    # print(v)
    

    16, 当前的关联标签

    # soup.next
    # soup.next_element
    # soup.next_elements
    # soup.next_sibling
    # soup.next_siblings
     
    #
    # tag.previous
    # tag.previous_element
    # tag.previous_elements
    # tag.previous_sibling
    # tag.previous_siblings
     
    #
    # tag.parent
    # tag.parents
    

    17, 查找某标签的关联标签

    # tag.find_next(...)
    # tag.find_all_next(...)
    # tag.find_next_sibling(...)
    # tag.find_next_siblings(...)
     
    # tag.find_previous(...)
    # tag.find_all_previous(...)
    # tag.find_previous_sibling(...)
    # tag.find_previous_siblings(...)
     
    # tag.find_parent(...)
    # tag.find_parents(...)
     
    # 参数同find_all
    

    20, append在当前标签内部追加一个标签

    # tag = soup.find('body')
    # tag.append(soup.find('a'))
    # print(soup)
    #
    # from bs4.element import Tag
    # obj = Tag(name='i',attrs={'id': 'it'})
    # obj.string = '我是一个新来的'
    # tag = soup.find('body')
    # tag.append(obj)
    # print(soup)
    

    21, insert在当前标签内部指定位置插入一个标签

    # from bs4.element import Tag
    # obj = Tag(name='i', attrs={'id': 'it'})
    # obj.string = '我是一个新来的'
    # tag = soup.find('body')
    # tag.insert(2, obj)
    # print(soup)
    

    22, insert_after,insert_before 在当前标签后面或前面插入

    # from bs4.element import Tag
    # obj = Tag(name='i', attrs={'id': 'it'})
    # obj.string = '我是一个新来的'
    # tag = soup.find('body')
    # # tag.insert_before(obj)
    # tag.insert_after(obj)
    # print(soup)
    

    23, replace_with 在当前标签替换为指定标签

    # from bs4.element import Tag
    # obj = Tag(name='i', attrs={'id': 'it'})
    # obj.string = '我是一个新来的'
    # tag = soup.find('div')
    # tag.replace_with(obj)
    # print(soup)
    

    24, 创建标签之间的关系

    # tag = soup.find('div')
    # a = soup.find('a')
    # tag.setup(previous_sibling=a)
    # print(tag.previous_sibling)
    
  • 相关阅读:
    博客作业6
    博客作业5
    3137102127 林志坤(实验3)
    3137102127 林志坤(实验2)
    个人简介
    Bookstore项目测试缺陷报告
    自我介绍
    第6次博客园作业
    软件测试第6次作业
    《构建之法》心得体会
  • 原文地址:https://www.cnblogs.com/sunqim16/p/7445511.html
Copyright © 2020-2023  润新知