• 吴裕雄 实战python编程(2)


    from urllib.parse import urlparse

    url = 'http://www.pm25x.com/city/beijing.htm'
    o = urlparse(url)
    print(o)

    print("scheme={}".format(o.scheme)) # http
    print("netloc={}".format(o.netloc)) # www.pm25x.com
    print("port={}".format(o.port)) # None
    print("path={}".format(o.path)) # /city/beijing.htm
    print("query={}".format(o.query)) # 空

    import requests

    url = 'http://www.wsbookshow.com/'
    html = requests.get(url)
    html.encoding="GBK"
    print(html.text)

    import requests
    import numpy as np

    url = 'http://www.wsbookshow.com/'
    html = requests.get(url)
    html.encoding="gbk"
    htmllist = html.text.splitlines()
    print(type(htmllist))
    print(np.shape(htmllist))
    for row in htmllist:
    print(row)

    import requests

    url = 'http://www.wsbookshow.com/'
    html = requests.get(url)
    html.encoding="gbk"

    htmllist = html.text.splitlines()
    n=0
    for row in htmllist:
    if "新概念" in row:
    n+=1
    print("找到 {} 次!".format(n))

    import re
    pat = re.compile('[a-z]+')

    m = pat.match('tem12po')
    print(m)

    if not m==None:
    print(m.group())
    print(m.start())
    print(m.end())
    print(m.span())

    import re
    m = re.match(r'[a-z]+','tem12po')
    print(m)

    if not m==None:
    print(m.group())
    print(m.start())
    print(m.end())
    print(m.span())

    import re

    pat = re.compile('[a-z]+')

    m = pat.search('3tem12po')
    print(m) # <_sre.SRE_Match object; span=(1, 4), match='tem'>

    if not m==None:
    print(m.group()) # tem
    print(m.start()) # 1
    print(m.end()) # 4
    print(m.span()) # (1,4)

    import re
    pat = re.compile('[a-z]+')

    m = pat.findall('tem12po')
    print(m) # ['tem', 'po']

    import requests,re

    regex = re.compile('[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+.[a-zA-Z0-9-.]+')
    url = 'http://www.wsbookshow.com/'
    html = requests.get(url)
    emails = regex.findall(html.text)
    for email in emails:
    print(email)

  • 相关阅读:
    后缀自动机学习笔记
    [bzoj4516][Sdoi2016]生成魔咒——后缀自动机
    [bzoj1692][Usaco2007 Dec]队列变换——贪心+后缀数组
    BZOJ4811 [Ynoi2017]由乃的OJ
    codeforces796E Exam Cheating
    BZOJ1004 [HNOI2008]Cards
    BZOJ1798 [Ahoi2009]Seq 维护序列seq
    BZOJ4785 [Zjoi2017]树状数组
    UOJ207 共价大爷游长沙
    POJ3768 Katu Puzzle
  • 原文地址:https://www.cnblogs.com/tszr/p/10061172.html
Copyright © 2020-2023  润新知