• 易知大学 任务25 第一次爬虫与测试


    1.

    (1)测试

    simOneGame(probA,porbB)

    def printIntroduce():
        print('学号39,This program simulates a game between two players,A and  B')
        print('Probability(a number between 0~1) is used')
    try:
        printIntroduce()
    except:
        print("error")

    2)

    getInput()

    def getInput():
        a = eval(input("请输入选手A的能力值(0-1):"))
     
        b = eval(input("请输入选手B的能力值(0-1):"))
     
        m=eval(input("比赛的局数:"))
     
        n = eval(input("模拟比赛的场次:"))
        return a,b,m,n
    try:
        getInput()
    except:
        print("error")
    View Code

    3) printSummary(winsA,winsB)

    def printSummary(winsA,winsB):
     
        n = winsA + winsB
     
        print("竞技分析开始,共模拟{}场比赛".format(n))
     
        print("选手A获胜{}场比赛,占比{:0.1%}".format(winsA,winsA/n))
               
        print("选手B获胜{}场比赛,占比{:0.1%}".format(winsB,winsB/n))
    try:
        printSummary(6,4)
    except:
        print("error")
    View Code

    4)simNGames(m,n,probA,proB)

    def simNGames(m,n,probA,probB):
    
        winsA,winsB = 0,0
        wa,wb=0,0
    
        for i in range(n):
            for i in range(m):
    
                #scoreA,scoreB = simOneGame(probA,probB)
                scoreA,scoreB=probA,probB
                if scoreA > scoreB:
                    wa += 1
                else:
                    wb += 1
    
                if wa==2:
                    winsA+=1
                    wa,wb=0,0
                    break
                if wb==2:
                    winsB+=1
                    wa,wb=0,0
                    break
        return winsA,winsB
    
    m=eval(input())
    n=eval(input())
    probA=eval(input())
    probB=eval(input())
    print(simNGames(m,n,probA,probB))

    (2)访问网站20次

     1 import requests
     2 def gethtml(url):
     3     try:
     4         r=requests.get(url)
     5         r.raise_for_status()
     6         r.encoding='utf-8'
     7         return r.text,r.status_code,len(r.text),len(r.text),len(r.content)
     8     except:
     9         return ""
    10     
    11 url="https://cn.bing.com/"
    12 for i in range(20):
    13     print(i)
    14     print(gethtml(url))
    View Code

    结果:

    (3)html

    from bs4 import BeautifulSoup
    import re
    soup=BeautifulSoup("<head><title>菜鸟教程(runoob.com)</title></head><body><h1>我的第一个标题</h1><p id=frist>我的第一个段落。</p></body><tr><td>row 1,cell 1</td><td>row 1,cell 2</td></tr><tr><td>row 2,cell 1</td><td>row 2,cell 2</td></tr></body></table>","html.parser")
    print(soup.head,"08")   
    print(soup.body)      
    print(soup.find_all(id="china"))  
    r=soup.text
    pat = re.findall(u'[u1100-uFFFDh]+?',r)
    print(pat)

    (4)中国大学排名

    import requests
    from bs4 import BeautifulSoup
    import pandas as pd
    allUniv=[]
    
    def getHTMLText(url):
        try:
            r=requests.get(url,timeout=30)
            r.raise_for_status()
            r.encoding='utf-8'
            return r.text
        except:
            return ""
        
    def fillUnivList(soup):
        data=soup.find_all('tr')
        for tr in data:
            ltd=tr.find_all('td')
            if len(ltd)==0:
                continue
            singleUniv=[]
            for td in ltd:
                singleUniv.append(td.string)
            allUniv.append(singleUniv)
            
    def printUnivList(num):
        print("{:^4}{:^10}{:^5}{:^8}{:^10}".format("排名","学校名称","省市","总分","年费"))
        for i in range(num):
            u=allUniv[i]
            print("{:^4}{:^10}{:^5}{:^8}{:^10}".format(u[0],u[1],u[2],u[3],u[6]))
        return u
            
    def main(num):
        url='http://www.zuihaodaxue.com/zuihaodaxuepaiming2018.html'
        html=getHTMLText(url)
        soup=BeautifulSoup(html,"html.parser")
        fillUnivList(soup)
        printUnivList(num)    
    
    def writecsv(list):
        name = ['排名', '学校名称', '省份', '总分', '生源质量(新生高考成绩得分)', '培养结果(毕业生就业率)', '社会声誉(社会捐赠收入·千元)', '科研规模(论文数量·篇)',
                '科研质量(论文质量·FWCI)', '顶尖成果(高被引论文·篇)', '顶尖人才(高被引学者·人)', '科技服务(企业科研经费·千元)', '成果转化(技术转让收入·千元)', '学生国际化(留学生比例)']
        name2 = ['a', 'b', 'c']
        test = pd.DataFrame(columns=name, data=list)
        test.to_csv(r'C:UserslenovoDesktop作业集合python编程作业craw.csv', encoding='gbk')
    
    def main(num):
        url = 'http://www.zuihaodaxue.com/zuihaodaxuepaiming2019.html'
        html = getHTMLText(url)
        soup = BeautifulSoup(html, "html.parser")
        fillUnivList(soup)
        try:
            printUnivList(num)
        except:
            pass
    main(560)

  • 相关阅读:
    一个apache安装后无法启动的原因分析
    数字的一点考虑
    [转]bat方式上删除注册表键,项
    题解 P2016 【战略游戏】
    题解 P1403 【[AHOI2005]约数研究】
    题解 P1317 【低洼地】
    2020面向对象程序设计寒假作业3 设计思想
    题解 P1829 【[国家集训队]Crash的数字表格 / JZPTAB】
    题解 P1082 【同余方程】
    Unity3D读取外部Text
  • 原文地址:https://www.cnblogs.com/modiqiang/p/12883549.html
Copyright © 2020-2023  润新知