• 第一个爬虫和测试


    1.测试球赛程序中的所有函数

    import random 

    from math import *
    def printIntro():#打印程序的介绍性信息
    print("模拟乒乓球竞赛")
    print("学号23")
    print("程序运行需要A和B的能力值(以0到1之间的小数表示)")


    def getInputs():#获得用户输入的参数
    a = eval(input("请输入选手A的能力值(0-1): "))
    b = eval(input("请输入选手B的能力值(0-1): "))
    h= eval(input("请输入一场要打几局:"))
    n = eval(input("模拟比赛的场次: "))
    return a, b, h,n

    def printSummary(winsA, winsB):
    n = winsA + winsB
    print("竞技分析开始, 共模拟{}场比赛".format(n))
    print("选手A获胜{}场比赛, 占比{:0.1%}".format(winsA, winsA/n))
    print("选手B获胜{}场比赛, 占比{:0.1%}".format(winsB, winsB/n))
    def gameOver(scoreA, scoreB):
    g=scoreA-scoreB
    if (abs(g)==2 and scoreA> 10 and scoreB> 10) or (g> 0 and scoreA==11) or (g<0 and scoreB==11):
    return scoreA,scoreB


    def simOneGame(probA, probB,h):#模拟一场比赛
    for i in range(h): #模拟七局四胜或五局三胜为一场
    serving = "A"
    roundA =roundB=0 #分别为队伍A和B的赢得的比赛的局次
    scoreA, scoreB = 0, 0
    while not gameOver(scoreA, scoreB): #模拟一局比赛
    roundA=roundB=0
    if serving == "A":
    if random.random() < probA:
    scoreA += 1
    else:
    serving = "B"
    else:
    if random.random() < probB:
    scoreB += 1

    else:
    serving = "A"
    if scoreA>scoreB:
    roundA += 1
    else:
    roundB += 1
    return roundA,roundB


    def simNGames(n ,probA, probB,h):#利用A,B的的能力值模拟N场比赛
    winsA, winsB = 0, 0
    for i in range(n):
    roundA , roundB = simOneGame(probA, probB,h)
    if roundA >roundB:
    winsA += 1
    else:
    winsB += 1
    return winsA, winsB

    def main():
    printIntro()
    probA, probB, h,n = getInputs()#分别为队伍A和B的能力值,一场的局数,比赛的场次
    winsA, winsB = simNGames(n, probA, probB,h)#分别为队伍A和B的赢得的比赛的场次
    printSummary(winsA, winsB)
    if h==7:
    print('此次模拟单打淘汰赛')
    else:
    print('此次模拟双打淘汰赛或者是团体淘汰赛')
    main()

    2.爬虫测试(对必应网页)

    import requests
    def getHTMLText(url):
    print("第",i+1,"次访问")
    try:
    r=requests.get(url,timeout=30)
    r.raise_for_status()
    r.encoding='utf-8'
    print("网络状态码:",r.status_code)
    print("text属性长度:",len(r.text))
    print("content属性长度:",len(r.content))
    return r.text
    except:
    return "error"
    url="http://cn.bing.com"
    #print(getHTMLText(url))
    for i in range(20):
    print(getHTMLText(url))


    3这是一个简单的HTML页面,请保持为字符串,完成后面的计算要求。

    from bs4 import BeautifulSoup
    import re
    html = BeautifulSoup("<!DOCTYPE html> <html> <head> <meta charset='utf-8'> <title>菜鸟教程(runoob.com)</title> </head> <body> <h1>我的第一标题</h1> <p id='frist'>我的第一段落。</p> </body> </table> </html>","html.parser")
    print(html.head,"20")
    print(html.body)
    print(html.find_all(id="first"))
    r=html.text
    pattern = re.findall(u'[u1100-uFFFDh]+?',r)
    print(pattern)

    4.中国大学排名(爬虫)(2019)

    输入如下代码:

    import requests
    from bs4 import BeautifulSoup
    allUniv = []
    def getHTMLText(url):
    try:
    r = requests.get(url, timeout=30)
    r.raise_for_status()
    r.encoding = 'utf-8'
    return r.text
    except:
    return ""
    def fillUnivList(soup):
    data = soup.find_all('tr')
    for tr in data:
    ltd = tr.find_all('td')
    if len(ltd)==0:
    continue
    singleUniv = []
    for td in ltd:
    singleUniv.append(td.string)
    allUniv.append(singleUniv)
    def printUnivList(num):
    print("{:^4}{:^10}{:^5}{:^8}{:^10}".format("排名","学校名称","省市","总分","科研规模"))
    for i in range(num):
    u=allUniv[i]
    print("{:^4}{:^10}{:^5}{:^8}{:^10}".format(u[0],u[1],u[2],u[3],u[6]))
    def main():
    url = 'http://www.zuihaodaxue.cn/zuihaodaxuepaiming2019.html'
    html = getHTMLText(url)
    soup = BeautifulSoup(html, "html.parser")
    fillUnivList(soup)
    printUnivList(10)
    main()

    import requests
    from bs4 import BeautifulSoup
    import csv

    url = "http://www.zuihaodaxue.com/zuihaodaxuepaiming2019.html"
    response = requests.get(url)
    soup = BeautifulSoup(response.content.decode('utf-8'), 'lxml')
    all_movies = soup.find('div', id = "showing-soon")
    #写模式打开csv文件
    csv_obj = open('data.csv', 'w', encoding="utf-8")
    #写入一行标题
    csv.writer(csv_obj).writerow([("排名","学校名称","省市","总分","科研规模"])
    for each_university in all_university.find_all('div', class_ = "item"):
    all_a = each_university.find_all('a')
    all_li = each_university.find_all('li')
    university_ranking= all_a[1].text
    university_name = all_a[1]['href']
    university_province = all_li[0].text
    university_total score = all_li[1].text
    university_scientific scale = all_li[2].text
    #逐个写入大学信息
    csv.writer(csv_obj).writerow([movie_name,movie_href,movie_date,movie_type,movie_area,movie_lovers])
    #关闭
    csv_obj.close()
    print("finshed")

  • 相关阅读:
    Python基础之初始编码
    Excel图表编辑---表格移动,样式修改
    Python基础之Python的变量、常量
    刷题62. Unique Paths
    刷题56. Merge Intervals
    刷题55. Jump Game
    刷题53. Maximum Subarray
    刷题49. Group Anagrams
    刷题48. Rotate Image
    刷题46. Permutations
  • 原文地址:https://www.cnblogs.com/155722-lq/p/12883121.html
Copyright © 2020-2023  润新知