• 第一条爬虫


    1.

    from random import random
    def printIntro():
    print("这个程序模拟两个选手张三和李四的某种竞技比赛")
    print("程序运行需要张三和李四的能力值(0-1)")
    def getInputs():
    a = eval(input("请输入选手张三的能力值(0-1): "))
    b = eval(input("请输入选手李四的能力值(0-1): "))
    n = eval(input("模拟比赛的场次: "))
    return a, b, n
    def simNGames(n, probA, probB):
    winsA, winsB = 0, 0
    for i in range(n):
    scoreA, scoreB = simOneGame(probA, probB)
    if scoreA > scoreB:
    winsA += 1
    else:
    winsB += 1
    return winsA, winsB
    def gameOver(a,b):
    return a==15 or b==15
    def simOneGame(probA, probB):
    scoreA, scoreB = 0, 0
    serving = "A"
    while not gameOver(scoreA, scoreB):
    if serving == "A":
    if random() < probA:
    scoreA += 1
    else:
    serving="B"
    else:
    if random() < probB:
    scoreB += 1
    else:
    serving="A"
    return scoreA, scoreB
    def printSummary(winsA, winsB):
    n = winsA + winsB
    print("竞技分析开始,共模拟{}场比赛".format(n))
    print("选手张三获胜{}场比赛,占比{:0.1%}".format(winsA, winsA/n))
    print("选手李四获胜{}场比赛,占比{:0.1%}".format(winsB, winsB/n))
    def main():
    printIntro()
    probA, probB, n = getInputs()
    winsA, winsB = simNGames(n, probA, probB)
    printSummary(winsA, winsB)
    main()

      


    2.
    import requests
     
    response = requests.get("http://www.google.cn/") 
    print(type(response)) 
    status_code = response.status_code
    print(status_code)
    text = response.text
    print(text)
    encoding = response.encoding
    print(encoding)
    encoding1 = response.encoding = 'utf-8'
    print(encoding1)
    text1 = response.text
    print("text内容为:{}".format(text1))
    print("
    ")
    print("text内容长度为:{}".format(len(text1)))
    print("content内容长度为:{}".format(len(r.content)))
    

      

    
    

    效果

    <class 'requests.models.Response'>
    200
    <!DOCTYPE html>
    <html lang="zh">
    <meta charset="utf-8">
    <title>Google</title>
    <style>
    html { background: #fff; margin: 0 1em; }
    body { font: .8125em/1.5 arial, sans-serif; text-align: center; }
    h1 { font-size: 1.5em; font-weight: normal; margin: 1em 0 0; }
    p#footer { color: #767676; font-size: .77em; }
    p#footer a { background: url(//www.google.cn/intl/zh-CN_cn/images/cn_icp.gif) top right no-repeat; padding: 5px 20px 5px 0; }
    ul { margin: 2em; padding: 0; }
    li { display: inline; padding: 0 2em; }
    div { -moz-border-radius: 20px; -webkit-border-radius: 20px; border: 1px solid #ccc; border-radius: 20px; margin: 2em auto 1em; max- 650px; min- 544px; }
    div:hover, div:hover * { cursor: pointer; }
    div:hover { border-color: #999; }
    div p { margin: .5em 0 1.5em; }
    img { border: 0; }
    </style>
    <div>
    <a href="http://www.google.com.hk/webhp?hl=zh-CN&amp;sourceid=cnhp">
    <img src="//www.google.cn/landing/cnexp/google-search.png" alt="Google" width="586" height="257">
    </a>
    <h1><a href="http://www.google.com.hk/webhp?hl=zh-CN&amp;sourceid=cnhp"><strong id="target">google.com.hk</strong></a></h1>
    <p>请收藏我们的网址
    </div>
    <ul>
    <li><a href="http://translate.google.cn/?sourceid=cnhp">翻译</a>
    </ul>
    <p id="footer">&copy;2011 - <a href="http://www.miibeian.gov.cn/">ICP证合字B2-20070004号</a>
    <script nonce="0qYFPrpj6kdYUM03_qP12w">
    var gcn=gcn||{};gcn.IS_IMAGES=(/images.google.cn/.exec(window.location)||window.location.hash=='#images'||window.location.hash=='images');gcn.HOMEPAGE_DEST='http://www.google.com.hk/webhp?hl=zh-CN&sourceid=cnhp';gcn.IMAGES_DEST='http://images.google.com.hk/imghp?'+'hl=zh-CN&sourceid=cnhp';gcn.DEST_URL=gcn.IS_IMAGES?gcn.IMAGES_DEST:gcn.HOMEPAGE_DEST;gcn.READABLE_HOMEPAGE_URL='google.com.hk';gcn.READABLE_IMAGES_URL='images.google.com.hk';gcn.redirectIfLocationHasQueryParams=function(){if(window.location.search&&/google.cn/.exec(window.location)&&!/webhp/.exec(window.location)){window.location=String(window.location).replace('google.cn','google.com.hk')}}();gcn.replaceHrefsWithImagesUrl=function(){if(gcn.IS_IMAGES){var a=document.getElementsByTagName('a');for(var i=0,len=a.length;i<len;i++){if(a[i].href==gcn.HOMEPAGE_DEST){a[i].href=gcn.IMAGES_DEST}}}}();gcn.listen=function(a,e,b){if(a.addEventListener){a.addEventListener(e,b,false)}else if(a.attachEvent){var r=a.attachEvent('on'+e,b);return r}};gcn.stopDefaultAndProp=function(e){if(e&&e.preventDefault){e.preventDefault()}else if(window.event&&window.event.returnValue){window.eventReturnValue=false;return false}if(e&&e.stopPropagation){e.stopPropagation()}else if(window.event&&window.event.cancelBubble){window.event.cancelBubble=true;return false}};gcn.resetChildElements=function(a){var b=a.childNodes;for(var i=0,len=b.length;i<len;i++){gcn.listen(b[i],'click',gcn.stopDefaultAndProp)}};gcn.redirect=function(){window.location=gcn.DEST_URL};gcn.setInnerHtmlInEl=function(a){if(gcn.IS_IMAGES){var b=document.getElementById(a);if(b){b.innerHTML=b.innerHTML.replace(gcn.READABLE_HOMEPAGE_URL,gcn.READABLE_IMAGES_URL)}}};
    gcn.listen(document, 'click', gcn.redirect);
    gcn.setInnerHtmlInEl('target');
    </script>

    ISO-8859-1
    utf-8
    text内容为:<!DOCTYPE html>
    <html lang="zh">
    <meta charset="utf-8">
    <title>Google</title>
    <style>
    html { background: #fff; margin: 0 1em; }
    body { font: .8125em/1.5 arial, sans-serif; text-align: center; }
    h1 { font-size: 1.5em; font-weight: normal; margin: 1em 0 0; }
    p#footer { color: #767676; font-size: .77em; }
    p#footer a { background: url(//www.google.cn/intl/zh-CN_cn/images/cn_icp.gif) top right no-repeat; padding: 5px 20px 5px 0; }
    ul { margin: 2em; padding: 0; }
    li { display: inline; padding: 0 2em; }
    div { -moz-border-radius: 20px; -webkit-border-radius: 20px; border: 1px solid #ccc; border-radius: 20px; margin: 2em auto 1em; max- 650px; min- 544px; }
    div:hover, div:hover * { cursor: pointer; }
    div:hover { border-color: #999; }
    div p { margin: .5em 0 1.5em; }
    img { border: 0; }
    </style>
    <div>
    <a href="http://www.google.com.hk/webhp?hl=zh-CN&amp;sourceid=cnhp">
    <img src="//www.google.cn/landing/cnexp/google-search.png" alt="Google" width="586" height="257">
    </a>
    <h1><a href="http://www.google.com.hk/webhp?hl=zh-CN&amp;sourceid=cnhp"><strong id="target">google.com.hk</strong></a></h1>
    <p>请收藏我们的网址
    </div>
    <ul>
    <li><a href="http://translate.google.cn/?sourceid=cnhp">翻译</a>
    </ul>
    <p id="footer">&copy;2011 - <a href="http://www.miibeian.gov.cn/">ICP证合字B2-20070004号</a>
    <script nonce="0qYFPrpj6kdYUM03_qP12w">
    var gcn=gcn||{};gcn.IS_IMAGES=(/images.google.cn/.exec(window.location)||window.location.hash=='#images'||window.location.hash=='images');gcn.HOMEPAGE_DEST='http://www.google.com.hk/webhp?hl=zh-CN&sourceid=cnhp';gcn.IMAGES_DEST='http://images.google.com.hk/imghp?'+'hl=zh-CN&sourceid=cnhp';gcn.DEST_URL=gcn.IS_IMAGES?gcn.IMAGES_DEST:gcn.HOMEPAGE_DEST;gcn.READABLE_HOMEPAGE_URL='google.com.hk';gcn.READABLE_IMAGES_URL='images.google.com.hk';gcn.redirectIfLocationHasQueryParams=function(){if(window.location.search&&/google.cn/.exec(window.location)&&!/webhp/.exec(window.location)){window.location=String(window.location).replace('google.cn','google.com.hk')}}();gcn.replaceHrefsWithImagesUrl=function(){if(gcn.IS_IMAGES){var a=document.getElementsByTagName('a');for(var i=0,len=a.length;i<len;i++){if(a[i].href==gcn.HOMEPAGE_DEST){a[i].href=gcn.IMAGES_DEST}}}}();gcn.listen=function(a,e,b){if(a.addEventListener){a.addEventListener(e,b,false)}else if(a.attachEvent){var r=a.attachEvent('on'+e,b);return r}};gcn.stopDefaultAndProp=function(e){if(e&&e.preventDefault){e.preventDefault()}else if(window.event&&window.event.returnValue){window.eventReturnValue=false;return false}if(e&&e.stopPropagation){e.stopPropagation()}else if(window.event&&window.event.cancelBubble){window.event.cancelBubble=true;return false}};gcn.resetChildElements=function(a){var b=a.childNodes;for(var i=0,len=b.length;i<len;i++){gcn.listen(b[i],'click',gcn.stopDefaultAndProp)}};gcn.redirect=function(){window.location=gcn.DEST_URL};gcn.setInnerHtmlInEl=function(a){if(gcn.IS_IMAGES){var b=document.getElementById(a);if(b){b.innerHTML=b.innerHTML.replace(gcn.READABLE_HOMEPAGE_URL,gcn.READABLE_IMAGES_URL)}}};
    gcn.listen(document, 'click', gcn.redirect);
    gcn.setInnerHtmlInEl('target');
    </script>

    text内容长度为:3216
    content内容长度为:3244

    3

      

    4.

    import requests
    from bs4 import BeautifulSoup
    import bs4
    def getHTMLText(url):
        try:
            r = requests.get(url, timeout = 30)
            r.raise_for_status
            r.encoding = r.apparent_encoding
            return r.text
        except:
            return ""
    
    def fillUnivList(ulist, html):
        soup = BeautifulSoup(html, "lxml")
        for tr in soup.find('tbody').children:
            if isinstance(tr, bs4.element.Tag):
                tds = tr('td')
                ulist.append([tds[0].string, tds[1].string, tds[2].string,tds[3].string])
    
    def printUnivList(ulist, num):
        tplt = "{0:^6}	{1:{4}^10}	{2:^10}	{3:^10}"
        print(tplt.format("排名","学校名称","省份","总分",chr(12288)))
        for i in range(num):
            u = ulist[i]
            print(tplt.format(u[0],u[1],u[2],u[3],chr(12288)))
    
    def main():
        uinfo = []
        url = "http://www.zuihaodaxue.com/zuihaodaxuepaiming2017.html"
        html = getHTMLText(url)
        fillUnivList(uinfo, html)
        printUnivList(uinfo, 30)
    
    main()
    

      

  • 相关阅读:
    Django
    C++开源库集合
    单细胞参考文献 single cell
    第三章 RNA测序
    第二章 基因芯片
    前言 转录组
    生物信息学——RNA的剪切过程
    生信研究内容
    测序总结,高通量测序名词
    单端测序,双端测序,基因组计划图谱
  • 原文地址:https://www.cnblogs.com/Glzt/p/12883367.html
Copyright © 2020-2023  润新知