• python-spider 第10题


    # 使用flask 搭建一个后端网站
    
    from flask import Flask
    from flask import request
    
    app = Flask(__name__)
    
    
    @app.route('/data', methods=['GET', 'POST'])
    def hello_world():
    
        if request.method == "GET":
            print(request)
    
    
        if request.method == 'POST':
    
            # print(request)
            print(request.form.to_dict() )
            print(request.headers)
            print(request.accept_charsets)
    
            return {'data': 'data'}
    
    
    if __name__ == '__main__':
        app.run(debug=True)
    
    
    
    <!doctype html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport"
              content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
        <meta http-equiv="X-UA-Compatible" content="ie=edge">
        <title>
            第十题---烟雾缭绕【难度:简单】
        </title>
        <script src="https://cdn.bootcdn.net/ajax/libs/jquery/3.4.1/jquery.min.js"></script>
    
    </head>
    <body>
    <h1>
        目标:采集100页的全部数字,并计算所有数据加和。当然了,有一个并不太明显的反爬手段存在
    </h1>
    <button id="id">按钮</button>
    </body>
    
    
    <script type="text/javascript">
        var url = "http://127.0.0.1:5000/data";
        call = function (num) {
            var list = {
                "page": String(1),
    
            };
            $.ajax({
                url: url,
                dataType: "json",
                async: true,
                data: list,
                type: "POST",
                beforeSend: function (request) {
                    (function () {
                    })()
                },
                success: function (data) {
    
                    datas = data.data;
                    console.log(datas)
                }
            })
        };
        call(1);
    
    </script>
    
    

    copy 网页的代码做主要是想分析 beforesend 这个请求到底做了什么 最后找了一圈也没有发现的有啥 再翻了下js基础 发现是我想多了

    这个函数什么东西都没有干

    最后把请求头替换掉成功过关 具体检测的应该是请求头中的某一个 ,想要知道具体检测的是什么就把每一个请求头打上备注 看下少了哪个请求头访问会失败就完事了~

    这个贴一个正则替换headers 跟一个爬虫老师学来的 具体是谁忘了。
    (.): (.) 替换成 "$1":"$2",

    最后贴上代码

    import json
    from typing import Dict, List
    
    import browsercookie
    import requests
    from requests.cookies import RequestsCookieJar
    
    ## init for classes
    
    session = requests.session()
    chrome_cookie = browsercookie.chrome()
    s = []
    
    url = "http://www.python-spider.com/api/challenge10"
    # url = 'http://127.0.0.1:5000/data'
    
    
    def get_cookie_from_chrome(domain: str) -> List[Dict]:
        """
        :param domain: the cookies your want to get from.
        :return: a dict the contains cookies from the domain.
        """
        l = []
        for i in chrome_cookie:
            if domain in i.domain:
                l.append({'name': i.name, "value": i.value})
        return l
    
    
    def set_cookie(domain):
        """
        :param domain: the domain going to set
        :return: the instance of RequestsCookieJar contain the cookies of the domain you set
        """
        cookie_jar = RequestsCookieJar()
        cookies_list = get_cookie_from_chrome(domain=domain)
        for cookie in cookies_list:
            cookie_jar.set(cookie['name'], cookie['value'], domain=domain)
    
        return cookie_jar
    
    
    if __name__ == '__main__':
        header = {
            "Connection": "keep-alive",
            "Content-Length": "6",
            "Accept": "application/json, text/javascript, */*; q=0.01",
            "Dnt": "1",
            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36",
            "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
            "Origin": "http://www.python-spider.com/api/challenge10",
            "Sec-Fetch-Site": "cross-site",
            "Sec-Fetch-Mode": "cors",
            "Sec-Fetch-Dest": "empty",
            "Referer": "http://www.python-spider.com/api/challenge10",
            "Accept-Encoding": "gzip, deflate, br",
            "Accept-Language": "zh,en;q=0.9,zh-CN;q=0.8",
    
        }
    
        cookie_jar = set_cookie('www.python-spider.com')
        for i in range(1, 101):
            data = {"page": str(i)}
            response = session.post(url, headers=header, cookies=cookie_jar, data={"page": str(i)})
            print(response.text)
            data1 = json.loads(response.text)['data']
    
            s.extend([i['value'].strip('
    ') for i in data1])
        print(s)
        print(sum(int(i) for i in s))
    
    
    
  • 相关阅读:
    一个利用扩展方法的实例:AttachDataExtensions
    正则表达式语法
    正则表达式30分钟入门教程
    js正则验证两位小数 验证数字最简单正则表达式大全
    SQL Server DBA三十问【转】
    Vue(踩坑)vue.esm.js?efeb:628 [Vue warn]: Error in render: "TypeError: Cannot read property 'length' of undefined" found in
    vue(有必要做的项目优化)
    vue_(根据多种条件过滤评论内容)
    vue(ref父组件使用子组件中定义的方法)
    Vuex(实现加减操作,Vue.set解决自定义属性没有双向数据绑定)
  • 原文地址:https://www.cnblogs.com/ruhai/p/13511992.html
Copyright © 2020-2023  润新知