python-spider 第10题

# 使用flask 搭建一个后端网站

from flask import Flask
from flask import request

app = Flask(__name__)


@app.route('/data', methods=['GET', 'POST'])
def hello_world():

    if request.method == "GET":
        print(request)


    if request.method == 'POST':

        # print(request)
        print(request.form.to_dict() )
        print(request.headers)
        print(request.accept_charsets)

        return {'data': 'data'}


if __name__ == '__main__':
    app.run(debug=True)


<!doctype html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport"
          content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
    <meta http-equiv="X-UA-Compatible" content="ie=edge">
    <title>
        第十题---烟雾缭绕【难度：简单】
    </title>
    <script src="https://cdn.bootcdn.net/ajax/libs/jquery/3.4.1/jquery.min.js"></script>

</head>
<body>
<h1>
    目标：采集100页的全部数字，并计算所有数据加和。当然了，有一个并不太明显的反爬手段存在
</h1>
<button id="id">按钮</button>
</body>


<script type="text/javascript">
    var url = "http://127.0.0.1:5000/data";
    call = function (num) {
        var list = {
            "page": String(1),

        };
        $.ajax({
            url: url,
            dataType: "json",
            async: true,
            data: list,
            type: "POST",
            beforeSend: function (request) {
                (function () {
                })()
            },
            success: function (data) {

                datas = data.data;
                console.log(datas)
            }
        })
    };
    call(1);

</script>

copy 网页的代码做主要是想分析 beforesend 这个请求到底做了什么最后找了一圈也没有发现的有啥再翻了下js基础发现是我想多了

这个函数什么东西都没有干

最后把请求头替换掉成功过关具体检测的应该是请求头中的某一个，想要知道具体检测的是什么就把每一个请求头打上备注看下少了哪个请求头访问会失败就完事了~

这个贴一个正则替换headers 跟一个爬虫老师学来的具体是谁忘了。
(.): (.) 替换成 "$1":"$2",

最后贴上代码

import json
from typing import Dict, List

import browsercookie
import requests
from requests.cookies import RequestsCookieJar

## init for classes

session = requests.session()
chrome_cookie = browsercookie.chrome()
s = []

url = "http://www.python-spider.com/api/challenge10"
# url = 'http://127.0.0.1:5000/data'


def get_cookie_from_chrome(domain: str) -> List[Dict]:
    """
    :param domain: the cookies your want to get from.
    :return: a dict the contains cookies from the domain.
    """
    l = []
    for i in chrome_cookie:
        if domain in i.domain:
            l.append({'name': i.name, "value": i.value})
    return l


def set_cookie(domain):
    """
    :param domain: the domain going to set
    :return: the instance of RequestsCookieJar contain the cookies of the domain you set
    """
    cookie_jar = RequestsCookieJar()
    cookies_list = get_cookie_from_chrome(domain=domain)
    for cookie in cookies_list:
        cookie_jar.set(cookie['name'], cookie['value'], domain=domain)

    return cookie_jar


if __name__ == '__main__':
    header = {
        "Connection": "keep-alive",
        "Content-Length": "6",
        "Accept": "application/json, text/javascript, */*; q=0.01",
        "Dnt": "1",
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36",
        "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
        "Origin": "http://www.python-spider.com/api/challenge10",
        "Sec-Fetch-Site": "cross-site",
        "Sec-Fetch-Mode": "cors",
        "Sec-Fetch-Dest": "empty",
        "Referer": "http://www.python-spider.com/api/challenge10",
        "Accept-Encoding": "gzip, deflate, br",
        "Accept-Language": "zh,en;q=0.9,zh-CN;q=0.8",

    }

    cookie_jar = set_cookie('www.python-spider.com')
    for i in range(1, 101):
        data = {"page": str(i)}
        response = session.post(url, headers=header, cookies=cookie_jar, data={"page": str(i)})
        print(response.text)
        data1 = json.loads(response.text)['data']

        s.extend([i['value'].strip('
') for i in data1])
    print(s)
    print(sum(int(i) for i in s))

相关阅读:
一个利用扩展方法的实例：AttachDataExtensions
正则表达式语法
 正则表达式30分钟入门教程
 js正则验证两位小数验证数字最简单正则表达式大全
 SQL Server DBA三十问【转】
Vue（踩坑）vue.esm.js?efeb:628 [Vue warn]: Error in render: "TypeError: Cannot read property 'length' of undefined" found in
vue（有必要做的项目优化）
vue_(根据多种条件过滤评论内容)
vue(ref父组件使用子组件中定义的方法)
Vuex(实现加减操作,Vue.set解决自定义属性没有双向数据绑定)
原文地址：https://www.cnblogs.com/ruhai/p/13511992.html