爬虫小案例：扇贝单词评估

使用python模拟扇贝官网单词测试的过程
数据来源：扇贝网站 https://www.shanbay.com/vocabtest/
import requests,random

# 题库
topics_data = []
# 单词列表
words_data = []
# 选择的单词列表
word_indexs = []
# 答案序号
answer_orders = ['A','B','C','D','E']
# 选择正确的单词
answer_right_indexs = []

# 选择题库
def select_topics():
    global topics_data
    url = 'https://www.shanbay.com/api/v1/vocabtest/category/'
    params = {
        '_': str(random.randint(10**13, 9 * 10**13))
    }
    res = requests.get(url, params=params)
    if res.status_code == 200:
        topics_data = res.json()['data']
        # print(topics_data)
        print('请选择题库序号：{} ~ {}'.format(0, len(topics_data)))
        for i in range(len(topics_data)):
            print('{}.{}'.format(i, topics_data[i][1]), end='  ')
        print("")
        while True:
            try:
                index = int(input("："))
                if index not in range(len(topics_data)):
                    print('范围只能是{} ~ {}，请重新选择！'.format(0, len(topics_data)))
                    continue
            except BaseException:
                print('输入有误，请重新选择！')
            else:
                break
        return index
    else:
        print('出题范围请求失败！')
        return False

# 选择单词
def select_words(category):
    global words_data,word_indexs
    url = 'https://www.shanbay.com/api/v1/vocabtest/vocabularies/'
    headers ={
        'referer':'https://www.shanbay.com/vocabtest/',
        'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36 OPR/65.0.3467.78 (Edition Baidu)'
    }
    params = {
        'category':category,
        '_':str(random.randint(10**13, 9 * 10**13))
    }
    # print(params)
    print('正获取相关单词...')
    res = requests.get(url, headers=headers, params=params)
    if res.status_code == 200:
        # print(res.json())
        words_data = res.json()['data']
        # print(words_data)
        # 显示单词
        print('请选择你认识的单词（选择对应的序号即可，范围：{} ~ {}）'.format(0, len(words_data)))
        rownum = 0
        for index in range(len(words_data)):
            print('{}.{}'.format(index, words_data[index]['content'].ljust(15, ' ')), end='')
            if (index > 0 and index % 8 == 0) or (index == len(words_data)-1):
                print('')
        while True:
            try:
                index = int(input(':'))
                if index not in range(len(words_data)):
                    print('您输入的范围有误，请输入{} ~ {}的数值：'.format(0, len(words_data)))
                    continue
                elif index in word_indexs:
                    continue
                else:
                    word_indexs.append(index)
            except BaseException:
                print('您输入的数值有误，请重新输入！')
            else:
                tip = input('是否要继续选择你认识的单词？（输入：n/N 则退出；其他任意字符则继续）：')
                if tip in ['n', 'N']:
                    break
                else:
                    print('请选择你认识的单词（选择对应的序号即可，范围：{} ~ {}）'.format(0, len(words_data)))
                    
        # print('已选择：{}'.format(word_indexs))
        return True

    else:
        print('单词获取失败！')
        return False

# 作答
def select_answer():
    global answer_right_indexs
    num = 0
    for index in word_indexs:
        if num > 0:
            print('')
            print('————————————————————————————————————')
            print('')
        print('{}/{}）{}的正确词义是：'.format(num+1, len(word_indexs), words_data[index]['content']))
        num2 = 0
        for choice in words_data[index]['definition_choices']:
            print('{}：{}'.format(answer_orders[num2], choice['definition']))
            num2 += 1
        print('{}：{}'.format(answer_orders[num2], '不认识'))
        while True:
            choice = input(':').strip().upper()
            if choice not in answer_orders:
                print('请重新选择序号：{}'.format('、'.join(answer_orders)))
                continue
            else:
                break
        print('你选择了：{}'.format(choice))

        # 判断答案是否正确
        if choice != 'E' and int(words_data[index]['definition_choices'][answer_orders.index(choice)]['pk']) == int(words_data[index]['pk']):
            print('答对了！')
            answer_right_indexs.append(index)
        else:
            for right_choice in words_data[index]['definition_choices']:
                if int(right_choice['pk']) ==  int(words_data[index]['pk']):
                    break
            # print('答错了！：')
            # print(right_choice)
            print('答错了，正确答案是：{}：{}'.format(answer_orders[words_data[index]['definition_choices'].index(right_choice)], right_choice['definition']))
        num += 1

# 报告
def show_report(category):
    url = 'https://www.shanbay.com/api/v1/vocabtest/vocabularies/'
    headers = {
        'referer':'https://www.shanbay.com/vocabtest/',
        'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36 OPR/65.0.3467.78 (Edition Baidu)'
    }
    params = {
        'category':category,
        'phase':'primary',
        'right_ranks':'',
        'word_ranks':''
    }
    word_ranks = []
    for word in words_data:
        word_ranks.append(str(word['rank']))
    params['word_ranks'] = ','.join(word_ranks)
    # print(params)
    res = requests.post(url, headers=headers, params=params)
    if res.status_code == 200:
        result = res.json()['data']
        print('')
        print('您的词汇量大约是：{}'.format(result['vocab'])) # 此处返回结果是 0
        print(result['comment'])
        print('详细报告：')
        print('{}个单词，不认识{}，认识{}，掌握了{}，答错了{}'.format(len(words_data), len(words_data) - len(word_indexs), len(word_indexs), len(answer_right_indexs), len(
            word_indexs) - len(answer_right_indexs)))
    else:
        print('评分请求失败！')

def main():
    # 题库序号
    topic_index = select_topics()
    if topic_index:
        # 题库代号
        category = topics_data[topic_index]
        print('选择的题库是：{}.{}'.format(topic_index, category[1]))
        # 选择单词
        result = select_words(category[0])
        if result:
            print('单词测试，请选择正确的词义：')
            # 作答
            select_answer()
            # 报告
            show_report(category[0])

if __name__ == '__main__':
    main()
相关阅读:
golang 相关
 ES root用户启动失败can not run elasticsearch as root
基于 Flink CDC + Hudi 湖仓一体方案实践
 数据平台上云
 多云趋势
 数果实时数仓探索
 宽表的设计
 数仓指标体系
 Hudi在医疗大数据的应用
 Hudi on Flink上手使用总结
原文地址：https://www.cnblogs.com/KeenLeung/p/12191949.html