• 爬虫小案例:扇贝单词评估


    使用python模拟扇贝官网单词测试的过程

    数据来源:扇贝网站 https://www.shanbay.com/vocabtest/

    import requests,random
    
    # 题库
    topics_data = []
    # 单词列表
    words_data = []
    # 选择的单词列表
    word_indexs = []
    # 答案序号
    answer_orders = ['A','B','C','D','E']
    # 选择正确的单词
    answer_right_indexs = []
    
    # 选择题库
    def select_topics():
        global topics_data
        url = 'https://www.shanbay.com/api/v1/vocabtest/category/'
        params = {
            '_': str(random.randint(10**13, 9 * 10**13))
        }
        res = requests.get(url, params=params)
        if res.status_code == 200:
            topics_data = res.json()['data']
            # print(topics_data)
            print('请选择题库序号:{} ~ {}'.format(0, len(topics_data)))
            for i in range(len(topics_data)):
                print('{}.{}'.format(i, topics_data[i][1]), end='  ')
            print("")
            while True:
                try:
                    index = int(input(""))
                    if index not in range(len(topics_data)):
                        print('范围只能是{} ~ {},请重新选择!'.format(0, len(topics_data)))
                        continue
                except BaseException:
                    print('输入有误,请重新选择!')
                else:
                    break
            return index
        else:
            print('出题范围请求失败!')
            return False
    
    # 选择单词
    def select_words(category):
        global words_data,word_indexs
        url = 'https://www.shanbay.com/api/v1/vocabtest/vocabularies/'
        headers ={
            'referer':'https://www.shanbay.com/vocabtest/',
            'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36 OPR/65.0.3467.78 (Edition Baidu)'
        }
        params = {
            'category':category,
            '_':str(random.randint(10**13, 9 * 10**13))
        }
        # print(params)
        print('正获取相关单词...')
        res = requests.get(url, headers=headers, params=params)
        if res.status_code == 200:
            # print(res.json())
            words_data = res.json()['data']
            # print(words_data)
            # 显示单词
            print('请选择你认识的单词(选择对应的序号即可,范围:{} ~ {})'.format(0, len(words_data)))
            rownum = 0
            for index in range(len(words_data)):
                print('{}.{}'.format(index, words_data[index]['content'].ljust(15, ' ')), end='')
                if (index > 0 and index % 8 == 0) or (index == len(words_data)-1):
                    print('')
            while True:
                try:
                    index = int(input(':'))
                    if index not in range(len(words_data)):
                        print('您输入的范围有误,请输入{} ~ {}的数值:'.format(0, len(words_data)))
                        continue
                    elif index in word_indexs:
                        continue
                    else:
                        word_indexs.append(index)
                except BaseException:
                    print('您输入的数值有误,请重新输入!')
                else:
                    tip = input('是否要继续选择你认识的单词?(输入:n/N 则退出;其他任意字符则继续):')
                    if tip in ['n', 'N']:
                        break
                    else:
                        print('请选择你认识的单词(选择对应的序号即可,范围:{} ~ {})'.format(0, len(words_data)))
                        
            # print('已选择:{}'.format(word_indexs))
            return True
    
        else:
            print('单词获取失败!')
            return False
    
    # 作答
    def select_answer():
        global answer_right_indexs
        num = 0
        for index in word_indexs:
            if num > 0:
                print('')
                print('————————————————————————————————————')
                print('')
            print('{}/{}){}的正确词义是:'.format(num+1, len(word_indexs), words_data[index]['content']))
            num2 = 0
            for choice in words_data[index]['definition_choices']:
                print('{}:{}'.format(answer_orders[num2], choice['definition']))
                num2 += 1
            print('{}:{}'.format(answer_orders[num2], '不认识'))
            while True:
                choice = input(':').strip().upper()
                if choice not in answer_orders:
                    print('请重新选择序号:{}'.format(''.join(answer_orders)))
                    continue
                else:
                    break
            print('你选择了:{}'.format(choice))
    
            # 判断答案是否正确
            if choice != 'E' and int(words_data[index]['definition_choices'][answer_orders.index(choice)]['pk']) == int(words_data[index]['pk']):
                print('答对了!')
                answer_right_indexs.append(index)
            else:
                for right_choice in words_data[index]['definition_choices']:
                    if int(right_choice['pk']) ==  int(words_data[index]['pk']):
                        break
                # print('答错了!:')
                # print(right_choice)
                print('答错了,正确答案是:{}:{}'.format(answer_orders[words_data[index]['definition_choices'].index(right_choice)], right_choice['definition']))
            num += 1
    
    # 报告
    def show_report(category):
        url = 'https://www.shanbay.com/api/v1/vocabtest/vocabularies/'
        headers = {
            'referer':'https://www.shanbay.com/vocabtest/',
            'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36 OPR/65.0.3467.78 (Edition Baidu)'
        }
        params = {
            'category':category,
            'phase':'primary',
            'right_ranks':'',
            'word_ranks':''
        }
        word_ranks = []
        for word in words_data:
            word_ranks.append(str(word['rank']))
        params['word_ranks'] = ','.join(word_ranks)
        # print(params)
        res = requests.post(url, headers=headers, params=params)
        if res.status_code == 200:
            result = res.json()['data']
            print('')
            print('您的词汇量大约是:{}'.format(result['vocab'])) # 此处返回结果是 0
            print(result['comment'])
            print('详细报告:')
            print('{}个单词,不认识{},认识{},掌握了{},答错了{}'.format(len(words_data), len(words_data) - len(word_indexs), len(word_indexs), len(answer_right_indexs), len(
                word_indexs) - len(answer_right_indexs)))
        else:
            print('评分请求失败!')
    
    def main():
        # 题库序号
        topic_index = select_topics()
        if topic_index:
            # 题库代号
            category = topics_data[topic_index]
            print('选择的题库是:{}.{}'.format(topic_index, category[1]))
            # 选择单词
            result = select_words(category[0])
            if result:
                print('单词测试,请选择正确的词义:')
                # 作答
                select_answer()
                # 报告
                show_report(category[0])
    
    if __name__ == '__main__':
        main()

  • 相关阅读:
    golang 相关
    ES root用户启动失败can not run elasticsearch as root
    基于 Flink CDC + Hudi 湖仓一体方案实践
    数据平台上云
    多云趋势
    数果实时数仓探索
    宽表的设计
    数仓指标体系
    Hudi在医疗大数据的应用
    Hudi on Flink上手使用总结
  • 原文地址:https://www.cnblogs.com/KeenLeung/p/12191949.html
Copyright © 2020-2023  润新知