• python答题辅助


    最近直播答题app很热门,由于之前看过跳一跳的python脚本(非常棒),于是也想写一个答题的脚本。

    https://github.com/huanmsf/cai
    

    思路:

    1、截图

    2、文字识别,提取问题和选项(分割后识别准确性会提高)

    3、爬取网页数据,根据规则匹配选项

    4、根据选项自动点击屏幕该位置(应该循环点击,防止刚好切换到西瓜妹)

    5、重复前面步骤

    存在的问题:

    1、答题时间有限,如果爬去的链接多了,还没解析完时间就到了。爬取的少就缺少分析数据,结果不靠谱。

    2、问题和选项需要提取关键字匹配

    3、可能要试试其他搜索引擎(百度垃圾信息严重影响正确率)

     目录:

    ├── baidu.py
    ├── cai.png
    ├── main.py
    ├── need
    │   └── chi_sim.traineddata
    ├── README
    └── screenshot.py
    

     main.py:

    from screenshot import pull_screenshot
    import time, urllib.request, baidu, os
    
    try:
        import Image
    except ImportError:
        from PIL import Image, ImageDraw
    
    import pytesseract
    
    # 屏幕顶端到问题的距离/屏幕高度,随分辨率变化(默认1920*1080)
    top_off_c = 0.15
    # 问题高度
    que_h = 300
    # 答案高度
    ans_h = 170
    
    # 左右偏移量
    l_r_off = 40
    
    # 问题过滤器
    que_filter = ['.', ' ']
    
    # 答案过滤器
    ans_filter = ["《", "》", ' ']
    
    # 问题列表
    que_list = []
    
    # 选项坐标
    point_A = (0, 0, 0, 0)
    point_B = (0, 0, 0, 0)
    point_C = (0, 0, 0, 0)
    
    
    # 辅助找到文字区域
    def draw():
        img = Image.open('cai.png')
        w, h = img.size
        draw = ImageDraw.Draw(img)
        draw.line((40, h * 0.15, w - 40, h * 0.15), fill="red")
        draw.line((40, h * 0.15 + 300, w - 40, h * 0.15 + 300), fill="red")
    
        draw.line((40, h * 0.15 + 470, w * 0.7, h * 0.15 + 470), fill="red")
        draw.line((40, h * 0.15 + 640, w * 0.7, h * 0.15 + 640), fill="red")
        draw.line((40, h * 0.15 + 810, w * 0.7, h * 0.15 + 810), fill="red")
    
        img.show()
    
    
    def click(point):
        # img = Image.open('cai.png')
        # w, h = img.size
        # draw = ImageDraw.Draw(img)
        # draw.arc(point, 0, 360, fill="red")
        # img.show()
        cmd = 'adb shell input swipe {x1} {y1} {x2} {y2} {duration}'.format(
            x1=point[0],
            y1=point[1],
            x2=point[2],
            y2=point[3],
            duration=1
        )
        os.system(cmd)
    
    
    def main():
        while True:
    
            print(">>>>>>")
            pull_screenshot()
            img = Image.open('cai.png')
            img = img.convert('L')
            w, h = img.size
            img_q = img.crop((l_r_off, h * top_off_c, w - l_r_off, h * top_off_c + que_h))
            img_a = img.crop((l_r_off, h * top_off_c + que_h, w * 0.7, h * top_off_c + que_h + ans_h))
            img_b = img.crop((l_r_off, h * top_off_c + que_h + ans_h, w * 0.7, h * top_off_c + que_h + ans_h * 2))
            img_c = img.crop((l_r_off, h * top_off_c + que_h + ans_h * 2, w * 0.7, h * top_off_c + que_h + ans_h * 3))
    
            point_A = (w / 3 - 20, h * top_off_c + que_h + ans_h / 2 - 20, w / 3, h * top_off_c + que_h + ans_h / 2)
            point_B = (w / 3 - 20, h * top_off_c + que_h + ans_h / 2 * 3 - 20, w / 3, h * top_off_c + que_h + ans_h / 2 * 3)
            point_C = (w / 3 - 20, h * top_off_c + que_h + ans_h / 2 * 5 - 20, w / 3, h * top_off_c + que_h + ans_h / 2 * 5)
    
            # need 下的chi文件 复制到/usr/share/tesseract-ocr/4.00/
            question = pytesseract.image_to_string(img_q, lang='chi_sim')
            ans_a = pytesseract.image_to_string(img_a, lang='chi_sim')
            ans_b = pytesseract.image_to_string(img_b, lang='chi_sim')
            ans_c = pytesseract.image_to_string(img_c, lang='chi_sim')
            ans = ["1", "1", "1"]
            for f in que_filter:
                question = question.strip().replace(f, "")
    
            for f in ans_filter:
                ans_a = ans_a.strip().replace(f, "")
                ans_b = ans_b.strip().replace(f, "")
                ans_c = ans_c.strip().replace(f, "")
    
            ans[0] = ans_a
            ans[1] = ans_b
            ans[2] = ans_c
    
            for a in ans:
                if not a.strip():
                    ind = ans.index(a)
                    ans[ind] = "&*&"
    
            print(question)
            print(ans)
    
            if que_list.__contains__(question):
                continue
    
            index = baidu.search(question, ans)
            # 选第1,2,3个
            if index == 0:
                click(point_A)
            elif index == 1:
                click(point_B)
            else:
                click(point_C)
    
            print("index" + str(index))
            que_list.append(question)
    
    
    if __name__ == '__main__':
        main()
    

     baidu.py:

    # -*- coding:utf-8 -*-
    
    import urllib, time, re
    
    import lxml.etree as etree
    
    # 答案积分规则
    """
    某个答案首次出现在一篇文章中+10,再次+3
    """
    
    
    def search(question, ans):
        cont = {}
        q_url = "http://www.baidu.com/s?word=" + urllib.parse.quote(question)
        top_page = getdata(q_url)
        selector = etree.HTML(top_page)
        url_list = selector.xpath('//h3[@class]/a[@data-click]/@href')[0:5]
        for url_item in url_list:
            if not url_item.startswith('http'):
                continue
            print(url_item)
            sub_page = getdata(url_item)
            selector = etree.HTML(sub_page)
            try:
                content_list = selector.xpath('//div/text()|//span/text()|//p/text()')
            except:
                return 0
            ans_tmp_list = []
            for con in content_list:
                if con.strip():
                    for a in ans:
                        if a in con:
                            if ans_tmp_list.__contains__(a):
                                if a in cont.keys():
                                    cont[a] += 3
                                else:
                                    cont[a] = 3
                            else:
                                if a in cont.keys():
                                    cont[a] += 10
                                else:
                                    cont[a] = 10
                                ans_tmp_list.append(a)
    
                    print(con)
    
        print(cont)
        if not cont:
            return 0
        else:
            l = sorted(cont.items(), key=lambda x: x[1], reverse=True)
            return ans.index(l[0][0])
    
    
    def getdata(url):
        req = urllib.request.Request(url)
        try:
            response = urllib.request.urlopen(req)
        except:
            return " "
        top_page = ""
        try:
            top_page = response.read().decode("utf-8", 'ignore')
        except:
            pass
        # print(top_page)
        return top_page
    

     screenshot.py:

    # -*- coding: utf-8 -*-
    """
    手机屏幕截图的代码(参考跳一跳外挂源码)
    """
    import subprocess
    import os
    import sys
    from PIL import Image
    
    SCREENSHOT_WAY = 3
    
    
    def pull_screenshot():
        global SCREENSHOT_WAY
        if 1 <= SCREENSHOT_WAY <= 3:
            process = subprocess.Popen(
                'adb shell screencap -p',
                shell=True, stdout=subprocess.PIPE)
            binary_screenshot = process.stdout.read()
            if SCREENSHOT_WAY == 2:
                binary_screenshot = binary_screenshot.replace(b'
    ', b'
    ')
            elif SCREENSHOT_WAY == 1:
                binary_screenshot = binary_screenshot.replace(b'
    
    ', b'
    ')
            f = open('cai.png', 'wb')
            f.write(binary_screenshot)
            f.close()
        elif SCREENSHOT_WAY == 0:
            os.system('adb shell screencap -p /sdcard/cai.png')
            os.system('adb pull /sdcard/cai.png .')
    
    文字识别
    sudo pip3 install pytesseract
    sudo apt-get install tesseract-ocr
    

    初级版本效果:

     题外话:

    最近在浏览FB站看到

    冲顶大会辅助揭秘:王思聪撒的币,还是要靠技术来捡

    文中提到可以提前10秒得到题目(不知是否属实),由于访问权限不能看,如有知道怎么搞的请留言交流下,谢谢

  • 相关阅读:
    爬虫header和cookie
    爬虫代理squid
    response对象
    pyspider中内容选择器常用方法汇总
    非阻塞 sleep
    post请求体过大导致ngx.req.get_post_args()取不到参数体的问题
    常用lua代码块
    nginx静态文件缓存的解决方案
    lua-resty-gearman模块
    非在线PDF转图片!!!
  • 原文地址:https://www.cnblogs.com/lanqie/p/8290590.html
Copyright © 2020-2023  润新知