• 定时任务中多进程线程 对数据库操作 同一数据操作 的 冲突避免


    import os, sys
    import time
    import logging
    import requests
    import threading
    import random

    from random import choice
    from selenium import webdriver
    from selenium.webdriver.common.desired_capabilities import DesiredCapabilities

    """
    全局约定,便于后期做日志分析
    os._exit(INT)
    4001 4002 4003 4004
    """
    start_time = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time()))
    os_sep = os.sep
    this_file_abspath, this_file_name = os.path.dirname(os.path.abspath(__file__)), os.path.abspath(__file__).split(os_sep)[
        -1]
    base_dir = os.path.dirname(os_sep.join(os.path.abspath(__file__).split(os_sep)[0:-2]))
    log_abspath = '%s%s%s' % (base_dir, os_sep, 'log')

    """
    日志的记录不能依赖于日志类
    """
    now_, e = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())), '启动脚本'
    logf, s = '%s%s%s%s' % (log_abspath, os_sep, this_file_name, now_), '%s%s%s%s' % (__file__, now_, os.getcwd(), e)
    logf_selenium = '%s%s' % (logf, 'seleniumlog')
    with open(logf, 'a') as fo:
        fo.write(s)
        print(s)

    try:
        sys.path.append(base_dir)
        from core.utils import MysqlHelper
    except Exception as e:
        s = '%s%s%s' % (
            'from core.utils import MysqlHelper EXCEPTION ', time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())),
            e)
        with open(logf, 'a') as fo:
            fo.write(s)
            print(s)
            os._exit(4001)

    try:
        logging.basicConfig(level=logging.INFO,
                            format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s[thread:%(thread)d][process:%(process)d]',
                            datefmt='%a, %d %b %Y %H:%M:%S',
                            filename=logf,
                            filemode='a')
    except Exception as e:
        s = '%s%s%s' % ('logging.basicConfig EXCEPTION ', time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())), e)
        with open(logf, 'a') as fo:
            fo.write(s)
            print(s)
            os._exit(4002)

    try:

        fua, lua = '%s%s%s' % (this_file_abspath, os_sep,
                               'ua_list.txt'), []
        with open(fua, 'r') as fo:
            for i in fo:
                lua.append(i.replace(' ', ''))
    except Exception as e:
        s = '%s%s' % ('打开文件 EXCEPTION  ua文件路径: ', fua)
        logging.error(s)
        print(s)
        os._exit(4003)

    """
     对异常无限重启
    """

    try:

        """

        该脚本任务:仅涉及test_error_temp,仅更新url状态计数:行删除、行不删(打不开、无我司广告,切换或不更新)、正常
        """
        mysql_obj = MysqlHelper()
        desc = 'DESC' if random.randint(1, 2) == 2 else 'ASC'

        q = 'SELECT DISTINCT url FROM test_error_temp  WHERE  no_ad_times+no_open_times+ok_times<script_need_run_times  ORDER BY id  %s ;' % (
            desc)
        s = '%s%s' % (' DB SQL ', q)
        logging.info(s)

        tuple_l = mysql_obj.select(q)
        del mysql_obj
        if len(tuple_l) == 0:
            s = '无待检测url,程序退出'
            print(s)
            logging.info(s)

    except Exception as e:
        s = '%s%s%s' % ('初始数据,查询数据库异常,无限次重启该脚本', e, time.strftime('%Y%m%d %H:%M:%S', time.localtime(time.time())))
        print(s)
        logging.warning(s)
        cmd = 'python %s' % (__file__)
        os.system(cmd)
        os._exit(1024)

    ctrl_start, max_script_time = time.time(), 3600 * 6

    mycode_l, repeat_times, repeat_sleep_times = ['g3user', '51g3.com.cn'], 2, 2
    c_done, c_all, tl, tstep = 0, len(tuple_l), [], 1000
    drop_ = 0
    s = '%s%s%s%s%s%s%s' % (
        '  drop_,ctrl_start, max_script_time,mycode_l, repeat_times, repeat_sleep_times ', drop_, ctrl_start,
        max_script_time,
        mycode_l, repeat_times, repeat_sleep_times)
    print(s)
    logging.info(s)


    def main():
        def ctrl_runtime(exit_type=''):
            if time.time() - ctrl_start >= max_script_time:
                s = '%s%s%s%s%s%s%s%s%s' % (
                    '程序开始执行时间', ctrl_start, '执行时间阈值', max_script_time, '终止执行', ' exit_type =', exit_type, ' threadID ',
                    threading.get_ident())
                logging.info(s)
                if exit_type == '':
                    exit(s)
                elif exit_type == 'sys':
                    sys.exit(s)
                elif exit_type == 'os':
                    # an integer is required
                    # Required argument 'status' (pos 1) not found
                    os._exit(4004)

        def chk_exception_url(url, sleep_seconds=0, http_tag='http://'):
            time.sleep(sleep_seconds)
            ret = {}
            # 程序异常,则目前认为,待检查url情况为期待值,
            # ret['ok']初始值为-1,即该次检查不写数据库
            ret['ok'] = -1

            s = '%s%s%s%s%s%s%s%s%s%s' % (
                time.strftime('%Y%m%d %H:%M:%S', time.localtime(time.time())), ' os.getppid() ', os.getppid(),
                ' os.getpid() ', os.getpid(), ' threading.get_ident ', threading.get_ident(), ' start_time ', start_time,
                url)
            try:
                if url.find('http') == -1:
                    url = '%s%s' % (http_tag, url)
                r = requests.get(url)
                ret['status_code'], txt_pos = int(r.status_code), -1
                s = '%s,%s,%s,%s,%s' % (s, ret['status_code'], url, r, r.reason)
            except Exception as e:
                s = '%s %s %s' % (s, ' SPIDER ', e)
                logging.error(s)
                print(e, url)
                ret['status_code'], ret['info'] = -1, s
                return ret

            if ret['status_code'] == 200:
                for ii in mycode_l:
                    if r.text.find(ii) > -1:
                        s = '%s%s' % (s, ' OK ')
                        logging.info(s)
                        ret['info'] = s
                        return ret

                try:

                    dcap = dict(DesiredCapabilities.PHANTOMJS)
                    dcap["phantomjs.page.settings.userAgent"] = choice(lua)
                    dcap['browserName'], dcap['platform'] = 'Mozilla', 'Win'

                    driver = webdriver.PhantomJS(desired_capabilities=dcap,
                                                 executable_path='/usr/local/phantomjs/bin/phantomjs',
                                                 service_log_path=logf_selenium)
                    driver.get(url)
                    page_source = driver.page_source
                    driver.quit()
                    del driver

                    for ii in mycode_l:
                        if page_source.find(ii) > -1:
                            s = '%s%s' % (s, ' OK ')
                            logging.info(s)
                            ret['info'] = s
                            return ret

                    s = '%s%s' % (s, '返回200,但是在html中未检测到我公司代码。')
                    ret['ok'], ret['info'] = 0, s
                    return ret
                except Exception as e:
                    s = '%s%s%s%s' % (s, ' SPIDER ', e, ' 返回200,但是在检查是否我司代码环节,程序执行异常')
                    logging.error(s)
                    ret['info'] = s
                    return ret
            elif ret['status_code'] >= 400:
                logging.warning(s)
                ret['ok'], ret['info'] = 0, s
                return ret

        def tf(ts):
            global c_done
            te = ts + tstep
            te = min(te, c_all)
            for i in tuple_l[ts:te]:
                ctrl_runtime(exit_type='os')
                url = i[0]
                s = '%s%s%s%s%s%s%s%s%s%s' % (
                    time.strftime('%Y%m%d %H:%M:%S', time.localtime(time.time())), ' os.getppid() ', os.getppid(),
                    ' os.getpid() ', os.getpid(), ' threading.get_ident ', threading.get_ident(), ' start_time ',
                    start_time,
                    url)
                for t in range(0, repeat_times, 1):
                    ret = chk_exception_url(url, repeat_sleep_times)
                    if ret['ok'] == 1:
                        c_done += 1
                        s = '%s/%s%s%s' % (c_done, c_all, 'chk-ret', s)
                        print(s)
                        logging.info(s)
                        break
                q, ctime = '', int(time.time())
                if ret['ok'] == 1:
                    q = 'UPDATE test_error_temp SET ok_times=ok_times+1,remarks=CONCAT("%s",remarks),update_time="%s" WHERE url="%s" AND no_ad_times+no_open_times+ok_times<script_need_run_times ' % (
                        ret['info'], ctime, url)
                elif ret['ok'] == 0:
                    if ret['status_code'] == 200:
                        q = 'UPDATE test_error_temp SET no_ad_times=no_ad_times+1,remarks=CONCAT("%s",remarks),update_time="%s"  WHERE url="%s" AND no_ad_times+no_open_times+ok_times<script_need_run_times ' % (
                            ret['info'], ctime, url)
                    else:
                        q = 'UPDATE test_error_temp SET no_open_times=no_open_times+1,remarks=CONCAT("%s",remarks),update_time="%s"  WHERE url="%s" AND no_ad_times+no_open_times+ok_times<script_need_run_times ' % (
                            ret['info'], ctime, url)
                if q is not '':
                    try:
                        mysql_obj = MysqlHelper()
                        mysql_obj.execute(q)
                        mysql_obj.commit()
                        del mysql_obj
                        s = '%s%s%s' % (s, ' DB SQL ok ', q)
                        logging.info(s)
                        print(s)
                    except Exception as e:
                        s = '%s%s%s%s' % (s, ' DB Exception- ', q, e)
                        logging.error(s)
                        print(s)

        class MyThread(threading.Thread):
            def __init__(self, func, args, name):
                threading.Thread.__init__(self)
                self.func, self.args, self.name = func, args, name

            def run(self):
                self.func(self.args)

        for i in range(0, c_all, tstep):
            thread_instance = MyThread(tf, (i), tf.__name__)
            tl.append(thread_instance)

        for t in tl:
            t.setDaemon = False
            t.start()
        for t in tl:
            t.join()


    if __name__ == '__main__':
        main()

    虽然在数据源头有 no_ad_times+no_open_times+ok_times<script_need_run_times,读环节有限制条件

    但是在更新环节,写环节,同样需要把该条件加上

    no_ad_times+no_open_times+ok_times<script_need_run_times

    -- 历史有效检测次数统计
    SELECT COUNT(1) ,no_open_times+no_ad_times+ok_times as a FROM test_error_temp GROUP BY a ORDER BY a DESC;

    COUNT(1) a

    1 75
    15 74
    53 73
    114 72
    51 71
    46 70
    61 69
    81 68
    86 67
    73 66
    80 65
    121 64
    118 63
    125 62
    136 61
    137 60
    154 59
    197 58
    200 57
    186 56
    214 55

    避免 75/74/73异常数据

  • 相关阅读:
    构建Web API服务
    AutoMapper(七)
    AutoMapper(六)
    AutoMapper(五)
    AutoMapper(四)
    AutoMapper(三)
    递归函数练习:输出菲波拉契(Fibonacci)数列的前N项数据
    递归程序练习:输出十进制数的二进制表示
    递归函数练习——累乘
    递归练习——累加和
  • 原文地址:https://www.cnblogs.com/rsapaper/p/8041176.html
Copyright © 2020-2023  润新知