• AnsibleAPI源码剖析(1)-Runner类的 初始化


    #ansible版本说明:ansible1.9.1

    1.简单使用例子

    # -*- coding=utf-8 -*-
    import ansible.runner
    ##########################
    runner = ansible.runner.Runner(
                host_list = 'ip.txt',                           #指定主机文件 
                remote_user = 'admin',                          #指定远程执行用户
                module_name = 'shell',                          #使用ansible的shell
                module_args = 'echo 111;sleep 5',               #模块参数
                pattern = 'test',                               #主机文件中生效的组
                forks = 5,                                      #多进程并发数量
                remote_pass = '123456',                         #远程执行的密码
                #is_playbook= True,
            )
    datastructure = runner.run()
    print datastructure

    ip.txt

    [test]
    127.0.0.1

    2.ansible.runner模块中的Runner类

    (1)初始化函数学习

    # -*- coding=utf-8 -*-
    
    class Runner(object):
        ''' core API interface to ansible '''
    
        # see bin/ansible for how this is used...
    
        def __init__(self,
            host_list=C.DEFAULT_HOST_LIST,      # ex: /etc/ansible/hosts, legacy usage  #初始化默认参数,如果没指定走/etc/ansible/hosts文件 
            module_path=None,                   # ex: /usr/share/ansible           #Ansible的路径一般不用写
            module_name=C.DEFAULT_MODULE_NAME,  # ex: copy                  #模块名字必须指定
            module_args=C.DEFAULT_MODULE_ARGS,  # ex: "src=/tmp/a dest=/tmp/b"           #模块的参数,对应了ansible命令行中的-a后面带的内容
            forks=C.DEFAULT_FORKS,              # parallelism level                      
         #进程的数目,如果你填入了20,它会判断你的list_hosts里面是否有20个IP,没有的话根据主机的数量来派生进程,如果多的话,就用multiprocess的pool进程池来调用
    
            timeout=C.DEFAULT_TIMEOUT,          # SSH timeout                            #SSH的超时时间
            pattern=C.DEFAULT_PATTERN,          # which hosts?  ex: 'all', 'acme.example.org'#指定hostfile文件中执行的分组
            remote_user=C.DEFAULT_REMOTE_USER,  # ex: 'username'                         #远程执行的用户
            remote_pass=C.DEFAULT_REMOTE_PASS,  # ex: 'password123' or None if using key #远程执行的密码
            remote_port=None,                   # if SSH on different ports              #远程执行的端口,如果ssh端口被变更过的话
            private_key_file=C.DEFAULT_PRIVATE_KEY_FILE, # if not using keys/passwords   #自钥地址,用来秘钥验证的
            background=0,                       # async poll every X seconds, else 0 for non-async #0代表不异步,其余数代表多少秒后根据任务id去取数据
            basedir=None,                       # directory of playbook, if applicable   #指定playbook的存放目录
            setup_cache=None,                   # used to share fact data w/ other tasks #用于与其他任务共享实时数据
            vars_cache=None,                    # used to store variables about hosts    #存储有关主机的变量
            transport=C.DEFAULT_TRANSPORT,      # 'ssh', 'paramiko', 'local'             #3种传输模式,ssh,paramiko,local
            conditional='True',                 # run only if this fact expression evals to true #状态的判断,
            callbacks=None,                     # used for output                        #指定回调输出
            module_vars=None,                   # a playbooks internals thing            #playbooks内部的东西
            play_vars=None,                     #                                        #和playbook相关的变量
            play_file_vars=None,                #                                                    
            role_vars=None,                     #
            role_params=None,                   #
            default_vars=None,                  #                                        #默认变量
            extra_vars=None,                    # extra vars specified with he playbook(s)#playbook指定额外的变量
            is_playbook=False,                  # running from playbook or not?           #是否以playbook运行
            inventory=None,                     # reference to Inventory object           #引用inventory对象
            subset=None,                        # subset pattern                          #子集模式
            check=False,                        # don't make any changes, just try to probe for potential changes #不做任何改变,仅仅尝试
            diff=False,                         # whether to show diffs for template files that change            #是否显示更改的模板文件差异
            environment=None,                   # environment variables (as dict) to use inside the command       #环境变量以字典方式传递进来
            complex_args=None,                  # structured data in addition to module_args, must be a dict      #结构化数据,参数
            error_on_undefined_vars=C.DEFAULT_UNDEFINED_VAR_BEHAVIOR, # ex. False
            accelerate=False,                   # use accelerated connection                                      #使用加速ssh连接
            accelerate_ipv6=False,              # accelerated connection w/ IPv6
            accelerate_port=None,               # port to use with accelerated connection                         #使用加速连接使用的端口
            vault_pass=None,
            run_hosts=None,                     # an optional list of pre-calculated hosts to run on              #要运行的预计算的主机列表
            no_log=False,                       # option to enable/disable logging for a given task               #是否开启任务日志
            run_once=False,                     # option to enable/disable host bypass loop for a given task
            become=False,                         # whether to run privelege escalation or not                    #是否运行特权升级sudo
            become_method=C.DEFAULT_BECOME_METHOD,
            become_user=C.DEFAULT_BECOME_USER,      # ex: 'root'                                                  #sudo用户
            become_pass=C.DEFAULT_BECOME_PASS,      # ex: 'password123' or None                                   #sudo密码
            become_exe=C.DEFAULT_BECOME_EXE,        # ex: /usr/local/bin/sudo                                     #sudo命令行
            ):
    
            # used to lock multiprocess inputs and outputs at various levels
            self.output_lockfile  = OUTPUT_LOCKFILE                                                               #输出文件锁
            self.process_lockfile = PROCESS_LOCKFILE                                                              #进程锁
    
            if not complex_args:                                                                                  #为空的话初始化为空字典
                complex_args = {}
    
            # storage & defaults
            self.check            = check
            self.diff             = diff
            self.setup_cache      = utils.default(setup_cache, lambda: ansible.cache.FactCache())
            self.vars_cache       = utils.default(vars_cache, lambda: collections.defaultdict(dict))
            self.basedir          = utils.default(basedir, lambda: os.getcwd())
            self.callbacks        = utils.default(callbacks, lambda: DefaultRunnerCallbacks())
            self.generated_jid    = str(random.randint(0, 999999999999))
            self.transport        = transport
            self.inventory        = utils.default(inventory, lambda: ansible.inventory.Inventory(host_list))
            # utils.default详细见下方的备注执行lambda函数,self.inventory就是Inventory对象,初始化通过host_list来执行,这个对象下包含了主机信息以及各种变量
            self.module_vars      = utils.default(module_vars, lambda: {})
            self.play_vars        = utils.default(play_vars, lambda: {})
            self.play_file_vars   = utils.default(play_file_vars, lambda: {})
            self.role_vars        = utils.default(role_vars, lambda: {})
            self.role_params      = utils.default(role_params, lambda: {})
            self.default_vars     = utils.default(default_vars, lambda: {})
            self.extra_vars       = utils.default(extra_vars, lambda: {})
    
            self.always_run       = None
            self.connector        = connection.Connector(self)
            self.conditional      = conditional
            self.delegate_to      = None
            self.module_name      = module_name
            self.forks            = int(forks)
            self.pattern          = pattern
            self.module_args      = module_args
            self.timeout          = timeout
            self.remote_user      = remote_user
            self.remote_pass      = remote_pass
            self.remote_port      = remote_port
            self.private_key_file = private_key_file
            self.background       = background
            self.become           = become
            self.become_method    = become_method
            self.become_user_var  = become_user
            self.become_user      = None
            self.become_pass      = become_pass
            self.become_exe       = become_exe
            self.is_playbook      = is_playbook
            self.environment      = environment
            self.complex_args     = complex_args
            self.error_on_undefined_vars = error_on_undefined_vars
            self.accelerate       = accelerate
            self.accelerate_port  = accelerate_port
            self.accelerate_ipv6  = accelerate_ipv6
            self.callbacks.runner = self
            self.omit_token       = '__omit_place_holder__%s' % sha1(os.urandom(64)).hexdigest()
            self.vault_pass       = vault_pass
            self.no_log           = no_log
            self.run_once         = run_once
    
            if self.transport == 'smart':
           # 判断传输模式,确定最后是否使用paramiko,ansible1.2.1/1.3版本里面有smart的东西,前向兼容
                # If the transport is 'smart', check to see if certain conditions
                # would prevent us from using ssh, and fallback to paramiko.
                # 'smart' is the default since 1.2.1/1.3
                self.transport = "ssh"
           #判断系统平台和远程执行密码来确定传输模式
    
                if sys.platform.startswith('darwin') and self.remote_pass:
                    # due to a current bug in sshpass on OSX, which can trigger
                    # a kernel panic even for non-privileged users, we revert to
                    # paramiko on that OS when a SSH password is specified
                    self.transport = "paramiko"
                else:
                    # see if SSH can support ControlPersist if not use paramiko #执行命令ssh -o ControlPersist加速模式,确定传输模式
                    cmd = subprocess.Popen(['ssh','-o','ControlPersist'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                    (out, err) = cmd.communicate()
                    if "Bad configuration option" in err:
                        self.transport = "paramiko"
    
            # save the original transport, in case it gets
            # changed later via options like accelerate
            self.original_transport = self.transport
    
            # misc housekeeping
            if subset and self.inventory._subset is None:
                # don't override subset when passed from playbook
                self.inventory.subset(subset)
    
            # If we get a pre-built list of hosts to run on, from say a playbook, use them.
            # Also where we will store the hosts to run on once discovered
            self.run_hosts = run_hosts
    
            if self.transport == 'local':
            #传输模式本地的话,pwd和os模块联合方法取出当前系统执行用户,linux下的方法
                self.remote_user = pwd.getpwuid(os.geteuid())[0]
            #模块路径指定的话,切割模块路径
            if module_path is not None:
                for i in module_path.split(os.pathsep):
                    utils.plugins.module_finder.add_directory(i)
    
            utils.plugins.push_basedir(self.basedir)
    
            # ensure we are using unique tmp paths
            # 初始化基本随机数生成器
            random.seed()
        # *****************************************************

    备注:utils.default

    def default(value, function):
        ''' syntactic sugar around lazy evaluation of defaults '''
        if value is None:
            return function()
        return value

     (2) ansible.inventory.Inventory(host_list)原理剖析(假设都按照最上面的例子来传递,则host_list=ip.txt)

    ansible/inventory/__init__.py

    class Inventory(object):
        """
        Host inventory for ansible.
        """
    
        __slots__ = [ 'host_list', 'groups', '_restriction', '_also_restriction', '_subset', 
                      'parser', '_vars_per_host', '_vars_per_group', '_hosts_cache', '_groups_list',
                      '_pattern_cache', '_vault_password', '_vars_plugins', '_playbook_basedir']
    
        def __init__(self, host_list=C.DEFAULT_HOST_LIST, vault_password=None):
    
            # the host file file, or script path, or list of hosts
            # if a list, inventory data will NOT be loaded
            self.host_list = host_list
            # 文件名传递进去
            self._vault_password=vault_password
    
            # caching to avoid repeated calculations, particularly with
            # external inventory scripts.
    
            self._vars_per_host  = {}
            self._vars_per_group = {}
            self._hosts_cache    = {}
            self._groups_list    = {} 
            self._pattern_cache  = {}
    
            # to be set by calling set_playbook_basedir by playbook code
            self._playbook_basedir = None
    
            # the inventory object holds a list of groups
            self.groups = []
    
            # a list of host(names) to contain current inquiries to
            self._restriction = None
            self._also_restriction = None
            self._subset = None
            # 判断host_list是不是字符串类型
            if isinstance(host_list, basestring):
                if "," in host_list:
                    host_list = host_list.split(",")
                    host_list = [ h for h in host_list if h and h.strip() ]
            if host_list is None:
                self.parser = None
            elif isinstance(host_list, list):
                self.parser = None
                all = Group('all')
                self.groups = [ all ]
                ipv6_re = re.compile('[([a-f:A-F0-9]*[%[0-z]+]?)](?::(d+))?')
                for x in host_list:
                    m = ipv6_re.match(x)
                    if m:
                        all.add_host(Host(m.groups()[0], m.groups()[1]))
                    else:
                        if ":" in x:
                            tokens = x.rsplit(":", 1)
                            # if there is ':' in the address, then this is an ipv6
                            if ':' in tokens[0]:
                                all.add_host(Host(x))
                            else:
                                all.add_host(Host(tokens[0], tokens[1]))
                        else:
                            all.add_host(Host(x))
            # 判断host_list文件是否存在,将IP筛选出来
            elif os.path.exists(host_list):
                if os.path.isdir(host_list):
                    # Ensure basedir is inside the directory判断是否是目录
                    self.host_list = os.path.join(self.host_list, "")
                    self.parser = InventoryDirectory(filename=host_list)
                    self.groups = self.parser.groups.values()
                else:
                    # check to see if the specified file starts with a 如果是文件的话执行如下流程
                    # shebang (#!/), so if an error is raised by the parser
                    # class we can show a more apropos error
                    shebang_present = False
                    try:
                        inv_file = open(host_list)
                        first_line = inv_file.readlines()[0]
                        inv_file.close()
                        if first_line.startswith('#!'):
                            shebang_present = True
    #判断文件第一行是不是shebang
    except: pass #实际的逻辑执行 if utils.is_executable(host_list): try: self.parser = InventoryScript(filename=host_list) self.groups = self.parser.groups.values() except: if not shebang_present: raise errors.AnsibleError("The file %s is marked as executable, but failed to execute correctly. " % host_list + "If this is not supposed to be an executable script, correct this with `chmod -x %s`." % host_list) else: raise else: try: self.parser = InventoryParser(filename=host_list) self.groups = self.parser.groups.values() except: if shebang_present: raise errors.AnsibleError("The file %s looks like it should be an executable inventory script, but is not marked executable. " % host_list + "Perhaps you want to correct this with `chmod +x %s`?" % host_list) else: raise utils.plugins.vars_loader.add_directory(self.basedir(), with_subdir=True) else: raise errors.AnsibleError("Unable to find an inventory file, specify one with -i ?") # 获取插件变量 self._vars_plugins = [ x for x in utils.plugins.vars_loader.all(self) ] # get group vars from group_vars/ files and vars plugins获取组变量 for group in self.groups: group.vars = utils.combine_vars(group.vars, self.get_group_variables(group.name, vault_password=self._vault_password)) # get host vars from host_vars/ files and vars plugins获取主机变量 for host in self.get_hosts(): host.vars = utils.combine_vars(host.vars, self.get_host_variables(host.name, vault_password=self._vault_password))

    (3)ansible/inventory/script.py

    class InventoryScript(object):
        ''' Host inventory parser for ansible using external inventory scripts. '''
    
        def __init__(self, filename=C.DEFAULT_HOST_LIST):
    
            # Support inventory scripts that are not prefixed with some
            # path information but happen to be in the current working
            # directory when '.' is not in PATH.
            self.filename = os.path.abspath(filename)
            cmd = [ self.filename, "--list" ]
            try:
                # 检测主机文件是否存在
                sp = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            except OSError, e:
                raise errors.AnsibleError("problem running %s (%s)" % (' '.join(cmd), e))
            (stdout, stderr) = sp.communicate()
    
            if sp.returncode != 0:
                raise errors.AnsibleError("Inventory script (%s) had an execution error: %s " % (filename,stderr))
    
            self.data = stdout
            # see comment about _meta below
            self.host_vars_from_top = None
            # 错误信息处理
            self.groups = self._parse(stderr)
    
    
        def _parse(self, err):
    
            all_hosts = {}
    
            # not passing from_remote because data from CMDB is trusted
            self.raw  = utils.parse_json(self.data)
            self.raw  = json_dict_bytes_to_unicode(self.raw)
    
            all       = Group('all')
            groups    = dict(all=all)
            group     = None
    
    
            if 'failed' in self.raw:
                sys.stderr.write(err + "
    ")
                raise errors.AnsibleError("failed to parse executable inventory script results: %s" % self.raw)
    
            for (group_name, data) in self.raw.items():
    
                # in Ansible 1.3 and later, a "_meta" subelement may contain
                # a variable "hostvars" which contains a hash for each host
                # if this "hostvars" exists at all then do not call --host for each
                # host.  This is for efficiency and scripts should still return data
                # if called with --host for backwards compat with 1.2 and earlier.
    
                if group_name == '_meta':
                    if 'hostvars' in data:
                        self.host_vars_from_top = data['hostvars']
                        continue
    
                if group_name != all.name:
                    group = groups[group_name] = Group(group_name)
                else:
                    group = all
                host = None
    
                if not isinstance(data, dict):
                    data = {'hosts': data}
                # is not those subkeys, then simplified syntax, host with vars
                elif not any(k in data for k in ('hosts','vars','children')):
                    data = {'hosts': [group_name], 'vars': data}
    
                if 'hosts' in data:
                    if not isinstance(data['hosts'], list):
                        raise errors.AnsibleError("You defined a group "%s" with bad "
                            "data for the host list:
     %s" % (group_name, data))
    
                    for hostname in data['hosts']:
                        if not hostname in all_hosts:
                            all_hosts[hostname] = Host(hostname)
                        host = all_hosts[hostname]
                        group.add_host(host)
    
                if 'vars' in data:
                    if not isinstance(data['vars'], dict):
                        raise errors.AnsibleError("You defined a group "%s" with bad "
                            "data for variables:
     %s" % (group_name, data))
    
                    for k, v in data['vars'].iteritems():
                        if group.name == all.name:
                            all.set_variable(k, v)
                        else:
                            group.set_variable(k, v)
    
            # Separate loop to ensure all groups are defined
            for (group_name, data) in self.raw.items():
                if group_name == '_meta':
                    continue
                if isinstance(data, dict) and 'children' in data:
                    for child_name in data['children']:
                        if child_name in groups:
                            groups[group_name].add_child_group(groups[child_name])
    
            for group in groups.values():
                if group.depth == 0 and group.name != 'all':
                    all.add_child_group(group)
    
            return groups

    总归一句话就是,通过以上代码片段,json化输出IP列表

    (4)run函数,实际执行

    def run(self):
            ''' xfer & run module on all matched hosts '''
    
            # find hosts that match the pattern
            # 通过pattern找到host_list中的主机信息
            if not self.run_hosts:
                self.run_hosts = self.inventory.list_hosts(self.pattern)
                # 通过self.pattern找到list_hosts的json字段,获取内容
            hosts = self.run_hosts
            # 如果主机数量是0的话,callback回调,内容是空字典
            if len(hosts) == 0:
                self.callbacks.on_no_hosts()
                return dict(contacted={}, dark={})
            # 把实例赋值给全局变量multiprocessing_runner
            global multiprocessing_runner
            multiprocessing_runner = self
            results = None
    
            # Check if this is an action plugin. Some of them are designed
            # to be ran once per group of hosts. Example module: pause,
            # run once per hostgroup, rather than pausing once per each
            # host.
         # p呢就是根据module_name找出要执行的相应模块插件
    p = utils.plugins.action_loader.get(self.module_name, self)
         # 进程数量优化
    if self.forks == 0 or self.forks > len(hosts): self.forks = len(hosts) if (p and (getattr(p, 'BYPASS_HOST_LOOP', None)) or self.run_once): # Expose the current hostgroup to the bypassing plugins self.host_set = hosts # We aren't iterating over all the hosts in this # group. So, just choose the "delegate_to" host if that is defined and is # one of the targeted hosts, otherwise pick the first host in our group to # construct the conn object with. if self.delegate_to is not None and self.delegate_to in hosts: host = self.delegate_to else: host = hosts[0] result_data = self._executor(host, None).result # Create a ResultData item for each host in this group # using the returned result. If we didn't do this we would # get false reports of dark hosts. results = [ ReturnData(host=h, result=result_data, comm_ok=True) for h in hosts ] del self.host_set elif self.forks > 1: try:
              # 调用_parallel_exec函数去跑结果 results
    = self._parallel_exec(hosts) except IOError, ie: print ie.errno if ie.errno == 32: # broken pipe from Ctrl+C raise errors.AnsibleError("interrupted") raise else: results = [ self._executor(h, None) for h in hosts ] return self._partition_results(results)

     (5)_parallel_exec函数

    def _parallel_exec(self, hosts):
            ''' handles mulitprocessing when more than 1 fork is required '''
    
            manager = multiprocessing.Manager()
            job_queue = manager.Queue()
            #任务队列
            for host in hosts:
                job_queue.put(host)
            #结果队列
            result_queue = manager.Queue()
    
            try:
                fileno = sys.stdin.fileno()
            except ValueError:
                fileno = None
    
            workers = []
            '''起forks进程数的进程去执行_executor_hook函数,函数的参数是任务队列,结果队列,以及new_stdin'''
            for i in range(self.forks):
                new_stdin = None
                if fileno is not None:
                    try:
                        new_stdin = os.fdopen(os.dup(fileno))
                    except OSError, e:
                        # couldn't dupe stdin, most likely because it's
                        # not a valid file descriptor, so we just rely on
                        # using the one that was passed in
                        pass
                prc = multiprocessing.Process(target=_executor_hook,
                    args=(job_queue, result_queue, new_stdin))
                prc.start()
                workers.append(prc)
           #把每个进程放到workers列表里
    try:
            #遍历workers列表中的每个多进程实例,join方法呢是等待每个进程执行结束。保证多进程每个进程都执行结束,如果出现异常,进程中断再结束
    for worker in workers: worker.join() except KeyboardInterrupt: for worker in workers: worker.terminate() worker.join()     #结果集列表 results = [] try:
           #结果队列不为空的话,不断从中取数据追加到结果result中
    while not result_queue.empty(): results.append(result_queue.get(block=False)) except socket.error: raise errors.AnsibleError("<interrupted>") return results

    (6)_executor_hook函数

    def _executor_hook(job_queue, result_queue, new_stdin):
    
        # attempt workaround of https://github.com/newsapps/beeswithmachineguns/issues/17
        # this function also not present in CentOS 6
        if HAS_ATFORK:
            atfork()
    
        signal.signal(signal.SIGINT, signal.SIG_IGN)
        while not job_queue.empty():
            try:
                host = job_queue.get(block=False)
                return_data = multiprocessing_runner._executor(host, new_stdin)
                result_queue.put(return_data)
            except Queue.Empty:
                pass
            except:
                traceback.print_exc()
  • 相关阅读:
    C++11 lambda表达式(lambda expression)
    win 10 relog.exe 下载地址
    检测闩锁/自旋锁争用
    关于sql 锁和并发的一些记录
    FAST number_rows 意义解释
    网站实施SEO的步骤
    搜索引擎高级搜索指令浅析
    关于遇到高并发时候的一些总结
    Autofac 设置方法拦截器的两种方式
    C# MVC 进入Action 方法之后怎么使用MVC参数验证模型
  • 原文地址:https://www.cnblogs.com/PythonOrg/p/6030415.html
Copyright © 2020-2023  润新知