• ironic baremetal rescue process


    1、用户调用Nova的rescue函数

    nova/virt/ironic/driver.py
    class IronicDriver(virt_driver.ComputeDriver):
            ......
            ......
        #导入ironicclient模块
        def __init__(self, virtapi, read_only=False):
            super(IronicDriver, self).__init__(virtapi)
            global ironic
            if ironic is None:
                ironic = importutils.import_module('ironicclient')
            ......
            self.ironicclient = client_wrapper.IronicClientWrapper()
         
        def spawn(self, context, instance, image_meta, injected_files,
              admin_password, allocations, network_info=None,
              block_device_info=None):
            ......
            #调用ironicclient.call方法,触发节点部署
            try:
                 self.ironicclient.call("node.set_provision_state", node_uuid,
                                        ironic_states.ACTIVE,
                                        configdrive=configdrive_value)
            ......
            try:
                ##Virt驱动程序在等待provision_state更改时循环,并根据需要更新Nova状态
                timer.start(interval=CONF.ironic.api_retry_interval).wait()
                LOG.info('Successfully provisioned Ironic node %s',
                 node.uuid, instance=instance)
    View Code
    ironic/api/controllers/v1/node.py
    #ronic API接收set_provision_state调用,并执行do_node_rescue RPC调用
    class NodeStatesController(rest.RestController):
        def provision(self, node_ident, target, configdrive=None,
                  clean_steps=None, rescue_password=None):
            .....
            elif (target == ir_states.VERBS['rescue']):
                if not (rescue_password and rescue_password.strip()):
                    msg = (_('A non-empty "rescue_password" is required when '
                        'setting target provision state to %s') %
                    ir_states.VERBS['rescue'])
                    raise wsme.exc.ClientSideError(
                        msg, status_code=http_client.BAD_REQUEST)
                pecan.request.rpcapi.do_node_rescue(
                    pecan.request.context, rpc_node.uuid, rescue_password, topic)
    View Code
    ironic/conductor/manager.py
    class ConductorManager(base_manager.BaseConductorManager):
        ......
        def do_node_rescue(self, context, node_id, rescue_password):
            ......
            #保存节点的救援密码
            instance_info = node.instance_info
            instance_info['rescue_password'] = rescue_password
            node.instance_info = instance_info
            node.save()#Ironic conductor在instance_info中设置了救援密码并将通知给相应的驱动
      
            try:
            task.driver.power.validate(task)
            task.driver.rescue.validate(task)
            task.driver.network.validate(task)
             
            try:
            task.process_event(
            'rescue',
            callback=self._spawn_worker,
            call_args=(self._do_node_rescue, task),#内部RPC方法来救援现有的节点部署
            err_handler=utils.spawn_rescue_error_handler)
             
        def _do_node_rescue(self, task):
            ......
            try:
                next_state = task.driver.rescue.rescue(task)
      
            if next_state == states.RESCUEWAIT:
            task.process_event('wait')
            elif next_state == states.RESCUE:
                    task.process_event('done')
    View Code
    ironic/drivers/modules/agent.py
    class AgentRescue(base.RescueInterface):
        .....
        #在节点上启动一个救援ramdisk
        def rescue(self, task):
            #重置电源状态
            manager_utils.node_power_action(task, states.POWER_OFF)
            #清理实例
            task.driver.boot.clean_up_instance(task)
            #取消节点的租户网络
            task.driver.network.unconfigure_tenant_networks(task)
            #为每个端口创建neutron端口以启动救援虚拟磁盘
            task.driver.network.add_rescuing_network(task)
            if CONF.agent.manage_agent_boot:
                ramdisk_opts = deploy_utils.build_agent_options(task.node)
                #使用PXE准备Ironic ramdisk的引导
                task.driver.boot.prepare_ramdisk(task, ramdisk_opts)
            #重置电源状态为POWER_ON
            manager_utils.node_power_action(task, states.POWER_ON)
             
            return states.RESCUEWAIT
    View Code
    ironic/drivers/modules/pxe.py
        class PXEBoot(base.BootInterface):
            ......
            def prepare_ramdisk(self, task, ramdisk_params):
                node = task.node
                mode = deploy_utils.rescue_or_deploy_mode(node)
                 
                if CONF.pxe.ipxe_enabled:
                    #将iPXE引导脚本呈现到HTTP根目录
                    pxe_utils.create_ipxe_boot_script()
                dhcp_opts = pxe_utils.dhcp_options_for_instance(task)#检索DHCP PXE启动选项
                provider = dhcp_factory.DHCPFactory()
                provider.update_dhcp(task, dhcp_opts)#发送或更新此节点的DHCP BOOT选项
                pxe_info = _get_image_info(node, mode=mode)#为救援镜像生成TFTP文件的路径
      
                manager_utils.node_set_boot_device(task, boot_devices.PXE,
                                       persistent=persistent)
     
                if CONF.pxe.ipxe_enabled and CONF.pxe.ipxe_use_swift:
                    kernel_label = '%s_kernel' % mode
                    ramdisk_label = '%s_ramdisk' % mode
                    pxe_info.pop(kernel_label, None)
                    pxe_info.pop(ramdisk_label, None)
     
                if pxe_info:
                    _cache_ramdisk_kernel(task.context, node, pxe_info)
                
    View Code

    ipa和ironic-conductor交互,Agent ramdisk启动后,回调/v1/lookup获取节点信息, 发送心跳

    ironic/drivers/modules/agent_base_vendor.py
    class HeartbeatMixin(object):
        ......
        def heartbeat(self, task, callback_url, agent_version):
            ......
            try:
                .....
                elif (node.provision_state == states.RESCUEWAIT):
                    msg = _('Node failed to perform rescue operation.')
                    self._finalize_rescue(task)
      
        def _finalize_rescue(self, task):
            node = task.node
            try:
                result = self._client.finalize_rescue(node)
    View Code
    ironic/drivers/modules/agent_client.py
    class AgentClient(object):
        #指示虚拟磁盘完成救援模式的进入
        def finalize_rescue(self, node):
            #根据config drive和rescue password调用finalize_rescue(RESCUEWAIT -> RESCUING),向ipa传入rescue_password
            rescue_pass = node.instance_info.get('rescue_password')
            params = {'rescue_password': rescue_pass}
            return self._command(node=node,
                         method='rescue.finalize_rescue',
                         params=params)
      
        def _command(self, node, method, params, wait=False):
            #向ipa发送命令
            url = self._get_command_url(node)
            body = self._get_command_body(method, params)
            request_params = {
                'wait': str(wait).lower()
            try:
                response = self.session.post(url, params=request_params, data=body)
    View Code
    ironic_python_agent/extensions/rescue.py
    PASSWORD_FILE = '/etc/ipa-rescue-config/ipa-rescue-password'
    class RescueExtension(base.BaseAgentExtension):
            def finalize_rescue(self, rescue_password=""):
                self.write_rescue_password(rescue_password)
                self.agent.serve_api = False #关闭api接口
                return
      
      
            def write_rescue_password(self, rescue_password=""):
                    LOG.debug('Writing hashed rescue password to %s', PASSWORD_FILE)
                    salt = self.make_salt()
                    hashed_password = crypt.crypt(rescue_password, salt)
                    try:
                        with open(PASSWORD_FILE, 'w') as f:
                        f.write(hashed_password)#把救援密码写入到/etc/ipa-rescue-config/ipa-rescue-password
    View Code
    ironic/drivers/modules/agent_base_vendor.py
    class HeartbeatMixin(object):
        #调用ramdisk来准备救援模式并验证结果
        def _finalize_rescue(self, task):
            node = task.node
            try:
                result = self._client.finalize_rescue(node)
            task.process_event('resume')#恢复node的状态
            task.driver.rescue.clean_up(task)#清理此节点的部署环境
            task.driver.network.configure_tenant_networks(task)#将网络调整到之前的租户网络
            task.process_event('done')#返回task状态为done
    View Code
  • 相关阅读:
    让数据更精准,神器标配:热图
    运维监控大数据的提取与分析
    IT运营新世界大会:广通软件开启双态运维大时代
    持续交付的Mesos与Docker导入篇
    运算符
    Django 模型层(2)
    Django模型层
    Django的模板层
    Django的视图层
    Django的路由层(URLconf)
  • 原文地址:https://www.cnblogs.com/gushiren/p/9512846.html
Copyright © 2020-2023  润新知