• ElasticSearch recovery过程源码分析





    public class RecoveryState implements ToXContent, Streamable {
        public enum Stage {
            INIT((byte) 0),
             * recovery of lucene files, either reusing local ones are copying new ones
            INDEX((byte) 1),
             * potentially running check index
            VERIFY_INDEX((byte) 2),
             * starting up the engine, replaying the translog
            TRANSLOG((byte) 3),
             * performing final task after all translog ops have been done
            FINALIZE((byte) 4),
            DONE((byte) 5);


    public enum ShardRoutingState {
         * The shard is not assigned to any node.
        UNASSIGNED((byte) 1),
         * The shard is initializing (probably recovering from either a peer shard
         * or gateway).
        //分片正在初始化(可能正在从peer shard或者gateway进行恢复)
        INITIALIZING((byte) 2),
         * The shard is started.
        STARTED((byte) 3),
         * The shard is in the process being relocated.
        RELOCATING((byte) 4);


     * The source recovery accepts recovery requests from other peer shards and start the recovery process from this
     * source shard to the target shard.
    public class PeerRecoverySourceService extends AbstractComponent implements IndexEventListener {
        public static class Actions {
            public static final String START_RECOVERY = "internal:index/shard/recovery/start_recovery";
        private final TransportService transportService;
        private final IndicesService indicesService;
        private final RecoverySettings recoverySettings;
        private final ClusterService clusterService;
        private final OngoingRecoveries ongoingRecoveries = new OngoingRecoveries();
        public PeerRecoverySourceService(Settings settings, TransportService transportService, IndicesService indicesService,
                                         RecoverySettings recoverySettings, ClusterService clusterService) {
            this.transportService = transportService;
            this.indicesService = indicesService;
            this.clusterService = clusterService;
            this.recoverySettings = recoverySettings;
            transportService.registerRequestHandler(Actions.START_RECOVERY, StartRecoveryRequest::new, ThreadPool.Names.GENERIC, new StartRecoveryTransportRequestHandler());
        public void beforeIndexShardClosed(ShardId shardId, @Nullable IndexShard indexShard,
                                           Settings indexSettings) {
            if (indexShard != null) {
                ongoingRecoveries.cancel(indexShard, "shard is closed");
        private RecoveryResponse recover(StartRecoveryRequest request) throws IOException {
            final IndexService indexService = indicesService.indexServiceSafe(request.shardId().getIndex());
            final IndexShard shard = indexService.getShard(request.shardId().id());
            // starting recovery from that our (the source) shard state is marking the shard to be in recovery mode as well, otherwise
            // the index operations will not be routed to it properly
            RoutingNode node = clusterService.state().getRoutingNodes().node(request.targetNode().getId());
            if (node == null) {
                logger.debug("delaying recovery of {} as source node {} is unknown", request.shardId(), request.targetNode());
                throw new DelayRecoveryException("source node does not have the node [" + request.targetNode() + "] in its state yet..");
            ShardRouting routingEntry = shard.routingEntry();
            // 或者
            if (request.isPrimaryRelocation() && (routingEntry.relocating() == false || routingEntry.relocatingNodeId().equals(request.targetNode().getId()) == false)) {
                logger.debug("delaying recovery of {} as source shard is not marked yet as relocating to {}", request.shardId(), request.targetNode());
                throw new DelayRecoveryException("source shard is not marked yet as relocating to [" + request.targetNode() + "]");
            ShardRouting targetShardRouting = node.getByShardId(request.shardId());
            if (targetShardRouting == null) {
                logger.debug("delaying recovery of {} as it is not listed as assigned to target node {}", request.shardId(), request.targetNode());
                throw new DelayRecoveryException("source node does not have the shard listed in its state as allocated on the node");
            if (!targetShardRouting.initializing()) {
                logger.debug("delaying recovery of {} as it is not listed as initializing on the target node {}. known shards state is [{}]",
                    request.shardId(), request.targetNode(), targetShardRouting.state());
                throw new DelayRecoveryException("source node has the state of the target shard to be [" + targetShardRouting.state() + "], expecting to be [initializing]");
            if (request.targetAllocationId() == null) {
                // ES versions < 5.4.0 do not send targetAllocationId as part of recovery request, just assume that we have the correct id
                request = new StartRecoveryRequest(request.shardId(), targetShardRouting.allocationId().getId(), request.sourceNode(),
                    request.targetNode(), request.metadataSnapshot(), request.isPrimaryRelocation(), request.recoveryId());
            if (request.targetAllocationId().equals(targetShardRouting.allocationId().getId()) == false) {
                logger.debug("delaying recovery of {} due to target allocation id mismatch (expected: [{}], but was: [{}])",
                    request.shardId(), request.targetAllocationId(), targetShardRouting.allocationId().getId());
                throw new DelayRecoveryException("source node has the state of the target shard to have allocation id [" +
                    targetShardRouting.allocationId().getId() + "], expecting to be [" + request.targetAllocationId() + "]");
            RecoverySourceHandler handler = ongoingRecoveries.addNewRecovery(request, shard);
            logger.trace("[{}][{}] starting recovery to {}", request.shardId().getIndex().getName(), request.shardId().id(), request.targetNode());
            try {
                return handler.recoverToTarget();
            } finally {
                ongoingRecoveries.remove(shard, handler);

    StartRecoveryTransportRequestHandler中的messageReceived负责从transport channel接收启动recovery的请求并执行recover操作。


  • 相关阅读:
    .net http大文件断点续传上传
    asp.net http大文件断点续传上传
    VUE http大文件断点续传上传
    JavaScript http大文件断点续传上传
    linux 文件系统
    ceph 参数说明<转>
    iptables 配置
    c++ 回调类成员函数实现
  • 原文地址:https://www.cnblogs.com/mister-w/p/9657361.html
Copyright © 2020-2023  润新知