• Kafka – kafka consumer


    ConsumerRecords<String, String> records = consumer.poll(100); 

     

    /**
         * Fetch data for the topics or partitions specified using one of the subscribe/assign APIs. It is an error to not have
         * subscribed to any topics or partitions before polling for data.
         * <p>
         * On each poll, consumer will try to use the last consumed offset as the starting offset and fetch sequentially. The last
         * consumed offset can be manually set through {@link #seek(TopicPartition, long)} or automatically set as the last committed
         * offset for the subscribed list of partitions
         *
         *
         * @param timeout The time, in milliseconds, spent waiting in poll if data is not available in the buffer.
         *            If 0, returns immediately with any records that are available currently in the buffer, else returns empty.
         *            Must not be negative.
         * @return map of topic to records since the last fetch for the subscribed list of topics and partitions
         *
         * @throws org.apache.kafka.clients.consumer.InvalidOffsetException if the offset for a partition or set of
         *             partitions is undefined or out of range and no offset reset policy has been configured
         * @throws org.apache.kafka.common.errors.WakeupException if {@link #wakeup()} is called before or while this
         *             function is called
         * @throws org.apache.kafka.common.errors.InterruptException if the calling thread is interrupted before or while
         *             this function is called
         * @throws org.apache.kafka.common.errors.AuthorizationException if caller lacks Read access to any of the subscribed
         *             topics or to the configured groupId
         * @throws org.apache.kafka.common.KafkaException for any other unrecoverable errors (e.g. invalid groupId or
         *             session timeout, errors deserializing key/value pairs, or any new error cases in future versions)
         * @throws java.lang.IllegalArgumentException if the timeout value is negative
         * @throws java.lang.IllegalStateException if the consumer is not subscribed to any topics or manually assigned any
         *             partitions to consume from
         */
        @Override
        public ConsumerRecords<K, V> poll(long timeout) {
            try {
                if (timeout < 0)
                    throw new IllegalArgumentException("Timeout must not be negative");
    
                if (this.subscriptions.hasNoSubscriptionOrUserAssignment())
                    throw new IllegalStateException("Consumer is not subscribed to any topics or assigned any partitions");
    
                // poll for new data until the timeout expires
                long start = time.milliseconds();
                long remaining = timeout;
                do {
                    Map<TopicPartition, List<ConsumerRecord<K, V>>> records = pollOnce(remaining); //pollOnce
                    if (!records.isEmpty()) {
                        // before returning the fetched records, we can send off the next round of fetches
                        // and avoid block waiting for their responses to enable pipelining while the user
                        // is handling the fetched records.
                        //
                        // NOTE: since the consumed position has already been updated, we must not allow
                        // wakeups or any other errors to be triggered prior to returning the fetched records.
                        if (fetcher.sendFetches() > 0) { //为了省时间,预先放fetch一次
                            client.pollNoWakeup();
                        }
    
                        if (this.interceptors == null)
                            return new ConsumerRecords<>(records);
                        else
                            return this.interceptors.onConsume(new ConsumerRecords<>(records)); //如果有interceptors,先处理一下
                    }
    
                    long elapsed = time.milliseconds() - start;
                    remaining = timeout - elapsed; //在超时内,反复尝试poll
                } while (remaining > 0);
    
                return ConsumerRecords.empty(); //如果数据不ready,返回empty
            } finally {
                release();
            }
        }

     

    pollOnce

    /**
         * Do one round of polling. In addition to checking for new data, this does any needed offset commits
         * (if auto-commit is enabled), and offset resets (if an offset reset policy is defined).
         * @param timeout The maximum time to block in the underlying call to {@link ConsumerNetworkClient#poll(long)}.
         * @return The fetched records (may be empty)
         */
        private Map<TopicPartition, List<ConsumerRecord<K, V>>> pollOnce(long timeout) {
            coordinator.poll(time.milliseconds()); //和ConsuemrCoordinator之间的心跳
    
            // fetch positions if we have partitions we're subscribed to that we
            // don't know the offset for
            if (!subscriptions.hasAllFetchPositions())
                updateFetchPositions(this.subscriptions.missingFetchPositions()); //同步offset
    
            // if data is available already, return it immediately
            Map<TopicPartition, List<ConsumerRecord<K, V>>> records = fetcher.fetchedRecords(); //已经有fetched
            if (!records.isEmpty())
                return records; //直接返回
    
            // send any new fetches (won't resend pending fetches)
            fetcher.sendFetches(); //没有现成的数据,发送fetch命令
    
            long now = time.milliseconds();
            long pollTimeout = Math.min(coordinator.timeToNextPoll(now), timeout);
    
            client.poll(pollTimeout, now, new PollCondition() {
                @Override
                public boolean shouldBlock() {
                    // since a fetch might be completed by the background thread, we need this poll condition
                    // to ensure that we do not block unnecessarily in poll()
                    return !fetcher.hasCompletedFetches();
                }
            });
    
            // after the long poll, we should check whether the group needs to rebalance
            // prior to returning data so that the group can stabilize faster
            if (coordinator.needRejoin())
                return Collections.emptyMap();
    
            return fetcher.fetchedRecords();
        }

     

    看下fetcher

    public Fetcher(ConsumerNetworkClient client,
                       int minBytes,
                       int maxBytes,
                       int maxWaitMs,
                       int fetchSize,
                       int maxPollRecords,
                       boolean checkCrcs,
                       Deserializer<K> keyDeserializer,
                       Deserializer<V> valueDeserializer,
                       Metadata metadata,
                       SubscriptionState subscriptions,
                       Metrics metrics,
                       String metricGrpPrefix,
                       Time time,
                       long retryBackoffMs) {

    创建时,

    this.fetcher = new Fetcher<>(this.client,
                        config.getInt(ConsumerConfig.FETCH_MIN_BYTES_CONFIG),
                        config.getInt(ConsumerConfig.FETCH_MAX_BYTES_CONFIG),
                        config.getInt(ConsumerConfig.FETCH_MAX_WAIT_MS_CONFIG),
                        config.getInt(ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG),
                        config.getInt(ConsumerConfig.MAX_POLL_RECORDS_CONFIG),
                        config.getBoolean(ConsumerConfig.CHECK_CRCS_CONFIG),
                        this.keyDeserializer,
                        this.valueDeserializer,
                        this.metadata,
                        this.subscriptions,
                        metrics,
                        metricGrpPrefix,
                        this.time,
                        this.retryBackoffMs);
    可以看出对应的配置

     

    fetcher.fetchedRecords

    /**
         * Return the fetched records, empty the record buffer and update the consumed position.
         *
         * NOTE: returning empty records guarantees the consumed position are NOT updated.
         *
         * @return The fetched records per partition
         * @throws OffsetOutOfRangeException If there is OffsetOutOfRange error in fetchResponse and
         *         the defaultResetPolicy is NONE
         */
        public Map<TopicPartition, List<ConsumerRecord<K, V>>> fetchedRecords() {
            Map<TopicPartition, List<ConsumerRecord<K, V>>> drained = new HashMap<>();
            int recordsRemaining = maxPollRecords; //最大poll records数
    
            while (recordsRemaining > 0) {
                if (nextInLineRecords == null || nextInLineRecords.isDrained()) { //如果nextInLineRecords是空的,没有records的
                    CompletedFetch completedFetch = completedFetches.poll(); //从completedFetches,fetched队列中取一个fetch
                    if (completedFetch == null)
                        break;
    
                    nextInLineRecords = parseFetchedData(completedFetch); //parse Fetch到nextInLineRecords中
                } else {
                    TopicPartition partition = nextInLineRecords.partition;
    
                    List<ConsumerRecord<K, V>> records = drainRecords(nextInLineRecords, recordsRemaining); //从nextInLineRecords取recordsRemaining个records
                    if (!records.isEmpty()) {
                        List<ConsumerRecord<K, V>> currentRecords = drained.get(partition); //取出partition对应的record list
                        if (currentRecords == null) {
                            drained.put(partition, records); //放入record list
                        } else {
                            // this case shouldn't usually happen because we only send one fetch at a time per partition,
                            // but it might conceivably happen in some rare cases (such as partition leader changes).
                            // we have to copy to a new list because the old one may be immutable
                            List<ConsumerRecord<K, V>> newRecords = new ArrayList<>(records.size() + currentRecords.size());
                            newRecords.addAll(currentRecords);
                            newRecords.addAll(records);
                            drained.put(partition, newRecords);
                        }
                        recordsRemaining -= records.size();
                    }
                }
            }
    
            return drained; //返回
        }

    可以看到fetchedRecords只是从已经完成的fetch中读取数据

     

    fetcher.sendFetches

    先看

    createFetchRequests
    /**
         * Create fetch requests for all nodes for which we have assigned partitions
         * that have no existing requests in flight.
         */
        private Map<Node, FetchRequest> createFetchRequests() {
            // create the fetch info
            Cluster cluster = metadata.fetch();
            Map<Node, LinkedHashMap<TopicPartition, FetchRequest.PartitionData>> fetchable = new LinkedHashMap<>();
            for (TopicPartition partition : fetchablePartitions()) {
                Node node = cluster.leaderFor(partition); //找到partition的leader所在node
                if (node == null) {
                    metadata.requestUpdate();
                } else if (this.client.pendingRequestCount(node) == 0) { //如果没有正在进行的fetch,一个partition同时只能有一个fetch请求
                    // if there is a leader and no in-flight requests, issue a new fetch
                    LinkedHashMap<TopicPartition, FetchRequest.PartitionData> fetch = fetchable.get(node);
                    if (fetch == null) {
                        fetch = new LinkedHashMap<>();
                        fetchable.put(node, fetch);
                    }
    
                    long position = this.subscriptions.position(partition);
                    fetch.put(partition, new FetchRequest.PartitionData(position, this.fetchSize)); //创建FetchRequest,position,从哪儿开始读,fetchSize,读多少
                    log.trace("Added fetch request for partition {} at offset {}", partition, position);
                } else {
                    log.trace("Skipping fetch for partition {} because there is an in-flight request to {}", partition, node);
                }
            }
    
            // create the fetches
            Map<Node, FetchRequest> requests = new HashMap<>();
            for (Map.Entry<Node, LinkedHashMap<TopicPartition, FetchRequest.PartitionData>> entry : fetchable.entrySet()) {
                Node node = entry.getKey();
                FetchRequest fetch = new FetchRequest(this.maxWaitMs, this.minBytes, this.maxBytes, entry.getValue()); //封装成FetchRequest
                requests.put(node, fetch);
            }
            return requests;
        }

     

       /**
         * Set-up a fetch request for any node that we have assigned partitions for which doesn't already have
         * an in-flight fetch or pending fetch data.
         * @return number of fetches sent
         */
        public int sendFetches() {
            Map<Node, FetchRequest> fetchRequestMap = createFetchRequests(); //创建Fetch Request
            for (Map.Entry<Node, FetchRequest> fetchEntry : fetchRequestMap.entrySet()) {
                final FetchRequest request = fetchEntry.getValue();
                final Node fetchTarget = fetchEntry.getKey();
    
                client.send(fetchTarget, ApiKeys.FETCH, request) //send request
                        .addListener(new RequestFutureListener<ClientResponse>() {
                            @Override
                            public void onSuccess(ClientResponse resp) { //如果成功
                                FetchResponse response = (FetchResponse) resp.responseBody();
                                Set<TopicPartition> partitions = new HashSet<>(response.responseData().keySet());
    
                                for (Map.Entry<TopicPartition, FetchResponse.PartitionData> entry : response.responseData().entrySet()) {
                                    TopicPartition partition = entry.getKey();
                                    long fetchOffset = request.fetchData().get(partition).offset;
                                    FetchResponse.PartitionData fetchData = entry.getValue();
                                    completedFetches.add(new CompletedFetch(partition, fetchOffset, fetchData, metricAggregator)); //把fetchData封装成CompletedFetch,加入completedFetcheslist
                                }
    
                                sensors.fetchLatency.record(resp.requestLatencyMs());
                                sensors.fetchThrottleTimeSensor.record(response.getThrottleTime());
                            }
    
                            @Override
                            public void onFailure(RuntimeException e) {
                                log.debug("Fetch request to {} failed", fetchTarget, e);
                            }
                        });
            }
            return fetchRequestMap.size();
        }

     

    client.send

    ConsumerNetworkClient
    /**
         * Send a new request. Note that the request is not actually transmitted on the
         * network until one of the {@link #poll(long)} variants is invoked. At this
         * point the request will either be transmitted successfully or will fail.
         * Use the returned future to obtain the result of the send. Note that there is no
         * need to check for disconnects explicitly on the {@link ClientResponse} object;
         * instead, the future will be failed with a {@link DisconnectException}.
         * @param node The destination of the request
         * @param api The Kafka API call
         * @param request The request payload
         * @return A future which indicates the result of the send.
         */
        public RequestFuture<ClientResponse> send(Node node,
                                                  ApiKeys api,
                                                  AbstractRequest request) {
            return send(node, api, ProtoUtils.latestVersion(api.id), request);
        }
    
        private RequestFuture<ClientResponse> send(Node node,
                                                   ApiKeys api,
                                                   short version,
                                                   AbstractRequest request) {
            long now = time.milliseconds();
            RequestFutureCompletionHandler completionHandler = new RequestFutureCompletionHandler();
            RequestHeader header = client.nextRequestHeader(api, version);
            ClientRequest clientRequest = new ClientRequest(node.idString(), now, true, header, request, completionHandler); //封装成client request
            put(node, clientRequest); //没有真正发出,而是放入list
    
            // wakeup the client in case it is blocking in poll so that we can send the queued request
            client.wakeup();
            return completionHandler.future;
        }
    
        private void put(Node node, ClientRequest request) {
            synchronized (this) {
                List<ClientRequest> nodeUnsent = unsent.get(node);
                if (nodeUnsent == null) {
                    nodeUnsent = new ArrayList<>();
                    unsent.put(node, nodeUnsent);
                }
                nodeUnsent.add(request);
            }
        }

     

    NetworkClient.wakeup

    /**
         * Interrupt the client if it is blocked waiting on I/O.
         */
        @Override
        public void wakeup() {
            this.selector.wakeup();
        }

    wakeup就是让client从selector的block等待中,被唤醒,可以处理其他的请求

     

    这里说了,只有当poll被调用的时候,才会真正的将request发送出去,poll是在哪儿被调用的?

     

    在上面pollOnce的时候,有这样的逻辑

            client.poll(pollTimeout, now, new PollCondition() {
                @Override
                public boolean shouldBlock() {
                    // since a fetch might be completed by the background thread, we need this poll condition
                    // to ensure that we do not block unnecessarily in poll()
                    return !fetcher.hasCompletedFetches();
                }
            });

    意思是调用poll的超时是pollTimeout,
    PollCondition.shouldBlock,意思是何时我们需要block等待,当hasCompletedFetches时,是不需要等数据的,所以只有当没有现成的数据的时候,才需要等

     

    ConsumerNetworkClient.poll

    /**
         * Poll for any network IO.
         * @param timeout timeout in milliseconds
         * @param now current time in milliseconds
         */
        public void poll(long timeout, long now, PollCondition pollCondition) {
            // there may be handlers which need to be invoked if we woke up the previous call to poll
            firePendingCompletedRequests();
    
            synchronized (this) {
                // send all the requests we can send now
                trySend(now);
    
                // check whether the poll is still needed by the caller. Note that if the expected completion
                // condition becomes satisfied after the call to shouldBlock() (because of a fired completion
                // handler), the client will be woken up.
                if (pollCondition == null || pollCondition.shouldBlock()) {
                    // if there are no requests in flight, do not block longer than the retry backoff
                    if (client.inFlightRequestCount() == 0)
                        timeout = Math.min(timeout, retryBackoffMs);
                    client.poll(Math.min(MAX_POLL_TIMEOUT_MS, timeout), now);
                    now = time.milliseconds();
                } else {
                    client.poll(0, now);
                }
    
                // handle any disconnects by failing the active requests. note that disconnects must
                // be checked immediately following poll since any subsequent call to client.ready()
                // will reset the disconnect status
                checkDisconnects(now);
    
                // trigger wakeups after checking for disconnects so that the callbacks will be ready
                // to be fired on the next call to poll()
                maybeTriggerWakeup();
                
                // throw InterruptException if this thread is interrupted
                maybeThrowInterruptException();
    
                // try again to send requests since buffer space may have been
                // cleared or a connect finished in the poll
                trySend(now);
    
                // fail requests that couldn't be sent if they have expired
                failExpiredRequests(now);
            }
    
            // called without the lock to avoid deadlock potential if handlers need to acquire locks
            firePendingCompletedRequests();
        }

     

    trySend

    private boolean trySend(long now) {
            // send any requests that can be sent now
            boolean requestsSent = false;
            for (Map.Entry<Node, List<ClientRequest>> requestEntry: unsent.entrySet()) { // 前面send的时候时候,request放入unsent
                Node node = requestEntry.getKey();
                Iterator<ClientRequest> iterator = requestEntry.getValue().iterator();
                while (iterator.hasNext()) {
                    ClientRequest request = iterator.next();
                    if (client.ready(node, now)) {// Begin connecting to the given node, return true if we are already connected and ready to send to that node 
                        client.send(request, now); // 调用send,发送request
                        iterator.remove();
                        requestsSent = true;
                    }
                }
            }
            return requestsSent;
        }

     

    NetworkClient.send

    /**
         * Queue up the given request for sending. Requests can only be sent out to ready nodes.
         * @param request The request
         * @param now The current timestamp
         */
        @Override
        public void send(ClientRequest request, long now) {
            doSend(request, false, now);
        }

     

    private void doSend(ClientRequest request, boolean isInternalRequest, long now) {
            String nodeId = request.destination();
            if (request.header().apiKey() == ApiKeys.API_VERSIONS.id) {
                if (!canSendApiVersionsRequest(nodeId))
                    throw new IllegalStateException("Attempt to send API Versions request to node " + nodeId + " which is not ready.");
            } else if (!canSendRequest(nodeId))
                throw new IllegalStateException("Attempt to send a request to node " + nodeId + " which is not ready.");
    
            Send send = request.body().toSend(nodeId, request.header());
            InFlightRequest inFlightRequest = new InFlightRequest(
                    request.header(),
                    request.createdTimeMs(),
                    request.destination(),
                    request.callback(),
                    request.expectResponse(),
                    isInternalRequest,
                    send,
                    now);
    
            this.inFlightRequests.add(inFlightRequest); // 加入inFlightRequest
            selector.send(inFlightRequest.send);
        }

    最终用selector.send来发送Send

    /**
         * Queue the given request for sending in the subsequent {@link #poll(long)} calls
         * @param send The request to send
         */
        public void send(Send send) {
            String connectionId = send.destination();
            if (closingChannels.containsKey(connectionId))
                this.failedSends.add(connectionId);
            else {
                KafkaChannel channel = channelOrFail(connectionId, false); // 从Map<String, KafkaChannel> channels中get该connect对应的channel
                try {
                    channel.setSend(send);
                } catch (CancelledKeyException e) {
                    this.failedSends.add(connectionId);
                    close(channel, false);
                }
            }
        }

    KafkaChannel.setSend

    public void setSend(Send send) {
            if (this.send != null)
                throw new IllegalStateException("Attempt to begin a send operation with prior send operation still in progress.");
            this.send = send;
            this.transportLayer.addInterestOps(SelectionKey.OP_WRITE);
        }

    可以看到select.send也只是把send放到channel中,

    真正发送要等到调用NetworkClient.poll

    在ConsumerNetworkClient.poll中,

                if (pollCondition == null || pollCondition.shouldBlock()) {
                    // if there are no requests in flight, do not block longer than the retry backoff
                    if (client.inFlightRequestCount() == 0)
                        timeout = Math.min(timeout, retryBackoffMs);
                    client.poll(Math.min(MAX_POLL_TIMEOUT_MS, timeout), now);
                    now = time.milliseconds();
                } else {
                    client.poll(0, now);
                }

    如果需要block或没有pollCondition,选择block timeout来等待数据

    否则调用client.poll(0, now),意思是没有数据即刻返回

    NetworkClient.poll

    @Override
        public void poll(long timeout) throws IOException {
            if (timeout < 0)
                throw new IllegalArgumentException("timeout should be >= 0");
    
            clear();
    
            if (hasStagedReceives() || !immediatelyConnectedKeys.isEmpty())
                timeout = 0;
    
            /* check ready keys */
            long startSelect = time.nanoseconds();
            int readyKeys = select(timeout);
            long endSelect = time.nanoseconds();
            this.sensors.selectTime.record(endSelect - startSelect, time.milliseconds());
    
            if (readyKeys > 0 || !immediatelyConnectedKeys.isEmpty()) {
                pollSelectionKeys(this.nioSelector.selectedKeys(), false, endSelect);
                pollSelectionKeys(immediatelyConnectedKeys, true, endSelect);
            }
    
            addToCompletedReceives();
    
            long endIo = time.nanoseconds();
            this.sensors.ioTime.record(endIo - endSelect, time.milliseconds());
    
            // we use the time at the end of select to ensure that we don't close any connections that
            // have just been processed in pollSelectionKeys
            maybeCloseOldestConnection(endSelect);
        }

     

    select

        private int select(long ms) throws IOException {
            if (ms < 0L)
                throw new IllegalArgumentException("timeout should be >= 0");
    
            if (ms == 0L)
                return this.nioSelector.selectNow();
            else
                return this.nioSelector.select(ms);
        }

     

    pollSelectionKeys

        private void pollSelectionKeys(Iterable<SelectionKey> selectionKeys,
                                       boolean isImmediatelyConnected,
                                       long currentTimeNanos) {
            Iterator<SelectionKey> iterator = selectionKeys.iterator();
            while (iterator.hasNext()) {
                SelectionKey key = iterator.next();
                iterator.remove();
                KafkaChannel channel = channel(key);
    
                try {
    
                    /* complete any connections that have finished their handshake (either normally or immediately) */
                    if (isImmediatelyConnected || key.isConnectable()) {
                        if (channel.finishConnect()) {
                            this.connected.add(channel.id());
                            this.sensors.connectionCreated.record();
                            SocketChannel socketChannel = (SocketChannel) key.channel();
                            log.debug("Created socket with SO_RCVBUF = {}, SO_SNDBUF = {}, SO_TIMEOUT = {} to node {}",
                                    socketChannel.socket().getReceiveBufferSize(),
                                    socketChannel.socket().getSendBufferSize(),
                                    socketChannel.socket().getSoTimeout(),
                                    channel.id());
                        } else
                            continue;
                    }
    
                    /* if channel is not ready finish prepare */
                    if (channel.isConnected() && !channel.ready())
                        channel.prepare();
    
                    /* if channel is ready read from any connections that have readable data */
                    if (channel.ready() && key.isReadable() && !hasStagedReceive(channel)) {
                        NetworkReceive networkReceive;
                        while ((networkReceive = channel.read()) != null)
                            addToStagedReceives(channel, networkReceive);
                    }
    
                    /* if channel is ready write to any sockets that have space in their buffer and for which we have data */
                    if (channel.ready() && key.isWritable()) {
                        Send send = channel.write(); //真正写出数据
                        if (send != null) {
                            this.completedSends.add(send);
                        }
                    }
    
                    /* cancel any defunct sockets */
                    if (!key.isValid())
                        close(channel, true);
    
                } catch (Exception e) {
    
                }
            }
        }

     

    直接用NIO写应用,是需要勇气的

  • 相关阅读:
    发送邮件封装的方法
    异步编程
    Xaml中string(字符串)常量的定义以及空格的处理
    python中lxml的应用
    Python打包成exe
    利用TaskScheduler处理Queue、Stack等类型的操作队列(生产者消费者场景)
    TreeView的性能问题
    WPF中ItemsControl绑定到Google ProtocolBuffer的结构体时的性能问题
    WPF中ToolTip的自定义
    WPF中ItemsControl绑定到Google ProtocolBuffer的结构体时的性能问题
  • 原文地址:https://www.cnblogs.com/fxjwind/p/6495818.html
Copyright © 2020-2023  润新知