• 并发查询ElasticSearch, 根据分片来实现


    并发查询ES,根据分片的个数来设置并发

    1. 获取所有的分片
    2. 设置并发
    3. 每个线程都可以使用scroll全量查询分片数据.

    直连分片的这种方式有可能会导致ES集群压力增加,只能适用于低频、需要快速导出数据的场景,不能过度依赖.

    所用到的依赖:

        <dependencies>
            <dependency>
                <groupId>org.elasticsearch.client</groupId>
                <artifactId>elasticsearch-rest-high-level-client</artifactId>
                <version>7.6.2</version>
            </dependency>
        </dependencies>
    

    代码:

    
    import java.io.IOException;
    import java.util.concurrent.CountDownLatch;
    import java.util.concurrent.ExecutorService;
    import java.util.concurrent.Executors;
    import java.util.concurrent.ScheduledExecutorService;
    import java.util.concurrent.ScheduledFuture;
    import java.util.concurrent.ScheduledThreadPoolExecutor;
    import java.util.concurrent.TimeUnit;
    import java.util.concurrent.atomic.AtomicLong;
    import org.apache.http.HttpHost;
    import org.apache.http.auth.AuthScope;
    import org.apache.http.auth.UsernamePasswordCredentials;
    import org.apache.http.client.CredentialsProvider;
    import org.apache.http.impl.client.BasicCredentialsProvider;
    import org.elasticsearch.action.admin.indices.settings.get.GetSettingsRequest;
    import org.elasticsearch.action.admin.indices.settings.get.GetSettingsResponse;
    import org.elasticsearch.action.search.ClearScrollRequest;
    import org.elasticsearch.action.search.SearchRequest;
    import org.elasticsearch.action.search.SearchResponse;
    import org.elasticsearch.action.search.SearchScrollRequest;
    import org.elasticsearch.client.RequestOptions;
    import org.elasticsearch.client.RestClient;
    import org.elasticsearch.client.RestClientBuilder;
    import org.elasticsearch.client.RestHighLevelClient;
    import org.elasticsearch.common.unit.TimeValue;
    import org.elasticsearch.index.query.QueryBuilders;
    import org.elasticsearch.search.Scroll;
    import org.elasticsearch.search.SearchHit;
    import org.elasticsearch.search.builder.SearchSourceBuilder;
    import org.elasticsearch.search.sort.SortOrder;
    
    public class ShardQuery {
    
        private static String                   index   = "index_name";
        private static AtomicLong               count   = new AtomicLong(0); // 统计当前已查询ES记录数.(测试代码, 生产环境需要删掉)
        private static ScheduledFuture<?>       scheduledFuture; // (测试代码, 生产环境需要删掉)
        private static ScheduledExecutorService service = new ScheduledThreadPoolExecutor(1);
    
        /**
         * 无密码认证.
         */
        private static RestHighLevelClient newEsClient() {
            HttpHost host = new HttpHost("ip", port);
            RestClientBuilder restClientBuilder = RestClient.builder(host);
            return new RestHighLevelClient(restClientBuilder);
        }
    
        /**
         * 有密码认证.
         */
    //    private static RestHighLevelClient newEsClient() {
    //        HttpHost host = new HttpHost("ip", port);
    //        RestClientBuilder restClientBuilder = RestClient.builder(host);
    //        CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
    //        credentialsProvider.setCredentials(AuthScope.ANY,
    //            new UsernamePasswordCredentials("username", "passwd"));
    //        restClientBuilder.setHttpClientConfigCallback((httpClientBuilder) -> {
    //            httpClientBuilder.disableAuthCaching();
    //            return httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
    //        });
    //        return new RestHighLevelClient(restClientBuilder);
    //    }
    
        public static void main(String[] args) {
            countPrinter();
    
            int shards = getShardsNum(index);
            ExecutorService exec = Executors.newFixedThreadPool(shards);
            CountDownLatch countDownLatch = new CountDownLatch(shards);
    
            for (int i = 0; i < shards; i++) {
                int finalI = i;
                exec.execute(new Runnable() {
                    @Override
                    public void run() {
                        RestHighLevelClient client = newEsClient();
                        Scroll scroll = new Scroll(TimeValue.timeValueSeconds(30));
                        SearchRequest searchRequest = new SearchRequest(index);
                        searchRequest.scroll(scroll);
                        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
                        searchSourceBuilder.query(QueryBuilders.matchAllQuery());
                        searchSourceBuilder.sort("_doc", SortOrder.ASC);
                        searchSourceBuilder.size(5);// 测试值. 生产环境建议设置在 1000 ~ 10000
                        searchRequest.source(searchSourceBuilder);
    
                        if (shards > 1) {
                            searchRequest.preference("_shards:" + finalI); // 分片偏好. 只有分片数大于1时才起作用.
                        }
    
                        System.out.println(String.format("启动线程%s, 编号:%d", Thread.currentThread().getName(), finalI));
    
                        SearchResponse searchResponse = null;
                        try {
                            searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
    
                        assert searchResponse != null;
                        String scrollId = searchResponse.getScrollId();
                        SearchHit[] hits = searchResponse.getHits().getHits();
                        while (hits.length != 0) {
                            for (SearchHit hit : hits) {
                                String data = hit.getSourceAsString();
                                System.out.println(finalI + " 查询数据: " + data);
                                // todo 此处发送数据到其他sink datasource
                                count.getAndIncrement();
                            }
    
                            SearchScrollRequest searchScrollRequest = new SearchScrollRequest(scrollId);
                            searchScrollRequest.scroll(scroll);
    
                            SearchResponse searchScrollResponse = null;
                            try {
                                searchScrollResponse = client.scroll(searchScrollRequest,
                                    RequestOptions.DEFAULT);
                            } catch (IOException e) {
                                e.printStackTrace();
                            }
                            assert searchScrollResponse != null;
                            scrollId = searchScrollResponse.getScrollId();
                            hits = searchScrollResponse.getHits().getHits();
                        }
                        ClearScrollRequest clearScrollRequest = new ClearScrollRequest();
                        clearScrollRequest.addScrollId(scrollId);
                        try {
                            client.clearScroll(clearScrollRequest, RequestOptions.DEFAULT);
                            client.close();
                        } catch (IOException e) {
                            e.printStackTrace();
                        } finally {
                            countDownLatch.countDown();
                        }
                    }
                });
            }
            try {
                countDownLatch.await();
                scheduledFuture.cancel(true);
                System.out.println("最终计数器, 一共读取记录数: " + count.toString());
                exec.shutdown();
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    
        /**
         * 查询index的分片数量.
         */
        private static int getShardsNum(String indexName) {
            RestHighLevelClient client = newEsClient();
            GetSettingsRequest settingsRequest = new GetSettingsRequest().indices(indexName);
            settingsRequest.names("index.number_of_shards");
            int shards = 1;
            try {
                GetSettingsResponse settingsResponse = client
                    .indices()
                    .getSettings(settingsRequest, RequestOptions.DEFAULT);
                String numberOfShards = settingsResponse
                    .getSetting(indexName, "index.number_of_shards");
                shards = Integer.parseInt(numberOfShards);
                client.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
            return shards;
        }
    
        private static void countPrinter() {
            scheduledFuture = service.scheduleAtFixedRate(new Runnable() {
                public void run() {
                    System.out.println(System.currentTimeMillis() + "	当前count: " + count.get());
                }
            }, 0, 1, TimeUnit.SECONDS);
        }
    }
    
    
  • 相关阅读:
    选择排序遇到的引用和传值问题记录
    The web application [ROOT] appears to have started a thread named [spring.cloud.inetutils] but has failed to stop it. This is very likely to create a memory leak. Stack trace of thread:
    IDEA中实用的插件
    Column 'status' specified twice
    Error updating database. Cause: com.mysql.jdbc.exceptions.jdbc4.MySQLSyntaxErrorException: Unknown column 'dataType' in 'field list'
    You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'where id = 2' at line 8
    Missing URI template variable 'id' for method parameter of type long
    值传递和引用传递的区别
    SpringBoot项目与数据库交互,访问http://localhost:8888/admin/userInfo时,报org.springframework.dao.EmptyResultDataAccessException: Incorrect result size: expected 1, actual 0
    SpringBoot项目启动时报错:org.apache.catalina.LifecycleException: Protocol handler start failed
  • 原文地址:https://www.cnblogs.com/simuhunluo/p/14600604.html
Copyright © 2020-2023  润新知