并发查询ES,根据分片的个数来设置并发
- 获取所有的分片
- 设置并发
- 每个线程都可以使用scroll全量查询分片数据.
直连分片的这种方式有可能会导致ES集群压力增加,只能适用于低频、需要快速导出数据的场景,不能过度依赖.
所用到的依赖:
<dependencies>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>7.6.2</version>
</dependency>
</dependencies>
代码:
import java.io.IOException;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.http.HttpHost;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.elasticsearch.action.admin.indices.settings.get.GetSettingsRequest;
import org.elasticsearch.action.admin.indices.settings.get.GetSettingsResponse;
import org.elasticsearch.action.search.ClearScrollRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchScrollRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestClientBuilder;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.Scroll;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.sort.SortOrder;
public class ShardQuery {
private static String index = "index_name";
private static AtomicLong count = new AtomicLong(0); // 统计当前已查询ES记录数.(测试代码, 生产环境需要删掉)
private static ScheduledFuture<?> scheduledFuture; // (测试代码, 生产环境需要删掉)
private static ScheduledExecutorService service = new ScheduledThreadPoolExecutor(1);
/**
* 无密码认证.
*/
private static RestHighLevelClient newEsClient() {
HttpHost host = new HttpHost("ip", port);
RestClientBuilder restClientBuilder = RestClient.builder(host);
return new RestHighLevelClient(restClientBuilder);
}
/**
* 有密码认证.
*/
// private static RestHighLevelClient newEsClient() {
// HttpHost host = new HttpHost("ip", port);
// RestClientBuilder restClientBuilder = RestClient.builder(host);
// CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
// credentialsProvider.setCredentials(AuthScope.ANY,
// new UsernamePasswordCredentials("username", "passwd"));
// restClientBuilder.setHttpClientConfigCallback((httpClientBuilder) -> {
// httpClientBuilder.disableAuthCaching();
// return httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
// });
// return new RestHighLevelClient(restClientBuilder);
// }
public static void main(String[] args) {
countPrinter();
int shards = getShardsNum(index);
ExecutorService exec = Executors.newFixedThreadPool(shards);
CountDownLatch countDownLatch = new CountDownLatch(shards);
for (int i = 0; i < shards; i++) {
int finalI = i;
exec.execute(new Runnable() {
@Override
public void run() {
RestHighLevelClient client = newEsClient();
Scroll scroll = new Scroll(TimeValue.timeValueSeconds(30));
SearchRequest searchRequest = new SearchRequest(index);
searchRequest.scroll(scroll);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.query(QueryBuilders.matchAllQuery());
searchSourceBuilder.sort("_doc", SortOrder.ASC);
searchSourceBuilder.size(5);// 测试值. 生产环境建议设置在 1000 ~ 10000
searchRequest.source(searchSourceBuilder);
if (shards > 1) {
searchRequest.preference("_shards:" + finalI); // 分片偏好. 只有分片数大于1时才起作用.
}
System.out.println(String.format("启动线程%s, 编号:%d", Thread.currentThread().getName(), finalI));
SearchResponse searchResponse = null;
try {
searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
} catch (IOException e) {
e.printStackTrace();
}
assert searchResponse != null;
String scrollId = searchResponse.getScrollId();
SearchHit[] hits = searchResponse.getHits().getHits();
while (hits.length != 0) {
for (SearchHit hit : hits) {
String data = hit.getSourceAsString();
System.out.println(finalI + " 查询数据: " + data);
// todo 此处发送数据到其他sink datasource
count.getAndIncrement();
}
SearchScrollRequest searchScrollRequest = new SearchScrollRequest(scrollId);
searchScrollRequest.scroll(scroll);
SearchResponse searchScrollResponse = null;
try {
searchScrollResponse = client.scroll(searchScrollRequest,
RequestOptions.DEFAULT);
} catch (IOException e) {
e.printStackTrace();
}
assert searchScrollResponse != null;
scrollId = searchScrollResponse.getScrollId();
hits = searchScrollResponse.getHits().getHits();
}
ClearScrollRequest clearScrollRequest = new ClearScrollRequest();
clearScrollRequest.addScrollId(scrollId);
try {
client.clearScroll(clearScrollRequest, RequestOptions.DEFAULT);
client.close();
} catch (IOException e) {
e.printStackTrace();
} finally {
countDownLatch.countDown();
}
}
});
}
try {
countDownLatch.await();
scheduledFuture.cancel(true);
System.out.println("最终计数器, 一共读取记录数: " + count.toString());
exec.shutdown();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
/**
* 查询index的分片数量.
*/
private static int getShardsNum(String indexName) {
RestHighLevelClient client = newEsClient();
GetSettingsRequest settingsRequest = new GetSettingsRequest().indices(indexName);
settingsRequest.names("index.number_of_shards");
int shards = 1;
try {
GetSettingsResponse settingsResponse = client
.indices()
.getSettings(settingsRequest, RequestOptions.DEFAULT);
String numberOfShards = settingsResponse
.getSetting(indexName, "index.number_of_shards");
shards = Integer.parseInt(numberOfShards);
client.close();
} catch (IOException e) {
e.printStackTrace();
}
return shards;
}
private static void countPrinter() {
scheduledFuture = service.scheduleAtFixedRate(new Runnable() {
public void run() {
System.out.println(System.currentTimeMillis() + " 当前count: " + count.get());
}
}, 0, 1, TimeUnit.SECONDS);
}
}