• Elasticsearch Sliced Scroll分页检索案例分享


    The best elasticsearch highlevel java rest api-----bboss 

    Elasticsearch Sliced Scroll分页检索案例分享 

    我们在文章《Elasticsearch Scroll分页检索案例分享》中介绍了elasticsearch scroll的基本用法,本文介绍Elasticsearch Sliced Scroll分页检索功能。

    1.准备工作

    参考文档《高性能elasticsearch ORM开发库使用介绍》导入和配置es客户端

    2.定义Sliced Scroll检索dsl

    创建配置文件-在resources目录下定义文件scroll.xml

    esmapper/scroll.xml

    文件内容包含Sliced Scroll检索dsl语句-scrollSliceQuery

    <property name="scrollSliceQuery">
            <![CDATA[
             {
               "slice": {
                    "id": $id,
                    "max": $max
                },
                "size":$size,
                "query": {
                    "term" : {
                        "gc.jvmGcOldCount" : 3
                    }
                }
            }
            ]]>
        </property>

    3.串行方式执行slice检索

    /**
     * 串行方式执行slice scroll操作
     */
    @Test
    public void testSliceScroll() {
    	ClientInterface clientUtil = ElasticSearchHelper.getConfigRestClientUtil("esmapper/scroll.xml");
    	List<String> scrollIds = new ArrayList<>();
    	long starttime = System.currentTimeMillis();
    	//scroll slice分页检索
    	int max = 6;
    	long realTotalSize = 0;
    	for (int i = 0; i < max; i++) {
    		Map params = new HashMap();
    		params.put("id", i);
    		params.put("max", max);//最多6个slice,不能大于share数
    		params.put("size", 100);//每页100条记录
    		ESDatas<Map> sliceResponse = clientUtil.searchList("agentstat-*/_search?scroll=1m",
    				"scrollSliceQuery", params,Map.class);
    		List<Map> sliceDatas = sliceResponse.getDatas();
    		realTotalSize = realTotalSize + sliceDatas.size();
    		long totalSize = sliceResponse.getTotalSize();
    		String scrollId = sliceResponse.getScrollId();
    		if (scrollId != null)
    			scrollIds.add(scrollId);
    		System.out.println("totalSize:" + totalSize);
    		System.out.println("scrollId:" + scrollId);
    		if (sliceDatas != null && sliceDatas.size() >= 100) {//每页100条记录,迭代scrollid,遍历scroll分页结果
    			do {
    				sliceResponse = clientUtil.searchScroll("1m", scrollId, Map.class);
    				String sliceScrollId = sliceResponse.getScrollId();
    				if (sliceScrollId != null)
    					scrollIds.add(sliceScrollId);
    				sliceDatas = sliceResponse.getDatas();
    				if (sliceDatas == null || sliceDatas.size() < 100) {
    					break;
    				}
    				realTotalSize = realTotalSize + sliceDatas.size();
    			} while (true);
    		}
    	}
          //打印处理耗时和实际检索到的数据
    	long endtime = System.currentTimeMillis();
    	System.out.println("耗时:"+(endtime - starttime)+",realTotalSize:"+realTotalSize);
    	//查询存在es服务器上的scroll上下文信息
    	String scrolls = clientUtil.executeHttp("_nodes/stats/indices/search", ClientUtil.HTTP_GET);
    	System.out.println(scrolls);
    	//处理完毕后清除scroll上下文信息
    	if(scrollIds.size() > 0) {
    		scrolls = clientUtil.deleteScrolls(scrollIds);
    		System.out.println(scrolls);
    	}
    	//清理完毕后查看scroll上下文信息
    	scrolls = clientUtil.executeHttp("_nodes/stats/indices/search", ClientUtil.HTTP_GET);
    	System.out.println(scrolls);
    }

    4.并行方式执行slice检索

    //用来存放实际slice检索总记录数
    long realTotalSize ;
    //辅助方法,用来累计每次scroll获取到的记录数
    synchronized void incrementSize(int size){
    	this.realTotalSize = this.realTotalSize + size;
    }
    /**
     * 并行方式执行slice scroll操作
     */
    @Test
    public void testParralSliceScroll() {
    	final ClientInterface clientUtil = ElasticSearchHelper.getConfigRestClientUtil("esmapper/scroll.xml");
    	final List<String> scrollIds = new ArrayList<>();
    	long starttime = System.currentTimeMillis();
    	//scroll slice分页检索
    	final int max = 6;
    	final CountDownLatch countDownLatch = new CountDownLatch(max);//线程任务完成计数器,每个线程对应一个sclice,每运行完一个slice任务,countDownLatch计数减去1
    
    <span class="hljs-keyword">for</span> (<span class="hljs-keyword">int</span> j = <span class="hljs-number">0</span>; j &lt; max; j++) {<span class="hljs-comment">//启动max个线程,并行处理每个slice任务</span>
    	<span class="hljs-keyword">final</span> <span class="hljs-keyword">int</span> i = j;
    	Thread sliceThread = <span class="hljs-keyword">new</span> Thread(<span class="hljs-keyword">new</span> Runnable() {<span class="hljs-comment">//多线程并行执行scroll操作做,每个线程对应一个sclice</span>
    
    		<span class="hljs-meta">@Override</span>
    		<span class="hljs-function"><span class="hljs-keyword">public</span> <span class="hljs-keyword">void</span> <span class="hljs-title">run</span><span class="hljs-params">()</span> </span>{
    			Map params = <span class="hljs-keyword">new</span> HashMap();
    			params.put(<span class="hljs-string">"id"</span>, i);
    			params.put(<span class="hljs-string">"max"</span>, max);<span class="hljs-comment">//最多6个slice,不能大于share数</span>
    			params.put(<span class="hljs-string">"size"</span>, <span class="hljs-number">100</span>);<span class="hljs-comment">//每页100条记录</span>
    			ESDatas&lt;Map&gt; sliceResponse = clientUtil.searchList(<span class="hljs-string">"agentstat-*/_search?scroll=1m"</span>,
    					<span class="hljs-string">"scrollSliceQuery"</span>, params,Map.class);
    			List&lt;Map&gt; sliceDatas = sliceResponse.getDatas();
    			incrementSize( sliceDatas.size());<span class="hljs-comment">//统计实际处理的文档数量</span>
    			<span class="hljs-keyword">long</span> totalSize = sliceResponse.getTotalSize();
    			String scrollId = sliceResponse.getScrollId();
    			<span class="hljs-keyword">if</span> (scrollId != <span class="hljs-keyword">null</span>)
    				scrollIds.add(scrollId);
    			System.out.println(<span class="hljs-string">"totalSize:"</span> + totalSize);
    			System.out.println(<span class="hljs-string">"scrollId:"</span> + scrollId);
    			<span class="hljs-keyword">if</span> (sliceDatas != <span class="hljs-keyword">null</span> &amp;&amp; sliceDatas.size() &gt;= <span class="hljs-number">100</span>) {<span class="hljs-comment">//每页100条记录,迭代scrollid,遍历scroll分页结果</span>
    				<span class="hljs-keyword">do</span> {
    					sliceResponse = clientUtil.searchScroll(<span class="hljs-string">"1m"</span>, scrollId, Map.class);
    					String sliceScrollId = sliceResponse.getScrollId();
    					<span class="hljs-keyword">if</span> (sliceScrollId != <span class="hljs-keyword">null</span>)
    						scrollIds.add(sliceScrollId);
    					sliceDatas = sliceResponse.getDatas();
    					<span class="hljs-keyword">if</span> (sliceDatas == <span class="hljs-keyword">null</span> || sliceDatas.size() &lt; <span class="hljs-number">100</span>) {
    						<span class="hljs-keyword">break</span>;
    					}
    					incrementSize( sliceDatas.size());<span class="hljs-comment">//统计实际处理的文档数量</span>
    				} <span class="hljs-keyword">while</span> (<span class="hljs-keyword">true</span>);
    			}
    			countDownLatch.countDown();<span class="hljs-comment">//slice检索完毕后计数器减1</span>
    		}
    
    	});
    	sliceThread.start();<span class="hljs-comment">//启动线程</span>
    }
    <span class="hljs-keyword">try</span> {
    	countDownLatch.await();<span class="hljs-comment">//等待所有的线程执行完毕,计数器变成0</span>
    } <span class="hljs-keyword">catch</span> (InterruptedException e) {
    	e.printStackTrace();
    }
      <span class="hljs-comment">//打印处理耗时和实际检索到的数据</span>
    <span class="hljs-keyword">long</span> endtime = System.currentTimeMillis();
    System.out.println(<span class="hljs-string">"耗时:"</span>+(endtime - starttime)+<span class="hljs-string">",realTotalSize:"</span>+realTotalSize);
    <span class="hljs-comment">//查询存在es服务器上的scroll上下文信息</span>
    String scrolls = clientUtil.executeHttp(<span class="hljs-string">"_nodes/stats/indices/search"</span>, ClientUtil.HTTP_GET);
    

    // System.out.println(scrolls);
    //处理完毕后清除scroll上下文信息
    if(scrollIds.size() > 0) {
    scrolls = clientUtil.deleteScrolls(scrollIds);
    // System.out.println(scrolls);
    }
    //清理完毕后查看scroll上下文信息
    scrolls = clientUtil.executeHttp("_nodes/stats/indices/search", ClientUtil.HTTP_GET);
    // System.out.println(scrolls);
    }

    通过串行运行和并行运行结果比较,并行处理的性能要好很多,实际检索到的文档数量等价一致。

    5.参考文档

    https://www.elastic.co/guide/en/elasticsearch/reference/6.2/search-request-scroll.html

    6.开发交流

    elasticsearch技术交流群:166471282

  • 相关阅读:
    js上传超大文件解决方案
    java上传超大文件解决方案
    jsp上传超大文件解决方案
    .net上传超大文件解决方案
    asp.net上传超大文件解决方案
    PHP上传超大文件解决方案
    内网大文件传输断点续传源码
    HDU
    Android中makfile的随记
    android 阿拉伯语下,图库中编辑运动轨迹图片,动画中会显示绿色的图片
  • 原文地址:https://www.cnblogs.com/jpfss/p/10813765.html
Copyright © 2020-2023  润新知