• 使用bitset实现毫秒级查询(二)


    上一篇中我们了解了bitset索引的基本用法,本篇开始学习bitset索引更新及一些复杂查询。

    1.bitset索引更新

      因为我们的数据是在系统启动时全部加载进内存,所以当数据库数据发生变化时要实时通知到内存,可以使用消息队列的方式实现:将新增或者修改的数据写入kafka,然后在索引服务中从kafka中读出数据更新索引.
    在UserIndexStore类中增加更新索引方法:

    /**
    	 * 更新索引
    	 * @param user
    	 */
    	public void updateIndex(User user) {
    		String name = user.getName();
    		Integer userIndex = this.nameIndexMap.get(name);
    		if (userIndex != null) {
    			clear(userIndex);//清除bitset对应位置的值
    			update(user, userIndex);
    			this.userMap.put(userIndex, user);
    			this.nameIndexMap.put(user.getName(), userIndex);
    		} else {
    			//新增时会有并发问题
    			synchronized (this) {
    				int index = this.userMap.size() + 1;
    				createIndex(user, index);
    			}
    		}
    
    	}
    
    	private void update(User user, Integer userIndex) {
    		getAddress().update(user.getAddress(), userIndex);
    		getAge().update(user.getAge(), userIndex);
    		getGender().update(user.getGender(), userIndex);
    
    	}
    
    	private void clear(Integer index) {
    		getAddress().clear(index);
    		getAge().clear(index);
    		getGender().clear(index);
    	}
    

    在BitSetIndexModel类中增加clear()方法

        /**
    	 * 对第i位置0
    	 * @param i
    	 */
    	public void clear(Integer i) {
    		for (BitSet bs : bsList) {
    			if (bs != null && i < bs.length()) {
    				bs.clear(i);
    			}
    		}
    	}
    
    2.bitset进阶查询

    '>=','<=',between and
    在BitSetIndexModel类中增加如下方法:

    public List<String> getHigherList(String str) {
    		List<String> newlist = new ArrayList<String>();
    		newlist.add(str);
    		newlist.addAll(list);
    		// 排序
    		Collections.sort(newlist);
    		// 查找str在list中的位置
    		int fromIndex = Collections.binarySearch(newlist, str);
    		if (fromIndex >= 0) {
    			// 如果map中不包含当前值则 index后移一位
    			if (map.get(str) == null) {
    				fromIndex++;
    			}
    			return newlist.subList(fromIndex, newlist.size());
    		} else {
    			return new ArrayList<String>();
    		}
    	}
    
    	public List<String> getLowerList(String str) {
    		List<String> newlist = new ArrayList<String>();
    		newlist.add(str);
    		newlist.addAll(list);
    		// 排序
    		Collections.sort(newlist);
    		// 查找str在list中的位置
    		int endIndex = Collections.binarySearch(newlist, str);
    		if (endIndex >= 0) {
    			return newlist.subList(0, endIndex + 1);
    		} else {
    			return new ArrayList<String>();
    		}
    	}
    
    	@SuppressWarnings("unchecked")
    	public <T extends Comparable<? super T>> List<T> getRange(T min, T max, Comparator<? super T> c) {
    		List<T> newlist = new ArrayList<T>();
    		for (String s : list) {
    			newlist.add((T) (s));
    		}
    		Collections.sort(newlist);
    		// 查找str在list中的位置
    		int fromIndex = minBinarySearch(newlist, min, c);
    		int endIndex = maxBinarySearch(newlist, max, c);
    		if (fromIndex >= 0 && endIndex <= list.size() - 1) {
    			if (fromIndex == endIndex) {
    				return newlist.subList(fromIndex, endIndex + 1);
    			}
    			return newlist.subList(fromIndex, ++endIndex);
    		} else {
    			return new ArrayList<T>();
    		}
    	}
    
    	/**
    	 *
    	 * @param list
    	 * @param key
    	 * @return
    	 */
    	private static <T> int maxBinarySearch(List<T> list, T key, Comparator<? super T> c) {
    		int low = 0;
    		int high = list.size() - 1;
    		int mid = 0;
    		while (low <= high) {
    			mid = (low + high) >>> 1;
    			T midVal = list.get(mid);
    			int cmp = c.compare(midVal, key);
    			if (cmp < 0) {
    				low = mid + 1;
    			} else if (cmp > 0) {
    				high = mid - 1;
    			} else {
    				return mid; // key found
    			}
    		}
    		if (mid == low) {
    			return high;
    		} else {
    			return mid;
    		}
    	}
    
    	private static <T> int minBinarySearch(List<T> list, T key, Comparator<? super T> c) {
    		int low = 0;
    		int high = list.size() - 1;
    		int mid = 0;
    		while (low <= high) {
    			mid = (low + high) >>> 1;
    			T midVal = list.get(mid);
    			int cmp = c.compare(midVal, key);
    			if (cmp < 0) {
    				low = mid + 1;
    			} else if (cmp > 0) {
    				high = mid - 1;
    			} else {
    				return mid; // key found
    			}
    		}
    		if (high == mid) {
    			return low;
    		} else {
    			return mid;
    		}
    	}
    

    在UserIndexStore中增加以下方法

    /**
    	 * 查询年龄大于等于指定值的user索引
    	 * @param age
    	 * @return
    	 */
    	public BitSet findUserByAgeHigher(String age) {
    		BitSetIndexModel indexModel = getAge();
    		List<String> strs = indexModel.getHigherList(age);
    		BitSet bitset = null;
    		for (String str : strs) {
    			bitset = indexModel.or(str, bitset);
    		}
    		return bitset;
    	}
    
    	/**
    	 * 查询age小于等于指定值的user索引
    	 * @param age
    	 * @return
    	 */
    	public BitSet findUserByAgeLower(String age) {
    		BitSetIndexModel indexModel = getAge();
    		List<String> strs = indexModel.getLowerList(age);
    		BitSet bitset = null;
    		for (String str : strs) {
    			bitset = indexModel.or(str, bitset);
    		}
    		return bitset;
    	}
    
    	/**
    	 * 查询age在某两个值区间内的user索引
    	 * @param min
    	 * @param max
    	 * @return
    	 */
    	public BitSet findUserByAgeBetweenAnd(String min, String max) {
    		BitSetIndexModel indexModel = getAge();
    		List<String> strs = indexModel.getRange(min, max, new Comparator<Object>() {
    			@Override
    			public int compare(Object o1, Object o2) {
    
    				return Integer.valueOf(o1 == null ? "0" : o1.toString()).compareTo(Integer.valueOf(o2 == null ? "0" : o2.toString()));
    			}
    		});
    		BitSet bitset = null;
    		for (String str : strs) {
    			bitset = indexModel.or(str, bitset);
    		}
    		return bitset;
    	}
    
    测试,查询年龄在16-17之间的北京女孩。
    public class BitSetTestRange {
    
    	public static void main(String[] args) {
    		List<User> users = buildData();
    		UserIndexStore.getInstance().createIndex(users);
    		ExecutorService executorService = Executors.newFixedThreadPool(50);
    		int num = 2000;
    		long begin1 = System.currentTimeMillis();
    		for (int i = 0; i < num; i++) {
    			Runnable syncRunnable = new Runnable() {
    				@Override
    				public void run() {
    					BitSet bs = UserIndexStore.getInstance().query("北京", "girl", null);
    					BitSet ageBs = UserIndexStore.getInstance().findUserByAgeBetweenAnd("16", "17");
    					bs.and(ageBs);
    					for (Integer index : BitSetIndexModel.getRealIndexs(bs)) {
    						UserIndexStore.getInstance().findUser(index);
    					}
    				}
    			};
    			executorService.execute(syncRunnable);
    		}
    		executorService.shutdown();
    		while (true) {
    			try {
    				if (executorService.awaitTermination(1, TimeUnit.SECONDS)) {
    					System.err.println("单次查询时间为:" + (System.currentTimeMillis() - begin1) / num + "ms");
    					break;
    				}
    			} catch (InterruptedException e) {
    				e.printStackTrace();
    			}
    		}
    	}
    
    	private static List<User> buildData() {
    		String[] addresss = { "北京", "上海", "深圳" };
    		String[] ages = { "16", "17", "18" };
    		List<User> users = new ArrayList<>();
    		for (int i = 0; i < 1000000; i++) {
    			User user = new User();
    			user.setName("name" + i);
    			int rand = ThreadLocalRandom.current().nextInt(3);
    			user.setAddress(addresss[ThreadLocalRandom.current().nextInt(3)]);
    			user.setGender((rand & 1) == 0 ? "girl" : "boy");
    			user.setAge(ages[ThreadLocalRandom.current().nextInt(3)]);
    			users.add(user);
    		}
    		return users;
    	}
    
    }
    
    
    单次查询时间为:22ms
    

    相比"="查询,区间查询速度慢了一些,但还在预期之内。


    总结

      以上就实现了一个bitset索引,支持索引创建,更新,查询。并且因为没有传统的网络传输和磁盘io,所以速度非常快,基本上响应时间都在10ms以内。如果需要我可以在下一篇使用spring cloud搭建一个较完整的demo,供大家参考使用。

  • 相关阅读:
    登琴台石有感
    天台之约
    游小九寨有感
    又见白沫江
    大学生创业 提前要做好三项准备
    游十八里香草沟有感
    观响水滩瀑布有感
    创业十把金钥匙,看了非常有启发~(转)
    2009年春季商务人士最关注的经管图书
    有了你,就有了诗意的人生
  • 原文地址:https://www.cnblogs.com/ncbest/p/7724129.html
Copyright © 2020-2023  润新知