• RocketMQ-过滤器


    布隆过滤器

    可以google一下对布隆算法的理解,总的来说就是可以精确判断某个key不存在,但是存在的判断结果有一定的误差,支持数据的插入和查询,对频繁删除数据的场景不支持。在分布式环境下可以配合redis的bitset命令,实现分布式布隆过滤器。

    package org.apache.rocketmq.filter.util;
    
    import com.google.common.hash.Hashing;
    
    import java.nio.charset.Charset;
    
    /**
     * Simple implement of bloom filter.
     */
    public class BloomFilter {
    
        public static final Charset UTF_8 = Charset.forName("UTF-8");
    
        // as error rate, 10/100 = 0.1
        private int f = 10;
        private int n = 128;
    
        // hash function num, by calculation.
        private int k;
        // bit count, by calculation.
        private int m;
    
        /**
         * Create bloom filter by error rate and mapping num.
         *
         * @param f error rate
         * @param n num will mapping to bit
         */
        public static BloomFilter createByFn(int f, int n) {
            return new BloomFilter(f, n);
        }
    
        /**
         * Constructor.
         *
         * @param f error rate
         * @param n num will mapping to bit
         */
        private BloomFilter(int f, int n) {
            if (f < 1 || f >= 100) {
                throw new IllegalArgumentException("f must be greater or equal than 1 and less than 100");
            }
            if (n < 1) {
                throw new IllegalArgumentException("n must be greater than 0");
            }
    
            this.f = f;
            this.n = n;
    
            // set p = e^(-kn/m)
            // f = (1 - p)^k = e^(kln(1-p))
            // when p = 0.5, k = ln2 * (m/n), f = (1/2)^k = (0.618)^(m/n)
            double errorRate = f / 100.0;
            this.k = (int) Math.ceil(logMN(0.5, errorRate));
    
            if (this.k < 1) {
                throw new IllegalArgumentException("Hash function num is less than 1, maybe you should change the value of error rate or bit num!");
            }
    
            // m >= n*log2(1/f)*log2(e)
            this.m = (int) Math.ceil(this.n * logMN(2, 1 / errorRate) * logMN(2, Math.E));
            // m%8 = 0
            this.m = (int) (Byte.SIZE * Math.ceil(this.m / (Byte.SIZE * 1.0)));
        }
    
        /**
         * Calculate bit positions of {@code str}.
         * <p>
         * See "Less Hashing, Same Performance: Building a Better Bloom Filter" by Adam Kirsch and Michael
         * Mitzenmacher.
         * </p>
         * 根据key来计算多个要设置的比特位
         */
        public int[] calcBitPositions(String str) {
            int[] bitPositions = new int[this.k];
    
            long hash64 = Hashing.murmur3_128().hashString(str, UTF_8).asLong();
    
            int hash1 = (int) hash64;
            int hash2 = (int) (hash64 >>> 32);
    
            for (int i = 1; i <= this.k; i++) {
                int combinedHash = hash1 + (i * hash2);
                // Flip all the bits if it's negative (guaranteed positive number)
                if (combinedHash < 0) {
                    combinedHash = ~combinedHash;
                }
                bitPositions[i - 1] = combinedHash % this.m;
            }
    
            return bitPositions;
        }
    
        /**
         * Calculate bit positions of {@code str} to construct {@code BloomFilterData}
         */
        public BloomFilterData generate(String str) {
            int[] bitPositions = calcBitPositions(str);
    
            return new BloomFilterData(bitPositions, this.m);
        }
    
        /**
         * Calculate bit positions of {@code str}, then set the related {@code bits} positions to 1.
         * 该方法是把一个key计算成多个比特位,并在BitsArray中设置成1
         */
        public void hashTo(String str, BitsArray bits) {
            hashTo(calcBitPositions(str), bits);
        }
    
        /**
         * Set the related {@code bits} positions to 1.
         */
        public void hashTo(int[] bitPositions, BitsArray bits) {
            check(bits);
    
            for (int i : bitPositions) {
                bits.setBit(i, true);
            }
        }
    
        /**
         * Extra check:
         * <li>1. check {@code filterData} belong to this bloom filter.</li>
         * <p>
         * Then set the related {@code bits} positions to 1.
         * </p>
         */
        public void hashTo(BloomFilterData filterData, BitsArray bits) {
            if (!isValid(filterData)) {
                throw new IllegalArgumentException(
                    String.format("Bloom filter data may not belong to this filter! %s, %s",
                        filterData, this.toString())
                );
            }
            hashTo(filterData.getBitPos(), bits);
        }
    
        /**
         * Calculate bit positions of {@code str}, then check all the related {@code bits} positions is 1.
         *
         * @return true: all the related {@code bits} positions is 1
         */
        public boolean isHit(String str, BitsArray bits) {
            return isHit(calcBitPositions(str), bits);
        }
    
        /**
         * Check all the related {@code bits} positions is 1.
         * 判断是否命中
         * @return true: all the related {@code bits} positions is 1
         */
        public boolean isHit(int[] bitPositions, BitsArray bits) {
            check(bits);
            boolean ret = bits.getBit(bitPositions[0]);
            for (int i = 1; i < bitPositions.length; i++) {
                ret &= bits.getBit(bitPositions[i]);
            }
            return ret;
        }
    
        /**
         * Check all the related {@code bits} positions is 1.
         *
         * @return true: all the related {@code bits} positions is 1
         */
        public boolean isHit(BloomFilterData filterData, BitsArray bits) {
            if (!isValid(filterData)) {
                throw new IllegalArgumentException(
                    String.format("Bloom filter data may not belong to this filter! %s, %s",
                        filterData, this.toString())
                );
            }
            return isHit(filterData.getBitPos(), bits);
        }
    
        /**
         * Check whether one of {@code bitPositions} has been occupied.
         *
         * @return true: if all positions have been occupied.
         */
        public boolean checkFalseHit(int[] bitPositions, BitsArray bits) {
            for (int j = 0; j < bitPositions.length; j++) {
                int pos = bitPositions[j];
    
                // check position of bits has been set.
                // that mean no one occupy the position.
                if (!bits.getBit(pos)) {
                    return false;
                }
            }
    
            return true;
        }
    
        protected void check(BitsArray bits) {
            if (bits.bitLength() != this.m) {
                throw new IllegalArgumentException(
                    String.format("Length(%d) of bits in BitsArray is not equal to %d!", bits.bitLength(), this.m)
                );
            }
        }
    
        /**
         * Check {@code BloomFilterData} is valid, and belong to this bloom filter.
         * <li>1. not null</li>
         * <li>2. {@link org.apache.rocketmq.filter.util.BloomFilterData#getBitNum} must be equal to {@code m} </li>
         * <li>3. {@link org.apache.rocketmq.filter.util.BloomFilterData#getBitPos} is not null</li>
         * <li>4. {@link org.apache.rocketmq.filter.util.BloomFilterData#getBitPos}'s length is equal to {@code k}</li>
         */
        public boolean isValid(BloomFilterData filterData) {
            if (filterData == null
                || filterData.getBitNum() != this.m
                || filterData.getBitPos() == null
                || filterData.getBitPos().length != this.k) {
                return false;
            }
    
            return true;
        }
    
        /**
         * error rate.
         */
        public int getF() {
            return f;
        }
    
        /**
         * expect mapping num.
         */
        public int getN() {
            return n;
        }
    
        /**
         * hash function num.
         */
        public int getK() {
            return k;
        }
    
        /**
         * total bit num.
         */
        public int getM() {
            return m;
        }
    
        @Override
        public boolean equals(Object o) {
            if (this == o)
                return true;
            if (!(o instanceof BloomFilter))
                return false;
    
            BloomFilter that = (BloomFilter) o;
    
            if (f != that.f)
                return false;
            if (k != that.k)
                return false;
            if (m != that.m)
                return false;
            if (n != that.n)
                return false;
    
            return true;
        }
    
        @Override
        public int hashCode() {
            int result = f;
            result = 31 * result + n;
            result = 31 * result + k;
            result = 31 * result + m;
            return result;
        }
    
        @Override
        public String toString() {
            return String.format("f: %d, n: %d, k: %d, m: %d", f, n, k, m);
        }
    
        protected double logMN(double m, double n) {
            return Math.log(n) / Math.log(m);
        }
    }

    SQL92过滤

    在filter下面的expresion包中,定义了多个Expression,这些Expression是根据sql compile出来的,然后调用evaluate方法,就可以具体判断该数据是否满足条件,具体看一下这个test case。

    @Test
        public void testSQL() {
            try {
                /**
                 * 判断这条数据是否符合sql条件
                 */
                final Map<String, Object> props = new HashMap<>();
                props.put("a",1);
                props.put("b",1);
    
                 Object ret = FilterFactory.INSTANCE.get(ExpressionType.SQL92).compile("a is not null and a > 0")
                        .evaluate(new EvaluationContext(){
                            //EvaluationContext  定义了待判断数据的获取上下文
    
                            @Override
                            public Object get(String name) {
                                return props.get(name);
                            }
    
                            @Override
                            public Map<String, Object> keyValues() {
    
                                return props;
                            }
                        });
                System.out.println(ret);  // True
            } catch (Exception e) {
                e.printStackTrace();
                assertThat(Boolean.FALSE).isTrue();
            }
        }

    自定义过滤器

    1 实现一个具体的过滤表达式 NothingExpression

    static class NothingExpression implements Expression {
    
            @Override
            public Object evaluate(final EvaluationContext context) throws Exception {
                return Boolean.TRUE;
            }
        }

    2 定义该表达式的过滤器,实现FilterSpi接口,其中compile接口就是返回一个初始化过的过滤表达式

    static class NothingFilter implements FilterSpi {
            @Override
            public Expression compile(final String expr) throws MQFilterException {
                return new NothingExpression();
            }
    
            @Override
            public String ofType() {
                return "Nothing";
            }
        }

    3 调用该表达式的evaluate。并传入待过滤的数据,返回过滤结果

    @Test
        public void testRegister() {
            FilterFactory.INSTANCE.register(new NothingFilter());
    
            Expression expr = null;
            try {
                expr = FilterFactory.INSTANCE.get("Nothing").compile("abc");
            } catch (MQFilterException e) {
                e.printStackTrace();
                assertThat(Boolean.FALSE).isTrue();
            }
    
            assertThat(expr).isNotNull();
    
            try {
                assertThat((Boolean) expr.evaluate(new EmptyEvaluationContext())).isTrue();
            } catch (Exception e) {
                e.printStackTrace();
                assertThat(Boolean.FALSE).isTrue();
            }
        }
  • 相关阅读:
    [总结]2020年8月 OI学习/刷题记录
    [总结]2020年7月 OI学习/刷题记录
    [总结]2020年6月 OI学习/刷题记录
    [总结]2020年5月 OI学习/刷题记录
    [Luogu P6474][NOI Online #2 入门组]荆轲刺秦王 题解(BFS)
    [总结]2020年4月 OI学习/刷题记录
    [总结]2020年3月 OI学习/刷题记录
    [Luogu P6059]纯粹容器 senpai
    [Luogu P6044]「ACOI2020」惊吓路径
    ubuntu下查看(改变)本地端口开放情况,开启和关闭防火墙
  • 原文地址:https://www.cnblogs.com/gaojy/p/15077338.html
Copyright © 2020-2023  润新知