• Distinct


    1.topology

    package com.suning.yystorm.business.adunion.uv;
    
    import org.slf4j.Logger;
    import org.slf4j.LoggerFactory;
    
    import com.suning.yystorm.business.adunion.uv.bolt.UVAppFilterBolt;
    import com.suning.yystorm.business.adunion.uv.bolt.UVAppStockFilterBolt;
    import com.suning.yystorm.business.adunion.uv.bolt.UVCacheBolt;
    import com.suning.yystorm.business.adunion.uv.bolt.UVWapFilterBolt;
    import com.suning.yystorm.business.adunion.uv.bolt.UVWapStockFilterBolt;
    
    import com.suning.yystorm.utils.PropertyUtils;
    
    import backtype.storm.Config;
    import backtype.storm.StormSubmitter;
    import backtype.storm.spout.SchemeAsMultiScheme;
    import backtype.storm.topology.TopologyBuilder;
    import backtype.storm.tuple.Fields;
    import storm.kafka.BrokerHosts;
    import storm.kafka.KafkaSpout;
    import storm.kafka.SpoutConfig;
    import storm.kafka.StringScheme;
    import storm.kafka.ZkHosts;
    
    public class UVTopology {
        private static final Logger LOGGER = LoggerFactory.getLogger(UVTopology.class);
    
        public static void main(String[] args) {
            try {
                Config config = new Config();
                config.setNumWorkers(8);
                config.setDebug(false);
                config.setMaxSpoutPending(3000);
                config.setMessageTimeoutSecs(180);
    
                String kafkaZKNode = PropertyUtils.getInstance().getValueByKey("kafka.zookeeper.nodes");
                String kafkaZKRoot = "/xx";
                String kafkaConsumerId = "tweeter_uv_v10";
    
                BrokerHosts brokerHosts = new ZkHosts(kafkaZKNode);
                // 接入kafka数据
                SpoutConfig wapSpoutConfig = new SpoutConfig(brokerHosts, "a", kafkaZKRoot, kafkaConsumerId + "_wap");
                wapSpoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
                wapSpoutConfig.forceFromStart = false;
                wapSpoutConfig.socketTimeoutMs = 3000;
    
                SpoutConfig appSpoutConfig = new SpoutConfig(brokerHosts, "b", kafkaZKRoot, kafkaConsumerId + "_app");
                appSpoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
                appSpoutConfig.forceFromStart = false;
                appSpoutConfig.socketTimeoutMs = 3000;
    
                SpoutConfig stockWapSpoutConfig = new SpoutConfig(brokerHosts, "c", kafkaZKRoot, kafkaConsumerId + "_stockWap");
                stockWapSpoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
                stockWapSpoutConfig.forceFromStart = false;
                stockWapSpoutConfig.socketTimeoutMs = 3000;
    
                SpoutConfig stockAppSpoutConfig = new SpoutConfig(brokerHosts, "d", kafkaZKRoot, kafkaConsumerId + "_stockApp");
                stockAppSpoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
                stockAppSpoutConfig.forceFromStart = false;
                stockAppSpoutConfig.socketTimeoutMs = 3000;
                // 数据处理
                TopologyBuilder builder = new TopologyBuilder();
    
                builder.setSpout("wapSpout", new KafkaSpout(wapSpoutConfig), 10);
                builder.setBolt("wapFilterBolt", new UVWapFilterBolt(), 40).shuffleGrouping("wapSpout");
                builder.setSpout("appSpout", new KafkaSpout(appSpoutConfig), 36);
                builder.setBolt("appFilterBolt", new UVAppFilterBolt(), 80).shuffleGrouping("appSpout");
    
                builder.setSpout("stockWapSpout", new KafkaSpout(stockWapSpoutConfig), 10);
                builder.setBolt("stockWapFilterBolt", new UVWapStockFilterBolt(), 20).shuffleGrouping("stockWapSpout");
                builder.setSpout("stockAppSpout", new KafkaSpout(stockAppSpoutConfig), 10);
                builder.setBolt("stockAppFilterBolt", new UVAppStockFilterBolt(), 40).shuffleGrouping("stockAppSpout");
    
                builder.setBolt("uvCacheBolt", new UVCacheBolt(), 60).fieldsGrouping("appFilterBolt", new Fields("dimension")).fieldsGrouping("wapFilterBolt", new Fields("dimension")).fieldsGrouping("stockWapFilterBolt", new Fields("dimension")).fieldsGrouping("stockAppFilterBolt", new Fields("dimension"));
                runCluster(config, builder);
            } catch (Exception e) {
                LOGGER.info("error,", e);
            }
        }
        
        private static void runCluster(Config config, TopologyBuilder builder) {
            try {
                StormSubmitter.submitTopology("TweeterUVTopology_v10", config, builder.createTopology());
            } catch (Exception e) {
                LOGGER.info("TweeterUVTopology_v10提交拓扑异常", e);
            }
        }
    }

    2.缓存,10秒记录一次

    package com.suning.yystorm.business.adunion.uv.bolt;
    
    import java.text.DateFormat;
    import java.text.SimpleDateFormat;
    import java.util.Date;
    import java.util.HashMap;
    import java.util.Map;
    
    import org.slf4j.Logger;
    import org.slf4j.LoggerFactory;
    
    import com.suning.yystorm.business.adunion.AdUnionConsts;
    import com.suning.yystorm.comm.hbase.HbaseClient;
    import com.suning.yystorm.comm.hbase.HbaseClientImpl;
    import com.suning.yystorm.utils.Distinct;
    import com.suning.yystorm.utils.FixedMap;
    import com.suning.yystorm.utils.TupleHelpers;
    
    import backtype.storm.Config;
    import backtype.storm.task.OutputCollector;
    import backtype.storm.task.TopologyContext;
    import backtype.storm.topology.OutputFieldsDeclarer;
    import backtype.storm.topology.base.BaseRichBolt;
    import backtype.storm.tuple.Tuple;
    
    /**
     * 
     * 〈一句话功能简述〉<br>
     * 〈功能详细描述〉
     *
     * @author 15060063
     * @see [相关类/方法](可选)
     * @since [产品/模块版本] (可选)
     */
    @SuppressWarnings("rawtypes")
    public class UVCacheBolt extends BaseRichBolt {
    
        private static final long serialVersionUID = 3384989044692629267L;
    
        private static final Logger LOGGER = LoggerFactory.getLogger(UVCacheBolt.class);
    
        private OutputCollector collector = null;
        private FixedMap<String, Long> map = null;
        private Distinct distinct = null;
        private HbaseClient client = null;
        private DateFormat format;
    
        @Override
        public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
            this.collector = collector;
            this.map = new FixedMap<String, Long>(AdUnionConsts.MAP_COUNT_UV_MAX);
            this.distinct = new Distinct(AdUnionConsts.MAP_COUNT_UV_MAX, "yyyyMMdd");
            this.client = new HbaseClientImpl();
            this.format = new SimpleDateFormat("yyyyMMddHHmmss");
        }
    
        @Override
        public void execute(Tuple tuple) {
            if (TupleHelpers.isTickTuple(tuple)) {
    
                if (!map.isEmpty()) {
                    try {
                        client.insertBatchDataByLongMd5(AdUnionConsts.TWEETER_UV, map, format.format(new Date()), "info", false);
                        map.clear();
                    } catch (Exception e) {
                        LOGGER.error("推客UV数UVCacheBolt存储异常", e);
                    }
                }
                
            } else {
                try {
                    String dimension = tuple.getStringByField("dimension");
                    String uvid = tuple.getStringByField("uvid");
    
                    long old = distinct.cardinality(dimension);
    
                    distinct.easyOffer(dimension, uvid);
                    long newCard = distinct.cardinality(dimension);
                    if (old != newCard) {
                        map.put(dimension, newCard);
                    }
    
                } catch (Exception e) {
                    LOGGER.info("tweeter UVCache异常", e);
                }
    
                this.collector.ack(tuple);
            }
        }
    
        @Override
        public void declareOutputFields(OutputFieldsDeclarer declarer) {
    
        }
    
        @Override
        public Map<String, Object> getComponentConfiguration() {
            Map<String, Object> conf = new HashMap<String, Object>();
            conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, 10);
            return conf;
        }
    }

     hour

    package com.suning.yystorm.business.adunion.houruv.bolt;
    
    import java.text.DateFormat;
    import java.text.SimpleDateFormat;
    import java.util.Date;
    import java.util.HashMap;
    import java.util.Map;
    
    import org.slf4j.Logger;
    import org.slf4j.LoggerFactory;
    
    import com.suning.yystorm.business.adunion.AdUnionConsts;
    import com.suning.yystorm.comm.hbase.HbaseClient;
    import com.suning.yystorm.comm.hbase.HbaseClientImpl;
    import com.suning.yystorm.utils.Distinct;
    import com.suning.yystorm.utils.FixedMap;
    import com.suning.yystorm.utils.TupleHelpers;
    
    import backtype.storm.Config;
    import backtype.storm.task.OutputCollector;
    import backtype.storm.task.TopologyContext;
    import backtype.storm.topology.OutputFieldsDeclarer;
    import backtype.storm.topology.base.BaseRichBolt;
    import backtype.storm.tuple.Tuple;
    
    /**
     * 
     * 〈一句话功能简述〉<br>
     * 〈功能详细描述〉
     *
     * @author 15060063
     * @see [相关类/方法](可选)
     * @since [产品/模块版本] (可选)
     */
    @SuppressWarnings("rawtypes")
    public class HourUVCacheBolt extends BaseRichBolt {
    
        private static final long serialVersionUID = 3384989044692629267L;
    
        private static final Logger LOGGER = LoggerFactory.getLogger(HourUVCacheBolt.class);
    
        private OutputCollector collector = null;
        private FixedMap<String, Long> map = null;
        private Distinct distinct = null;
        private HbaseClient client = null;
        private DateFormat format;
    
        @Override
        public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
            this.collector = collector;
            this.map = new FixedMap<String, Long>(AdUnionConsts.MAP_COUNT_UV_MAX);
            this.distinct = new Distinct(AdUnionConsts.MAP_COUNT_UV_MAX, "yyyyMMddHH");
            this.client = new HbaseClientImpl();
            this.format = new SimpleDateFormat("yyyyMMddHHmmss");
        }
    
        @Override
        public void execute(Tuple tuple) {
            if (TupleHelpers.isTickTuple(tuple)) {
    
                if (!map.isEmpty()) {
                    try {
                        client.insertBatchDataByLongMd5(AdUnionConsts.TWEETER_HOUR_UV, map, format.format(new Date()), "info", false);
                        map.clear();
                    } catch (Exception e) {
                        LOGGER.error("推客hour UV数HourUVCacheBolt存储异常", e);
                    }
                }
                
            } else {
                try {
                    String dimension = tuple.getStringByField("dimension");
                    String uvid = tuple.getStringByField("uvid");
    
                    long old = distinct.cardinality(dimension);
    
                    distinct.easyOffer(dimension, uvid);
                    long newCard = distinct.cardinality(dimension);
                    if (old != newCard) {
                        map.put(dimension, newCard);
                    }
    
                } catch (Exception e) {
                    LOGGER.info("tweeter hour UVCache异常", e);
                }
    
                this.collector.ack(tuple);
            }
        }
    
        @Override
        public void declareOutputFields(OutputFieldsDeclarer declarer) {
    
        }
    
        @Override
        public Map<String, Object> getComponentConfiguration() {
            Map<String, Object> conf = new HashMap<String, Object>();
            conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, 10);
            return conf;
        }
    }

    3.去重

    package com.suning.yystorm.utils;
    
    import java.text.DateFormat;
    import java.text.SimpleDateFormat;
    import java.util.Date;
    
    import com.clearspring.analytics.stream.cardinality.AdaptiveCounting;
    import com.clearspring.analytics.stream.cardinality.ICardinality;
    
    public class Distinct {
    
        private FixedMap<String, ICardinality> cardinalityMap = null;
    
        private DateFormat format = null;
        private String time = null;
    
        /**
         * 在不同时间范围内排重,eg:按天排重,按小时排重等
         * 
         * @param size
         * @param _format 格式化日期:eg:yyyyMMdd,yyyyMMddHH
         */
        public Distinct(int size, String _format) {
            cardinalityMap = new FixedMap<String, ICardinality>(size);
    
            format = new SimpleDateFormat(_format);
            time = format.format(new Date());
        }
    
        /**
         * 
         * 功能描述: <br>
         * 〈功能详细描述〉
         *
         * @param dimension
         * @param keyword
         * @return 重复:false,不重复:true
         * @see [相关类/方法](可选)
         * @since [产品/模块版本](可选)
         */
        public boolean offer(String dimension, String keyword) {
            String _time = format.format(new Date());
            if (!time.equalsIgnoreCase(_time)) {
                cardinalityMap.clear();
                time = _time;
            }
    
            ICardinality cardinality = cardinalityMap.get(dimension);
            if (cardinality == null) {
                synchronized (cardinalityMap) {
                    if (cardinality == null) {
                        cardinalityMap.put(dimension, AdaptiveCounting.Builder.obyCount(Integer.MAX_VALUE).build());
                    }
                }
            }
    
            cardinality = cardinalityMap.get(dimension);
    
            return cardinality.offer(keyword);
        }
        
        public boolean easyOffer(String dimension, String keyword) {
            String today = format.format(new Date());
            if (!time.equals(today)) {
                cardinalityMap.clear();
                time = today;
            }
    
            ICardinality cardinality = cardinalityMap.get(dimension);
            if (cardinality == null) {
                cardinalityMap.put(dimension, AdaptiveCounting.Builder.obyCount(Integer.MAX_VALUE).build());
            }
    
            cardinality = cardinalityMap.get(dimension);
    
            return cardinality.offer(keyword);
        }
    
        public long cardinality(String dimension) {
            ICardinality card = cardinalityMap.get(dimension);
            if (null != card) {
                return card.cardinality();
            } else {
                return 0;
            }
        }
    
        public ICardinality getCard(String dimension) {
            String _time = format.format(new Date());
            if (!time.equalsIgnoreCase(_time)) {
                cardinalityMap.clear();
                time = _time;
            }
    
            ICardinality cardinality = cardinalityMap.get(dimension);
            if (cardinality == null) {
                synchronized (cardinalityMap) {
                    if (cardinalityMap.get(dimension) == null) {
                        cardinalityMap.put(dimension, AdaptiveCounting.Builder.obyCount(Integer.MAX_VALUE).build());
                    }
                }
            }
    
            cardinality = cardinalityMap.get(dimension);
    
            return cardinality;
        }
    
        public boolean setCard(String dimension, ICardinality cardinality) {
            cardinalityMap.put(dimension, cardinality);
            return true;
        }
    
        public FixedMap<String, ICardinality> getCardinalityMap() {
            return cardinalityMap;
        }
        
    }
  • 相关阅读:
    接口和抽象的区别
    接口
    jquery Ajax提交表单数据
    SQL 检查 是否存在 表 临时表
    ASP.NET MVC 设置Area中 Controller 的方法 默认启动页
    json 序列化为数组
    C# Lamda中类似于SQL 中的 In 功能
    各种webservice调用地址
    ASP.NET获取客户端IP地址
    C#反射机制 Type类型
  • 原文地址:https://www.cnblogs.com/cxhfuujust/p/10883289.html
Copyright © 2020-2023  润新知