• 局部聚合与全局聚合


    1.说明

      在局部聚合的类中必须有一个内部类,用处存储状态值,中间的结果

      GroupBy+局部聚合,不需要过多的跨网络传输

      GroupBy+全局聚合,需要大量的网络传输

    一:局部聚合

    1.需求

      另一个流,基于分钟的订单金额总数,局部聚合

    2.驱动类

     1 package com.jun.trident;
     2 
     3 import backtype.storm.Config;
     4 import backtype.storm.LocalCluster;
     5 import backtype.storm.StormSubmitter;
     6 import backtype.storm.generated.AlreadyAliveException;
     7 import backtype.storm.generated.InvalidTopologyException;
     8 import backtype.storm.tuple.Fields;
     9 import backtype.storm.tuple.Values;
    10 import storm.trident.Stream;
    11 import storm.trident.TridentState;
    12 import storm.trident.TridentTopology;
    13 import storm.trident.operation.Function;
    14 import storm.trident.operation.TridentCollector;
    15 import storm.trident.operation.TridentOperationContext;
    16 import storm.trident.operation.builtin.Count;
    17 import storm.trident.operation.builtin.Sum;
    18 import storm.trident.testing.FixedBatchSpout;
    19 import storm.trident.testing.MemoryMapState;
    20 import storm.trident.tuple.TridentTuple;
    21 
    22 import java.util.Map;
    23 
    24 public class TridentDemo {
    25     public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException {
    26         TridentTopology tridentTopology=new TridentTopology();
    27         //模拟数据
    28         Fields field=new Fields("log","flag");
    29         FixedBatchSpout spout=new FixedBatchSpout(field,5,
    30             new Values("168.214.187.214 - - [1481953616092] "GET /view.php HTTP/1.1" 200 0 "http://cn.bing.com/search?q=spark mllib" "Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1" "-"","A"),
    31             new Values("168.187.202.202 - - [1481953537038] "GET /IBEIfeng.gif?order_id=1063&orderTime=1481953537038&memberId=4000012340500607&productInfos=10005-2099.48-B-1|10004-1886.62-A-2|10001-961.99-A-1&orderAmt=6834.70 HTTP/1.1" 200 0 "-" "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2;Tident/6.0)" "-"","A"),
    32             new Values("61.30.167.187 - - [1481953539039] "GET /IBEIfeng.gif?order_id=1064&orderTime=1481953539039&memberId=4000930409959999&productInfos=10007-3329.13-B-1|10009-2607.71-B-1|10002-390.62-A-1|10006-411.00-B-2&orderAmt=7149.46 HTTP/1.1" 200 0 "-" "Mozilla/5.0 (Linux; Android 4.2.1; Galaxy Nexus Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19" "-"","A"),
    33             new Values("30.29.132.190 - - [1481953544042] "GET /IBEIfeng.gif?order_id=1065&orderTime=1481953544043&memberId=1234568970080798&productInfos=10005-2099.48-B-1|10001-3242.40-C-2|10006-411.00-B-1&orderAmt=8995.28 HTTP/1.1" 200 0 "-" "Mozilla/5.0 (iPhone; CPU iPhone OS 7_)_3 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11B511 Safari/9537.53" "-"","B"),
    34             new Values("222.190.187.201 - - [1481953578068] "GET /IBEIfeng.gif?order_id=1066&orderTime=1481953578068&memberId=3488586887970809&productInfos=10005-2099.48-B-1|10001-2774.16-C-2&orderAmt=7647.80 HTTP/1.1" 200 0 "-" "Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1" "-"","B"),
    35             new Values("72.202.43.53 - - [1481953579069] "GET /IBEIfeng.gif?order_id=1067&orderTime=1481953579069&memberId=2084859896989877&productInfos=10007-3329.13-B-1|10001-961.99-A-2&orderAmt=5253.10 HTTP/1.1" 200 0 "-" "Mozilla/5.0 (Linux; Android 4.2.1; Galaxy Nexus Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19" "-"","B")
    36         );
    37         //多次循环
    38         spout.setCycle(true);
    39         //流处理
    40         Stream stream=tridentTopology.newStream("orderAnalyse",spout)
    41                 //过滤
    42             .each(new Fields("log"),new ValidLogFilter())
    43                 //解析
    44             .each(new Fields("log"), new LogParserFunction(),new Fields("orderId","orderTime","orderAmtStr","memberId"))
    45                 //投影
    46             .project(new Fields("orderId","orderTime","orderAmtStr","memberId"))
    47                 //时间解析
    48             .each(new Fields("orderTime"),new DateTransFormerFunction(),new Fields("day","hour","minter"))
    49          ;
    50         //分流
    51         //1.基于minter统计订单数量,分组统计
    52         TridentState state=stream.groupBy(new Fields("minter"))
    53                 //全局聚合,使用内存存储状态信息
    54                 .persistentAggregate(new MemoryMapState.Factory(),new Count(),new Fields("orderNumByMinter"));
    55 //        state.newValuesStream().each(new Fields("minter","orderNumByMinter"),new PrintFilter());
    56 
    57         //2.另一个流,基于分钟的订单金额,局部聚合
    58         Stream partitionStream=stream.each(new Fields("orderAmtStr"),new TransforAmtToDoubleFunction(),new Fields("orderAmt"))
    59             .groupBy(new Fields("minter"))
    60                     //局部聚合
    61                 .chainedAgg()    //聚合链
    62             .partitionAggregate(new Fields("orderAmt"),new LocalSum(),new Fields("orderAmtSumOfLocal"))
    63                 .chainEnd();      //聚合链
    64         partitionStream.each(new Fields("minter","orderAmtSumOfLocal"),new PrintFilter());
    65         
    66         //提交
    67         Config config=new Config();
    68         if(args==null || args.length<=0){
    69             LocalCluster localCluster=new LocalCluster();
    70             localCluster.submitTopology("tridentDemo",config,tridentTopology.build());
    71         }else {
    72             config.setNumWorkers(2);
    73             StormSubmitter.submitTopology(args[0],config,tridentTopology.build());
    74         }
    75     }
    76 }

    3.金额从字符串转为double类型的方法类

     1 package com.jun.trident;
     2 
     3 import backtype.storm.tuple.Values;
     4 import org.slf4j.Logger;
     5 import org.slf4j.LoggerFactory;
     6 import storm.trident.operation.Function;
     7 import storm.trident.operation.TridentCollector;
     8 import storm.trident.operation.TridentOperationContext;
     9 import storm.trident.tuple.TridentTuple;
    10 
    11 import java.util.Map;
    12 
    13 public class TransforAmtToDoubleFunction implements Function {
    14     private static final Logger logger= LoggerFactory.getLogger(TransforAmtToDoubleFunction.class);
    15     @Override
    16     public void execute(TridentTuple tridentTuple, TridentCollector tridentCollector) {
    17         String orderAmtStr=tridentTuple.getStringByField("orderAmtStr");
    18         try{
    19             Double orderAmt=Double.parseDouble(orderAmtStr);
    20             tridentCollector.emit(new Values(orderAmt));
    21         }catch (Exception e){
    22             logger.error("金额转换错误:"+orderAmtStr);
    23         }
    24 
    25     }
    26 
    27     @Override
    28     public void prepare(Map map, TridentOperationContext tridentOperationContext) {
    29 
    30     }
    31 
    32     @Override
    33     public void cleanup() {
    34 
    35     }
    36 }

    4.局部聚合的类

     1 package com.jun.trident;
     2 
     3 import backtype.storm.tuple.Values;
     4 import storm.trident.operation.Aggregator;
     5 import storm.trident.operation.TridentCollector;
     6 import storm.trident.operation.TridentOperationContext;
     7 import storm.trident.tuple.TridentTuple;
     8 
     9 import java.util.Map;
    10 
    11 
    12 public class LocalSum implements Aggregator<LocalSum.InnerState> {
    13     //内部状态类,用于暂存累计的结果状态
    14     public static class InnerState{
    15         public double amtSum=0.0;
    16     }
    17     @Override
    18     public InnerState init(Object o, TridentCollector tridentCollector) {
    19         InnerState innerState=new InnerState();
    20         innerState.amtSum=0.0;
    21         return innerState;
    22     }
    23 
    24     //執行循环累计
    25     @Override
    26     public void aggregate(InnerState preState, TridentTuple tridentTuple, TridentCollector tridentCollector) {
    27         Double orderAmt=tridentTuple.getDoubleByField("orderAmt");
    28         //累计
    29         double preSum=preState.amtSum;
    30         double newAmt=preSum+orderAmt;
    31         //更新
    32        preState.amtSum=newAmt;
    33     }
    34 
    35     @Override
    36     public void complete(InnerState innerState, TridentCollector tridentCollector) {
    37         //最终结果
    38        tridentCollector.emit(new Values(innerState.amtSum));
    39     }
    40 
    41     @Override
    42     public void prepare(Map map, TridentOperationContext tridentOperationContext) {
    43 
    44     }
    45 
    46     @Override
    47     public void cleanup() {
    48 
    49     }
    50 
    51 
    52 
    53 }

    5.效果

      

    二:全局聚合

    1.说明

      在上一个文档中的程序中已经开始使用全局聚合了,但是这里在和局部聚合放在一起重新说明一次

    2.驱动类

     1 package com.jun.trident;
     2 
     3 import backtype.storm.Config;
     4 import backtype.storm.LocalCluster;
     5 import backtype.storm.StormSubmitter;
     6 import backtype.storm.generated.AlreadyAliveException;
     7 import backtype.storm.generated.InvalidTopologyException;
     8 import backtype.storm.tuple.Fields;
     9 import backtype.storm.tuple.Values;
    10 import storm.trident.Stream;
    11 import storm.trident.TridentState;
    12 import storm.trident.TridentTopology;
    13 import storm.trident.operation.Function;
    14 import storm.trident.operation.TridentCollector;
    15 import storm.trident.operation.TridentOperationContext;
    16 import storm.trident.operation.builtin.Count;
    17 import storm.trident.operation.builtin.Sum;
    18 import storm.trident.testing.FixedBatchSpout;
    19 import storm.trident.testing.MemoryMapState;
    20 import storm.trident.tuple.TridentTuple;
    21 
    22 import java.util.Map;
    23 
    24 public class TridentDemo {
    25     public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException {
    26         TridentTopology tridentTopology=new TridentTopology();
    27         //模拟数据
    28         Fields field=new Fields("log","flag");
    29         FixedBatchSpout spout=new FixedBatchSpout(field,5,
    30             new Values("168.214.187.214 - - [1481953616092] "GET /view.php HTTP/1.1" 200 0 "http://cn.bing.com/search?q=spark mllib" "Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1" "-"","A"),
    31             new Values("168.187.202.202 - - [1481953537038] "GET /IBEIfeng.gif?order_id=1063&orderTime=1481953537038&memberId=4000012340500607&productInfos=10005-2099.48-B-1|10004-1886.62-A-2|10001-961.99-A-1&orderAmt=6834.70 HTTP/1.1" 200 0 "-" "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2;Tident/6.0)" "-"","A"),
    32             new Values("61.30.167.187 - - [1481953539039] "GET /IBEIfeng.gif?order_id=1064&orderTime=1481953539039&memberId=4000930409959999&productInfos=10007-3329.13-B-1|10009-2607.71-B-1|10002-390.62-A-1|10006-411.00-B-2&orderAmt=7149.46 HTTP/1.1" 200 0 "-" "Mozilla/5.0 (Linux; Android 4.2.1; Galaxy Nexus Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19" "-"","A"),
    33             new Values("30.29.132.190 - - [1481953544042] "GET /IBEIfeng.gif?order_id=1065&orderTime=1481953544043&memberId=1234568970080798&productInfos=10005-2099.48-B-1|10001-3242.40-C-2|10006-411.00-B-1&orderAmt=8995.28 HTTP/1.1" 200 0 "-" "Mozilla/5.0 (iPhone; CPU iPhone OS 7_)_3 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11B511 Safari/9537.53" "-"","B"),
    34             new Values("222.190.187.201 - - [1481953578068] "GET /IBEIfeng.gif?order_id=1066&orderTime=1481953578068&memberId=3488586887970809&productInfos=10005-2099.48-B-1|10001-2774.16-C-2&orderAmt=7647.80 HTTP/1.1" 200 0 "-" "Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1" "-"","B"),
    35             new Values("72.202.43.53 - - [1481953579069] "GET /IBEIfeng.gif?order_id=1067&orderTime=1481953579069&memberId=2084859896989877&productInfos=10007-3329.13-B-1|10001-961.99-A-2&orderAmt=5253.10 HTTP/1.1" 200 0 "-" "Mozilla/5.0 (Linux; Android 4.2.1; Galaxy Nexus Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19" "-"","B")
    36         );
    37         //多次循环
    38         spout.setCycle(true);
    39         //流处理
    40         Stream stream=tridentTopology.newStream("orderAnalyse",spout)
    41                 //过滤
    42             .each(new Fields("log"),new ValidLogFilter())
    43                 //解析
    44             .each(new Fields("log"), new LogParserFunction(),new Fields("orderId","orderTime","orderAmtStr","memberId"))
    45                 //投影
    46             .project(new Fields("orderId","orderTime","orderAmtStr","memberId"))
    47                 //时间解析
    48             .each(new Fields("orderTime"),new DateTransFormerFunction(),new Fields("day","hour","minter"))
    49          ;
    50         //分流
    51         //1.基于minter统计订单数量,分组统计
    52         TridentState state=stream.groupBy(new Fields("minter"))
    53                 //全局聚合,使用内存存储状态信息
    54                 .persistentAggregate(new MemoryMapState.Factory(),new Count(),new Fields("orderNumByMinter"));
    55 //        state.newValuesStream().each(new Fields("minter","orderNumByMinter"),new PrintFilter());
    56 
    57         //2.另一个流,基于分钟的订单金额,局部聚合
    58         Stream partitionStream=stream.each(new Fields("orderAmtStr"),new TransforAmtToDoubleFunction(),new Fields("orderAmt"))
    59             .groupBy(new Fields("minter"))
    60                     //局部聚合
    61                 .chainedAgg()    //聚合链
    62             .partitionAggregate(new Fields("orderAmt"),new LocalSum(),new Fields("orderAmtSumOfLocal"))
    63                 .chainEnd();      //聚合链
    64 //        partitionStream.each(new Fields("minter","orderAmtSumOfLocal"),new PrintFilter());
    65         //做一次全局聚合
    66         TridentState partitionState=partitionStream.groupBy(new Fields("minter"))
    67                 //全局聚合
    68                 .persistentAggregate(new MemoryMapState.Factory(),new Fields("orderAmtSumOfLocal"),new Sum(),new Fields("totalOrderAmt"));
    69         partitionState.newValuesStream().each(new Fields("minter","totalOrderAmt"),new PrintFilter());
    70 
    71         //提交
    72         Config config=new Config();
    73         if(args==null || args.length<=0){
    74             LocalCluster localCluster=new LocalCluster();
    75             localCluster.submitTopology("tridentDemo",config,tridentTopology.build());
    76         }else {
    77             config.setNumWorkers(2);
    78             StormSubmitter.submitTopology(args[0],config,tridentTopology.build());
    79         }
    80     }
    81 }

    3.效果

      

  • 相关阅读:
    [洛谷2397]yyy loves Maths VI
    subs函数
    JSON 对象和字符串
    JSON添加注释的问题
    给定一个数组求里面数字拼在一起组成的最小数字
    为什么集合类没有实现Cloneable和Serializable接口
    Java 接口
    Unicode(统一码、万国码、单一码)
    meta 中的属性viewport
    meta标签
  • 原文地址:https://www.cnblogs.com/juncaoit/p/9164176.html
Copyright © 2020-2023  润新知