• Trident的过滤操作


    1.过滤操作

      只是判断某个tuple是否保留

      无需跨网络,无需跨分区

      不会改变tuple的结构,只是改变tuple的数量

    2.需求

      过滤掉不是订单的tuple。

      其中订单中包含“IBEIfeng.gif”。

    3.主驱动类

      先过滤

      后打印

     1 package com.jun.trident;
     2 
     3 import backtype.storm.Config;
     4 import backtype.storm.LocalCluster;
     5 import backtype.storm.StormSubmitter;
     6 import backtype.storm.generated.AlreadyAliveException;
     7 import backtype.storm.generated.InvalidTopologyException;
     8 import backtype.storm.tuple.Fields;
     9 import backtype.storm.tuple.Values;
    10 import storm.trident.TridentTopology;
    11 import storm.trident.testing.FixedBatchSpout;
    12 
    13 public class TridentDemo {
    14     public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException {
    15         TridentTopology tridentTopology=new TridentTopology();
    16         //模拟数据
    17         Fields field=new Fields("log","flag");
    18         FixedBatchSpout spout=new FixedBatchSpout(field,5,
    19             new Values("168.214.187.214 - - [1481953616092] "GET /view.php HTTP/1.1" 200 0 "http://cn.bing.com/search?q=spark mllib" "Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1" "-"","A"),
    20             new Values("168.187.202.202 - - [1481953537038] "GET /IBEIfeng.gif?order_id=1063&orderTime=1481953537038&memberId=4000012340500607&productInfos=10005-2099.48-B-1|10004-1886.62-A-2|10001-961.99-A-1&orderAmt=6834.70 HTTP/1.1" 200 0 "-" "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2;Tident/6.0)" "-"","A"),
    21             new Values("61.30.167.187 - - [1481953539039] "GET /IBEIfeng.gif?order_id=1064&orderTime=1481953539039&memberId=4000930409959999&productInfos=10007-3329.13-B-1|10009-2607.71-B-1|10002-390.62-A-1|10006-411.00-B-2&orderAmt=7149.46 HTTP/1.1" 200 0 "-" "Mozilla/5.0 (Linux; Android 4.2.1; Galaxy Nexus Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19" "-"","A"),
    22             new Values("30.29.132.190 - - [1481953544042] "GET /IBEIfeng.gif?order_id=1065&orderTime=1481953544043&memberId=1234568970080798&productInfos=10005-2099.48-B-1|10001-3242.40-C-2|10006-411.00-B-1&orderAmt=8995.28 HTTP/1.1" 200 0 "-" "Mozilla/5.0 (iPhone; CPU iPhone OS 7_)_3 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11B511 Safari/9537.53" "-"","B"),
    23             new Values("222.190.187.201 - - [1481953578068] "GET /IBEIfeng.gif?order_id=1066&orderTime=1481953578068&memberId=3488586887970809&productInfos=10005-2099.48-B-1|10001-2774.16-C-2&orderAmt=7647.80 HTTP/1.1" 200 0 "-" "Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1" "-"","B"),
    24             new Values("72.202.43.53 - - [1481953579069] "GET /IBEIfeng.gif?order_id=1067&orderTime=1481953579069&memberId=2084859896989877&productInfos=10007-3329.13-B-1|10001-961.99-A-2&orderAmt=5253.10 HTTP/1.1" 200 0 "-" "Mozilla/5.0 (Linux; Android 4.2.1; Galaxy Nexus Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19" "-"","B")
    25         );
    26         //多次循环
    27         spout.setCycle(true);
    28         //提交
    29         Config config=new Config();
    30         tridentTopology.newStream("orderAnalyse",spout)
    31 //            .each(new Fields("log","flag"),new PrintFilter());
    32             .each(new Fields("log"),new ValidLogFilter())
    33             .each(new Fields("log","flag"),new PrintFilter());
    34         if(args==null || args.length<=0){
    35             LocalCluster localCluster=new LocalCluster();
    36             localCluster.submitTopology("tridentDemo",config,tridentTopology.build());
    37         }else {
    38             config.setNumWorkers(2);
    39             StormSubmitter.submitTopology(args[0],config,tridentTopology.build());
    40         }
    41     }
    42 }

    4.过滤类

     1 package com.jun.trident;
     2 
     3 import storm.trident.operation.Filter;
     4 import storm.trident.operation.TridentOperationContext;
     5 import storm.trident.tuple.TridentTuple;
     6 
     7 import java.util.Map;
     8 
     9 public class ValidLogFilter implements Filter {
    10     //定义如何过滤的逻辑
    11     @Override
    12     public boolean isKeep(TridentTuple tridentTuple) {
    13         String log=tridentTuple.getStringByField("log");
    14         boolean isKepp=true;
    15         if(log==null || "".equals(log)){
    16             isKepp=false;
    17         }else {
    18             //含有IBEIfeng.gif才是订单
    19             if (log.contains("IBEIfeng.gif")){
    20                 isKepp=true;
    21             }else {
    22                 isKepp=false;
    23             }
    24         }
    25         return isKepp;
    26     }
    27 
    28     @Override
    29     public void prepare(Map map, TridentOperationContext tridentOperationContext) {
    30 
    31     }
    32 
    33     @Override
    34     public void cleanup() {
    35 
    36     }
    37 }

    5.效果

      

  • 相关阅读:
    BZOJ 1823: [JSOI2010]满汉全席 [2-SAT]
    POJ 3683 Priest John's Busiest Day[2-SAT 构造解]
    Centos出现-bash: unzip: command not found的解决办法
    物理cpu与逻辑cpu概述
    如何正确查看Linux机器内存使用情况
    connections java.net.BindException: Address already in use_解决方案
    Linux查看端口、进程情况及kill进程
    linux如何查看端口被哪个进程占用?
    如何正确查看Linux机器内存使用情况
    TPS和事务响应时间的关系
  • 原文地址:https://www.cnblogs.com/juncaoit/p/9161561.html
Copyright © 2020-2023  润新知