分组策略
shuffle 随机分组
field分组
安装指定filed的key进行hash处理,
相同的field,一定进入到同一bolt.
该分组容易产生数据倾斜问题,通过使用二次聚合避免此类问题。
使用二次聚合避免倾斜。
App入口类
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 public class { public static void main (String[] args) throws Exception { TopologyBuilder builder = new TopologyBuilder(); builder.setSpout("wcspout" , new WordCountSpout()).setNumTasks(2 ); builder.setBolt("split-bolt" , new SplitBolt(),3 ).shuffleGrouping("wcspout" ).setNumTasks(3 ); builder.setBolt("counter-1" , new CountBolt(),3 ).shuffleGrouping("split-bolt" ).setNumTasks(3 ); builder.setBolt("counter-2" , new CountBolt(),2 ).fieldsGrouping("counter-1" ,new Fields("word" )).setNumTasks(2 ); Config conf = new Config(); conf.setNumWorkers(2 ); conf.setDebug(true ); LocalCluster cluster = new LocalCluster(); cluster.submitTopology("wc" , conf, builder.createTopology()); } }
聚合bolt
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 public class CountBolt implements IRichBolt { private Map<String,Integer> map ; private TopologyContext context; private OutputCollector collector; private long lastEmitTime = 0 ; private long duration = 5000 ; public void prepare (Map stormConf, TopologyContext context, OutputCollector collector) { this .context = context; this .collector = collector; map = new HashMap<String, Integer>(); map = Collections.synchronizedMap(map); Thread t = new Thread(){ public void run () { while (true ){ emitData(); } } }; t.setDaemon(true ); t.start(); } private void emitData () { synchronized (map){ for (Map.Entry<String, Integer> entry : map.entrySet()) { collector.emit(new Values(entry.getKey(), entry.getValue())); } map.clear(); } try { Thread.sleep(5000 ); } catch (InterruptedException e) { e.printStackTrace(); } } public void execute (Tuple tuple) { String word = tuple.getString(0 ); Util.sendToLocalhost(this , word); Integer count = tuple.getInteger(1 ); if (!map.containsKey(word)){ map.put(word, count); } else { map.put(word,map.get(word) + count); } } public void cleanup () { for (Map.Entry<String,Integer> entry : map.entrySet()){ System.out.println(entry.getKey() + " : " + entry.getValue()); } } public void declareOutputFields (OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word" ,"count" )); } public Map<String, Object> getComponentConfiguration () { return null ; } }
all分组
使用广播分组。
1 builder.setBolt("split-bolt" , new SplitBolt(),2 ).allGrouping("wcspout" ).setNumTasks(2 );
direct(特供)
只发送给指定的一个bolt.
1 2 3 collector.emitDirect(taskId,new Values(line));
1 2 builder.setBolt("split-bolt" , new SplitBolt(),2 ).directGrouping("wcspout" ).setNumTasks(2 );
global分组
对目标target tasked进行排序,选择最小的taskId号进行发送tuple
类似于direct,可以是特殊的direct分组。
自定义分组
自定义CustomStreamGrouping类
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 public class MyGrouping implements CustomStreamGrouping { private List<Integer> targetTasks ; public void prepare (WorkerTopologyContext context, GlobalStreamId stream, List<Integer> targetTasks) { this .targetTasks = targetTasks ; } public List<Integer> chooseTasks (int taskId, List<Object> values) { List<Integer> subTaskIds = new ArrayList<Integer>(); for (int i = 0 ; i <= targetTasks.size() / 2 ; i ++){ subTaskIds.add(targetTasks.get(i)); } return subTaskIds; } }
设置分组策略
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 public class { public 大专栏 Storm的分组策略和确保消息送达机制 · 十年饮冰,难凉热血 > static void main (String[] args) throws Exception { TopologyBuilder builder = new TopologyBuilder(); builder.setSpout("wcspout" , new WordCountSpout()).setNumTasks(2 ); builder.setBolt("split-bolt" , new SplitBolt(),4 ).customGrouping("wcspout" ,new MyGrouping()).setNumTasks(4 ); Config conf = new Config(); conf.setNumWorkers(2 ); conf.setDebug(true ); LocalCluster cluster = new LocalCluster(); cluster.submitTopology("wc" , conf, builder.createTopology()); System.out.println("hello world" ); } }
storm确保消息如何被完全处理 WordCountSpout:通过回调函数
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 public class WordCountSpout implements IRichSpout { private TopologyContext context ; private SpoutOutputCollector collector ; private List<String> states ; private Random r = new Random(); private int index = 0 ; private Map<Long,String> messages = new HashMap<Long, String>(); private Map<Long,Integer> failMessages = new HashMap<Long, Integer>(); public void open (Map conf, TopologyContext context, SpoutOutputCollector collector) { this .context = context ; this .collector = collector ; states = new ArrayList<String>(); states.add("hello world tom" ); states.add("hello world tomas" ); states.add("hello world tomasLee" ); states.add("hello world tomson" ); } public void close () { } public void activate () { } public void deactivate () { } public void nextTuple () { if (index < 3 ){ String line = states.get(r.nextInt(4 )); long ts = System.currentTimeMillis() ; messages.put(ts,line); collector.emit(new Values(line),ts); System.out.println(this + "nextTuple() : " + line + " : " + ts); index ++ ; } } public void ack (Object msgId) { Long ts = (Long)msgId ; failMessages.remove(ts) ; messages.remove(ts) ; } public void fail (Object msgId) { Long ts = (Long)msgId; Integer retryCount = failMessages.get(ts); retryCount = (retryCount == null ? 0 : retryCount) ; if (retryCount >= 3 ){ failMessages.remove(ts) ; messages.remove(ts) ; } else { collector.emit(new Values(messages.get(ts)),ts); System.out.println(this + "fail() : " + messages.get(ts) + " : " + ts); retryCount ++ ; failMessages.put(ts,retryCount); } } public void declareOutputFields (OutputFieldsDeclarer declarer) { declarer.declare(new Fields("line" )); } public Map<String, Object> getComponentConfiguration () { return null ; } }
SplitBolt:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 public class SplitBolt implements IRichBolt { private TopologyContext context ; private OutputCollector collector ; public void prepare (Map stormConf, TopologyContext context, OutputCollector collector) { this .context = context ; this .collector = collector ; } public void execute (Tuple tuple) { String line = tuple.getString(0 ); if (new Random().nextBoolean()){ collector.ack(tuple); System.out.println(this + " : ack() : " + line + " : " + tuple.getMessageId().toString()); } else { collector.fail(tuple); System.out.println(this + " : fail() : " + line + " : " + tuple.getMessageId().toString()); } } public void cleanup () { } public void declareOutputFields (OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word" ,"count" )); } public Map<String, Object> getComponentConfiguration () { return null ; } }
App:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 public class { public static void main (String[] args) throws Exception { TopologyBuilder builder = new TopologyBuilder(); builder.setSpout("wcspout" , new WordCountSpout()).setNumTasks(1 ); builder.setBolt("split-bolt" , new SplitBolt(),2 ).shuffleGrouping("wcspout" ).setNumTasks(2 ); Config conf = new Config(); conf.setNumWorkers(2 ); conf.setDebug(true ); LocalCluster cluster = new LocalCluster(); cluster.submitTopology("wc" , conf, builder.createTopology()); System.out.println("hello world llll" ); } }
测试结果: