• Storm手写WordCount


    建立一个maven项目,在pom.xml中进行如下配置:

    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
        xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
        <modelVersion>4.0.0</modelVersion>
        <groupId>cn.darrenchan</groupId>
        <artifactId>StormDemo</artifactId>
        <version>0.0.1-SNAPSHOT</version>
        <name>StormDemo</name>
    
        <dependencies>
            <dependency>
                <groupId>org.apache.storm</groupId>
                <artifactId>storm-core</artifactId>
                <version>0.9.5</version>
                <!--<scope>provided</scope> -->
            </dependency>
        </dependencies>
        <build>
            <plugins>
                <plugin>
                    <artifactId>maven-assembly-plugin</artifactId>
                    <configuration>
                        <descriptorRefs>
                            <descriptorRef>jar-with-dependencies</descriptorRef>
                        </descriptorRefs>
                        <archive>
                            <manifest>
                                <mainClass>cn.itcast.bigdata.hadoop.mapreduce.wordcount.WordCount</mainClass>
                            </manifest>
                        </archive>
                    </configuration>
                    <executions>
                        <execution>
                            <id>make-assembly</id>
                            <phase>package</phase>
                            <goals>
                                <goal>single</goal>
                            </goals>
                        </execution>
                    </executions>
                </plugin>
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-compiler-plugin</artifactId>
                    <configuration>
                        <source>1.7</source>
                        <target>1.7</target>
                    </configuration>
                </plugin>
            </plugins>
        </build>
    
    </project>

    项目目录为:

    MySpout.java:

    package cn.darrenchan.storm;
    
    import java.util.Map;
    
    import backtype.storm.spout.SpoutOutputCollector;
    import backtype.storm.task.TopologyContext;
    import backtype.storm.topology.OutputFieldsDeclarer;
    import backtype.storm.topology.base.BaseRichSpout;
    import backtype.storm.tuple.Fields;
    import backtype.storm.tuple.Values;
    
    public class MySpout extends BaseRichSpout {
        
        private SpoutOutputCollector collector;
    
        //storm框架不停地调用nextTuple方法
    //values继承ArrayList
    @Override public void nextTuple() { collector.emit(new Values("i am lilei love hanmeimei")); } //初始化方法 @Override public void open(Map config, TopologyContext context, SpoutOutputCollector collector) { this.collector = collector; } //声明本spout组件发送出去的tuple中的数据的字段名 @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("love")); } }

    MySplitBolt.java:

    package cn.darrenchan.storm;
    
    import java.util.Map;
    
    import backtype.storm.task.OutputCollector;
    import backtype.storm.task.TopologyContext;
    import backtype.storm.topology.OutputFieldsDeclarer;
    import backtype.storm.topology.base.BaseRichBolt;
    import backtype.storm.tuple.Fields;
    import backtype.storm.tuple.Tuple;
    import backtype.storm.tuple.Values;
    
    public class MySplitBolt extends BaseRichBolt {
        
        private OutputCollector collector;
    
        //storm框架不停地调用,传入参数是tutle
        @Override
        public void execute(Tuple input) {
            String line = input.getString(0);
            String[] words = line.split(" ");
            for (String word : words) {
    //Values有两个,对应下面Fields有两个 collector.emit(
    new Values(word, 1)); } } //初始化方法 @Override public void prepare(Map config, TopologyContext context, OutputCollector collector) { this.collector = collector; } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) {
    //Fields有两个,对应上面Values有两个 declarer.declare(
    new Fields("word", "num")); } }

    MyCountBolt.java:

    package cn.darrenchan.storm;
    
    import java.util.HashMap;
    import java.util.Map;
    
    import backtype.storm.task.OutputCollector;
    import backtype.storm.task.TopologyContext;
    import backtype.storm.topology.OutputFieldsDeclarer;
    import backtype.storm.topology.base.BaseRichBolt;
    import backtype.storm.tuple.Tuple;
    
    public class MyCountBolt extends BaseRichBolt {
        
        private OutputCollector collector;
        private Map<String, Integer> map;
    
        @Override
        public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
            this.collector = collector;
            map = new HashMap<String, Integer>();
        }
    
        @Override
        public void execute(Tuple input) {
            String word = input.getString(0);
            Integer num = input.getInteger(1);
            if(map.containsKey(word)){
                map.put(word, map.get(word) + num);
            } else {
                map.put(word, 1);
            }
            
            System.out.println(map);
        }
    
        @Override
        public void declareOutputFields(OutputFieldsDeclarer declarer) {
            
        }
    
    }

    WordCountTopoloyMain.java:

    package cn.darrenchan.storm;
    
    import backtype.storm.Config;
    import backtype.storm.LocalCluster;
    import backtype.storm.topology.TopologyBuilder;
    import backtype.storm.tuple.Fields;
    
    public class WordCountTopoloyMain {
        public static void main(String[] args) throws Exception {
            //1.准备一个TopologyBuilder
            TopologyBuilder builder = new TopologyBuilder();
            builder.setSpout("mySpout", new MySpout(), 1);
            builder.setBolt("mySplitBolt", new MySplitBolt(), 2).shuffleGrouping("mySpout");
            builder.setBolt("myCountBolt", new MyCountBolt(), 2).fieldsGrouping("mySplitBolt", new Fields("word"));
            
            //2.创建一个configuration,用来指定当前的topology需要的worker的数量
            Config config = new Config();
            config.setNumWorkers(4);
            
            //3.任务提交 两种模式————本地模式和集群模式
            //集群模式
            //StormSubmitter.submitTopology("myWordCount", config, builder.createTopology());
            //本地模式
            LocalCluster localCluster = new LocalCluster();
            localCluster.submitTopology("myWordCount", config, builder.createTopology());
        }
    }

    三种求wordcount方式 比较:

    整体运行架构图:

  • 相关阅读:
    laravel 的passport Oauth 认证登录请求 的 oauth_token 重置
    多个php版本的composer使用
    MySQL查询语句练习题(面试时可能会遇到哦!)
    tp5 url 线上访问 在nginx 上 出现404错误,解决办法(1.80nginx 配置 pathInfo)
    源码编译安装lnmp环境(nginx-1.14.2 + mysql-5.6.43 + php-5.6.30 )------踩了无数坑,重装了十几次服务器才会的,不容易啊!
    Mysql错误处理: /usr/bin/mysqld_safe: line xxx: xxxx Killed ... (mysql自动停止 Plugin FEDERATED is disabled 的完美解决方法)
    Plugin 'FEDERATED' is disabled. /usr/sbin/mysqld: Table 'mysql.plugin' doesn't exist
    thinkphp5的mkdir() Permission denied问题
    微信小程序 Unexpected end of JSON input/Unexpected token o in JSON at position 1
    服务器 apache配置https,http强制跳转https(搭建http与https共存)
  • 原文地址:https://www.cnblogs.com/DarrenChan/p/7125687.html
Copyright © 2020-2023  润新知