• 大数据学习——Storm学习单词计数案例


    需求:计算单词在文档中出现的次数,每出现一次就累加一次

     

    遇到的问题

    这个问题是<scope>provided</scope>作用域问题

    https://www.cnblogs.com/biehongli/p/8316885.html

    这个问题是需要把从文件中读取的内容放入list

    代码如下

    <?xml version="1.0" encoding="UTF-8"?>
    <project xmlns="http://maven.apache.org/POM/4.0.0"
             xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
             xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
        <modelVersion>4.0.0</modelVersion>
    
        <groupId>com.cyf</groupId>
        <artifactId>TestStorm</artifactId>
        <version>1.0-SNAPSHOT</version>
    
        <repositories>
            <repository>
                <id>alimaven</id>
                <name>aliyun maven</name>
                <url>http://maven.aliyun.com/nexus/content/groups/public/</url>
                <releases>
                    <enabled>true</enabled>
                </releases>
                <snapshots>
                    <enabled>false</enabled>
                </snapshots>
            </repository>
        </repositories>
    
    
        <dependencies>
    
            <dependency>
                <groupId>org.apache.storm</groupId>
                <artifactId>storm-core</artifactId>
                <version>0.9.5</version>
            </dependency>
    
        </dependencies>
        <build>
            <plugins>
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-jar-plugin</artifactId>
                    <version>2.4</version>
                    <configuration>
                        <archive>
                            <manifest>
                                <addClasspath>true</addClasspath>
                                <classpathPrefix>lib/</classpathPrefix>
                                <mainClass>com.cyf.StormTopologyDriver</mainClass>
                            </manifest>
                        </archive>
                    </configuration>
                </plugin>
            </plugins>
        </build>
    </project>
    MyLocalFileSpout

    package com.cyf;
    
    import backtype.storm.spout.SpoutOutputCollector;
    import backtype.storm.task.TopologyContext;
    import backtype.storm.topology.OutputFieldsDeclarer;
    import backtype.storm.topology.base.BaseRichSpout;
    import backtype.storm.tuple.Fields;
    import org.apache.commons.lang.StringUtils;
    
    import java.io.BufferedReader;
    import java.io.FileNotFoundException;
    import java.io.FileReader;
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.List;
    import java.util.Map;
    
    /**
     * Created by Administrator on 2019/2/19.
     */
    public class MyLocalFileSpout extends BaseRichSpout {
        private SpoutOutputCollector collector;
        private BufferedReader bufferedReader;
    
        //初始化方法
        public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
            this.collector = spoutOutputCollector;
            try {
    //            this.bufferedReader = new BufferedReader(new FileReader("/root/1.log"));
                this.bufferedReader = new BufferedReader(new FileReader("D:\1.log"));
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            }
    
        }
    
        //循环调用的方法
        //Storm实时计算的特性就是对数据一条一条的处理
    
        public void nextTuple() {
            //每调用一次就会发送一条数据出去
            try {
                String line = bufferedReader.readLine();
    
                if (StringUtils.isNotBlank(line)) {
                    List<Object> arrayList = new ArrayList<Object>();
                    arrayList.add(line);
                    collector.emit(arrayList);
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
    
        }
    
        public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
            outputFieldsDeclarer.declare(new Fields("juzi"));
        }
    }
    MySplitBolt
    package com.cyf;
    
    import backtype.storm.topology.BasicOutputCollector;
    import backtype.storm.topology.OutputFieldsDeclarer;
    import backtype.storm.topology.base.BaseBasicBolt;
    import backtype.storm.tuple.Fields;
    import backtype.storm.tuple.Tuple;
    import backtype.storm.tuple.Values;
    
    /**
     * Created by Administrator on 2019/2/19.
     */
    public class MySplitBolt extends BaseBasicBolt {
        public void execute(Tuple tuple, BasicOutputCollector basicOutputCollector) {
    
            //1.数据如何获取
            String juzi = (String) tuple.getValueByField("juzi");
            //2.进行切割
            String[] strings = juzi.split(" ");
            //3.发送数据
            for (String word : strings) {
                basicOutputCollector.emit(new Values(word, 1));
            }
        }
    
        public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
            outputFieldsDeclarer.declare(new Fields("word", "num"));
        }
    }
    MyWordCountAndPrintBolt
    package com.cyf;
    
    import backtype.storm.topology.BasicOutputCollector;
    import backtype.storm.topology.OutputFieldsDeclarer;
    import backtype.storm.topology.base.BaseBasicBolt;
    import backtype.storm.tuple.Tuple;
    
    import java.util.HashMap;
    import java.util.Map;
    
    /**
     * Created by Administrator on 2019/2/19.
     */
    public class MyWordCountAndPrintBolt extends BaseBasicBolt {
    
        private Map<String, Integer> wordCountMap = new HashMap<String, Integer>();
    
        public void execute(Tuple tuple, BasicOutputCollector basicOutputCollector) {
            String word = (String) tuple.getValueByField("word");
            Integer num = (Integer) tuple.getValueByField("num");
    
            //1查看单词对应的value是否存在
            Integer integer = wordCountMap.get(word);
            if (integer == null || integer.intValue() == 0) {
                wordCountMap.put(word, num);
            } else {
                wordCountMap.put(word, integer.intValue() + num);
            }
            //2.打印数据
            System.out.println(wordCountMap);
        }
    
        public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
    
        }
    }
    StormTopologyDriver
    package com.cyf;
    
    import backtype.storm.Config;
    import backtype.storm.LocalCluster;
    import backtype.storm.StormSubmitter;
    import backtype.storm.generated.AlreadyAliveException;
    import backtype.storm.generated.InvalidTopologyException;
    import backtype.storm.generated.StormTopology;
    import backtype.storm.topology.TopologyBuilder;
    
    /**
     * Created by Administrator on 2019/2/21.
     */
    public class StormTopologyDriver {
        public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException {
            //1准备任务信息
            TopologyBuilder topologyBuilder = new TopologyBuilder();
    
            topologyBuilder.setSpout("mySpout", new MyLocalFileSpout());
            topologyBuilder.setBolt("bolt1", new MySplitBolt()).shuffleGrouping("mySpout");
            topologyBuilder.setBolt("bolt2", new MyWordCountAndPrintBolt()).shuffleGrouping("bolt1");
    
            //2任务提交
            //提交给谁,提交什么内容
            Config config=new Config();
            StormTopology stormTopology=topologyBuilder.createTopology();
    
            //本地模式
            LocalCluster localCluster=new LocalCluster();
            localCluster.submitTopology("wordcount",config,stormTopology);
    
            //集群模式
    //        StormSubmitter.submitTopology("wordcount",config,stormTopology);
        }
    }

    本地运行结果:

     在集群上运行

    运行命令:

    storm jar TestStorm.jar  com.cyf.StormTopologyDriver

  • 相关阅读:
    dwr中的部分问题和总结
    UltraEdit常用快捷键
    Spring定时器StopWatch
    mybatis 教程(mybatis in action)
    Java中的session详解
    关于OutOfMemoryError的处理
    安卓版本和Api Level
    android通过adb wireless的使用
    adb端口被占用情况下如何杀掉进程
    解决Install failed uid changed
  • 原文地址:https://www.cnblogs.com/feifeicui/p/10398274.html
Copyright © 2020-2023  润新知