• hadoop初步


    设置环境变量

    vim ~/.profile
    
    export HADOOP_HOME=/home/mmc/hadoop
    

    hadoop 配置

    vim etc/hadoop/hadoop-env.sh

    export JAVA_HOME=/opt/java/jdk1.8.0_151
    

    vim etc/hadoop/core-site.xml

    <configuration>
        <property>
            <name>fs.defaultFS</name>
            <value>hdfs://0.0.0.0:9000</value>
            <description>hdfs通讯访问地址</description>
        </property>
        <property>
          <name>dfs.permissions</name>
          <value>false</value>
        </property>
        <property>
            <name>hadoop.tmp.dir</name>
            <value>file:/home/mmc/hadoop/tmp</value>
            <description>hadoop数据存放</description>
        </property>
    </configuration>
    

    vim etc/hadoop/hdfs-site.xml

    <configuration>
        <property>
            <name>dfs.namenode.name.dir</name>
            <value>file:/home/mmc/hadoop/hdfs/name</value>
        </property>
        <property>
            <name>dfs.datanode.data.dir</name>
            <value>file:/home/mmc/hadoop/hdfs/data</value>
        </property>
        <property>
            <name>dfs.replication</name>
            <value>1</value>
        </property>
    </configuration>
    

    本地免密访问

    ssh-keygen
    cd ~/.ssh
    touch authorized_keys
    chmod 600 authorized_keys
    cat id_rsa.pub >> authorized_keys
    

    格式化

    ./bin/hdfs namenode -format
    

    服务端打印日志

    export HADOOP_ROOT_LOGGER=DEBUG,console
    ./sbin/start-all.sh
    

    检查

    ./bin/hadoop fs -ls /
    ./bin/hadoop fs -mkdir -p /user/hadoop/input
    ./bin/hadoop fs -ls /user/hadoop/input
    

    pom.xml

        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
        <dependency>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-common</artifactId>
          <version>3.3.0</version>
        </dependency>
    
        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs -->
        <dependency>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-hdfs</artifactId>
          <version>3.3.0</version>
          <scope>test</scope>
        </dependency>
    
        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client -->
        <dependency>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-client</artifactId>
          <version>3.3.0</version>
        </dependency>
    
        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-yarn-common -->
        <dependency>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-yarn-common</artifactId>
          <version>3.3.0</version>
        </dependency>
    
    
        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-yarn-api -->
        <dependency>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-yarn-api</artifactId>
          <version>3.3.0</version>
        </dependency>
    

    log 配置

    src/main/resources/log4j.properties

    # Set root logger level to DEBUG and its only appender to A1.
    log4j.rootLogger=DEBUG, A1
    
    # A1 is set to be a ConsoleAppender.
    log4j.appender.A1=org.apache.log4j.ConsoleAppender
    
    # A1 uses PatternLayout.
    log4j.appender.A1.layout=org.apache.log4j.PatternLayout
    log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
    

    java 代码

    WordCount.java

    package org.example;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    
    import java.io.IOException;
    
    public class WordCount {
        static class WordCountMapper extends Mapper<LongWritable,Text,Text,IntWritable>{
    
            @Override
            protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
                //拿到一行数据,将输入的序列化数据转换成字符串
                String line = value.toString();
                //将一行数据按照分隔符拆分
                String[] words = line.split("	");
                //遍历单词数据,输出单词<k,1>
                for(String word:words){
                    //需要序列化写出
                    context.write(new Text(word),new IntWritable(1));
                }
            }
        }
        static class WordCountReducer extends Reducer<Text,IntWritable,Text,IntWritable>{
            //reduce方法是针对输入的一组数据,一个key和它的所有value组成一组(k:v1,v2,v3)
            @Override
            protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
                //定义一个计数器
                int count = 0;
                //遍历一组数据,将key出现次数累加到count
                for(IntWritable value : values){
                    count += value.get();
                }
                context.write(key,new IntWritable(count));
    
            }
        }
    }
    

    App.java

    package org.example;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    public class App
    {
        public static void main( String[] args ) throws Exception{
            String jobName = "word count";
    //        String inputPath = "hdfs://192.168.56.200:9000/user/hadoop/input/";
            String inputPath = "hdfs://192.168.0.24:9000/user/hadoop/input/";
    //        String inputPath = "/user/hadoop/input/";
    
    //        String outputPath = "hdfs://192.168.56.200:9000/user/hadoop/output/";
            String outputPath = "/home/mmc/downloads/hadoop/output";
    
            Configuration conf = new Configuration();
    
    //        conf.set("fs.defaultFS", "hdfs://192.168.56.200:9000");
            conf.set("fs.hdfs.impl","org.apache.hadoop.hdfs.DistributedFileSystem");
            conf.set("dfs.client.block.write.replace-datanode-on-failure.policy", "NEVER");
            conf.set("dfs.client.use.datanode.hostname", "true");
    
            Job job = Job.getInstance(conf);
            job.setJobName(jobName);
    
            job.setJarByClass(WordCount.class);
    
            job.setMapperClass(WordCount.WordCountMapper.class);
            job.setReducerClass(WordCount.WordCountReducer.class);
    
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
    
            FileInputFormat.setInputPaths(job,new Path(inputPath));
            FileOutputFormat.setOutputPath(job,new Path(outputPath));
            System.exit(job.waitForCompletion(true)?0:1);
        }
    }
    
  • 相关阅读:
    Java 堆和栈 垃圾回收 2015/9/16
    多态 Java 2015/9/16
    学校项目过程中知识点 Java 2015/9/15 晚
    Python print输出不换行
    Windows 加载EXT分区
    RouterOS 安全模式
    RouterOS Openswan l2tp ipsec
    RouterOS 自动邮件备份脚本
    python chnroutes ROS版
    Kinect 2.0&Speech 11 中文语音控制
  • 原文地址:https://www.cnblogs.com/ziyouchutuwenwu/p/14835698.html
Copyright © 2020-2023  润新知