• 云计算——实验一 HDFS与MAPREDUCE操作


    1、虚拟机集群搭建部署hadoop

    利用VMware、centOS-7、Xshell(secureCrt)等软件搭建集群部署hadoop

     

     

    远程连接工具使用Xshell:

    HDFS文件操作

     

     

    2.1 HDFS接口编程

    调用HDFS文件接口实现对分布式文件系统中文件的访问,如创建、修改、删除等

     

     

    三、MAPREDUCE并行程序开发

    求每年最高气温

    本实验是编写完成相关代码后,将该项目打包成jar包,上传至centos后利用hadoop命令进行运行。

    import java.io.IOException;
     
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    public class Temperature {
        /**
         * 四个泛型类型分别代表:
         * KeyIn        Mapper的输入数据的Key,这里是每行文字的起始位置(0,11,...)
         * ValueIn      Mapper的输入数据的Value,这里是每行文字
         * KeyOut       Mapper的输出数据的Key,这里是每行文字中的“年份”
         * ValueOut     Mapper的输出数据的Value,这里是每行文字中的“气温”
         */
        static class TempMapper extends
                Mapper<LongWritable, Text, Text, IntWritable> {
            @Override
            public void map(LongWritable key, Text value, Context context)
                    throws IOException, InterruptedException {
                // 打印样本: Before Mapper: 0, 2000010115
                System.out.print("Before Mapper: " + key + ", " + value);
                String line = value.toString();
                String year = line.substring(0, 4);
                int temperature = Integer.parseInt(line.substring(8));
                context.write(new Text(year), new IntWritable(temperature));
                // 打印样本: After Mapper:2000, 15
                System.out.println(
                        "======" +
                        "After Mapper:" + new Text(year) + ", " + new IntWritable(temperature));
            }
        }
     
       
           static class TempReducer extends
                Reducer<Text, IntWritable, Text, IntWritable> {
            @Override
            public void reduce(Text key, Iterable<IntWritable> values,
                    Context context) throws IOException, InterruptedException {
                int maxValue = Integer.MIN_VALUE;
                StringBuffer sb = new StringBuffer();
                //取values的最大值
                for (IntWritable value : values) {
                    maxValue = Math.max(maxValue, value.get());
                    sb.append(value).append(", ");
                }
                // 打印样本: Before Reduce: 2000, 15, 23, 99, 12, 22, 
                System.out.print("Before Reduce: " + key + ", " + sb.toString());
                context.write(key, new IntWritable(maxValue));
                // 打印样本: After Reduce: 2000, 99
                System.out.println(
                        "======" +
                        "After Reduce: " + key + ", " + maxValue);
            }
        }
     
        public static void main(String[] args) throws Exception {
            //输入路径
            String dst = "hdfs://localhost:9000/intput.txt";
            //输出路径,必须是不存在的,空文件加也不行。
            String dstOut = "hdfs://localhost:9000/output";
            Configuration hadoopConfig = new Configuration();
             
            hadoopConfig.set("fs.hdfs.impl", 
                org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()
            );
            hadoopConfig.set("fs.file.impl",
                org.apache.hadoop.fs.LocalFileSystem.class.getName()
            );
            Job job = new Job(hadoopConfig);
             
            //如果需要打成jar运行,需要下面这句
            job.setJarByClass(NewMaxTemperature.class);
     
            //job执行作业时输入和输出文件的路径
            FileInputFormat.addInputPath(job, new Path(dst));
            FileOutputFormat.setOutputPath(job, new Path(dstOut));
     
            //指定自定义的Mapper和Reducer作为两个阶段的任务处理类
            job.setMapperClass(TempMapper.class);
            job.setReducerClass(TempReducer.class);
             
            //设置最后输出结果的Key和Value的类型
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);       
            //执行job,直到完成
            job.waitForCompletion(true);
            System.out.println("Finished");
        }
    }

    词频统计

     

     

    import java.io.IOException;
     
    import org.apache.commons.lang.StringUtils;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
     
     
    public class WordCountMapper extends Mapper<LongWritable, Text, Text, LongWritable>{
     
        @Override
        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, LongWritable>.Context context)
                throws IOException, InterruptedException {
            // TODO Auto-generated method stub
            //super.map(key, value, context);
            //String[] words = StringUtils.split(value.toString());
              String[] words = StringUtils.split(value.toString(), " ");
            for(String word:words)
            {
                  context.write(new Text(word), new LongWritable(1));
                
            }                
        }    
    }
    
    
    
    
    reducer:
    package cn.edu.bupt.wcy.wordcount;
     
    import java.io.IOException;
     
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
     
    public class WordCountReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
        
        @Override
        protected void reduce(Text arg0, Iterable<LongWritable> arg1,
                Reducer<Text, LongWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException {
            // TODO Auto-generated method stub
            //super.reduce(arg0, arg1, arg2);
            int sum=0;
            for(LongWritable num:arg1)
            {
                sum += num.get();
                
            }
            context.write(arg0,new LongWritable(sum));
            
            
        }
    }
    
    
    runner:
    package cn.edu.bupt.wcy.wordcount;
     
    import java.io.IOException;
     
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
     
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
     
    public class WordCountRunner {
     
        public static void main(String[] args) throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException {
            Configuration conf = new Configuration();  
            Job job = new Job(conf);  
            job.setJarByClass(WordCountRunner.class);  
            job.setJobName("wordcount");  
            job.setOutputKeyClass(Text.class);  
            job.setOutputValueClass(LongWritable.class);  
            job.setMapperClass(WordCountMapper.class);  
            job.setReducerClass(WordCountReducer.class);  
            job.setInputFormatClass(TextInputFormat.class);  
            job.setOutputFormatClass(TextOutputFormat.class);  
            FileInputFormat.addInputPath(job, new Path(args[1]));  
            FileOutputFormat.setOutputPath(job, new Path(args[2]));  
            job.waitForCompletion(true);  
        }
        
    }
  • 相关阅读:
    判断是否是三角形,三角形面积,三角形内外切圆半径和面积
    输入从a加到b的两个数字
    九九乘法表
    某公司销售员工的年终奖根据该员工的年销售总额s提成,年销售总额超过1万元才提成,超过部分提成比例如下:
    判断是否是闰年?
    从键盘上输入三个点的坐标值(1,1)、(2,4)、(3,2),编程求该三角形的面积。
    输入一个正方形的边长,输出正方形的外接圆和内接圆的面积。
    .输入一个4位正整数,以相反的次序输出,例如,输入1234,输出为4321。
    SecoClient在win10系统中连接失败解决方案
    PHP 关于判断输入日期是否合法
  • 原文地址:https://www.cnblogs.com/lx06/p/15686169.html
Copyright © 2020-2023  润新知