实验一 HADOOP实验-HDFS与MAPREDUCE操作
一、实验目的
1、利用虚拟机搭建集群部署hadoop
2、HDFS文件操作以及文件接口编程;
3、MAPREDUCE并行程序开发、发布与调用。
二、实验内容
1、虚拟机集群搭建部署hadoop
利用VMware、centOS-7、Xshell(secureCrt)等软件搭建集群部署hadoop,具体操作参照
https://www.bilibili.com/video/BV1Kf4y1z7Nw?p=1
2、HDFS文件操作
在分布式文件系统上验证HDFS文件命令
2.1 HDFS接口编程
调用HDFS文件接口实现对分布式文件系统中文件的访问,如创建、修改、删除等。
代码:
package mapreduce; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org.junit.After; import org.junit.Before; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import java.util.Arrays; public class HdfsClient { private FileSystem fs; @After public void close() throws IOException { //关闭资源 fs.close(); } @Test public void testMkdir() throws URISyntaxException, IOException, InterruptedException { //连接的集群地址 URI uri = new URI("hdfs://node01:8020"); //配置文件 Configuration configuration = new Configuration(); //用户 String user="hadoop"; fs = FileSystem.get(uri,configuration,user); //创建文件 fs.mkdirs(new Path("/std/wmd")); System.out.println("创建成功"); } //上传 @Test public void testPut() throws IOException, URISyntaxException, InterruptedException { URI uri = new URI("hdfs://node01:8020"); //配置文件 Configuration configuration = new Configuration(); //用户 String user="hadoop"; fs = FileSystem.get(uri,configuration,user); //表示删除原数据,表示是否允许覆盖 fs.copyFromLocalFile(false,false,new Path("E:\\input.txt"),new Path("/wmd/input.txt")); System.out.println("上传成功"); } //文件下载 @Test public void testGet() throws IOException, URISyntaxException, InterruptedException { URI uri = new URI("hdfs://node01:8020"); //配置文件 Configuration configuration = new Configuration(); //用户 String user="hadoop"; fs = FileSystem.get(uri,configuration,user); fs.copyToLocalFile(false,new Path("hdfs://node01/wmd/input.txt"),new Path("D:\\"),true); System.out.println("下载成功"); } //文件删除 @Test public void testRm() throws IOException, URISyntaxException, InterruptedException { //删除文件 //参数解读:是否递归删除 //fs.delete(new Path("文件名"),false); //删除非空目录 //fs.delete("",true); URI uri = new URI("hdfs://node01:8020"); //配置文件 Configuration configuration = new Configuration(); //用户 String user="hadoop"; fs = FileSystem.get(uri,configuration,user); fs.delete(new Path("hdfs://node01/std"),true); System.out.println("删除成功"); } //文件的更名和移动 @Test public void testMv() throws IOException, URISyntaxException, InterruptedException { URI uri = new URI("hdfs://node01:8020"); //配置文件 Configuration configuration = new Configuration(); //用户 String user="hadoop"; fs = FileSystem.get(uri,configuration,user); //同目录下进行更名 fs.rename(new Path("/wmd/wmdym.txt"),new Path("/wmd.txt")); System.out.println("移动成功"); //目录更名 //fs.rename(new Path("/tiansui"),new Path("/dym")); } //获取文件详细信息 @Test public void fileDetail() throws IOException, URISyntaxException, InterruptedException { URI uri = new URI("hdfs://node01:8020"); //配置文件 Configuration configuration = new Configuration(); //用户 String user="hadoop"; fs = FileSystem.get(uri,configuration,user); //获取文件所有信息 RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"), true); //遍历文件 while (listFiles.hasNext()) { //本地文件状态 LocatedFileStatus fileStatus = listFiles.next(); System.out.println("============="+fileStatus.getPath()+"=============="); System.out.println(fileStatus.getLen()); System.out.println(fileStatus.getPermission()); System.out.println(fileStatus.getOwner()); System.out.println(fileStatus.getGroup()); System.out.println(fileStatus.getModificationTime()); System.out.println(fileStatus.getReplication()); System.out.println(fileStatus.getBlockSize()); System.out.println(fileStatus.getPath().getName()); BlockLocation[] blockLocations = fileStatus.getBlockLocations(); System.out.println(Arrays.toString(blockLocations)); } } }
2、MAPREDUCE并行程序开发
3.1 求每年最高气温
代码:
package mapreduce; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class Temperature { static class TempMapper extends Mapper<LongWritable, Text, Text, IntWritable> { @Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { System.out.print("Before Mapper: " + key + ", " + value); String line = value.toString(); String year = line.substring(0, 4); int temperature = Integer.parseInt(line.substring(8)); context.write(new Text(year), new IntWritable(temperature)); System.out.println("======" + "After Mapper:" + new Text(year) + ", " + new IntWritable(temperature)); } } static class TempReducer extends Reducer<Text, IntWritable, Text, IntWritable> { @Override public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int maxValue = Integer.MIN_VALUE; StringBuffer sb = new StringBuffer(); for (IntWritable value : values) { maxValue = Math.max(maxValue, value.get()); sb.append(value).append(", "); } System.out.print("Before Reduce: " + key + ", " + sb.toString()); context.write(key, new IntWritable(maxValue)); System.out.println("======" + "After Reduce: " + key + ", " + maxValue); } } public static void main(String[] args) throws Exception { String dst = "hdfs://node01:8020/wmd/input.txt"; String dstOut = "hdfs://node01:8020/wmd/output"; Configuration hadoopConfig = new Configuration(); hadoopConfig.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); hadoopConfig.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); Job job = new Job(hadoopConfig); // job.setJarByClass(NewMaxTemperature.class); FileInputFormat.addInputPath(job, new Path(dst)); FileOutputFormat.setOutputPath(job, new Path(dstOut)); job.setMapperClass(TempMapper.class); job.setReducerClass(TempReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.waitForCompletion(true); System.out.println("Finished"); } }