hadoop2.x学习笔记
df -hl 查看磁盘剩余空间
hdfs 数据默认存储路径
data/tmp/dfs
data/
└── tmp
├── dfs
│ ├── data
│ ├── name
│ └── namesecondary
文件切分成块(默认为128M)
默认块的副本数是3
bin/hdfs dfsadmin
安装maven环境
tar -zxf apache-maven-3.5.0-bin.tar.gz -C /opt/modules/
tar xzvf apache-maven-3.6.0-bin.tar.gz
设置环境变量
/opt/modules/apache-maven-3.5.0
vim /etc/profile
MAVEN_HOME=/opt/modules/apache-maven-3.5.0
PATH=$PATH:$MAVEN_HOME/bin
保存退出
source /etc/profile
查看maven 版本
mvn -version
设置快捷方式
/usr/share/applications/eclipse.desktop
查看默认启动字符界面还是图像化界面
systemctl get-default
设置默认启动模式:
systemctl set-default graphical.target/multi-user.target
=================================================================================
MapReduce 数据类型
Long -> LongWritable
Int -> INtWritable
=================================================================================
过程
* step 1:
Input
InputFormat
* 读取数据
* 转换成<key,value>
读取文件路径
sudo bin/hdfs dfs -text /user/hadoop/mapreduce/wordcount/input/wc.input
public class HdfsApp {
/**
* Get FileSystem
* @return
* @throws Exception
*/
public static FileSystem getFileSystem() throws Exception {
// System.out.println( "Hello Hdfs!" );
Configuration conf = new Configuration();
// get filesystem
FileSystem fileSystem = FileSystem.get(conf);
//System.out.println(fileSystem);
return fileSystem;
}
public static void main(String[] args) throws Exception {
FileSystem fileSystem = getFileSystem();
String fileName = "/user/hadoop/mapreduce/wordcount/input/wc.input";
//readPath
Path readPath = new Path(fileName);
//open file
FSDataInputStream inStream = fileSystem.open(readPath);
try{
IOUtils.copyBytes(inStream, System.out, 4096,false);
}catch(Exception e){
e.printStackTrace();
}finally{
IOUtils.closeStream(inStream);
}
}
}