学习日志---7

1.复习Linux hadoop hdfs MapReduce基础知识

1，列举linux常用命令
shutdown now
reboot
mkdir
mkdir -p
touch filename
rm -r filename
rm -rf filename
vi filename
i--->可编辑状态
esc --> : --->wq 保存退出
q!
wq!
cat
grep
find
ifconfig
ping
useradd
chmod 777 filename
more
cp srcfile destfile
mv
tar -zxvf filename -C destpath
ls
ls -a
pwd 查看当前目录的绝对路径
filewall:
service iptables start/stop/status/restart
centos7 : systemctl stop frewalld.service
systemctl -cmd --state
修改网络配置文件：
/etc/sysconfig/network-scripts/ifcfg-eth1
配置环境变量
/etc/profile
~/.bash_profile
/etc/profild.d/x.sh
JAVA
export JAVA_HOME=/root/app/jdk1.8.0_161
export PATH=$JAVA_HOME/bin:$PATH
配置文件生效：
source ~/.bash_priofile
查看java安装目录
echo $JAVA_HOME
2，hash函数特点是什么
代码论道
字符串----->固定位数的Hash值。
1，确定性
str1--->hash1值
str2---->hash1值
2，单向性
str----hash()----->hash值
3，防篡改
str1--->bit--->hash()--->hash值
4，防碰撞
str1 ---> hash1
str2---->hash1
mapreduce分区默认实现：
hash(key)%reduce task
Hash实现：
SHA
MD5
区块链：不可篡改
链表+hash(key)
3，hdfs如何实现文件的存储
NameNode:
处理客户端的请求
操作元数据(修改文件名打开文件、关闭文件、DataNode
info)
DataNode
存储文件数据block,blocksize=128MB
发送心跳信息，将自身所有存储的block信息，健康状况发
送给 NameNode。
SecondNameNode:hadoop2.x可有可无
130MB--> 128MB block + 2MB block
4，hdfs副本存放机制？
block 默认3份。
5，hdfs主要进程、yarn的主要进程分别是？
NameNode
DataNode
SecondNameNode
yarn:资源调度框架
ResourceManager
NodeManager
6，简述mapreduce计算流程
7,搭建伪分布式步骤
cents6.5
1，前置准备
java
tar -zxvf
配置环境变量
ssh
1,安装ssh服务器
openserver-ssh
2, ssh-keygen -t rsa
cd ~/.ssh
id_rsa id_rsa.pub
cp id_rsa.pub authorized_keys
3,ssh-copy-id
2,安装hadoop
2.1解压hadoop tar -zxvf hadoop-xx.tar.gz
2.2 配置文件
$HADOOP_HOME/etc/hadoop
hadoop-env.sh
JAVA_HOME
core-site.xml
<property>
<name>fs.defaultFS</name>
<value>hdfs://Master:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/root/hadoop/app/tmp</value>
</property>
hdfs-site.xml
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
HDFS Shell操作：
1,查看Hdfs上的文件
hdfs dfs -ls /
hadoop fs -ls /
2,将本地文件上传到hdfs
hdfs dfs -put hello.txt /
3,查看hdfs文件内容
hdfs dfs -cat /hello.txt
hdfs dfs -text /hello.txt
4，在hdfs创建目录
hdfs dfs -mkdir /hello
5，hdfs递归创建目录
hdfs dfs -mkdir -p /test/a/b
6，递归查询目录
hdfs dfs -ls -p /test
7,将hdfs上的文件下载到本地
hdfs dfs -get /hello.txt
8,将本地文件拷贝到hdfs上 /test/a/b
hdfs dfs -copyFromLocal hello.txt /test/a/b/h.txt
9,删除hdfs上目录
hdfs dfs -rm -R /hello
10,删除hdfs上的文件
hdfs dfs -rm /hello.txt

2.java操作HDFS文件

  1 import java.io.BufferedReader;
  2 import java.io.IOException;
  3 import java.io.InputStreamReader;
  4 
  5 import org.apache.hadoop.conf.Configuration;
  6 import org.apache.hadoop.fs.FSDataInputStream;
  7 import org.apache.hadoop.fs.FSDataOutputStream;
  8 import org.apache.hadoop.fs.FileSystem;
  9 import org.apache.hadoop.fs.Path;
 10 
 11 public class HDFSTest {
 12 
 13     public static void main(String[] args) {
 14         // TODO 自动生成的方法存根
 15         System.setProperty("hadoop.home.dir", "E:\hadoop");
 16         
 17         String str="";
 18         //createDir("/new/abc.txt");
 19         //delete("/new");
 20         //createFile("/new/abc.txt");
 21         //write(str);
 22         append(str);
 23         read();
 24 
 25     }
 26     
 27     //向文件中写入
 28     public static void write(String str){
 29         if(str==null||str.equals("")){
 30             str="Hello World!
";
 31         }
 32         Configuration conf=new Configuration();
 33         conf.set("fs.default.name", "hdfs://192.168.137.11:9000");
 34         try {
 35             FileSystem fs=FileSystem.get(conf);
 36             FSDataOutputStream outpustream = fs.create(new Path("/hadoop/a.txt"));
 37             outpustream.writeBytes(str);
 38             outpustream.close();
 39         } catch (IOException e) {
 40             // TODO Auto-generated catch block
 41             System.out.println(e.getMessage());
 42         }
 43     }
 44     //向文件中添加
 45     public static void append(String str){
 46         if(str==null||str.equals("")){
 47             str="Hello World!
";
 48         }
 49         Configuration conf=new Configuration();
 50         conf.set("fs.default.name", "hdfs://192.168.137.11:9000");
 51         conf.set("dfs.support.append", "true");
 52         conf.set("dfs.client.block.write.replace-datanode-on-failure.policy", "NEVER");
 53         conf.set("dfs.client.block.write.replace-datanode-on-failure.enable", "true");
 54         try {
 55             FileSystem fs=FileSystem.get(conf);
 56             FSDataOutputStream outpustream = fs.append(new Path("/hadoop/a.txt"));
 57             outpustream.writeBytes(str);
 58             outpustream.close();
 59         } catch (IOException e) {
 60             // TODO Auto-generated catch block
 61             System.out.println(e.getMessage());
 62         }
 63     }
 64     //读取文件，并打印在控制台
 65     public static void read(){
 66         Configuration conf=new Configuration();
 67         conf.set("fs.default.name", "hdfs://192.168.137.11:9000");
 68         try {
 69             FileSystem fs=FileSystem.get(conf);
 70             //创建输入流
 71             FSDataInputStream inputstream = fs.open(new Path("/hadoop/a.txt"));
 72             InputStreamReader isr=new InputStreamReader(inputstream);
 73             BufferedReader br=new BufferedReader(isr);
 74             String str=br.readLine();
 75             
 76             while(str!=null){
 77                 System.out.println(str);
 78                 str=br.readLine();
 79             }
 80             br.close();
 81             isr.close();
 82             inputstream.close();
 83             
 84         } catch (IOException e) {
 85             // TODO 自动生成的 catch 块
 86             e.printStackTrace();
 87         }
 88         
 89     }
 90     //创建目录
 91     public static void createDir(String path) {
 92         Configuration configuration=new Configuration();
 93         configuration.set("fs.default.name", "hdfs://192.168.137.11:9000");
 94         try {
 95             FileSystem fs=FileSystem.newInstance(configuration);
 96             fs.mkdirs(new Path(path));
 97         } catch (IOException e) {
 98             // TODO 自动生成的 catch 块
 99             e.printStackTrace();
100         }
101         
102     }
103     //删除文件
104     public static void delete(String path) {
105         Configuration configuration=new Configuration();
106         configuration.set("fs.default.name", "hdfs://192.168.137.11:9000");
107         try {
108             FileSystem fs=FileSystem.newInstance(configuration);
109             fs.delete(new Path(path),true);
110         } catch (IOException e) {
111             // TODO 自动生成的 catch 块
112             e.printStackTrace();
113         }
114     }
115     
116     //创建文件
117     public static void createFile(String path) {
118         Configuration configuration=new Configuration();
119         configuration.set("fs.default.name", "hdfs://192.168.137.11:9000");
120         try {
121             FileSystem fileSystem=FileSystem.newInstance(configuration);
122             fileSystem.createNewFile(new Path(path));
123         } catch (IOException e) {
124             // TODO 自动生成的 catch 块
125             e.printStackTrace();
126         }
127         
128     }
129 }

相关阅读:
flask-scripts
mysql相关
 day9:函数
 day8:文件操作
 day7:set和深浅copy
day6:前两小节补充
 day5:字典dict
day4:数据结构list
piano class 13
day3:数据类型 str
原文地址：https://www.cnblogs.com/yifengyifeng/p/9319099.html