之前调试程序都是在本机用的单机模式,体系结构老师要求实验要用完全分布模式的环境运行程序。
不想用虚拟机,完全是出于对界面的不习惯。于是使用了docker。
脚本会与之前的hadoop环境搭建有重叠。
参考了博客:http://tashan10.com/yong-dockerda-jian-hadoopwei-fen-bu-shi-ji-qun/,并不完全一致,因为我希望和本机的java hadoop版本一致。
1 # Reference http://tashan10.com/yong-dockerda-jian-hadoopwei-fen-bu-shi-ji-qun/ 2 3 # Ubuntu 14.04 LTS Hadoop 2.7 Fully Distributed with docker. 4 5 # Install docker. 6 sudo apt-get install apt-transport https 7 sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 36A1D7869245C8950F966E92D8576A8BA88D21E9 8 sudo bash -c "echo deb https://get.docker.io/ubuntu docker main > /etc/apt/sources.list.d/docker.list" 9 sudo apt-get update 10 sudo apt-get install lxc-docker 11 12 # Remove "sudo". kirai is my ubuntu's username. 13 sudo groupadd docker 14 sudo gpasswd -a kirai docker 15 sudo reboot 16 17 # Install ubuntu mirror. 18 docker pull ubuntu:14.04 19 20 # Run the ubuntu mirror. 21 docker run -ti ubuntu:14.04 22 23 # Change the package mirror. 24 # mv /etc/apt/sources.list /etc/apt/sources.list.bk 25 # echo "deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial main restricted universe multiverse" > /etc/apt/sources.list 26 # echo "deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-updates main restricted universe multiverse" >> /etc/apt/sources.list 27 # echo "deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-backports main restricted universe multiverse" >> /etc/apt/sources.list 28 # echo "deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-security main restricted universe multiverse" >> /etc/apt/sources.list 29 30 31 # Install java hadoop in the mirror. 32 33 apt-get install software-properties-common python-software-properties 34 add-apt-repository ppa:webupd8team/java 35 apt-get update 36 apt-get install oracle-java7-installer 37 exit 38 39 # Persistence (4631f498dec7 is the container's id with java) 40 docker commit -m "java installed" 4631f498dec7 ubuntu:java 41 42 # Restart the container with java. 43 # Reference ubuntu 14.04 LTS hadoop installation tutorial by me. 44 ##get & install hadoop 2.7.1 (under user : hadoop) 45 cd ~ 46 mkdir hadoop 47 sudo wget http://mirror.bit.edu.cn/apache/hadoop/common/hadoop-2.7.1/hadoop-2.7.1.tar.gz #get hadoop2.7.1 48 sudo tar xzf hadoop-2.7.1.tar.gz #unzip the hadoop-*.*.*.tar.gz 49 sudo rm hadoop-2.7.1.tar.gz #remove the zip 50 sudo mv hadoop-2.7.1 /usr/local/ #install hadoop at local 51 sudo chmod 774 /usr/local/hadoop #granted permissions to users (r&w) 52 53 ##configure ~/.bashrc 54 update-alternatives --config java #get java's path (stared one. such as '/usr/lib/jvm/java-7-openjdk-amd64/jre/bin/java'. but we just need '/usr/lib/jvm/java-7-openjdk-amd64' this shorter path to set up JAVA_HOME environment variables) 55 sudo vi ~/.bashrc #edit bashrc file 56 57 ##add the content below to the end of bashrc 58 #--------------------------------------------------------------# 59 60 export JAVA_HOME=/usr/lib/jvm/java-7-oracle 61 export HADOOP_INSTALL=/usr/local/hadoop-2.7.1 62 export PATH=$PATH:$HADOOP_INSTALL/bin 63 export PATH=$PATH:$HADOOP_INSTALL/sbin 64 export HADOOP_MAPRED_HOME=$HADOOP_INSTALL 65 export HADOOP_COMMON_HOME=$HADOOP_INSTALL 66 export HADOOP_HDFS_HOME=$HADOOP_INSTALL 67 export YARN_HOME=$HADOOP_INSTALL 68 export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_INSTALL/lib/native 69 export HADOOP_OPTS="-Djava.library.path=$HADOOP_INSTALL/lib" 70 #HADOOP VARIABLES END 71 72 #--------------------------------------------------------------# 73 source ~/.bashrc #make the environment variables come into effect 74 75 ##configure hadoop 76 sudo vi /usr/local/hadoop-2.7.1/etc/hadoop/hadoop-env.sh #edit hadoop-env.sh 77 ?JAVA_HOME #(in vim) locate JAVA_HOME 78 #change 'export JAVA_HOME=${JAVA_HOME}' into 'export JAVA_HOME=/usr/lib/jvm/java-7-oracle' 79 source /usr/local/hadoop-2.7.1/etc/hadoop/hadoop-env.sh #update 80 81 ##test 82 cd /usr/local/hadoop-2.7.1/ #go there 83 sudo mkdir input 84 sudo cp README.txt input 85 bin/hadoop jar share/hadoop/mapreduce/sources/hadoop-mapreduce-examples-2.7.1-sources.jar org.apache.hadoop.examples.WordCount input output 86 87 88 89 90 #----------------- FINISHED INSTALLATION ---------------------------# 91 # Persistence (8d9a50a0ee10 is the container's id with hadoop) 92 docker commit -m "hadoop installed" 8d9a50a0ee10 ubuntu:hadoop 93 94 # Configure hadoop. 95 # mainly core-site.xml、hdfs-site.xml、mapred-site.xml 96 cd /usr/local/hadoop-2.7.1 97 mkdir tmp 98 mkdir namenode 99 mkdir datanode 100 101 cd etc/hadoop 102 cp mapred-site.xml.template mapred-site.xml 103 104 vi core-site.xml 105 #--add contents between <configuration></configuration> --# 106 <property> 107 <name>hadoop.tmp.dir</name> 108 <value>/usr/local/hadoop-2.7.1/tmp</value> 109 <description>A base for other temporary directories.</description> 110 </property> 111 112 <property> 113 <name>fs.default.name</name> 114 <value>hdfs://master:9000</value> 115 <final>true</final> 116 <description>The name of the default file system. A URI whose 117 scheme and authority determine the FileSystem implementation. The 118 uri's scheme determines the config property (fs.SCHEME.impl) naming 119 the FileSystem implementation class. The uri's authority is used to 120 determine the host, port, etc. for a filesystem.</description> 121 </property> 122 #----# 123 124 vi hdfs-site.xml 125 #--add contents between <configuration></configuration> one master, two slave--# 126 <property> 127 <name>dfs.replication</name> 128 <value>2</value> 129 <final>true</final> 130 <description>Default block replication. 131 The actual number of replications can be specified when the file is created. 132 The default is used if replication is not specified in create time. 133 </description> 134 </property> 135 136 <property> 137 <name>dfs.namenode.name.dir</name> 138 <value>/usr/local/hadoop-2.7.1/namenode</value> 139 <final>true</final> 140 </property> 141 142 <property> 143 <name>dfs.datanode.data.dir</name> 144 <value>/usr/local/hadoop-2.7.1/datanode</value> 145 <final>true</final> 146 </property> 147 #----# 148 149 vi mapred-site.xml 150 #--add contents between <configuration></configuration>--# 151 <property> 152 <name>mapred.job.tracker</name> 153 <value>master:9001</value> 154 <description> 155 The host and port that the MapReduce job tracker runs 156 at. If "local", then jobs are run in-process as a single map 157 and reduce task. 158 </description> 159 </property> 160 #----# 161 162 # Format namenode. 163 hadoop namenode -format 164 165 # Install ssh. 166 apt-get install ssh 167 cd ~ 168 ssh-keygen -t rsa -P '' -f ~/.ssh/id_dsa 169 cd .ssh 170 cat id_dsa.pub >> authorized_keys 171 172 vi ~/.bashrc 173 #--append--# 174 #autorun 175 /usr/sbin/sshd 176 #----# 177 178 # ifconfig enable. 179 apt-get install net-tools 180 exit 181 182 # Persistence (342b9f9e1893 is the container's id with configured-hadoop) 183 docker commit -m "configured hadoop" 342b9f9e1893 ubuntu:chadoop 184 185 186 #-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-# 187 # Build distributed environment. 188 189 # Open three terminals and run: 190 docker run -ti -h slave1 ubuntu:chadoop 191 docker run -ti -h slave2 ubuntu:chadoop 192 docker run -ti -h master ubuntu:chadoop 193 194 # Write down the master and slaves' IP: 195 # slave1:172.17.0.2 196 # slave2:172.17.0.3 197 # master:172.17.0.4 198 199 # For each container: 200 vi /etc/hosts 201 202 #--write down--# 203 172.17.0.2 slave1 204 172.17.0.3 slave2 205 172.17.0.4 master 206 #----# 207 208 # For master: 209 vi /usr/local/hadoop-2.7.1/etc/hadoop/slaves 210 211 #--write down--# 212 slave1 213 slave2 214 #----# 215 216 #-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-#