• hadoop第一部分-安装、测试


    一、hadoop安装(本地模式及伪分布式安装)

    hadoop历史版本下载网站:http://archive.apache.org/dist/
    运行模式:
        本地模式
        yarn模式

    hadoop组成:
        common:基本组件、命令
        hdfs:分布式文件系统,安全(默认副本集)
        yarn:数据操作系统(性质相当于linux OS)
        mapreduce:分布式计算框架
            input -> map -> shuffer -> reduce -> output

    1、安装配置jdk开发环境
    [root@db01 mnt]#tar -zxvf jdk-7u67-linux-x64.tar.gz
    [root@db01 mnt]#mkdir /usr/java
    [root@db01 mnt]#mv jdk1.7.0_67/ /usr/java/
    [root@db01 mnt]#vim /etc/profile
    export JAVA_HOME=/usr/java/jdk1.7.0_67
    export PATH=$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$PATH
    export CLASSPATH=.:$JAVA_HOME/lib:$JAVA_HOME/jre/lib:$CLASSPATH
    [root@db01 mnt]#source /etc/profile
    [root@db01 mnt]# java -version
    java version "1.7.0_67"
    Java(TM) SE Runtime Environment (build 1.7.0_67-b01)
    Java HotSpot(TM) 64-Bit Server VM (build 24.65-b04, mixed mode)

    -----jdk配置成功---------------------------

    2、安装hadoop软件

    [root@db01 mnt]#tar -zxvf hadoop-2.5.0.tar.gz
    [root@db01 mnt]#mv /mnt/hadoop-2.5.0 /usr/local/hadoop-2.5.0/
    [root@db01 mnt]#chown -R hadoop:hadoop /usr/local/hadoop-2.5.0/

    3、测试

    [hadoop@db01 hadoop-2.5.0]$bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.5.0.jar grep input output 'dfs[a-z.]+'
    [hadoop@db01 hadoop-2.5.0]$mkdir wcinput
    [hadoop@db01 hadoop-2.5.0]$cd wcinput/
    [hadoop@db01 hadoop-2.5.0]$touch wc.input
    [hadoop@db01 hadoop-2.5.0]$vim wc.input

    hadoop yarn
    hadoop mapreduce
    hadoop hdfs
    yarn nodemanager
    hadoop resourcemanager


    [hadoop@db01 hadoop-2.5.0]$cd ../
    [hadoop@db01 hadoop-2.5.0]$bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.5.0.jar wordcount wcinput wcoutput

    4、编辑配置文件,配置hdfs

    [hadoop@db01 hadoop-2.5.0]$vim etc/hadoop/hadoop-env.sh
    export JAVA_HOME=/usr/java/jdk1.7.0_67

    [hadoop@db01 hadoop-2.5.0]$mkdir -p data/tmp
    [hadoop@db01 hadoop-2.5.0]$vim etc/hadoop/core-site.xml

    <configuration>
        <property>
                <name>fs.defaultFS</name>
                <value>hdfs://db01:9000</value>
        </property>

            <property>
                    <name>hadoop.tmp.dir</name>
                    <value>/usr/local/hadoop-2.5.0/data/tmp</value>
            </property>
    </configuration>


    [hadoop@db01 hadoop-2.5.0]$vim etc/hadoop/hdfs-site.xml

    <configuration>

        <property>
                <name>dfs.replication</name>
                    <value>1</value>
        </property>

    </configuration>

    5、格式化hdfs系统
    [hadoop@db01 hadoop-2.5.0]$bin/hdfs namenode -format

    6、分别启动namenode和datanode节点

    [hadoop@db01 hadoop-2.5.0]$sbin/hadoop-daemon.sh start namenode
    [hadoop@db01 hadoop-2.5.0]$sbin/hadoop-daemon.sh start datanode

    7、浏览器访问hdfs系统

    网址:http://db01:50070/

    8、创建hdfs下的工作目录,在hdfs上测试wordcount功能

    [hadoop@db01 hadoop-2.5.0]$bin/hdfs dfs -mkdir -p /user/hadoop/
    [hadoop@db01 hadoop-2.5.0]$bin/hdfs dfs -ls -R /
    [hadoop@db01 hadoop-2.5.0]$bin/hdfs dfs -mkdir -p /user/hadoop/mapreduce/wordcount/input
    [hadoop@db01 hadoop-2.5.0]$bin/hdfs dfs -put wcinput/wc.input /user/hadoop/mapreduce/wordcount/input/
    [hadoop@db01 hadoop-2.5.0]$bin/hdfs dfs -cat /user/hadoop/mapreduce/wordcount/input/wc.input
    [hadoop@db01 hadoop-2.5.0]$bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.5.0.jar wordcount /user/hadoop/mapreduce/wordcount/input/ /user/hadoop/mapreduce/wordcount/output/
    [hadoop@db01 hadoop-2.5.0]$bin/hdfs dfs -cat /user/hadoop/mapreduce/wordcount/output/part-r-00000

    9、配置yarn

    [hadoop@db01 hadoop-2.5.0]$ vim etc/hadoop/yarn-env.sh
    export JAVA_HOME=/usr/java/jdk1.7.0_67

    [hadoop@db01 hadoop-2.5.0]$ vim etc/hadoop/yarn-site.xml

    <configuration>
            <property>
                    <name>yarn.nodemanager.aux-services</name>
                    <value>mapreduce_shuffle</value>
            </property>

            <property>
                    <name>yarn.resourcemanager.hostname</name>
                    <value>db01</value>
            </property>
    </configuration>

    [hadoop@db01 hadoop-2.5.0]$ vim etc/hadoop/slaves
    db01

    10、启动yarn

    [hadoop@db01 hadoop-2.5.0]$ sbin/yarn-daemon.sh start resourcemanager
    [hadoop@db01 hadoop-2.5.0]$ sbin/yarn-daemon.sh start nodemanager

    [hadoop@db01 hadoop-2.5.0]$ jps
    14573 NodeManager
    13490 DataNode
    13400 NameNode
    14685 Jps
    14315 ResourceManager

    11、浏览器进入yarn监控

    http://db01:8088

    12、配置mapreduce

    [hadoop@db01 hadoop-2.5.0]$ vim etc/hadoop/mapred-env.sh
    export JAVA_HOME=/usr/java/jdk1.7.0_67

    [hadoop@db01 hadoop-2.5.0]$ cp etc/hadoop/mapred-site.xml.template etc/hadoop/mapred-site.xml
    [hadoop@db01 hadoop-2.5.0]$ vim etc/hadoop/mapred-site.xml
    <configuration>

            <property>
                    <name>mapreduce.framework.name</name>
                    <value>yarn</value>
            </property>

    </configuration>

    13、测试wordcount

    [hadoop@db01 hadoop-2.5.0]$ bin/hdfs dfs -rm -R /user/hadoop/mapreduce/wordcount/output/
    17/03/01 17:16:03 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
    17/03/01 17:16:04 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 0 minutes, Emptier interval = 0 minutes.
    Deleted /user/hadoop/mapreduce/wordcount/output
    [hadoop@db01 hadoop-2.5.0]$ bin/hdfs dfs -ls -R /
    17/03/01 17:16:28 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
    drwxr-xr-x   - hadoop supergroup          0 2017-03-01 16:04 /user
    drwxr-xr-x   - hadoop supergroup          0 2017-03-01 16:07 /user/hadoop
    drwxr-xr-x   - hadoop supergroup          0 2017-03-01 16:07 /user/hadoop/mapreduce
    drwxr-xr-x   - hadoop supergroup          0 2017-03-01 17:16 /user/hadoop/mapreduce/wordcount
    drwxr-xr-x   - hadoop supergroup          0 2017-03-01 16:08 /user/hadoop/mapreduce/wordcount/input
    -rw-r--r--   1 hadoop supergroup         81 2017-03-01 16:08 /user/hadoop/mapreduce/wordcount/input/wc.input

    [hadoop@db01 hadoop-2.5.0]$ bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.5.0.jar wordcount /user/hadoop/mapreduce/wordcount/input/ /user/hadoop/mapreduce/wordcount/output/
    17/03/01 17:18:08 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
    17/03/01 17:18:09 INFO client.RMProxy: Connecting to ResourceManager at db01/192.168.100.231:8032
    17/03/01 17:18:10 INFO input.FileInputFormat: Total input paths to process : 1
    17/03/01 17:18:10 INFO mapreduce.JobSubmitter: number of splits:1
    17/03/01 17:18:10 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1488358618376_0001
    17/03/01 17:18:11 INFO impl.YarnClientImpl: Submitted application application_1488358618376_0001
    17/03/01 17:18:11 INFO mapreduce.Job: The url to track the job: http://db01:8088/proxy/application_1488358618376_0001/
    17/03/01 17:18:11 INFO mapreduce.Job: Running job: job_1488358618376_0001
    17/03/01 17:18:19 INFO mapreduce.Job: Job job_1488358618376_0001 running in uber mode : false
    17/03/01 17:18:19 INFO mapreduce.Job:  map 0% reduce 0%
    17/03/01 17:18:25 INFO mapreduce.Job:  map 100% reduce 0%
    17/03/01 17:18:31 INFO mapreduce.Job:  map 100% reduce 100%
    17/03/01 17:18:31 INFO mapreduce.Job: Job job_1488358618376_0001 completed successfully
    17/03/01 17:18:31 INFO mapreduce.Job: Counters: 49
        File System Counters
            FILE: Number of bytes read=97
            FILE: Number of bytes written=194147
            FILE: Number of read operations=0
            FILE: Number of large read operations=0
            FILE: Number of write operations=0
            HDFS: Number of bytes read=209
            HDFS: Number of bytes written=67
            HDFS: Number of read operations=6
            HDFS: Number of large read operations=0
            HDFS: Number of write operations=2
        Job Counters
            Launched map tasks=1
            Launched reduce tasks=1
            Data-local map tasks=1
            Total time spent by all maps in occupied slots (ms)=3516
            Total time spent by all reduces in occupied slots (ms)=3823
            Total time spent by all map tasks (ms)=3516
            Total time spent by all reduce tasks (ms)=3823
            Total vcore-seconds taken by all map tasks=3516
            Total vcore-seconds taken by all reduce tasks=3823
            Total megabyte-seconds taken by all map tasks=3600384
            Total megabyte-seconds taken by all reduce tasks=3914752
        Map-Reduce Framework
            Map input records=5
            Map output records=10
            Map output bytes=121
            Map output materialized bytes=97
            Input split bytes=128
            Combine input records=10
            Combine output records=6
            Reduce input groups=6
            Reduce shuffle bytes=97
            Reduce input records=6
            Reduce output records=6
            Spilled Records=12
            Shuffled Maps =1
            Failed Shuffles=0
            Merged Map outputs=1
            GC time elapsed (ms)=47
            CPU time spent (ms)=1690
            Physical memory (bytes) snapshot=411054080
            Virtual memory (bytes) snapshot=1784795136
            Total committed heap usage (bytes)=275251200
        Shuffle Errors
            BAD_ID=0
            CONNECTION=0
            IO_ERROR=0
            WRONG_LENGTH=0
            WRONG_MAP=0
            WRONG_REDUCE=0
        File Input Format Counters
            Bytes Read=81
        File Output Format Counters
            Bytes Written=67

    [hadoop@db01 hadoop-2.5.0]$ bin/hdfs dfs -ls -R /user/hadoop/mapreduce/wordcount/output/
    17/03/01 17:19:42 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
    -rw-r--r--   1 hadoop supergroup          0 2017-03-01 17:18 /user/hadoop/mapreduce/wordcount/output/_SUCCESS
    -rw-r--r--   1 hadoop supergroup         67 2017-03-01 17:18 /user/hadoop/mapreduce/wordcount/output/part-r-00000

    [hadoop@db01 hadoop-2.5.0]$ bin/hdfs dfs -cat /user/hadoop/mapreduce/wordcount/output/part-r-00000
    17/03/01 17:20:58 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
    hadoop    4
    hdfs    1
    mapreduce    1
    nodemanager    1
    resourcemanager    1
    yarn    2

    14、yarn测试wordcount(输出文件夹不能存在,否则会报错)

    [hadoop@db01 hadoop-2.5.0]$ bin/yarn jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.5.0.jar wordcount /user/hadoop/mapreduce/wordcount/input/ /user/hadoop/mapreduce/wordcount/output2/
    17/03/01 17:43:08 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
    17/03/01 17:43:09 INFO client.RMProxy: Connecting to ResourceManager at db01/192.168.100.231:8032
    17/03/01 17:43:10 INFO input.FileInputFormat: Total input paths to process : 1
    17/03/01 17:43:10 INFO mapreduce.JobSubmitter: number of splits:1
    17/03/01 17:43:10 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1488358618376_0002
    17/03/01 17:43:11 INFO impl.YarnClientImpl: Submitted application application_1488358618376_0002
    17/03/01 17:43:11 INFO mapreduce.Job: The url to track the job: http://db01:8088/proxy/application_1488358618376_0002/
    17/03/01 17:43:11 INFO mapreduce.Job: Running job: job_1488358618376_0002
    17/03/01 17:43:18 INFO mapreduce.Job: Job job_1488358618376_0002 running in uber mode : false
    17/03/01 17:43:18 INFO mapreduce.Job:  map 0% reduce 0%
    17/03/01 17:43:23 INFO mapreduce.Job:  map 100% reduce 0%
    17/03/01 17:43:29 INFO mapreduce.Job:  map 100% reduce 100%
    17/03/01 17:43:30 INFO mapreduce.Job: Job job_1488358618376_0002 completed successfully
    17/03/01 17:43:30 INFO mapreduce.Job: Counters: 49
        File System Counters
            FILE: Number of bytes read=97
            FILE: Number of bytes written=194149
            FILE: Number of read operations=0
            FILE: Number of large read operations=0
            FILE: Number of write operations=0
            HDFS: Number of bytes read=209
            HDFS: Number of bytes written=67
            HDFS: Number of read operations=6
            HDFS: Number of large read operations=0
            HDFS: Number of write operations=2
        Job Counters
            Launched map tasks=1
            Launched reduce tasks=1
            Data-local map tasks=1
            Total time spent by all maps in occupied slots (ms)=3315
            Total time spent by all reduces in occupied slots (ms)=3460
            Total time spent by all map tasks (ms)=3315
            Total time spent by all reduce tasks (ms)=3460
            Total vcore-seconds taken by all map tasks=3315
            Total vcore-seconds taken by all reduce tasks=3460
            Total megabyte-seconds taken by all map tasks=3394560
            Total megabyte-seconds taken by all reduce tasks=3543040
        Map-Reduce Framework
            Map input records=5
            Map output records=10
            Map output bytes=121
            Map output materialized bytes=97
            Input split bytes=128
            Combine input records=10
            Combine output records=6
            Reduce input groups=6
            Reduce shuffle bytes=97
            Reduce input records=6
            Reduce output records=6
            Spilled Records=12
            Shuffled Maps =1
            Failed Shuffles=0
            Merged Map outputs=1
            GC time elapsed (ms)=38
            CPU time spent (ms)=1690
            Physical memory (bytes) snapshot=400715776
            Virtual memory (bytes) snapshot=1776209920
            Total committed heap usage (bytes)=274202624
        Shuffle Errors
            BAD_ID=0
            CONNECTION=0
            IO_ERROR=0
            WRONG_LENGTH=0
            WRONG_MAP=0
            WRONG_REDUCE=0
        File Input Format Counters
            Bytes Read=81
        File Output Format Counters
            Bytes Written=67
    [hadoop@db01 hadoop-2.5.0]$ bin/hdfs dfs -cat /user/hadoop/mapreduce/wordcount/output2/
    17/03/01 17:44:11 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
    cat: `/user/hadoop/mapreduce/wordcount/output2': Is a directory
    [hadoop@db01 hadoop-2.5.0]$
    [hadoop@db01 hadoop-2.5.0]$
    [hadoop@db01 hadoop-2.5.0]$ bin/hdfs dfs -cat /user/hadoop/mapreduce/wordcount/output2/part*
    17/03/01 17:44:40 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
    hadoop    4
    hdfs    1
    mapreduce    1
    nodemanager    1
    resourcemanager    1
    yarn    2

    [hadoop@db01 hadoop-2.5.0]$ bin/hdfs dfs -text /user/hadoop/mapreduce/wordcount/output2/part*
    17/03/01 17:47:38 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
    hadoop    4
    hdfs    1
    mapreduce    1
    nodemanager    1
    resourcemanager    1
    yarn    2

    注意:mapreduce会对默认结果进行排序。


    15、启动mapreduce历史服务器

    [hadoop@db01 hadoop-2.5.0]$ sbin/mr-jobhistory-daemon.sh start historyserver
    starting historyserver, logging to /usr/local/hadoop-2.5.0/logs/mapred-hadoop-historyserver-db01.out

    [hadoop@db01 hadoop-2.5.0]$ jps
    14573 NodeManager
    13490 DataNode
    13400 NameNode
    14315 ResourceManager
    16366 Jps
    16296 JobHistoryServer

    16、启用yarn日志的聚集(Aggregation)功能
    聚集:在mapreduce任务完成后,将日志信息上床到hdfs上。

    [hadoop@db01 hadoop-2.5.0]$ cat etc/hadoop/yarn-site.xml

    <configuration>

        <property>
                <name>yarn.nodemanager.aux-services</name>
                    <value>mapreduce_shuffle</value>
        </property>

            <property>
                    <name>yarn.resourcemanager.hostname</name>
                    <value>db01</value>
            </property>

    ##开启日志聚集功能
        <property>
                    <name>yarn.log-aggregation-enable</name>
                    <value>true</value>
            </property>
    ##日志保存7天(单位秒)
            <property>
                    <name>yarn.log-aggregation.retain-seconds</name>
                    <value>600000</value>
            </property>

    </configuration>

    ----------重启yarn服务及historyserver服务:

    [hadoop@db01 hadoop-2.5.0]$ sbin/yarn-daemon.sh stop resourcemanager
    stopping resourcemanager
    [hadoop@db01 hadoop-2.5.0]$ sbin/yarn-daemon.sh stop nodemanager
    stopping nodemanager
    nodemanager did not stop gracefully after 5 seconds: killing with kill -9
    [hadoop@db01 hadoop-2.5.0]$ jps
    13490 DataNode
    13400 NameNode
    16511 Jps
    16296 JobHistoryServer
    [hadoop@db01 hadoop-2.5.0]$ sbin/mr-jobhistory-daemon.sh stop historyserver
    stopping historyserver
    [hadoop@db01 hadoop-2.5.0]$ jps
    13490 DataNode
    13400 NameNode
    16548 Jps
    [hadoop@db01 hadoop-2.5.0]$ sbin/yarn-daemon.sh start resourcemanager
    starting resourcemanager, logging to /usr/local/hadoop-2.5.0/logs/yarn-hadoop-resourcemanager-db01.out
    [hadoop@db01 hadoop-2.5.0]$ sbin/yarn-daemon.sh start nodemanager
    starting nodemanager, logging to /usr/local/hadoop-2.5.0/logs/yarn-hadoop-nodemanager-db01.out
    [hadoop@db01 hadoop-2.5.0]$ sbin/mr-jobhistory-daemon.sh start historyserver
    starting historyserver, logging to /usr/local/hadoop-2.5.0/logs/mapred-hadoop-historyserver-db01.out
    [hadoop@db01 hadoop-2.5.0]$ jps
    16584 ResourceManager
    13490 DataNode
    13400 NameNode
    16834 NodeManager
    16991 JobHistoryServer
    17028 Jps
    [hadoop@db01 hadoop-2.5.0]$

    17、重新运行wordcount任务,测试yarn日志聚集功能:
    [hadoop@db01 hadoop-2.5.0]$ bin/yarn jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.5.0.jar wordcount /user/hadoop/mapreduce/wordcount/input/ /user/hadoop/mapreduce/wordcount/output3/

    浏览器(http://db01:8088/)查看日志信息:

    Log Type: stderr
    Log Length: 0

    Log Type: stdout
    Log Length: 0

    Log Type: syslog
    Log Length: 3816
    2017-03-01 18:36:45,873 WARN [main] org.apache.hadoop.conf.Configuration: job.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.retry.interval;  Ignoring.
    2017-03-01 18:36:45,911 WARN [main] org.apache.hadoop.conf.Configuration: job.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.attempts;  Ignoring.
    2017-03-01 18:36:46,130 WARN [main] org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
    2017-03-01 18:36:46,239 INFO [main] org.apache.hadoop.metrics2.impl.MetricsConfig: loaded properties from hadoop-metrics2.properties
    2017-03-01 18:36:46,319 INFO [main] org.apache.hadoop.metrics2.impl.MetricsSystemImpl: Scheduled snapshot period at 10 second(s).
    2017-03-01 18:36:46,319 INFO [main] org.apache.hadoop.metrics2.impl.MetricsSystemImpl: MapTask metrics system started
    2017-03-01 18:36:46,335 INFO [main] org.apache.hadoop.mapred.YarnChild: Executing with tokens:
    2017-03-01 18:36:46,335 INFO [main] org.apache.hadoop.mapred.YarnChild: Kind: mapreduce.job, Service: job_1488364479714_0001, Ident: (org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier@55c290b4)
    2017-03-01 18:36:46,427 INFO [main] org.apache.hadoop.mapred.YarnChild: Sleeping for 0ms before retrying again. Got null now.
    2017-03-01 18:36:46,732 INFO [main] org.apache.hadoop.mapred.YarnChild: mapreduce.cluster.local.dir for child: /usr/local/hadoop-2.5.0/data/tmp/nm-local-dir/usercache/hadoop/appcache/application_1488364479714_0001
    2017-03-01 18:36:46,863 WARN [main] org.apache.hadoop.conf.Configuration: job.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.retry.interval;  Ignoring.
    2017-03-01 18:36:46,878 WARN [main] org.apache.hadoop.conf.Configuration: job.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.attempts;  Ignoring.
    2017-03-01 18:36:47,202 INFO [main] org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id
    2017-03-01 18:36:47,668 INFO [main] org.apache.hadoop.mapred.Task:  Using ResourceCalculatorProcessTree : [ ]
    2017-03-01 18:36:47,873 INFO [main] org.apache.hadoop.mapred.MapTask: Processing split: hdfs://db01:9000/user/hadoop/mapreduce/wordcount/input/wc.input:0+81
    2017-03-01 18:36:47,887 INFO [main] org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
    2017-03-01 18:36:47,953 INFO [main] org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584)
    2017-03-01 18:36:47,953 INFO [main] org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100
    2017-03-01 18:36:47,953 INFO [main] org.apache.hadoop.mapred.MapTask: soft limit at 83886080
    2017-03-01 18:36:47,953 INFO [main] org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600
    2017-03-01 18:36:47,953 INFO [main] org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600
    2017-03-01 18:36:47,989 INFO [main] org.apache.hadoop.mapred.MapTask: Starting flush of map output
    2017-03-01 18:36:47,989 INFO [main] org.apache.hadoop.mapred.MapTask: Spilling map output
    2017-03-01 18:36:47,990 INFO [main] org.apache.hadoop.mapred.MapTask: bufstart = 0; bufend = 121; bufvoid = 104857600
    2017-03-01 18:36:47,990 INFO [main] org.apache.hadoop.mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214360(104857440); length = 37/6553600
    2017-03-01 18:36:48,002 INFO [main] org.apache.hadoop.mapred.MapTask: Finished spill 0
    2017-03-01 18:36:48,008 INFO [main] org.apache.hadoop.mapred.Task: Task:attempt_1488364479714_0001_m_000000_0 is done. And is in the process of committing
    2017-03-01 18:36:48,106 INFO [main] org.apache.hadoop.mapred.Task: Task 'attempt_1488364479714_0001_m_000000_0' done.

    18、hadoop配置文件

        默认配置文件:四个模块相对应的jar包中
            *core-default.xml
            *hdfs-default.xml
            *yarn-default.xml
            *mapred-default.xml
        用户自定义配置文件:$HADOOP_HOME/etc/hadoop/
            *core-site.xml
            *hdfs-site.xml
            *yarn-site.xml
            *mapred-site.xml

    19、开启hdfs回收站功能

    [hadoop@db01 hadoop-2.5.0]$ cat etc/hadoop/core-site.xml


    <configuration>
        <property>
                <name>fs.defaultFS</name>
                <value>hdfs://db01:9000</value>
        </property>

            <property>
                    <name>hadoop.tmp.dir</name>
                    <value>/usr/local/hadoop-2.5.0/data/tmp</value>
            </property>
    ##开启回收站功能,设置保存7天删除数据信息
            <property>
                    <name>fs.trash.interval</name>
                    <value>7 * 24 * 60</value>
            </property>
    </configuration>

    重启生效:

    20、hadoop的3种启动/关闭方式

        *各个服务器逐一启动(比较常用,编写shell脚本)
            hdfs:
                sbin/hadoop-daemon.sh start|stop namenode
                sbin/hadoop-daemon.sh start|stop datanode
                sbin/hadoop-daemon.sh start|stop secondarynamenode
            yarn:
                sbin/yarn-daemon.sh start|stop resourcemanager
                   sbin/yarn-daemon.sh start|stop nodemanager
            mapreduce:
                sbin/mr-jobhistory-daemon.sh start|stop historyserver

        *各个模块分开启动:需要配置ssh对等性,需要在namenode上运行
            hdfs:
                sbin/start-dfs.sh
                sbin/start-yarn.sh
            yarn:
                sbin/stop-dfs.sh
                sbin/stop-yarn.sh
        *全部启动:不建议使用,这个命令需要在namenode上运行,但是会同时叫secondaryname节点也启动到namenode节点
                sbin/start-all.sh
                sbin/stop-all.sh

    附加:配置ssh对等性

    [hadoop@db01 ~]$ ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa

    [hadoop@db01 ~]$ scp .ssh/authorized_keys db02:/home/hadoop/.ssh/authorized_keys

    21、hadoop角色
        namenode:参数值hdfs://db01:9000决定
        core-site.xml
    ##以下参数确定namenode节点
        <property>
                <name>fs.defaultFS</name>
                <value>hdfs://db01:9000</value>
        </property>

        datanode:slaves文件内容决定
    [hadoop@db01 hadoop-2.5.0]$ cat etc/hadoop/slaves
    db01
        secondarynamenode:参数dfs.namenode.secondary.http-address决定

        hdfs-site.xml

         <property>
                    <name>dfs.namenode.secondary.http-address</name>
                    <value>db01:50090</value>
            </property>

        resourcemanager:
        yarn-site.xml

         <property>
                    <name>yarn.resourcemanager.hostname</name>
                    <value>db01</value>
            </property>


        nodemanager:

    [hadoop@db01 hadoop-2.5.0]$ cat etc/hadoop/slaves
    db01

        jobhistoryserver:
        mapred-site.xml

         <property>
                    <name>mapreduce.jobhistory.address</name>
                    <value>db01:10020</value>
            </property>

            <property>
                    <name>mapreduce.jobhistory.webapp.address</name>
                    <value>db01:19888</value>
            </property>

    22、问题

    [hadoop@db01 hadoop-2.5.0]$ bin/hdfs dfs -ls
    17/03/01 21:50:33 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
    Found 1 items  -------------------------------------------------------------->这个警告需要源码替换lib/native包才能消除
    drwxr-xr-x   - hadoop supergroup          0 2017-03-01 16:07 mapreduce

    23、附加:配置文件

    [hadoop@db01 hadoop-2.5.0]$ cat etc/hadoop/core-site.xml
    <?xml version="1.0" encoding="UTF-8"?>
    <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
    <!--
      Licensed under the Apache License, Version 2.0 (the "License");
      you may not use this file except in compliance with the License.
      You may obtain a copy of the License at

        http://www.apache.org/licenses/LICENSE-2.0

      Unless required by applicable law or agreed to in writing, software
      distributed under the License is distributed on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      See the License for the specific language governing permissions and
      limitations under the License. See accompanying LICENSE file.
    -->

    <!-- Put site-specific property overrides in this file. -->

    <configuration>
        <property>
                <name>fs.defaultFS</name>
                <value>hdfs://db01:9000</value>
        </property>

            <property>
                    <name>hadoop.tmp.dir</name>
                    <value>/usr/local/hadoop-2.5.0/data/tmp</value>
            </property>

            <property>
                    <name>fs.trash.interval</name>
                    <value>7000</value>
            </property>
    </configuration>
    [hadoop@db01 hadoop-2.5.0]$ cat etc/hadoop/hdfs-site.xml
    <?xml version="1.0" encoding="UTF-8"?>
    <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
    <!--
      Licensed under the Apache License, Version 2.0 (the "License");
      you may not use this file except in compliance with the License.
      You may obtain a copy of the License at

        http://www.apache.org/licenses/LICENSE-2.0

      Unless required by applicable law or agreed to in writing, software
      distributed under the License is distributed on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      See the License for the specific language governing permissions and
      limitations under the License. See accompanying LICENSE file.
    -->

    <!-- Put site-specific property overrides in this file. -->

    <configuration>

        <property>
                <name>dfs.replication</name>
                    <value>1</value>
        </property>

            <property>
                    <name>dfs.namenode.secondary.http-address</name>
                    <value>db01:50090</value>
            </property>

    </configuration>
    [hadoop@db01 hadoop-2.5.0]$ cat etc/hadoop/yarn-site.xml
    <?xml version="1.0"?>
    <!--
      Licensed under the Apache License, Version 2.0 (the "License");
      you may not use this file except in compliance with the License.
      You may obtain a copy of the License at

        http://www.apache.org/licenses/LICENSE-2.0

      Unless required by applicable law or agreed to in writing, software
      distributed under the License is distributed on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      See the License for the specific language governing permissions and
      limitations under the License. See accompanying LICENSE file.
    -->
    <configuration>

        <property>
                <name>yarn.nodemanager.aux-services</name>
                    <value>mapreduce_shuffle</value>
        </property>

            <property>
                    <name>yarn.resourcemanager.hostname</name>
                    <value>db01</value>
            </property>

        <property>
                    <name>yarn.log-aggregation-enable</name>
                    <value>true</value>
            </property>

            <property>
                    <name>yarn.log-aggregation.retain-seconds</name>
                    <value>600000</value>
            </property>
    </configuration>
    [hadoop@db01 hadoop-2.5.0]$ cat etc/hadoop/mapred-site.xml
    <?xml version="1.0"?>
    <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
    <!--
      Licensed under the Apache License, Version 2.0 (the "License");
      you may not use this file except in compliance with the License.
      You may obtain a copy of the License at

        http://www.apache.org/licenses/LICENSE-2.0

      Unless required by applicable law or agreed to in writing, software
      distributed under the License is distributed on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      See the License for the specific language governing permissions and
      limitations under the License. See accompanying LICENSE file.
    -->

    <!-- Put site-specific property overrides in this file. -->

    <configuration>

        <property>
                <name>mapreduce.framework.name</name>
                    <value>yarn</value>
        </property>

            <property>
                    <name>mapreduce.jobhistory.address</name>
                    <value>db01:10020</value>
            </property>

            <property>
                    <name>mapreduce.jobhistory.webapp.address</name>
                    <value>db01:19888</value>
            </property>

    </configuration>

    另外注意需要在各个配置文件中定义java环境变量

  • 相关阅读:
    [NOIP2006] 提高组 洛谷P1064 金明的预算方案
    [NOIP2006] 提高组 洛谷P1063 能量项链
    [NOIP2006] 提高组 洛谷P1065 作业调度方案
    [NOIP2005] 提高组 洛谷P1051 谁拿了最多奖学金
    [NOIP2005] 提高组 洛谷P1054 等价表达式
    [NOIP2005] 提高组 洛谷P1053 篝火晚会
    [NOIP2005] 普及组 循环
    Bzoj3622 已经没有什么好害怕的了
    [NOIP2006] 普及组
    Bzoj1008 [HNOI2008]越狱
  • 原文地址:https://www.cnblogs.com/wcwen1990/p/6652089.html
Copyright © 2020-2023  润新知