#删除已有文件夹
hadoop fs -rmr /sxydata/input/example_1
hadoop fs -rmr /sxydata/output/example_1
#创建输入文件夹
hadoop fs -mkdir /sxydata/input/example_1
#放入输入文件
hadoop fs -put text* /sxydata/input/example_1
#查看文件是否放好
hadoop fs -ls /sxydata/input/example_1
#本地测试一下map和reduce
head -20 text1.txt | python count_mapper.py | sort | python count_reducer.py
#集群上跑任务
hadoop jar /usr/lib/hadoop-current/share/hadoop/tools/lib/hadoop-streaming-2.3.2.jar
-file count_mapper //放到服务器上跑
-mapper count_mapper.py //mapper阶段用这个脚本
-file count_reducer.py
-reducer count_reducer.py
input /sxydata/input/example_1 //这个文件夹下所有文件一行一行输入
output /sxydata/output/example_1 //结果写道文件夹下