1 bash循环执行程序
#!/bin/sh
list1="1 2 3 4 5" # 这就是个string
list2="5 4 3 2 1"
list1_x=($list1) # 转换成列表
list2_x=($list2)
length=${#list1_x[@]} # 获取列表内数据的个数
for ((i=0; i<${length}; i++));
do
echo ${list1_x[$i]}
echo ${list2_x[$i]}
done
2 循环任务列表执行hadoop脚本
#!/usr/bin/env bash
# cd into current path
path=$(cd `dirname $0`; pwd)
cd $path
# common
# AFS_BASE="path/user/xxxx/diaoyan/xxxxx"
AFS_BASE="path/user/fmflow/yunfan/job_output"
USER_BASE="path/user/xxxx/diaoyan/name"
LOCAL_HADOOP_BIN="/home/vca/xxxx/bin/hadoop"
# tools
HADOOP_LIB_PATH="path/user/xxxx/diaoyan/xxxx/lib/"
PYTHON_PATH=${HADOOP_LIB_PATH}"python.tar.gz"
USER_ENV=${USER_BASE}"/image_data/proto_trans.tar.gz"
# all the path you need
task_list="3153 3152 3115 3112" # 任务列表
input_list="3195 3187 3182 3173"
output_list="se_0059 se_0104 se_0304 se_0204"
task_list=($task_list) # 转换成列表
input_list=($input_list)
output_list=($output_list)
task_len=${#task_list[@]}
echo $task_len
for ((i=0; i<${task_len}; i++)); # 循环执行
do
INPUT_DIR1=${AFS_BASE}"/task_"${task_list[$i]}"/"${task_list[$i]}
INPUT_DIR2=${AFS_BASE}"/task_"${task_list[$i]}"/"${input_list[$i]}
OUTPUT_PATH=${USER_BASE}"/txxxxx/"${output_list[$i]}
echo $INPUT_DIR1, $INPUT_DIR2, $OUTPUT_PATH
# show sth.
echo "input path: ${INPUT_DIR}"
echo "output path: ${OUTPUT_PATH}"
$LOCAL_HADOOP_BIN fs -rmr ${OUTPUT_PATH}
# run the routine
${LOCAL_HADOOP_BIN} streaming
-D fs.default.name="hadoop_path"
-D mapred.job.groups="group_name"
-D dfs.client.auth.method=4
-D mapred.job.priority=VERY_HIGH
-D mapred.job.name="search_development_name_vxxxxxc_image_extract"
-D abaci.split.optimize.enable=false
-D mapred.job.map.capacity=500
-D mapred.map.tasks=500
-D mapred.job.reduce.capacity=50
-D mapred.reduce.tasks=50
-D stream.memory.limit=800
-D mapred.reduce.tasks.speculative.execution=false
-D mapred.map.tasks.speculative.execution=false
-D mapred.max.map.failures.percent=20
-D mapred.max.reduce.failures.percent=20
-D stream.num.map.output.key.fields=1
-D num.key.fields.for.partition=1
-partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner
-mapper "./python/bin/python base64_decode.py"
-reducer "cat"
-input ${INPUT_DIR1},${INPUT_DIR2}
-output ${OUTPUT_PATH}
-file ./base64_decode.py
-cacheArchive ${PYTHON_PATH}
-cacheArchive ${USER_ENV}
done