shell多个进程处理任务,共享一个任务列表
日志写到各自log文件,失败样本也写到各自文件。
### config start ###
run_parts_start=0
run_parts_end=199
hadoop_prefix=hdfs://xxx/part-00
part_list_paths=./my_part_list
thread_num=10
### config end ###
# init env
if [ ! -d "failed_samples" ]; then
mkdir failed_samples
fi
if [ ! -d "multi_thread_logs" ]; then
mkdir multi_thread_logs
fi
rm -f failed_samples/*
rm -f multi_thread_logs/*
rm -f $part_list_paths
rm -f tc_err.log
rm -f tc_stat.log
rm -f tmp
function Get_part_list_paths() {
for idx in $(seq -f "%03g" $run_parts_start $run_parts_end)
do
#echo "process path:" ${hadoop_prefix}${idx}
#hadoop fs -cat ${hadoop_prefix}${idx} | ./tdbank_insert
echo ${hadoop_prefix}${idx} >> $part_list_paths
done
}
function rand() {
min=$1
max=$(($2-$min+1))
num=$(($RANDOM+1000000000))
echo $(($num%$max+$min))
}
function Sub_thread() {
log_path=multi_thread_logs/sub_thread_${1}.log
while true
do
# get top part name
sleep $(rand 2 15)
if [ $(wc -l < $part_list_paths) -le 0 ]; then
echo -e "\nsub_thread_${1} complete!" >> $log_path
break
fi
my_part_name=$(flock -x -n $part_list_paths -c "echo $(head -1 $part_list_paths); sed -i '1d' $part_list_paths")
if [ -z "$my_part_name" ]; then
echo $1 "part_name is empty, try again..."
continue
fi
echo $1 $my_part_name
# main process part
echo -n $(date '+[%Y/%m/%d %T]') >> $log_path
echo -e "\n" >> $log_path
hadoop fs -cat ${my_part_name} | ./your_bin_file failed_samples/failed_sample_thread_${1} >> $log_path 2>&1
my_part_idx=$(echo ${my_part_name} | awk -F '/' '{print $NF}')
echo -e "************* " $my_part_idx " Finished! **************\n" >> $log_path
# check success status
line_num=$[$(awk '/part-/{print NR}' $log_path | tail -1)-1]
success_flag_line=$(sed -n "${line_num}p" $log_path)
if [[ ${success_flag_line} =~ "TOTAL" ]]; then
echo -e "$my_part_idx Succeed!\n" >> $log_path
else
echo -e "$my_part_idx Failed! Append to part list..\n" >> $log_path
flock -x -n $part_list_paths -c "echo ${my_part_name} >> ${part_list_paths}"
fi
done
}
function Multi_thread() {
for thread_idx in $(seq 1 $thread_num)
do
Sub_thread ${thread_idx} &
done
wait
}
# exec
Get_part_list_paths
Multi_thread
echo -e "All threads complete!\n"
多进程kill process
#ps -efww | grep -w 'program_name' | grep -v grep | cut -c 9-15 | xargs kill -9