shell多进程共享文件

shell多个进程处理任务,共享一个任务列表

日志写到各自log文件,失败样本也写到各自文件。

### config start ###
run_parts_start=0
run_parts_end=199
hadoop_prefix=hdfs://xxx/part-00
part_list_paths=./my_part_list
thread_num=10
### config end ###

# init env
if [ ! -d "failed_samples" ]; then
  mkdir failed_samples
fi

if [ ! -d "multi_thread_logs" ]; then
  mkdir multi_thread_logs
fi

rm -f failed_samples/*
rm -f multi_thread_logs/*
rm -f $part_list_paths
rm -f tc_err.log
rm -f tc_stat.log
rm -f tmp


function Get_part_list_paths() {
    for idx in $(seq -f "%03g" $run_parts_start $run_parts_end)
    do
        #echo "process path:" ${hadoop_prefix}${idx} 
        #hadoop fs -cat ${hadoop_prefix}${idx} | ./tdbank_insert
        echo ${hadoop_prefix}${idx} >> $part_list_paths
    done
}

function rand() {
    min=$1
    max=$(($2-$min+1))
    num=$(($RANDOM+1000000000))
    echo $(($num%$max+$min))
}

function Sub_thread() {
    log_path=multi_thread_logs/sub_thread_${1}.log
    while true
    do
        # get top part name
        sleep $(rand 2 15) 
        if [ $(wc -l < $part_list_paths) -le 0 ]; then
            echo -e "\nsub_thread_${1} complete!" >> $log_path
            break
        fi 
        my_part_name=$(flock -x -n $part_list_paths -c "echo $(head -1 $part_list_paths); sed -i '1d' $part_list_paths") 
        if [ -z "$my_part_name" ]; then
            echo $1 "part_name is empty, try again..."
            continue
        fi
        echo $1 $my_part_name
        
        # main process part
        echo -n $(date '+[%Y/%m/%d %T]') >> $log_path
        echo -e "\n" >> $log_path
        hadoop fs -cat ${my_part_name} | ./your_bin_file failed_samples/failed_sample_thread_${1} >> $log_path 2>&1
        my_part_idx=$(echo ${my_part_name} | awk -F '/' '{print $NF}')
        echo -e "************* " $my_part_idx " Finished! **************\n" >> $log_path
        
        # check success status
        line_num=$[$(awk '/part-/{print NR}' $log_path | tail -1)-1]
        success_flag_line=$(sed -n "${line_num}p" $log_path)
        if [[ ${success_flag_line} =~ "TOTAL" ]]; then
            echo -e "$my_part_idx Succeed!\n" >> $log_path
        else
            echo -e "$my_part_idx Failed! Append to part list..\n" >> $log_path
            flock -x -n $part_list_paths -c "echo ${my_part_name} >> ${part_list_paths}"
        fi
    done
}

function Multi_thread() {
    for thread_idx in $(seq 1 $thread_num)
    do
        Sub_thread ${thread_idx} &
    done
    wait
}

# exec
Get_part_list_paths
Multi_thread
echo  -e "All threads complete!\n"

多进程kill process

#ps -efww | grep -w 'program_name' | grep -v grep | cut -c 9-15 | xargs kill -9

打赏一个呗

取消

感谢您的支持,我会继续努力的!

扫码支持
扫码支持
扫码打赏,你说多少就多少

打开支付宝扫一扫,即可进行扫码打赏哦