hadoop删除n天前的数据

hadoop滚动更新近n天的数据

#!/bin/bash
source ~/kbemr_env.sh
data_keep_days=30
hadoop_path=/your/hadoop/path
removeOutDate(){
    hadoop fs -ls $hadoop_path > temp.txt
    today_timestamp=$(date -d "$(date +"%Y-%m-%d %H:%M")" +%s)
    cat temp.txt | while read quanxian temp user group size day hour filepath
    do
        file_time="$day $hour"
        file_timestamp=$(date -d "$file_time" +%s)
        # compare date
        if [ $(($today_timestamp-$file_timestamp)) -ge $(($data_keep_days*24*3600)) ];then
            echo "removing $filepath"
            hadoop fs -rm -r $filepath > /dev/null 2>&1
        fi
    done
}

execute(){
    echo "$(date +'%Y-%m-%d %H:%M:%S') start to remove outdate files in hdfs"
    removeOutDate
    echo "$(date +'%Y-%m-%d %H:%M:%S') remove outdate files in hdfs finished"
    rm temp.txt
    echo "temp.txt is cleaned!"
}

# start run
execute

打赏一个呗

取消

感谢您的支持,我会继续努力的!

扫码支持
扫码支持
扫码打赏,你说多少就多少

打开支付宝扫一扫,即可进行扫码打赏哦