#!/bin/bash
source ~/kbemr_env.sh
data_keep_days=30
hadoop_path=/your/hadoop/path
removeOutDate(){
hadoop fs -ls $hadoop_path > temp.txt
today_timestamp=$(date -d "$(date +"%Y-%m-%d %H:%M")" +%s)
cat temp.txt | while read quanxian temp user group size day hour filepath
do
file_time="$day $hour"
file_timestamp=$(date -d "$file_time" +%s)
# compare date
if [ $(($today_timestamp-$file_timestamp)) -ge $(($data_keep_days*24*3600)) ];then
echo "removing $filepath"
hadoop fs -rm -r $filepath > /dev/null 2>&1
fi
done
}
execute(){
echo "$(date +'%Y-%m-%d %H:%M:%S') start to remove outdate files in hdfs"
removeOutDate
echo "$(date +'%Y-%m-%d %H:%M:%S') remove outdate files in hdfs finished"
rm temp.txt
echo "temp.txt is cleaned!"
}
# start run
execute