#!/usr/bin/bash
#############################################################################
#  Copyright (C) 2013-2015 Lawrence Livermore National Security, LLC.
#  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
#  Written by Albert Chu <chu11@llnl.gov>
#  LLNL-CODE-644248
#
#  This file is part of Magpie, scripts for running Hadoop on
#  traditional HPC systems.  For details, see https://github.com/llnl/magpie.
#
#  Magpie is free software; you can redistribute it and/or modify it
#  under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  Magpie is distributed in the hope that it will be useful, but
#  WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#  General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with Magpie.  If not, see <http://www.gnu.org/licenses/>.
#############################################################################

# This script is the core 'decommissionhdfsnodes' script.  For the
# most part, it shouldn't be editted.  See job submission files for
# configuration details.

source ${MAGPIE_SCRIPTS_HOME}/magpie/lib/magpie-lib-hadoop-helper
source ${MAGPIE_SCRIPTS_HOME}/magpie/lib/magpie-lib-paths

# This is a job, no loading export files or libs except for minimal convenience ones

if [ "${HADOOP_DECOMMISSION_HDFS_NODE_SIZE}" -ge "${HADOOP_WORKER_COUNT}" ]
then
    echo "Cannot decommission more nodes than are available"
    exit 1
fi

cd ${HADOOP_HOME}

hdfsreplication=`${hadoopcmdprefix}/hdfs getconf -confKey dfs.replication 2> /dev/null`

if [ "${HADOOP_DECOMMISSION_HDFS_NODE_SIZE}" -lt ${hdfsreplication} ]
then
    echo "Cannot have fewer nodes than HDFS replication"
    exit 1
fi

nodecounttodecommission=`expr ${HADOOP_WORKER_COUNT} - ${HADOOP_DECOMMISSION_HDFS_NODE_SIZE}`

echo "Creating ${HADOOP_CONF_DIR}/hosts-exclude"
tail -n ${nodecounttodecommission} ${HADOOP_CONF_DIR}/hosts-include > ${HADOOP_CONF_DIR}/hosts-exclude

echo "Refreshing nodes in namenode"
${hadoopcmdprefix}/hdfs dfsadmin -refreshNodes

while true
do
    count=`${hadoopcmdprefix}/hdfs dfsadmin -report | grep 'Decommission Status : Decommissioned' | wc -l`
    if [ ${count} -lt "${nodecounttodecommission}" ]
    then
        echo "Done decommissioning ${count} nodes out of ${nodecounttodecommission} ... sleeping for a bit "
        sleep 30
        continue
    fi

    echo "Decommissioned ${count} nodes"
    break
done

# Sets magpie_networkedhdfspath
Magpie_get_networkedhdfspath

count=0
while [ "${count}" -lt "${nodecounttodecommission}" ]
do
    noderank=`expr ${HADOOP_WORKER_COUNT} - ${count}`
    rmpath="${magpie_networkedhdfspath}/node-${noderank}"
    echo "Removing path ${rmpath} ..."
    rm -rf ${rmpath}
    count=`expr $count + 1`
done

exit 0
