#!/usr/bin/bash
#############################################################################
#  Copyright (C) 2013-2015 Lawrence Livermore National Security, LLC.
#  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
#  Written by Albert Chu <chu11@llnl.gov>
#  LLNL-CODE-644248
#
#  This file is part of Magpie, scripts for running Hadoop on
#  traditional HPC systems.  For details, see https://github.com/llnl/magpie.
#
#  Magpie is free software; you can redistribute it and/or modify it
#  under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  Magpie is distributed in the hope that it will be useful, but
#  WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#  General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with Magpie.  If not, see <http://www.gnu.org/licenses/>.
#############################################################################

# This script is for managing scripts that are run.  For the most
# part, it shouldn't be editted.  See job submission files for
# configuration details.

source ${MAGPIE_SCRIPTS_HOME}/magpie/exports/magpie-exports-dirs
source ${MAGPIE_SCRIPTS_HOME}/magpie/exports/magpie-exports-user
source ${MAGPIE_SCRIPTS_HOME}/magpie/lib/magpie-lib-log
source ${MAGPIE_SCRIPTS_HOME}/magpie/lib/magpie-lib-hadoop-helper
source ${MAGPIE_SCRIPTS_HOME}/magpie/lib/magpie-lib-helper

# For this run, we will use cluster specific paths
Magpie_make_all_local_dirs_node_specific

__Yarn_job_kill_handler () {
    childpid=$1

    echo "Received interrupt from user, killing yarn jobs and script"

    prepjobs=`${HADOOP_HOME}/bin/mapred job -list 2> /dev/null | grep PREP | awk '{print $1}'`

    for jobid in $prepjobs
    do
        echo "Killing Yarn job id ${jobid}"
        command="${HADOOP_HOME}/bin/mapred job -kill ${jobid}"
        echo "Running $command" >&2
        $command
    done

    runningjobs=`${HADOOP_HOME}/bin/mapred job -list 2> /dev/null | grep RUNNING | awk '{print $1}'`

    for jobid in $runningjobs
    do
        echo "Killing Yarn job id ${jobid}"
        command="${HADOOP_HOME}/bin/mapred job -kill ${jobid}"
        echo "Running $command" >&2
        $command
    done

    for pid in $(pstree -cp ${childpid} | grep -Po '\(\K\d+' | sed -re 's/$/ /g'); do kill $pid; done
}

if [ "$1X" == "X" ]
then
    Magpie_output_internal_error "magpie-run-execute, first argument unset"
    exit 1
fi

if [ "$1" != "interactive" ] \
    && [ "$1" != "launch" ] \
    && [ "$1" != "server" ] \
    && [ "$1" != "script" ] \
    && [ "$1" != "scriptnokill" ]
then
    Magpie_output_internal_error "magpie-run-execute, first argument invalid = $1"
    exit 1
fi

jobtype=$1

if [ "${jobtype}" == "script" ] || [ "${jobtype}" == "scriptnokill" ]
then
    if [ "$2X" == "X" ]
    then
        Magpie_output_internal_error "magpie-run-execute, second argument unset"
        exit 1
    fi
    scripttorun=$2
    scriptargs=${@:3}
fi


mypid=$$

if [ "${jobtype}" == "interactive" ]
then
    ${MAGPIE_SCRIPTS_HOME}/magpie/job/magpie-job-sleep countdown &
    childpid=$!
    typemessage="'${jobtype}' mode"
elif [ "${jobtype}" == "launch" ] || [ "${jobtype}" == "server" ]
then
    ${MAGPIE_SCRIPTS_HOME}/magpie/job/magpie-job-sleep normal &
    childpid=$!
    typemessage="'${jobtype}' mode"
elif [ "${jobtype}" == "script" ] || [ "${jobtype}" == "scriptnokill" ]
then
    ${scripttorun} ${scriptargs} &
    childpid=$!
    typemessage="job"
fi

if [ "${jobtype}" != "scriptnokill" ]
then
# Some systems have SIGUSR1 as USR1 so we hardcode the value of 10

    Magpie_get_magpie_hostname
    myhostname=${magpie_hostname}

    echo "*******************************************************"
    echo "* Run"
    echo "*"
    echo "* ${MAGPIE_REMOTE_CMD:-ssh}${MAGPIE_REMOTE_CMD_OPTS:+" "}${MAGPIE_REMOTE_CMD_OPTS} ${myhostname} kill -s 10 ${mypid}"
    echo "*"
    echo "* to exit ${typemessage} early."
    if [ "${jobtype}" == "script" ]
    then
        echo "*"
        echo "* Warning: killing early may not kill jobs submitted to an internally"
        echo "* managed scheduler within Magpie.  The job will be canceled during teardown"
        echo "* of daemons.  Extraneous error messages from your job may occur until then."

        # We use SIGUSR1 and SIGUSR2 above & below.  Signal 28, SIGWINCH seems a good choice
        # to use.
        if [ "${HADOOP_SETUP}" == "yes" ] && Magpie_hadoop_setup_type_enables_yarn
        then
            echo "*"
            echo "* Magpie is aware that Yarn has been launched.  If user wishes to"
            echo "* kill all currently submitted Yarn jobs in the PREP or"
            echo "* RUNNING state to be killed before killing the job, run:"
            echo "*"
            echo "* ${MAGPIE_REMOTE_CMD:-ssh}${MAGPIE_REMOTE_CMD_OPTS:+" "}${MAGPIE_REMOTE_CMD_OPTS} ${myhostname} kill -s 28 ${mypid}"

            trap "__Yarn_job_kill_handler ${childpid}" 28
        fi
    fi
    echo "*******************************************************"

    trap "echo \"Received interrupt from user, ${typemessage} ending early.\"; for pid in \$(pstree -cp ${childpid} | grep -Po '\(\K\d+' | sed -re 's/$/ /g'); do kill \$pid; done" 10
fi

# Some systems have SIGUSR2 as USR2 so we hardcode the value of 12

# This trap is used exclusively by Magpie to kill the job if it is
# running long

trap "echo \"Kill script due to job time limit being reached.\"; for pid in \$(pstree -cp ${childpid} | grep -Po '\(\K\d+' | sed -re 's/$/ /g'); do kill \$pid; done" 12

wait $childpid

childstatus=$?

if [ "${childstatus}" == "0" ]
then
    echo "*******************************************************"
    echo "* End of ${typemessage}"
    echo "*******************************************************"
fi

exit $childstatus
