#!/usr/bin/bash
#############################################################################
#  Copyright (C) 2013-2015 Lawrence Livermore National Security, LLC.
#  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
#  Written by Albert Chu <chu11@llnl.gov>
#  LLNL-CODE-644248
#
#  This file is part of Magpie, scripts for running Hadoop on
#  traditional HPC systems.  For details, see https://github.com/llnl/magpie.
#
#  Magpie is free software; you can redistribute it and/or modify it
#  under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  Magpie is distributed in the hope that it will be useful, but
#  WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#  General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with Magpie.  If not, see <http://www.gnu.org/licenses/>.
#############################################################################

# This script sets up configuration files for jobs.  For the most
# part, it shouldn't be editted.  See job submission files for
# configuration details.

if [ "${HADOOP_SETUP}" != "yes" ]
then
    exit 0
fi

source ${MAGPIE_SCRIPTS_HOME}/magpie/exports/magpie-exports-submission-type
source ${MAGPIE_SCRIPTS_HOME}/magpie/exports/magpie-exports-dirs
source ${MAGPIE_SCRIPTS_HOME}/magpie/exports/magpie-exports-user
source ${MAGPIE_SCRIPTS_HOME}/magpie/lib/magpie-lib-defaults
source ${MAGPIE_SCRIPTS_HOME}/magpie/lib/magpie-lib-node-identification
source ${MAGPIE_SCRIPTS_HOME}/magpie/lib/magpie-lib-hadoop-helper
source ${MAGPIE_SCRIPTS_HOME}/magpie/lib/magpie-lib-setup
source ${MAGPIE_SCRIPTS_HOME}/magpie/lib/magpie-lib-helper
source ${MAGPIE_SCRIPTS_HOME}/magpie/lib/magpie-lib-log

# hadoopnoderank set if succeed
if ! Magpie_am_I_a_hadoop_node
then
    exit 0
fi

# For rest of setup, we will use cluster specific paths
Magpie_make_all_local_dirs_node_specific

extrahadoopclasses=""
extrahadoopopts=""
extrayarnopts=""
extrahadooptaskopts=""

if echo ${HADOOP_VERSION} | grep -q -E "3\.[0-9]\.[0-9]"
then
    cp ${HADOOP_CONF_DIR}/workers ${HADOOP_CONF_DIR}/hosts-include
else
    cp ${HADOOP_CONF_DIR}/slaves ${HADOOP_CONF_DIR}/hosts-include
fi
hostsincludefile="${HADOOP_CONF_DIR}/hosts-include"
hostsincludefilesubst=`echo "${hostsincludefile}" | sed "s/\\//\\\\\\\\\//g"`

# By default leave exclude file empty
touch ${HADOOP_CONF_DIR}/hosts-exclude
hostsexcludefile="${HADOOP_CONF_DIR}/hosts-exclude"
hostsexcludefilesubst=`echo "${hostsexcludefile}" | sed "s/\\//\\\\\\\\\//g"`

#
# Calculate values for various config file variables, based on
# recommendtions, rules of thumb, or based on what user input.
#

# Recommendation from Cloudera, parallel copies sqrt(number of nodes), floor of ten
if [ "${HADOOP_PARALLEL_COPIES}X" != "X" ]
then
    parallelcopies=${HADOOP_PARALLEL_COPIES}
else
    parallelcopies=`echo "sqrt ( ${HADOOP_WORKER_COUNT} )" | bc -l | xargs printf "%1.0f"`
    if [ "${parallelcopies}" -lt "10" ]
    then
        parallelcopies=10
    fi
fi

# Recommendation from Cloudera, 10% of nodes w/ floor of ten, ceiling 200
# My experience this is low b/c of high core counts, so bump higher to 50%
namenodehandlercount=`echo "${HADOOP_WORKER_COUNT} * .5" | bc -l | xargs printf "%1.0f"`
if [ "${namenodehandlercount}" -lt "10" ]
then
    namenodehandlercount=10
fi

if [ "${namenodehandlercount}" -gt "200" ]
then
    namenodehandlercount=200
fi

# General rule of thumb is half namenode handler count, so * .25 instead of * .5
datanodehandlercount=`echo "${HADOOP_WORKER_COUNT} * .25" | bc -l | xargs printf "%1.0f"`
if [ "${datanodehandlercount}" -lt "10" ]
then
    datanodehandlercount=10
fi

if [ "${datanodehandlercount}" -gt "200" ]
then
    datanodehandlercount=200
fi

# Per description, about 4% of nodes but w/ floor of 10
jobtrackerhandlercount=`echo "${HADOOP_WORKER_COUNT} * .04" | bc -l | xargs printf "%1.0f"`
if [ "${jobtrackerhandlercount}" -lt "10" ]
then
    jobtrackerhandlercount=10
fi

# Per descrption, about square root number of nodes
submitfilereplication=`echo "sqrt ( ${HADOOP_WORKER_COUNT} )" | bc -l | xargs printf "%1.0f"`

# Optimal depends on file system
if [ "${HADOOP_FILESYSTEM_MODE}" == "hdfs" ]
then
    iobuffersize=65536
elif Magpie_hadoop_filesystem_mode_is_hdfs_on_network_type
then
    # Default block size is 1M in Lustre
    # XXX: If not default, can get from lctl or similar?
    # If other networkFS, just assume like Lustre
    iobuffersize=1048576
elif [ "${HADOOP_FILESYSTEM_MODE}" == "rawnetworkfs" ]
then
    # Assuming Lustre, so copy above 1M
    iobuffersize=1048576
fi

# Sets magpie_processor_count & magpie_threads_to_use
Magpie_calculate_threads_to_use

# Sets magpie_memory_to_use
Magpie_calculate_memory_to_use

if [ "${HADOOP_MAX_TASKS_PER_NODE}X" != "X" ]
then
    maxtaskspernode=${HADOOP_MAX_TASKS_PER_NODE}
else
    maxtaskspernode=${magpie_threads_to_use}
fi

if [ "${YARN_RESOURCE_MEMORY}X" != "X" ]
then
    yarnresourcememory=${YARN_RESOURCE_MEMORY}
else
    yarnresourcememory=${magpie_memory_to_use}
fi

if [ "${HADOOP_CHILD_HEAPSIZE}X" != "X" ]
then
    allchildheapsize=${HADOOP_CHILD_HEAPSIZE}
else
# achu: We round down to nearest 512M
    tmp1=`echo "${yarnresourcememory} / ${maxtaskspernode}" | bc -l | xargs printf "%1.2f"`
    tmp2=`echo "${tmp1} / 512" | bc -l | xargs printf "%1.0f"`
    allchildheapsize=`echo "${tmp2} * 512" | bc -l | xargs printf "%1.0f"`
    if [ "${allchildheapsize}" -lt "512" ]
    then
        allchildheapsize=512
    fi
fi

if [ "${HADOOP_CHILD_MAP_HEAPSIZE}X" != "X" ]
then
    mapchildheapsize=${HADOOP_CHILD_MAP_HEAPSIZE}
else
    mapchildheapsize=${allchildheapsize}
fi

if [ "${HADOOP_CHILD_REDUCE_HEAPSIZE}X" != "X" ]
then
    reducechildheapsize=${HADOOP_CHILD_REDUCE_HEAPSIZE}
else
    reducechildheapsize=`expr ${mapchildheapsize} \* 2`
fi

if [ "${HADOOP_CHILD_MAP_CONTAINER_BUFFER}X" != "X" ]
then
    mapcontainerbuffer=${HADOOP_CHILD_MAP_CONTAINER_BUFFER}
else
    # Estimate 256M per G
    numgig=`expr ${mapchildheapsize} / 1024`
    if [ "${numgig}" == "0" ]
    then
        numgig=1
    fi
    mapcontainerbuffer=`expr ${numgig} \* 256`
fi

if [ "${HADOOP_CHILD_REDUCE_CONTAINER_BUFFER}X" != "X" ]
then
    reducecontainerbuffer=${HADOOP_CHILD_REDUCE_CONTAINER_BUFFER}
else
    # Estimate 256M per G
    numgig=`expr ${reducechildheapsize} / 1024`
    if [ "${numgig}" == "0" ]
    then
        numgig=1
    fi
    reducecontainerbuffer=`expr ${numgig} \* 256`
fi

# Cloudera recommends 256 for io.sort.mb.  Cloudera blog suggests
# io.sort.factor * 10 ~= io.sort.mb.

if [ "${HADOOP_IO_SORT_MB}X" != "X" ]
then
    iosortmb=${HADOOP_IO_SORT_MB}
else
    # 128M per gig
    numgig=`expr ${mapchildheapsize} / 1024`
    if [ "${numgig}" == "0" ]
    then
        numgig=1
    fi
    iosortmb=`expr ${numgig} \* 128`
fi

if [ "${HADOOP_IO_SORT_FACTOR}X" != "X" ]
then
    iosortfactor=${HADOOP_IO_SORT_FACTOR}
else
    iosortfactor=`expr ${iosortmb} \/ 10`
fi

mapcontainermb=`expr ${mapchildheapsize} + ${mapcontainerbuffer}`
reducecontainermb=`expr ${reducechildheapsize} + ${reducecontainerbuffer}`

yarnmincontainer=1024
if [ ${mapcontainermb} -lt ${yarnmincontainer} ]
then
    yarnmincontainer=${mapcontainermb}
fi

if [ ${reducecontainermb} -lt ${yarnmincontainer} ]
then
    yarnmincontainer=${reducecontainermb}
fi

yarnmaxcontainer=${yarnresourcememory}

if [ ${reducecontainermb} -gt ${yarnmaxcontainer} ]
then
    yarnmaxcontainer=${reducecontainermb}
fi

if [ "${HADOOP_MAPREDUCE_SLOWSTART}X" != "X" ]
then
    mapredslowstart=${HADOOP_MAPREDUCE_SLOWSTART}
else
    mapredslowstart="0.05"
fi

if [ "${HADOOP_DEFAULT_MAP_TASKS}X" != "X" ]
then
    defaultmaptasks=${HADOOP_DEFAULT_MAP_TASKS}
else
    defaultmaptasks=`expr ${maxtaskspernode} \* ${HADOOP_WORKER_COUNT}`
fi

if [ "${HADOOP_DEFAULT_REDUCE_TASKS}X" != "X" ]
then
    defaultreducetasks=${HADOOP_DEFAULT_REDUCE_TASKS}
else
    defaultreducetasks=${HADOOP_WORKER_COUNT}
fi

if [ "${HADOOP_HDFS_BLOCKSIZE}X" != "X" ]
then
    hdfsblocksize=${HADOOP_HDFS_BLOCKSIZE}
else
    # 64M is Hadoop default, widely considered bad choice, we'll use 128M as default
    hdfsblocksize=134217728
fi

if [ "${HADOOP_HDFS_REPLICATION}X" != "X" ]
then
    hdfsreplication=${HADOOP_HDFS_REPLICATION}
else
    hdfsreplication=${default_hdfs_replication}
fi

if Magpie_hadoop_filesystem_mode_is_hdfs_type
then
    if [ "${HADOOP_WORKER_COUNT}" -lt "${hdfsreplication}" ]
    then
        echo "Number of Hadoop worker nodes (${HADOOP_WORKER_COUNT}) must be greater than HDFS replication (${hdfsreplication})"
        exit 1
    fi
fi

# 32M is Hadoop default for local
if [ "${HADOOP_LOCAL_FILESYSTEM_BLOCKSIZE}X" != "X" ]
then
    localfilesystemblocksize=${HADOOP_LOCAL_FILESYSTEM_BLOCKSIZE}
else
    localfilesystemblocksize=33554432
fi

if [ "${HADOOP_DAEMON_HEAP_MAX}X" != "X" ]
then
    hadoopdaemonheapmax="${HADOOP_DAEMON_HEAP_MAX}"
else
    hadoopdaemonheapmax="1000"
fi

if [ "${HADOOP_NAMENODE_DAEMON_HEAP_MAX}X" != "X" ]
then
    hadoopnamenodedaemonheapmax="${HADOOP_NAMENODE_DAEMON_HEAP_MAX}"
else
    hadoopnamenodedaemonheapmax="${hadoopdaemonheapmax}"
fi

if [ "${HADOOP_COMPRESSION}X" != "X" ]
then
    if [ "${HADOOP_COMPRESSION}" == "yes" ]
    then
        compression=true
    else
        compression=false
    fi
else
    compression=false
fi

# Sets magpie_openfilescount
Magpie_calculate_openfiles ${HADOOP_WORKER_COUNT}
# Sets magpie_userprocessescount
Magpie_calculate_userprocesses ${HADOOP_WORKER_COUNT}

#
# Setup Hadoop file system
#

pathcount=0

# sets magpie_hadooptmpdir and magpie_fsdefault
Magpie_calculate_hadoop_filesystem_paths ${hadoopnoderank}

if [ "${HADOOP_FILESYSTEM_MODE}" == "hdfs" ]
then
    magpie_hadooptmpdirsubst=`echo "${magpie_hadooptmpdir}" | sed "s/\\//\\\\\\\\\//g"`
    magpie_fsdefaultsubst=`echo ${magpie_fsdefault} | sed "s/\\//\\\\\\\\\//g"`

    IFSORIG=${IFS}
    IFS=","
    datanodedirtmp=""
    for hdfspath in ${HADOOP_HDFS_PATH}
    do
        if [ ! -d "${hdfspath}" ]
        then
            mkdir -p ${hdfspath}
            if [ $? -ne 0 ] ; then
                echo "mkdir failed making ${hdfspath}"
                exit 1
            fi
        fi

        datanodedirtmp="${datanodedirtmp}${datanodedirtmp:+","}file://${hdfspath}/dfs/data"
        pathcount=`expr ${pathcount} + 1`
    done
    IFS=${IFSORIG}
    datanodedir=`echo "${datanodedirtmp}" | sed "s/\\//\\\\\\\\\//g"`
elif Magpie_hadoop_filesystem_mode_is_hdfs_on_network_type
then
    magpie_hadooptmpdirsubst=`echo "${magpie_hadooptmpdir}" | sed "s/\\//\\\\\\\\\//g"`
    magpie_fsdefaultsubst=`echo ${magpie_fsdefault} | sed "s/\\//\\\\\\\\\//g"`

    if [ ! -d "${magpie_hadooptmpdir}" ]
    then
        mkdir -p ${magpie_hadooptmpdir}
        if [ $? -ne 0 ] ; then
            echo "mkdir failed making ${magpie_hadooptmpdir}"
            exit 1
        fi

        if [ "${HADOOP_FILESYSTEM_MODE}" == "hdfsoverlustre" ]
        then
            lfscmd=`which lfs`
            if [ $? -eq 0 ] ; then
                ${lfscmd} setstripe --stripe-size ${hdfsblocksize} --stripe-count 1 ${magpie_hadooptmpdir}
            else
                Magpie_output_internal_warning "Can't find lfs and set stripe size"
            fi
        fi
    fi

    datanodedir="file:\/\/\$\{hadoop.tmp.dir\}\/dfs\/data"

    pathcount=1

    # Cleanup locks if requested
    if ([ "${HADOOP_FILESYSTEM_MODE}" == "hdfsoverlustre" ] && [ "${HADOOP_HDFSOVERLUSTRE_REMOVE_LOCKS}" == "yes" ]) \
        || ([ "${HADOOP_FILESYSTEM_MODE}" == "hdfsovernetworkfs" ] && [ "${HADOOP_HDFSOVERNETWORKFS_REMOVE_LOCKS}" == "yes" ])
    then
        for num in `seq 0 ${HADOOP_WORKER_COUNT}`
        do
            # MAGPIE_ONE_TIME_RUN not necessary to check here
            lockfile="${HADOOP_HDFSOVERLUSTRE_PATH}/node-${num}/dfs/data/in_use.lock"
            rm -f ${lockfile}
        done
    fi
elif [ "${HADOOP_FILESYSTEM_MODE}" == "rawnetworkfs" ]
then
    magpie_hadooptmpdirsubst=`echo "${magpie_hadooptmpdir}" | sed "s/\\//\\\\\\\\\//g"`
    magpie_fsdefaultsubst=`echo ${magpie_fsdefault} | sed "s/\\//\\\\\\\\\//g"`

    if [ ! -d "${HADOOP_RAWNETWORKFS_PATH}" ]
    then
        mkdir -p ${HADOOP_RAWNETWORKFS_PATH}
        if [ $? -ne 0 ] ; then
            echo "mkdir failed making ${HADOOP_RAWNETWORKFS_PATH}"
            exit 1
        fi
    fi

    if [ ! -d "${magpie_hadooptmpdir}" ]
    then
        mkdir -p ${magpie_hadooptmpdir}
        if [ $? -ne 0 ] ; then
            echo "mkdir failed making ${magpie_hadooptmpdir}"
            exit 1
        fi
    fi

    pathcount=1
else
    Magpie_output_internal_error "Illegal HADOOP_FILESYSTEM_MODE \"${HADOOP_FILESYSTEM_MODE}\" specified"
    exit 1
fi

if ([ "${HADOOP_FILESYSTEM_MODE}" == "hdfsoverlustre" ] \
    || [ "${HADOOP_FILESYSTEM_MODE}" == "hdfsovernetworkfs" ] \
    || [ "${HADOOP_FILESYSTEM_MODE}" == "rawnetworkfs" ]) \
    && [ "${HADOOP_LOCALSTORE}X" != "X" ]
then
    IFSORIG=${IFS}
    IFS=","
    mapredlocalstoredir=""
    yarnlocalstoredir=""
    for localstorefile in ${HADOOP_LOCALSTORE}
    do
        localstoredirtmp=${localstorefile}
        mapredlocalstoredirtmp=`echo "${localstoredirtmp}/mapred/local" | sed "s/\\//\\\\\\\\\//g"`
        yarnlocalstoredirtmp=`echo "${localstoredirtmp}/yarn-nm" | sed "s/\\//\\\\\\\\\//g"`

        if [ ! -d "${localstoredirtmp}" ]
        then
            mkdir -p ${localstoredirtmp}
            if [ $? -ne 0 ] ; then
                echo "mkdir failed making ${localstoredirtmp}"
                exit 1
            fi
        fi

        mapredlocalstoredir="${mapredlocalstoredir}${mapredlocalstoredir:+","}${mapredlocalstoredirtmp}"
        yarnlocalstoredir="${yarnlocalstoredir}${yarnlocalstoredir:+","}${yarnlocalstoredirtmp}"
    done
    IFS=${IFSORIG}
elif [ "${HADOOP_FILESYSTEM_MODE}" == "hdfs" ] \
    && [ ${pathcount} -gt 1 ]
then
    IFSORIG=${IFS}
    IFS=","
    mapredlocalstoredir=""
    yarnlocalstoredir=""
    for localstorefile in ${HADOOP_HDFS_PATH}
    do
        localstoredirtmp=${localstorefile}
        mapredlocalstoredirtmp=`echo "${localstoredirtmp}/mapred/local" | sed "s/\\//\\\\\\\\\//g"`
        yarnlocalstoredirtmp=`echo "${localstoredirtmp}/yarn-nm" | sed "s/\\//\\\\\\\\\//g"`

        mapredlocalstoredir="${mapredlocalstoredir}${mapredlocalstoredir:+","}${mapredlocalstoredirtmp}"
        yarnlocalstoredir="${yarnlocalstoredir}${yarnlocalstoredir:+","}${yarnlocalstoredirtmp}"
    done
    IFS=${IFSORIG}
else
    mapredlocalstoredir="\$\{hadoop.tmp.dir\}\/mapred\/local"
    yarnlocalstoredir="\$\{hadoop.tmp.dir\}\/yarn-nm"
fi

if Magpie_hadoop_filesystem_mode_is_hdfs_type
then
    # If HDFS would be stored in HDFS, so pick a better path
    yardappmapreduceamstagingdir="${HADOOP_LOCAL_SCRATCHSPACE_DIR}/yarn/"
else
    yardappmapreduceamstagingdir="${magpie_hadooptmpdir}/yarn/"
fi

# Sets magpie_hadoop_stop_timeout
Magpie_calculate_stop_timeouts

# set java.io.tmpdir
extrahadoopopts="${extrahadoopopts}${extrahadoopopts:+" "}-Djava.io.tmpdir=${HADOOP_LOCAL_SCRATCHSPACE_DIR}/tmp"

extrayarnopts="${extrayarnopts}${extrayarnopts+" "}-Djava.io.tmpdir=${YARN_LOCAL_SCRATCHSPACE_DIR}/tmp"

if [ ! -d "${HADOOP_LOCAL_SCRATCHSPACE_DIR}/tmp" ]
then
    mkdir -p ${HADOOP_LOCAL_SCRATCHSPACE_DIR}/tmp
    if [ $? -ne 0 ] ; then
        echo "mkdir failed making ${HADOOP_LOCAL_SCRATCHSPACE_DIR}/tmp"
        exit 1
    fi
fi

if [ ! -d "${YARN_LOCAL_SCRATCHSPACE_DIR}/tmp" ]
then
    mkdir -p ${YARN_LOCAL_SCRATCHSPACE_DIR}/tmp
    if [ $? -ne 0 ] ; then
        echo "mkdir failed making ${YARN_LOCAL_SCRATCHSPACE_DIR}/tmp"
        exit 1
    fi
fi

# Yarn Queue Users
yarnqueueusers="${USER}"
if [ "${YARN_QUEUES_EXTRA_USERS}X" != "X" ]
then
    yarnqueueusers="${yarnqueueusers},${YARN_QUEUES_EXTRA_USERS}"
fi

# Yarn Queue Groups
if [ "${YARN_QUEUES_EXTRA_GROUPS}X" != "X" ]
then
    yarnqueueusers="${yarnqueueusers} ${YARN_QUEUES_EXTRA_GROUPS}"
fi

# disable hsperfdata if using NO_LOCAL_DIR
if [ "${MAGPIE_NO_LOCAL_DIR}" == "yes" ]
then
    extrahadoopopts="${extrahadoopopts}${extrahadoopopts:+" "}-XX:-UsePerfData"

    extrayarnopts="${extrayarnopts}${extrayarnopts:+" "}-XX:-UsePerfData"
fi

# Load the jar files for Hive if using Tez
if [ "${HIVE_USE_TEZ}" == "yes" ]
then
    extratezclasses="${TEZ_HOME}/tez-api-${TEZ_VERSION}.jar"
    extratezclasses="${TEZ_HOME}/tez-common-${TEZ_VERSION}.jar:$extratezclasses"
    extratezclasses="${TEZ_HOME}/tez-dag-${TEZ_VERSION}.jar:$extratezclasses"
    extratezclasses="${TEZ_HOME}/tez-mapreduce-${TEZ_VERSION}.jar:$extratezclasses"
    extratezclasses="${TEZ_HOME}/tez-runtime-internals-${TEZ_VERSION}.jar:$extratezclasses"
    extratezclasses="${TEZ_HOME}/tez-runtime-library-${TEZ_VERSION}.jar:$extratezclasses"
    extratezclasses="${TEZ_HOME}/hadoop-shim-${TEZ_VERSION}.jar:$extratezclasses"
    extratezclasses="${TEZ_HOME}/hadoop-shim-2.7-${TEZ_VERSION}.jar:$extratezclasses"
    extratezclasses="${TEZ_HOME}/lib/commons-collections4-4.1.jar:$extratezclasses"
    extratezclasses="${TEZ_HOME}/lib/commons-collections-3.2.2.jar:$extratezclasses"

    extrahadoopclasses=${extrahadoopclasses}${extrahadoopclasses:+":"}${extratezclasses}
fi

#
# Get config files for setup
#

# Magpie_find_conffile will set the 'pre' filenames

Magpie_find_conffile "Hadoop" ${HADOOP_CONF_FILES:-""} "core-site-2.X-3.X.xml" "pre_coresitexml"
Magpie_find_conffile "Hadoop" ${HADOOP_CONF_FILES:-""} "hdfs-site-2.X-3.X.xml" "pre_hdfssitexml"
Magpie_find_conffile "Hadoop" ${HADOOP_CONF_FILES:-""} "yarn-site-2.X-3.X.xml" "pre_yarnsitexml"
if echo ${HADOOP_VERSION} | grep -q -E "2\.[0-9]\.[0-9]"
then
    Magpie_find_conffile "Hadoop" ${HADOOP_CONF_FILES:-""} "hadoop.log4j-2.X.properties" "pre_log4jproperties"
    Magpie_find_conffile "Hadoop" ${HADOOP_CONF_FILES:-""} "hadoop-env-2.X.sh" "pre_hadoopenvsh"
    Magpie_find_conffile "Hadoop" ${HADOOP_CONF_FILES:-""} "mapred-env-2.X.sh" "pre_mapredenvsh"
    Magpie_find_conffile "Hadoop" ${HADOOP_CONF_FILES:-""} "yarn-env-2.X.sh" "pre_yarnenvsh"
    Magpie_find_conffile "Hadoop" ${HADOOP_CONF_FILES:-""} "mapred-site-2.X.xml" "pre_mapredsitexml"
elif echo ${HADOOP_VERSION} | grep -q -E "3\.[0-9]\.[0-9]"
then
    Magpie_find_conffile "Hadoop" ${HADOOP_CONF_FILES:-""} "hadoop-env-3.X.sh" "pre_hadoopenvsh"
    Magpie_find_conffile "Hadoop" ${HADOOP_CONF_FILES:-""} "hadoop.log4j-3.X.properties" "pre_log4jproperties"
    Magpie_find_conffile "Hadoop" ${HADOOP_CONF_FILES:-""} "hadoop-user-functions-3-X.sh" "pre_hadoopuserfunctions"
    Magpie_find_conffile "Hadoop" ${HADOOP_CONF_FILES:-""} "mapred-site-3.X.xml" "pre_mapredsitexml"
fi

post_coresitexml=${HADOOP_CONF_DIR}/core-site.xml
post_mapredsitexml=${HADOOP_CONF_DIR}/mapred-site.xml
post_hadoopenvsh=${HADOOP_CONF_DIR}/hadoop-env.sh
post_mapredenvsh=${HADOOP_CONF_DIR}/mapred-env.sh
post_yarnsitexml=${HADOOP_CONF_DIR}/yarn-site.xml
post_yarnenvsh=${HADOOP_CONF_DIR}/yarn-env.sh
post_hdfssitexml=${HADOOP_CONF_DIR}/hdfs-site.xml
post_log4jproperties=${HADOOP_CONF_DIR}/log4j.properties
post_hadoopuserfunctions=${HADOOP_CONF_DIR}/hadoop-user-functions.sh

javahomesubst=`echo "${JAVA_HOME}" | sed "s/\\//\\\\\\\\\//g"`
extrahadoopoptssubst=`echo "${extrahadoopopts}" | sed "s/\\//\\\\\\\\\//g"`

#
# Setup Hadoop configuration files and environment files
#

cp ${pre_coresitexml} ${post_coresitexml}

sed -i \
    -e "s/HADOOPTMPDIR/${magpie_hadooptmpdirsubst}/g" \
    -e "s/FSDEFAULT/${magpie_fsdefaultsubst}/g" \
    -e "s/IOBUFFERSIZE/${iobuffersize}/g" \
    -e "s/LOCALBLOCKSIZE/${localfilesystemblocksize}/g" \
    -e "s/HADOOPDEFAULTUSER/${USER}/g" \
    ${post_coresitexml}

hadoopconfdirsubst=`echo "${HADOOP_CONF_DIR}" | sed "s/\\//\\\\\\\\\//g"`
hadooplogdirsubst=`echo "${HADOOP_LOG_DIR}" | sed "s/\\//\\\\\\\\\//g"`
hadooppiddirsubst=`echo "${HADOOP_PID_DIR}" | sed "s/\\//\\\\\\\\\//g"`
hadoopmapredpiddirsubst=`echo "${HADOOP_MAPRED_PID_DIR}" | sed "s/\\//\\\\\\\\\//g"`
hadoophomesubst=`echo "${HADOOP_HOME}" | sed "s/\\//\\\\\\\\\//g"`
hadoopcommonhomesubst=`echo "${HADOOP_COMMON_HOME}" | sed "s/\\//\\\\\\\\\//g"`
hadoopmapredhomesubst=`echo "${HADOOP_MAPRED_HOME}" | sed "s/\\//\\\\\\\\\//g"`
hadoophdfshomesubst=`echo "${HADOOP_HDFS_HOME}" | sed "s/\\//\\\\\\\\\//g"`

extrahadoopclassessubst=`echo "${extrahadoopclasses}" | sed "s/\\//\\\\\\\\\//g"`

cp ${pre_hadoopenvsh} ${post_hadoopenvsh}

sed -i \
    -e "s/HADOOP_JAVA_HOME/${javahomesubst}/g" \
    -e "s/HADOOP_DAEMON_HEAP_MAX/${hadoopdaemonheapmax}/g" \
    -e "s/HADOOP_NAMENODE_HEAP_MAX/${hadoopnamenodedaemonheapmax}/g" \
    -e "s/HADOOPTIMEOUTSECONDS/${magpie_hadoop_stop_timeout}/g" \
    -e "s/HADOOPCONFDIR/${hadoopconfdirsubst}/g" \
    -e "s/HADOOPLOGDIR/${hadooplogdirsubst}/g" \
    -e "s/HADOOPPIDDIR/${hadooppiddirsubst}/g" \
    -e "s/HADOOPMAPREDPIDDIR/${hadoopmapredpiddirsubst}/g" \
    -e "s/HADOOPHOME/${hadoophomesubst}/g" \
    -e "s/HADOOPCOMMONHOME/${hadoopcommonhomesubst}/g" \
    -e "s/HADOOPMAPREDHOME/${hadoopmapredhomesubst}/g" \
    -e "s/HADOOPHDFSHOME/${hadoophdfshomesubst}/g" \
    -e "s/EXTRAHADOOPCLASSES/${extrahadoopclassessubst}/g" \
    -e "s/EXTRAHADOOPOPTS/${extrahadoopoptssubst}/g" \
    ${post_hadoopenvsh}

if [ "${MAGPIE_REMOTE_CMD:-ssh}" != "ssh" ]
then
    echo "export HADOOP_SSH_CMD=\"${MAGPIE_REMOTE_CMD}\"" >> ${post_hadoopenvsh}
fi
echo "export HADOOP_SSH_OPTS=\"${MAGPIE_REMOTE_CMD_OPTS}\"" >> ${post_hadoopenvsh}

# Hadoop 3.0, could use pdsh, need to setup appropriately
if echo ${HADOOP_VERSION} | grep -q -E "3\.[0-9]\.[0-9]"
then
    if [ "${MAGPIE_REMOTE_CMD:-ssh}" != "ssh" ]
    then
        remotecmdbase=`basename ${MAGPIE_REMOTE_CMD}`
        echo "export PDSH_RCMD_TYPE=\"$remotecmdbase\"" >> ${post_yarnenvsh}
    fi
fi

if [ "${HADOOP_ENVIRONMENT_EXTRA_PATH}X" != "X" ] && [ -f ${HADOOP_ENVIRONMENT_EXTRA_PATH} ]
then
    cat ${HADOOP_ENVIRONMENT_EXTRA_PATH} >> ${post_hadoopenvsh}
else
    echo "ulimit -n ${magpie_openfilescount}" >> ${post_hadoopenvsh}
    echo "ulimit -u ${magpie_userprocessescount}" >> ${post_hadoopenvsh}
fi

cp ${pre_log4jproperties} ${post_log4jproperties}

if Magpie_hadoop_setup_type_enables_yarn
then
    hadoophomesubst=`echo "${HADOOP_HOME}" | sed "s/\\//\\\\\\\\\//g"`
    yardappmapreduceamstagingdirsubst=`echo "${yardappmapreduceamstagingdir}" | sed "s/\\//\\\\\\\\\//g"`
    extrahadooptaskoptssubst=`echo "${extrahadooptaskopts}" | sed "s/\\//\\\\\\\\\//g"`

    cp ${pre_mapredsitexml} ${post_mapredsitexml}

    # Space w/ HADOOPEXTRATASKJAVAOPTS & EXTRAHADOOPOPTS substitution is intentional
    sed -i \
        -e "s/HADOOPHOME/${hadoophomesubst}/g" \
        -e "s/HADOOP_MASTER_HOST/${HADOOP_MASTER_NODE}/g" \
        -e "s/MRPARALLELCOPIES/${parallelcopies}/g" \
        -e "s/JOBTRACKERHANDLERCOUNT/${jobtrackerhandlercount}/g" \
        -e "s/MRSLOWSTART/${mapredslowstart}/g" \
        -e "s/ALLCHILDHEAPSIZE/${allchildheapsize}/g" \
        -e "s/MAPCHILDHEAPSIZE/${mapchildheapsize}/g" \
        -e "s/MAPCONTAINERMB/${mapcontainermb}/g" \
        -e "s/REDUCECHILDHEAPSIZE/${reducechildheapsize}/g" \
        -e "s/REDUCECONTAINERMB/${reducecontainermb}/g" \
        -e "s/DEFAULTMAPTASKS/${defaultmaptasks}/g" \
        -e "s/DEFAULTREDUCETASKS/${defaultreducetasks}/" \
        -e "s/LOCALSTOREDIR/${mapredlocalstoredir}/g" \
        -e "s/IOSORTFACTOR/${iosortfactor}/g" \
        -e "s/IOSORTMB/${iosortmb}/g" \
        -e "s/HADOOPCOMPRESSION/${compression}/g" \
        -e "s/HADOOPHOSTSINCLUDEFILENAME/${hostsincludefilesubst}/g" \
        -e "s/HADOOPHOSTSEXCLUDEFILENAME/${hostsexcludefilesubst}/g" \
        -e "s/SUBMITFILEREPLICATION/${submitfilereplication}/g" \
        -e "s/HADOOPJOBHISTORYADDRESS/${default_hadoop_jobhistoryserver_address}/g" \
        -e "s/HADOOPJOBHISTORYWEBAPPADDRESS/${default_hadoop_jobhistoryserver_webapp_address}/g" \
        -e "s/MAPREDJOBTRACKERADDRESS/${default_mapred_job_tracker_address}/g" \
        -e "s/YARNAPPMAPREDUCEAMSTAGINGDIR/${yardappmapreduceamstagingdirsubst}/g" \
        -e "s/HADOOPEXTRATASKJAVAOPTS/ ${extrahadooptaskoptssubst}/g" \
        -e "s/EXTRAHADOOPOPTS/ ${extrahadoopoptssubst}/g" \
        ${post_mapredsitexml}
fi

if Magpie_hadoop_setup_type_enables_yarn
then
    # Returns 0 for ==, 1 for $1 > $2, 2 for $1 < $2
    Magpie_vercomp ${HADOOP_VERSION} "2.2.0"
    if [ $? == "2" ]
    then
        yarnauxservices="mapreduce.shuffle"
        yarnauxmapreduceshuffle="yarn.nodemanager.aux-services.mapreduce.shuffle.class"
    else
        yarnauxservices="mapreduce_shuffle"
        yarnauxmapreduceshuffle="yarn.nodemanager.aux-services.mapreduce_shuffle.class"
    fi

    if [ "${YARN_VMEM_CHECK}" == "false" ]
    then
        yarnvmemcheck='false'
    else
        yarnvmemcheck='true'
    fi

    if [ "${YARN_PMEM_CHECK}" == "false" ]
    then
        yarnpmemcheck='false'
    else
        yarnpmemcheck='true'
    fi

    cp ${pre_yarnsitexml} ${post_yarnsitexml}

    sed -i \
        -e "s/HADOOP_MASTER_HOST/${HADOOP_MASTER_NODE}/g" \
        -e "s/YARNMINCONTAINER/${yarnmincontainer}/g" \
        -e "s/YARNMAXCONTAINER/${yarnmaxcontainer}/g" \
        -e "s/YARNRESOURCEMEMORY/${yarnresourcememory}/g" \
        -e "s/LOCALSTOREDIR/${yarnlocalstoredir}/g" \
        -e "s/YARNAUXSERVICES/${yarnauxservices}/g" \
        -e "s/YARNAUXMAPREDUCESHUFFLE/${yarnauxmapreduceshuffle}/g" \
        -e "s/YARNRESOURCEMANAGERADDRESS/${default_yarn_resourcemanager_address}/g" \
        -e "s/YARNRESOURCEMANAGERSCHEDULERADDRESS/${default_yarn_resourcemanager_scheduler_address}/g" \
        -e "s/YARNRESOURCEMANAGERWEBAPPADDRESS/${default_yarn_resourcemanager_webapp_address}/g" \
        -e "s/YARNRESOURCEMANAGERWEBAPPHTTPSADDRESS/${default_yarn_resourcemanager_webapp_https_address}/g" \
        -e "s/YARNRESOURCEMANAGERRESOURCETRACKERADDRESS/${default_yarn_resourcemanager_resourcetracker_address}/g" \
        -e "s/YARNRESOURCEMANAGERADMINADDRESS/${default_yarn_resourcemanager_admin_address}/g" \
        -e "s/YARNLOCALIZERADDRESS/${default_yarn_nodemanager_localizer_address}/g" \
        -e "s/YARNNODEMANAGERWEBAPPADDRESS/${default_yarn_nodemanager_webapp_address}/g" \
        -e "s/YARNDEFAULTUSER/${USER}/g" \
        -e "s/YARNQUEUEUSERS/${yarnqueueusers}/g" \
        -e "s/YARNRESROUCECPUVCORES/${magpie_processor_count}/g" \
        -e "s/YARNVMEMCHECK/${yarnvmemcheck}/g" \
        -e "s/YARNPMEMCHECK/${yarnpmemcheck}/g" \
        ${post_yarnsitexml}

    # Only hadoop-env.sh for Hadoop 3.0
    if echo ${HADOOP_VERSION} | grep -q -E "2\.[0-9]\.[0-9]"
    then
        hadoopmapredlogdirsubst=`echo "${HADOOP_LOG_DIR}" | sed "s/\\//\\\\\\\\\//g"`
        hadoopconfdirsubst=`echo "${HADOOP_CONF_DIR}" | sed "s/\\//\\\\\\\\\//g"`
        hadoophomesubst=`echo "${HADOOP_HOME}" | sed "s/\\//\\\\\\\\\//g"`
        hadoopcommonhomesubst=`echo "${HADOOP_COMMON_HOME}" | sed "s/\\//\\\\\\\\\//g"`
        hadoopmapredhomesubst=`echo "${HADOOP_MAPRED_HOME}" | sed "s/\\//\\\\\\\\\//g"`

        cp ${pre_mapredenvsh} ${post_mapredenvsh}

        sed -i \
            -e "s/HADOOP_JAVA_HOME/${javahomesubst}/g" \
            -e "s/HADOOP_DAEMON_HEAP_MAX/${hadoopdaemonheapmax}/g" \
            -e "s/HADOOPTIMEOUTSECONDS/${magpie_hadoop_stop_timeout}/g" \
            -e "s/HADOOPMAPREDLOGDIR/${hadoopmapredlogdirsubst}/g" \
            -e "s/HADOOPCONFDIR/${hadoopconfdirsubst}/g" \
            -e "s/HADOOPHOME/${hadoophomesubst}/g" \
            -e "s/HADOOPCOMMONHOME/${hadoopcommonhomesubst}/g" \
            -e "s/HADOOPMAPREDHOME/${hadoopmapredhomesubst}/g" \
            ${post_mapredenvsh}

        cp ${pre_yarnenvsh} ${post_yarnenvsh}

        yarnconfdirsubst=`echo "${YARN_CONF_DIR}" | sed "s/\\//\\\\\\\\\//g"`
        yarnlogdirsubst=`echo "${YARN_LOG_DIR}" | sed "s/\\//\\\\\\\\\//g"`
        yarnpiddirsubst=`echo "${YARN_PID_DIR}" | sed "s/\\//\\\\\\\\\//g"`
        yarncommonhomesubst=`echo "${YARN_COMMON_HOME}" | sed "s/\\//\\\\\\\\\//g"`
        hadoopyarnhomesubst=`echo "${HADOOP_YARN_HOME}" | sed "s/\\//\\\\\\\\\//g"`

        extrayarnlibrarypathsubst=`echo "${extrayarnlibrarypath}" | sed "s/\\//\\\\\\\\\//g"`
        extrayarnoptssubst=`echo "${extrayarnopts}" | sed "s/\\//\\\\\\\\\//g"`

        sed -i \
            -e "s/HADOOP_JAVA_HOME/${javahomesubst}/g" \
            -e "s/HADOOP_DAEMON_HEAP_MAX/${hadoopdaemonheapmax}/g" \
            -e "s/HADOOPTIMEOUTSECONDS/${magpie_hadoop_stop_timeout}/g" \
            -e "s/YARNUSERNAME/${HADOOP_YARN_USER}/g" \
            -e "s/YARNCONFDIR/${yarnconfdirsubst}/g" \
            -e "s/YARNLOGDIR/${yarnlogdirsubst}/g" \
            -e "s/YARNPIDDIR/${yarnpiddirsubst}/g" \
            -e "s/YARNCOMMONHOME/${yarncommonhomesubst}/g" \
            -e "s/HADOOPYARNHOME/${hadoopyarnhomesubst}/g" \
            -e "s/EXTRAYARNLIBRARYPATH/${extrayarnlibrarypathsubst}/g" \
            -e "s/EXTRAYARNOPTS/${extrayarnoptssubst}/g" \
            ${post_yarnenvsh}

        if [ "${MAGPIE_REMOTE_CMD:-ssh}" != "ssh" ]
        then
            echo "export YARN_SSH_CMD=\"$MAGPIE_REMOTE_CMD\"" >> ${post_yarnenvsh}
        fi
        echo "export YARN_SSH_OPTS=\"${MAGPIE_REMOTE_CMD_OPTS}\"" >> ${post_yarnenvsh}

        if [ "${HADOOP_ENVIRONMENT_EXTRA_PATH}X" != "X" ] && [ -f ${HADOOP_ENVIRONMENT_EXTRA_PATH} ]
        then
            cat ${HADOOP_ENVIRONMENT_EXTRA_PATH} >> ${post_yarnenvsh}
        else
            echo "ulimit -n ${magpie_openfilescount}" >> ${post_yarnenvsh}
            echo "ulimit -u ${magpie_userprocessescount}" >> ${post_yarnenvsh}
        fi
    fi
fi

if Magpie_hadoop_filesystem_mode_is_hdfs_type
then
    cp ${pre_hdfssitexml} ${post_hdfssitexml}

    Magpie_get_magpie_hostname
    HADOOP_THIS_HOST=${magpie_hostname}

    sed -i \
        -e "s/HADOOP_MASTER_HOST/${HADOOP_MASTER_NODE}/g" \
        -e "s/HADOOP_HOST/${HADOOP_THIS_HOST}/g" \
        -e "s/HDFSBLOCKSIZE/${hdfsblocksize}/g" \
        -e "s/HDFSREPLICATION/${hdfsreplication}/g" \
        -e "s/HDFSNAMENODEHANDLERCLOUNT/${namenodehandlercount}/g" \
        -e "s/HDFSDATANODEHANDLERCLOUNT/${datanodehandlercount}/g" \
        -e "s/IOBUFFERSIZE/${iobuffersize}/g" \
        -e "s/HADOOPHOSTSINCLUDEFILENAME/${hostsincludefilesubst}/g" \
        -e "s/HADOOPHOSTSEXCLUDEFILENAME/${hostsexcludefilesubst}/g" \
        -e "s/DFSDATANODEDIR/${datanodedir}/g" \
        -e "s/DFSNAMENODESECONDARYHTTPADDRESS/${default_hadoop_hdfs_namenode_secondary_http_address}/g" \
        -e "s/DFSNAMENODESECONDARYHTTPSADDRESS/${default_hadoop_hdfs_namenode_secondary_https_address}/g" \
        -e "s/DFSNAMENODEHTTPADDRESS/${default_hadoop_hdfs_namenode_httpaddress}/g" \
        -e "s/DFSDATANODEADDRESS/${default_hadoop_hdfs_datanode_address}/g" \
        -e "s/DFSDATANODEHTTPADDRESS/${default_hadoop_hdfs_datanode_httpaddress}/g" \
        -e "s/DFSDATANODEIPCADDRESS/${default_hadoop_hdfs_datanode_ipcaddress}/g" \
        -e "s/DFSBACKUPADDRESS/${default_hadoop_hdfs_namenode_backup_address}/g" \
        -e "s/DFSBACKUPHTTPADDRESS/${default_hadoop_hdfs_namenode_backup_http_address}/g" \
        -e "s/DFSPERMISSIONSSUPERUSERGROUP/${USER}/g" \
        ${post_hdfssitexml}
fi

if echo ${HADOOP_VERSION} | grep -q -E "3\.[0-9]\.[0-9]"
then
    cp ${pre_hadoopuserfunctions} ${post_hadoopuserfunctions}
fi

exit 0
