#!/usr/bin/bash
#############################################################################
#  Copyright (C) 2013-2015 Lawrence Livermore National Security, LLC.
#  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
#  Written by Albert Chu <chu11@llnl.gov>
#  LLNL-CODE-644248
#
#  This file is part of Magpie, scripts for running Hadoop on
#  traditional HPC systems.  For details, see https://github.com/llnl/magpie.
#
#  Magpie is free software; you can redistribute it and/or modify it
#  under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  Magpie is distributed in the hope that it will be useful, but
#  WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#  General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with Magpie.  If not, see <http://www.gnu.org/licenses/>.
#############################################################################

# This script sets up configuration files for jobs.  For the most
# part, it shouldn't be editted.  See job submission files for
# configuration details.

if [ "${SPARK_SETUP}" != "yes" ]
then
    exit 0
fi

source ${MAGPIE_SCRIPTS_HOME}/magpie/exports/magpie-exports-submission-type
source ${MAGPIE_SCRIPTS_HOME}/magpie/exports/magpie-exports-dirs
source ${MAGPIE_SCRIPTS_HOME}/magpie/exports/magpie-exports-user
source ${MAGPIE_SCRIPTS_HOME}/magpie/lib/magpie-lib-defaults
source ${MAGPIE_SCRIPTS_HOME}/magpie/lib/magpie-lib-node-identification
source ${MAGPIE_SCRIPTS_HOME}/magpie/lib/magpie-lib-hadoop-helper
source ${MAGPIE_SCRIPTS_HOME}/magpie/lib/magpie-lib-helper
source ${MAGPIE_SCRIPTS_HOME}/magpie/lib/magpie-lib-setup
source ${MAGPIE_SCRIPTS_HOME}/magpie/lib/magpie-lib-version-helper

# sparknoderank set if succeed
if ! Magpie_am_I_a_spark_node
then
    exit 0
fi

# For rest of setup, we will use cluster specific paths
Magpie_make_all_local_dirs_node_specific

extrasparkjavaopts=""
extrasparkdaemonjavaopts=""

if [ "${SPARK_DAEMON_HEAP_MAX}X" != "X" ]
then
    sparkdaemonheapmax="${SPARK_DAEMON_HEAP_MAX}"
else
    sparkdaemonheapmax="1000"
fi

# Sets magpie_threads_to_use and magpie_processor_count
Magpie_calculate_threads_to_use

# Sets magpie_memory_to_use
Magpie_calculate_memory_to_use

if [ "${SPARK_WORKER_CORES_PER_NODE}X" != "X" ]
then
    workercores=${SPARK_WORKER_CORES_PER_NODE}
else
    workercores=${magpie_threads_to_use}
fi

# Spark is special, can't set > # of cores
if [ "${workercores}" -gt "${magpie_processor_count}" ]
then
    workercores=${magpie_processor_count}
fi

if [ "${SPARK_WORKER_MEMORY_PER_NODE}X" != "X" ]
then
    workermemory=${SPARK_WORKER_MEMORY_PER_NODE}
else
    workermemory=${magpie_memory_to_use}
fi

if [ "${SPARK_WORKER_DIRECTORY}X" != "X" ]
then
    workerdirectory=${SPARK_WORKER_DIRECTORY}
else
    workerdirectory="${SPARK_LOCAL_DIR}/work"
fi

if [ "${SPARK_JOB_MEMORY}X" != "X" ]
then
    sparkmem=${SPARK_JOB_MEMORY}
else
    sparkmem=${workermemory}
fi

# Need to calculate in overhead appropriately
if [ "${SPARK_SETUP_TYPE}" == "YARN" ]
then
    sparkmemoverhead=`expr ${sparkmem} \/ 10`
    sparkmem=`expr ${sparkmem} - ${sparkmemoverhead}`
fi

if [ "${SPARK_DRIVER_MEMORY}X" != "X" ]
then
    sparkdrivermemory=${SPARK_DRIVER_MEMORY}
else
    sparkdrivermemory=${sparkmem}
fi

if [ "${SPARK_LOCAL_SCRATCH_DIR}X" == "X" ]
then
    # sets magpie_hadooptmpdir and magpie_fsdefault
    Magpie_calculate_hadoop_filesystem_paths ${sparknoderank}

    sparklocalscratchdirpath="${magpie_hadooptmpdir}/spark/${MAGPIE_JOB_ID}/node-${sparknoderank}"

    if [ ! -d "${sparklocalscratchdirpath}" ]
    then
        mkdir -p ${sparklocalscratchdirpath}
        if [ $? -ne 0 ] ; then
            echo "mkdir failed making ${sparklocalscratchdirpath}"
            exit 1
        fi
    fi
else
    IFSORIG=${IFS}
    IFS=","
    sparklocalscratchdirpath=""
    for sparklocalpath in ${SPARK_LOCAL_SCRATCH_DIR}
    do
        # Setup node directory per node in case not a local drive
        sparklocalpathtmp="${sparklocalpath}/${MAGPIE_JOB_ID}/node-${sparknoderank}"

        if [ ! -d "${sparklocalpathtmp}" ]
        then
            mkdir -p ${sparklocalpathtmp}
            if [ $? -ne 0 ] ; then
                echo "mkdir failed making ${sparklocalpathtmp}"
                exit 1
            fi
        fi

        sparklocalscratchdirpath="${sparklocalscratchdirpath}${sparklocalscratchdirpath:+","}${sparklocalpathtmp}"
    done
    IFS=${IFSORIG}
fi

# Optimal depends on file system
if [ "${HADOOP_SETUP}" == "yes" ]
then
    if [ "${HADOOP_FILESYSTEM_MODE}" == "hdfs" ]
    then
        sparkshufflebufferkb=100
        sparkshufflebuffer=100k
        sparkshuffleconsolidatefiles="false"
    elif Magpie_hadoop_filesystem_mode_is_hdfs_on_network_type
    then
        # Default block size is 1M in Lustre
        # XXX: If not default, can get from lctl or similar?
        # If other networkFS, just assume like Lustre
        sparkshufflebufferkb=1024
        sparkshufflebuffer=1024k
        sparkshuffleconsolidatefiles="true"
    elif [ "${HADOOP_FILESYSTEM_MODE}" == "rawnetworkfs" ]
    then
        # Assuming Lustre, so copy above 1M
        sparkshufflebufferkb=1024
        sparkshufflebuffer=1024k
        sparkshuffleconsolidatefiles="true"
    fi
else
    # Accessing filesystem directly, assume Lustre
    sparkshufflebufferkb=1024
    sparkshufflebuffer=1024k
    sparkshuffleconsolidatefiles="true"
fi

if [ "${SPARK_DEFAULT_PARALLELISM}X" == "X" ]
then
    sparkdefaultparallelism="${SPARK_WORKER_COUNT}"
    sparkdefaultparallelismnotset=1
else
    sparkdefaultparallelism="${SPARK_DEFAULT_PARALLELISM}"
fi

if [ "${SPARK_STORAGE_MEMORY_FRACTION}X" == "X" ]
then
    sparkstoragememoryfraction="0.6"
else
    sparkstoragememoryfraction="${SPARK_STORAGE_MEMORY_FRACTION}"
fi

if [ "${SPARK_SHUFFLE_MEMORY_FRACTION}X" == "X" ]
then
    sparkshufflememoryfraction="0.2"
else
    sparkshufflememoryfraction="${SPARK_SHUFFLE_MEMORY_FRACTION}"
fi

if [ "${SPARK_MEMORY_FRACTION}X" == "X" ]
then
    sparkmemoryfraction="0.6"
else
    sparkmemoryfraction="${SPARK_MEMORY_FRACTION}"
fi

if [ "${SPARK_MEMORY_STORAGE_FRACTION}X" == "X" ]
then
    sparkmemorystoragefraction="0.5"
else
    sparkmemorystoragefraction="${SPARK_MEMORY_STORAGE_FRACTION}"
fi

if [ "${SPARK_RDD_COMPRESS}X" == "X" ]
then
    sparkrddcompress="true"
else
    sparkrddcompress="${SPARK_RDD_COMPRESS}"
fi

if [ "${SPARK_IO_COMPRESSION_CODEC}X" == "X" ]
then
    sparkiocompressioncodec="lz4"
else
    sparkiocompressioncodec="${SPARK_IO_COMPRESSION_CODEC}"
fi

if [ "${SPARK_DEPLOY_SPREADOUT}X" == "X" ]
then
    sparkdeployspreadout="true"
else
    sparkdeployspreadout="${SPARK_DEPLOY_SPREADOUT}"
fi

# Tests show proc count is about the best performing when you have
# many cores, which is likely the norm in HPC clusters

akkathreads=`expr ${magpie_processor_count}`

if [ "${akkathreads}" -lt "4" ]
then
    akkathreads=4
fi

if [ "${SPARK_NETWORK_TIMEOUT}X" == "X" ]
then
    sparknetworktimeoutsecs="120"
    sparknetworktimeoutms="120000"
else
    sparknetworktimeoutsecs="${SPARK_NETWORK_TIMEOUT}"
    sparknetworktimeoutms=$((${SPARK_NETWORK_TIMEOUT} * 1000))
fi

# Sets magpie_openfilescount
Magpie_calculate_openfiles ${SPARK_WORKER_COUNT}
# Sets magpie_userprocessescount
Magpie_calculate_userprocesses ${SPARK_WORKER_COUNT}

extrasparkjavaopts="${extrasparkjavaopts}${extrasparkjavaopts:+" "}-Djava.io.tmpdir=${SPARK_LOCAL_SCRATCHSPACE_DIR}/tmp"

extrasparkdaemonjavaopts="${extrasparkdaemonjavaopts}${extrasparkdaemonjavaopts:+" "}-Djava.io.tmpdir=${SPARK_LOCAL_SCRATCHSPACE_DIR}/tmp"

if [ ! -d "${SPARK_LOCAL_SCRATCHSPACE_DIR}/tmp" ]
then
    mkdir -p ${SPARK_LOCAL_SCRATCHSPACE_DIR}/tmp
    if [ $? -ne 0 ] ; then
        echo "mkdir failed making ${SPARK_LOCAL_SCRATCHSPACE_DIR}/tmp"
        exit 1
    fi
fi

# disable hsperfdata if using NO_LOCAL_DIR
if [ "${MAGPIE_NO_LOCAL_DIR}" == "yes" ]
then
    extrasparkjavaopts="${extrasparkjavaopts}${extrasparkjavaopts:+" "}-XX:-UsePerfData"

    extrasparkdaemonjavaopts="${extrasparkdaemonjavaopts}${extrasparkdaemonjavaopts:+" "}-XX:-UsePerfData"
fi

#
# Get config files for setup
#

# Magpie_find_conffile will set the 'pre' filenames

if echo ${SPARK_VERSION} | grep -q -E "1\.[0-9]\.[0-9]"
then
    Magpie_find_conffile "Spark" ${SPARK_CONF_FILES:-""} "spark-env-1.X.sh" "pre_sparkenvsh"
    Magpie_find_conffile "Spark" ${SPARK_CONF_FILES:-""} "spark.log4j-1.X.properties" "pre_log4jproperties"
    Magpie_find_conffile "Spark" ${SPARK_CONF_FILES:-""} "spark-defaults.conf" "pre_sparkdefaultsconf"
elif echo ${SPARK_VERSION} | grep -q -E "[2-3]\.[0-9]\.[0-9]"
then
    Magpie_find_conffile "Spark" ${SPARK_CONF_FILES:-""} "spark-env-2.X.sh" "pre_sparkenvsh"
    Magpie_find_conffile "Spark" ${SPARK_CONF_FILES:-""} "spark.log4j-1.X.properties" "pre_log4jproperties"
    Magpie_find_conffile "Spark" ${SPARK_CONF_FILES:-""} "spark-defaults.conf" "pre_sparkdefaultsconf"
fi

if [ "${SPARK_SETUP_TYPE}" == "YARN" ]
then
    Magpie_find_conffile "Spark" ${SPARK_CONF_FILES:-""} "spark-defaults.conf-yarn" "pre_sparkdefaultsconf_yarn"
fi

post_sparkenvsh=${SPARK_CONF_DIR}/spark-env.sh
post_sparkdefaultsconf=${SPARK_CONF_DIR}/spark-defaults.conf
post_log4jproperties=${SPARK_CONF_DIR}/log4j.properties

#
# Setup Spark configuration files and environment files
#

javahomesubst=`echo "${JAVA_HOME}" | sed "s/\\//\\\\\\\\\//g"`
sparkhomesubst=`echo "${SPARK_HOME}" | sed "s/\\//\\\\\\\\\//g"`
sparkpiddirsubst=`echo "${SPARK_PID_DIR}" | sed "s/\\//\\\\\\\\\//g"`
workerdirectorysubst=`echo "${workerdirectory}" | sed "s/\\//\\\\\\\\\//g"`

if [ "${MAGPIE_PYTHON}X" == "X" ]
then
    export MAGPIE_PYTHON="/usr/bin/python"
    echo "MAGPIE_PYTHON not set, assuming Python at /usr/bin/python"
fi
pysparkpythonsubst=`echo "${MAGPIE_PYTHON}" | sed "s/\\//\\\\\\\\\//g"`

extrasparkdaemonjavaoptssubst=`echo "${extrasparkdaemonjavaopts}" | sed "s/\\//\\\\\\\\\//g"`

# Sets magpie_sparkmajorminorversion
Magpie_get_spark_major_minor_version ${SPARK_VERSION}

cp ${pre_sparkenvsh} ${post_sparkenvsh}

Magpie_get_magpie_hostname
SPARK_THIS_HOST=${magpie_hostname}

sed -i \
    -e "s/SPARK_JAVA_HOME/${javahomesubst}/g" \
    -e "s/SPARKHOME/${sparkhomesubst}/g" \
    -e "s/SPARKPIDDIR/${sparkpiddirsubst}/g" \
    -e "s/SPARK_DAEMON_HEAP_MAX/${sparkdaemonheapmax}/g" \
    -e "s/SPARKWORKERCORES/${workercores}/g" \
    -e "s/SPARKWORKERMEMORY/${workermemory}/g" \
    -e "s/SPARKWORKERDIR/${workerdirectorysubst}/g" \
    -e "s/SPARKMEM/${sparkmem}/g" \
    -e "s/SPARKDRIVERMEMORY/${sparkdrivermemory}/g" \
    -e "s/SPARKMASTERHOST/${SPARK_THIS_HOST}/g" \
    -e "s/SPARKMASTERPORT/${default_spark_master_port}/g" \
    -e "s/SPARKMASTERWEBUIPORT/${default_spark_master_webui_port}/g" \
    -e "s/SPARKWORKERWEBUIPORT/${default_spark_worker_webui_port}/g" \
    -e "s/SPARKDAEMONJAVAOPTS/${extrasparkdaemonjavaoptssubst}/g" \
    -e "s/SPARKPYSPARKPYTHON/${pysparkpythonsubst}/g" \
    ${post_sparkenvsh}

# Comment out a few lines we don't need if using Yarn
if [ "${SPARK_SETUP_TYPE}" == "YARN" ]
then
    sed -i \
        -e "s/export SPARK_MASTER_PORT/# export SPARK_MASTER_PORT/g" \
        -e "s/export SPARK_MASTER_WEBUI_PORT/# export SPARK_MASTER_WEBUI_PORT/g" \
        -e "s/export SPARK_WORKER_WEBUI_PORT/# export SPARK_WORKER_WEBUI_PORT/g" \
        ${post_sparkenvsh}
fi

echo "export SPARK_LOG_DIR=\"${SPARK_LOG_DIR}\"" >> ${post_sparkenvsh}

if [ "${HBASE_SETUP}" == "yes" ]
then
    # Spark contains its own version of hbase, so sometime the jars conflict.
    # Putting hbase jars in front should fix this
    hbase_jars="${HBASE_HOME}/lib/hbase-client-${HBASE_VERSION}.jar:$hbase_jars"
    hbase_jars="${HBASE_HOME}/lib/hbase-common-${HBASE_VERSION}.jar:$hbase_jars"
    hbase_jars="${HBASE_HOME}/lib/hbase-protocol-${HBASE_VERSION}.jar:$hbase_jars"

    if [ "${sparkclasspath}X" != "X" ]
    then
        sparkclasspath="${hbase_jars}:${sparkclasspath}"
    else
        sparkclasspath="${hbase_jars}"
    fi
fi

if [ "${PHOENIX_SETUP}" == "yes" ]
then
    phoenix_jar="${PHOENIX_HOME}/phoenix-${PHOENIX_VERSION}-client.jar"

    # Phoenix contains its own version of Jackson and can conflicts with the
    # version of spark running, so we can put sparks jars in front

    # Handle special settings depending on version
    # 0 is =, 1 is >, 2 is <
    Magpie_vercomp ${magpie_sparkmajorminorversion} 2.0
    vercomp_result=$?
    if [ "${vercomp_result}" == "0" ] || [ "${vercomp_result}" == "1" ]
    then
        spark_jars_path="$SPARK_HOME/jars"
    else
        spark_jars_path="$SPARK_HOME/lib"
    fi

    for j in `ls $spark_jars_path`
    do
        spark_jars="$spark_jars_path/$j:$spark_jars"
    done

    if [ "${sparkclasspath}X" != "X" ]
    then
        sparkclasspath="${sparkclasspath}:${spark_jars}:${phoenix_jar}"
    else
        sparkclasspath="${spark_jars}:${phoenix_jar}"
    fi
fi

if [ "${ZEPPELIN_SETUP}" == "yes" ]
then
    zeppelin_jars="${ZEPPELIN_HOME}/zeppelin-server-${ZEPPELIN_VERSION}.jar"
    zeppelin_jars="${ZEPPELIN_HOME}/interpreter/spark/zeppelin-spark-${ZEPPELIN_VERSION}.jar"
    if [ "${sparkclasspath}X" != "X" ]
    then
        sparkclasspath="${sparkclasspath}:${zeppelin_jars}"
    else
        sparkclasspath="${zeppelin_jars}"
    fi
fi

if [ "${ALLUXIO_SETUP}" == "yes" ]
then
    alluxio_jars="${ALLUXIO_HOME}/client/alluxio-${ALLUXIO_VERSION}-client.jar"
    if [ "${sparkclasspath}X" != "X" ]
    then
        sparkclasspath="${sparkclasspath}:${alluxio_jars}"
    else
        sparkclasspath="${alluxio_jars}"
    fi
fi

if [ "${MAGPIE_REMOTE_CMD:-ssh}" != "ssh" ]
then
    echo "export SPARK_SSH_CMD=\"${MAGPIE_REMOTE_CMD}\"" >> ${post_sparkenvsh}
fi
if [ "${MAGPIE_REMOTE_CMD_OPTS}X" != "X" ]
then
    echo "export SPARK_SSH_OPTS=\"${MAGPIE_REMOTE_CMD_OPTS}\"" >> ${post_sparkenvsh}
fi

if [ "${HADOOP_SETUP}" == "yes" ] && [ "${SPARK_USE_YARN}" == "yes" ]
then
     sparkclasspath="${HADOOP_HOME}"/share/hadoop/yarn/hadoop-yarn-client-${HADOOP_VERSION}.jar:$sparkclasspath
     sparkclasspath="${HADOOP_HOME}"/share/hadoop/common/hadoop-common-${HADOOP_VERSION}.jar:$sparkclasspath
fi

# If using Yarn, don't set this, Spark will use configuration of yarn.nodemanager.local-dirs
if [ "${SPARK_SETUP_TYPE}" == "STANDALONE" ]
then
    echo "export SPARK_LOCAL_DIRS=\"${sparklocalscratchdirpath}\"" >> ${post_sparkenvsh}
fi

# Spark requires HADOOP_CONF_DIR and YARN_CONF_DIR to be set for it to
# automatically find/parse Hadoop conf files.
#
# We go ahead and set HADOOP_HOME for good measure just incase too.
#
if [ "${HADOOP_SETUP}" == "yes" ]
then
    echo "export HADOOP_HOME=\"${HADOOP_HOME}\"" >> ${post_sparkenvsh}
    echo "export HADOOP_CONF_DIR=\"${HADOOP_CONF_DIR}\"" >> ${post_sparkenvsh}
    echo "export YARN_CONF_DIR=\"${YARN_CONF_DIR}\"" >> ${post_sparkenvsh}
fi

if [ "${SPARK_ENVIRONMENT_EXTRA_PATH}X" != "X" ] && [ -f ${SPARK_ENVIRONMENT_EXTRA_PATH} ]
then
    cat ${SPARK_ENVIRONMENT_EXTRA_PATH} >> ${post_sparkenvsh}
else
    echo "ulimit -n ${magpie_openfilescount}" >> ${post_sparkenvsh}
    echo "ulimit -u ${magpie_userprocessescount}" >> ${post_sparkenvsh}
fi

if echo ${SPARK_VERSION} | grep -q -E "[1-3]\.[0-9]\.[0-9]"
then
    if [ "${SPARK_SETUP_TYPE}" == "YARN" ]
    then
        if echo ${SPARK_VERSION} | grep -q -E "1\.[0-9]\.[0-9]"
        then
            sparkmaster="yarn-client"
        else
            sparkmaster="yarn"
        fi
    else
        sparkmaster="spark://${SPARK_MASTER_NODE}:${default_spark_master_port}"
    fi
    sparkmastersubst=`echo "${sparkmaster}" | sed "s/\\//\\\\\\\\\//g"`

    if [ "${sparkclasspath}X" != "X" ]
    then
        sparkclasspathsubst=`echo "${sparkclasspath}" | sed "s/\\//\\\\\\\\\//g"`
        sparkdriverclasspathsubst=`echo "${sparkclasspath}" | sed "s/\\//\\\\\\\\\//g"`
    fi

    cp ${pre_sparkdefaultsconf} ${post_sparkdefaultsconf}

    if [ "${pre_sparkdefaultsconf_yarn}X" != "X" ]
    then
        sed -i -e "/@YARN@/{r ${pre_sparkdefaultsconf_yarn}" -e "d}" ${post_sparkdefaultsconf}
    else
        sed -i -e "/@YARN@/,+1d" ${post_sparkdefaultsconf}
    fi

    # Sparklyr needs to know the catalogImplementation
    if [ "${HIVE_SETUP}" == "yes" ]
    then
        sed -i -e "s/# spark.sql.catalogImplementation hive/spark.sql.catalogImplementation hive/g" ${post_sparkdefaultsconf}
    fi

    # This is not for legitimate security as it can easily be
    # calculated.  It is for sanity "just in case we screwed up a
    # config" security.
    sparkauthenticatesecret=`echo -n ${MAGPIE_JOB_ID} | md5sum | awk '{print $1}'`

    extrasparkjavaoptssubst=`echo "${extrasparkjavaopts}" | sed "s/\\//\\\\\\\\\//g"`

    sparklocalscratchdirpathsubst=`echo "${sparklocalscratchdirpath}" | sed "s/\\//\\\\\\\\\//g"`

    if [ "${SPARK_YARN_STAGING_DIR}X" != "X" ]
    then
	sparkyarnstagingdirsubst=`echo "${SPARK_YARN_STAGING_DIR}" | sed "s/\\//\\\\\\\\\//g"`
    fi

    sed -i \
        -e "s/SPARKMASTER/${sparkmastersubst}/g" \
        -e "s/SPARKSHUFFLEFILEBUFFERKB/${sparkshufflebufferkb}/g" \
        -e "s/SPARKSHUFFLEFILEBUFFER/${sparkshufflebuffer}/g" \
        -e "s/SPARKSHUFFLECONSOLIDATEFILES/${sparkshuffleconsolidatefiles}/g" \
        -e "s/SPARKDEFAULTPARALLELISM/${sparkdefaultparallelism}/g" \
        -e "s/SPARKEXECUTORMEMORY/${sparkmem}/g" \
        -e "s/SPARKUIPORT/${default_spark_application_dashboard_port}/g" \
        -e "s/SPARKEXECUTOREXTRACLASSPATH/${sparkclasspathsubst}/g" \
        -e "s/SPARKDRIVEREXTRACLASSPATH/${sparkdriverclasspathsubst}/g" \
        -e "s/SPARKAKKATHREADS/${akkathreads}/g" \
        -e "s/SPARKSTORAGEMEMORYFRACTION/${sparkstoragememoryfraction}/g" \
        -e "s/SPARKSHUFFLEMEMORYFRACTION/${sparkshufflememoryfraction}/g" \
        -e "s/SPARKMEMORYFRACTION/${sparkmemoryfraction}/g" \
        -e "s/SPARKMEMORYSTORAGEFRACTION/${sparkmemorystoragefraction}/g" \
        -e "s/SPARKRDDCOMPRESS/${sparkrddcompress}/g" \
        -e "s/SPARKIOCOMPRESSIONCODEC/${sparkiocompressioncodec}/g" \
        -e "s/SPARKDEPLOYSPREADOUT/${sparkdeployspreadout}/g" \
        -e "s/SPARKAUTHENTICATESECRET/${sparkauthenticatesecret}/g" \
        -e "s/EXTRASPARKJAVAOPTS/${extrasparkjavaoptssubst}/g" \
        -e "s/SPARKYARNEXECUTORCORES/${workercores}/g" \
        -e "s/SPARKYARNEXECUTORINSTANCES/${SPARK_WORKER_COUNT}/g" \
        -e "s/SPARKYARNEXECUTOROVERHEAD/${sparkmemoverhead}/g" \
        -e "s/SPARKLOCALDIR/${sparklocalscratchdirpathsubst}/g" \
	-e "s/SPARKYARNSTAGINGDIR/${sparkyarnstagingdirsubst}/g" \
        -e "s/SPARKNETWORKTIMEOUTSECS/${sparknetworktimeoutsecs}/g" \
        -e "s/SPARKNETWORKTIMEOUTMS/${sparknetworktimeoutms}/g" \
        ${post_sparkdefaultsconf}

    # Handle special settings depending on version
    # 0 is =, 1 is >, 2 is <
    Magpie_vercomp ${magpie_sparkmajorminorversion} 1.3
    vercomp_result=$?
    if [ "${vercomp_result}" == "1" ]
    then
        sed -i -e "s/spark.shuffle.file.buffer.kb/# spark.shuffle.file.buffer.kb/" ${post_sparkdefaultsconf}
    else
        sed -i -e "s/spark.shuffle.file.buffer\s/# spark.shuffle.file.buffer/" ${post_sparkdefaultsconf}
    fi

    # Handle special settings depending on version
    # Versions > 1.3.0, Spark tries to pick the best parallelism.  So
    # don't set spark.default.parallelism if user doesn't want to.
    # 0 is =, 1 is >, 2 is <
    Magpie_vercomp ${magpie_sparkmajorminorversion} 1.3.0
    vercomp_result=$?
    if [ "${vercomp_result}" == "0" ] || [ "${vercomp_result}" == "1" ]
    then
        if [ "${sparkdefaultparallelismnotset}" == "1" ]
        then
            sed -i -e "s/spark.default.parallelism/# spark.default.parallelism/" ${post_sparkdefaultsconf}
        fi
    fi

    # Handle special settings depending on version
    # 0 is =, 1 is >, 2 is <
    Magpie_vercomp ${magpie_sparkmajorminorversion} 1.5
    vercomp_result=$?
    if [ "${vercomp_result}" == "1" ]
    then
        sed -i -e "s/spark.storage.memoryFraction/# spark.storage.memoryFraction/" ${post_sparkdefaultsconf}
        sed -i -e "s/spark.shuffle.memoryFraction/# spark.shuffle.memoryFraction/" ${post_sparkdefaultsconf}
    else
        sed -i -e "s/spark.memory.fraction/# spark.memory.fraction/" ${post_sparkdefaultsconf}
        sed -i -e "s/spark.memory.storageFraction/# spark.memory.storageFraction/" ${post_sparkdefaultsconf}
    fi

    # Handle special settings depending on version
    # 0 is =, 1 is >, 2 is <
    Magpie_vercomp ${magpie_sparkmajorminorversion} 2.0
    vercomp_result=$?
    if [ "${vercomp_result}" == "0" ] || [ "${vercomp_result}" == "1" ]
    then
	if [ "${SPARK_YARN_STAGING_DIR}X" != "X" ]
	then
	    sed -i -e "s/# spark.yarn.stagingDir/spark.yarn.stagingDir/" ${post_sparkdefaultsconf}
	fi
    fi

    # spark authenticate doesn't work with yarn in Spark 1.2 -> 1.5.
    # Unclear why it works on Spark 1.6.  Not sure of subtlety in
    # implementation or what.  No apparent bug fix upstream that would
    # fix this.

    # Handle special settings depending on version
    # 0 is =, 1 is >, 2 is <
    Magpie_vercomp ${magpie_sparkmajorminorversion} 1.5
    vercomp_result=$?
    # Comment out a few lines we don't need if using Yarn
    if [ "${SPARK_SETUP_TYPE}" == "YARN" ] && [ "${vercomp_result}" != "1" ]
    then
        sed -i \
            -e "s/spark.authenticate/# spark.authenticate/g" \
            ${post_sparkdefaultsconf}
    fi

    # Handle special settings depending on version
    # Versions >= 2.0, spark.akka.threads no longer supported
    # 0 is =, 1 is >, 2 is <
    Magpie_vercomp ${magpie_sparkmajorminorversion} 2.0
    vercomp_result=$?
    if [ "${vercomp_result}" == "0" ] || [ "${vercomp_result}" == "1" ]
    then
        sed -i -e "s/spark.akka.threads/# spark.akka.threads/" ${post_sparkdefaultsconf}
    fi

    # Handle special settings depending on version
    # Versions >= 2.3, spark.yarn.executor.memoryOverhead is now
    # spark.executor.memoryOverhead
    # 0 is =, 1 is >, 2 is <
    Magpie_vercomp ${magpie_sparkmajorminorversion} 2.3
    vercomp_result=$?
    if [ "${vercomp_result}" == "0" ] || [ "${vercomp_result}" == "1" ]
    then
        sed -i -e "s/spark.yarn.executor.memoryOverhead/# spark.yarn.executor.memoryOverhead/" ${post_sparkdefaultsconf}
    else
        sed -i -e "s/spark.executor.memoryOverhead/# spark.executor.memoryOverhead/" ${post_sparkdefaultsconf}
    fi
fi

cp ${pre_log4jproperties} ${post_log4jproperties}

exit 0
