# shellcheck shell=bash
# -*-sh-*-

#-------------------------------------
# OHPC common test functions
#-------------------------------------

# Error message with exit
ERROR() {
	echo "[OHPC-TEST:ERROR]: $1" >&2
	exit 1
}

# Error message with return
ERROR_RETURN() {
	echo "[OHPC-TEST:ERROR]: $1" >&2
	return 1
}

# Check if specific RPM package is installed
check_if_rpm_installed() {
	rpm -q --quiet "$1" || ERROR "RPM $1 is not installed locally"
}

# Check if we have a known/supported resource manager
check_rms() {

	if [ -s /etc/pbs.conf ]; then
		export RESOURCE_MANAGER=openpbs
	elif [ -s /etc/slurm/slurm.conf ]; then
		export RESOURCE_MANAGER=slurm
	else
		ERROR "Unsupported or unknown resource manager"
	fi
}

# Check for valid executable
check_exe() {
	if ! type "$1" >&/dev/null; then
		ERROR_RETURN "$1 is not available for execution"
	fi
}

# export python family variables
get_python_vars() {
	local __PYTHON_FAMILY
	__PYTHON_FAMILY="$1"
	if [ "${__PYTHON_FAMILY}" == "python3" ]; then
		export _python=python3
		export python_module_prefix=py3
		export python_package_prefix=python3
	fi
}

save_logs_compiler_family() {
	if [ $# -lt 2 ]; then
		ERROR "insufficient input provided to save_logs_compiler_family()"
	fi

	local __TEST_DIR
	local __COMPILER
	local __SAVR_DIR
	__TEST_DIR="$1"
	__COMPILER="$2"
	__SAVE_DIR="family-${__COMPILER}"

	cd "${__TEST_DIR}" || ERROR "unable to cd to ${__TEST_DIR}"

	if [ -d "${__SAVE_DIR}" ]; then
		rm -rf "${__SAVE_DIR}"
	fi

	mkdir "${__SAVE_DIR}"

	shopt -s nullglob

	# Cache .log files

	for i in *.log; do
		mv "${i}" "${__SAVE_DIR}" || ERROR "Unable to move file -> ${i}"
	done

	# Also cache log.xml files (for JUnit parsing)

	for i in *.log.xml; do
		mv "${i}" "${__SAVE_DIR}" || ERROR "Unable to move file -> ${i}"
	done

	cd - >/dev/null || ERROR "Changing directory failed"

} # end of save_logs_compiler_family()

save_logs_mpi_family() {
	if [ $# -lt 3 ]; then
		ERROR "insufficient input provided to save_logs_mpi_family()"
	fi

	local __TEST_DIR
	local __COMPILER
	local __MPI
	local __SAVE_DIR
	__TEST_DIR="$1"
	__COMPILER="$2"
	__MPI="$3"
	__SAVE_DIR="family-${__COMPILER}-${__MPI}"

	cd "${__TEST_DIR}" || ERROR "unable to cd to ${__TEST_DIR}"

	if [ -d "${__SAVE_DIR}" ]; then
		rm -rf "${__SAVE_DIR}"
	fi

	mkdir "${__SAVE_DIR}"

	shopt -s nullglob

	# Cache .log files
	for i in *.log; do
		mv "${i}" "${__SAVE_DIR}" || ERROR "Unable to move file -> ${i}"
	done

	# Also cache log.xml files (for JUnit parsing)

	for i in *.log.xml; do
		mv "${i}" "${__SAVE_DIR}" || ERROR "Unable to move file -> ${i}"
	done
	cd - >/dev/null || ERROR "Changing directory failed"

} # end of save_logs_mpi_family()

# Check if we have a known/supported compiler family
# and set relevant compiler variables
check_compiler_family() {
	local __CC
	local __CXX
	local __FC
	__CC="$1"
	__CXX="$2"
	__FC="$3"

	local myCC
	local myCXX
	local myFC
	myCC=""
	myCXX=""
	myFC=""

	if [ "${LMOD_FAMILY_COMPILER}" == "intel" ]; then
		myCC="icx"
		myCXX="icpx"
		myFC="ifx"
	elif [[ "${LMOD_FAMILY_COMPILER}" =~ "arm" ]]; then
		myCC="armclang"
		myCXX="armclang++"
		myFC="armflang"
	elif [[ "${LMOD_FAMILY_COMPILER}" =~ "acfl" ]]; then
		myCC="armclang"
		myCXX="armclang++"
		myFC="armflang"
	elif [ "${LMOD_FAMILY_COMPILER}" == "gnu" ]; then
		myCC="gcc"
		myCXX="g++"
		myFC="gfortran"
	elif [ "${LMOD_FAMILY_COMPILER}" == "gnu14" ]; then
		myCC="gcc"
		myCXX="g++"
		myFC="gfortran"
	elif [ "${LMOD_FAMILY_COMPILER}" == "gnu15" ]; then
		myCC="gcc"
		myCXX="g++"
		myFC="gfortran"
	elif [ "${LMOD_FAMILY_COMPILER}" == "gnu13" ]; then
		myCC="gcc"
		myCXX="g++"
		myFC="gfortran"
	elif [ "${LMOD_FAMILY_COMPILER}" == "gnu12" ]; then
		myCC="gcc"
		myCXX="g++"
		myFC="gfortran"
	elif [ "${LMOD_FAMILY_COMPILER}" == "gnu9" ]; then
		myCC="gcc"
		myCXX="g++"
		myFC="gfortran"
	elif [[ "${LMOD_FAMILY_COMPILER}" == "llvm9" ]]; then
		myCC="clang"
		myCXX="clang++"
		myFC="gfortran"
	else
		ERROR "Unknown compiler family"
	fi

	eval "${__CC}"="'${myCC}'"
	eval "${__CXX}"="'${myCXX}'"
	eval "${__FC}"="'${myFC}'"
}

# Run serial binary through resource manager
run_serial_binary() {

	# Parse optional arguments
	output_file=""
	timeout=1 # default job timeout (in minutes)

	local OPTIND=1
	while getopts "o:s:t:" OPTION; do
		case "${OPTION}" in
		o)
			output_file="${OPTARG}"
			;;
		s)
			scalasca="${OPTARG}"
			;;
		t)
			timeout="${OPTARG}"
			;;
		'?')
			echo "Unknown option given to run_serial_binary" >&2
			exit 1
			;;
		esac
	done

	shift "$((OPTIND - 1))" # Shift off the options and optional --.

	EXE="$1"
	shift

	# shellcheck disable=SC2086
	check_exe ${EXE}

	if [ -z "${scalasca}" ]; then
		exec_prefix=""
	else
		exec_prefix="scalasca -analyze -t"
	fi

	if [ -z "${RESOURCE_MANAGER}" ]; then
		ERROR_RETURN "OHPC resource manager is not defined - please set via RESOURCE_MANAGER variable"
	fi

	if [ "${RESOURCE_MANAGER}" = "slurm" ]; then
		if [ -n "${output_file}" ]; then
			eval "${exec_prefix}" srun -n 1 -N 1 -t "${timeout}" "${EXE}" "$*" >&"${output_file}"
		else
			# srun -n 1 -N 1 -t $timeout $EXE "$@"
			eval "${exec_prefix}" srun -n 1 -N 1 -t "${timeout}" "${EXE}" "$*"
		fi
		return "$?"
	elif [ "${RESOURCE_MANAGER}" = "openpbs" ]; then

		echo "EXE = ${EXE}"

		# Generate script with commands so we can execute out of submission directory
		jobScript=".job.${RANDOM}"
		{
			echo "#!/bin/bash"
			echo "cd \${PBS_O_WORKDIR}"
			echo "${exec_prefix}" "${EXE}" "$@"
		} >"${jobScript}"
		chmod 700 "${jobScript}"

		if [ -n "${output_file}" ]; then
			QSUB_OUT=$(qsub -k oe -W block=true -l select=1 -l walltime="${timeout}" -- "$(readlink -f "${jobScript}")")
			JOB=$(echo "${QSUB_OUT}" | awk -F . '{print $1}')
			[[ -e ${HOME}/STDIN.o"${JOB}" ]] || exit 1
			mv "${HOME}/STDIN.o${JOB}" "${output_file}"
			cat "${output_file}"
		else
			qsub -k n -W block=true -l select=1 -l walltime="${timeout}" -- "$(readlink -f "${jobScript}")"

			RC="$?"
			rm -f "${jobScript}"
			return "${RC}"
		fi
	else
		ERROR_RETURN "Unsupported resource manager"
	fi
}

# Run MPI binary through resource manager
run_mpi_binary() {

	# Parse optional arguments
	input_file=""
	output_file=""
	if [ "${RESOURCE_MANAGER}" = "slurm" ]; then
		if [ -n "${SIMPLE_CI}" ]; then
			timeout=5 # default job timeout (in minutes)
			# Do not use pmix in the SIMPLE_CI case (GitHub Actions)
			unset OHPC_MPI_LAUNCHERS
		else
			timeout=2 # default job timeout (in minutes)
		fi
	else
		timeout="00:02:00"
	fi

	local OPTIND
	OPTIND=1
	while getopts "i:o:s:t:" OPTION; do
		case "${OPTION}" in
		i)
			input_file="${OPTARG}"
			;;
		o)
			output_file="${OPTARG}"
			;;
		s)
			scalasca="${OPTARG}"
			;;
		t)
			timeout="${OPTARG}"
			;;
		'?')
			echo "Unknown option given to run_mpi_binary" >&2
			exit 1
			;;
		esac
	done

	if [ -z "${scalasca}" ]; then
		mpi_launcher="prun"
	else
		mpi_launcher="scalasca -analyze -t prun"
	fi

	shift "$((OPTIND - 1))" # Shift off the options and optional --.

	EXE="$1"
	ARGS="$2"
	NNODES="$3"
	NTASKS="$4"

	# shellcheck disable=SC2086
	check_exe ${EXE}

	if [ -z "${RESOURCE_MANAGER}" ]; then
		ERROR_RETURN "Resource manager is not defined - please set via RESOURCE_MANAGER variable"
	fi

	if [ -z "${LMOD_FAMILY_MPI}" ]; then
		ERROR_RETURN "MPI toolchain s not loaded - please load MPI stack first"
	fi

	if [ "${RESOURCE_MANAGER}" = "slurm" ]; then
		if [ "${LMOD_FAMILY_MPI}" != "impi" ] && [ "${LMOD_FAMILY_MPI}" != "mvapich2" ] && [ "${LMOD_FAMILY_MPI}" != "openmpi5" ] && [ "${LMOD_FAMILY_MPI}" != "mpich" ]; then
			ERROR_RETURN "Unsupported MPI family"
		fi

		JOB_SCRIPT="/tmp/job.${USER}.${RANDOM}"

		{
			echo "#!/bin/bash"
			echo "#SBATCH -J OpenHPC-test"
			echo "#SBATCH -N ${NNODES}"
			echo "#SBATCH -n ${NTASKS}"
			echo "#SBATCH -t ${timeout}"
			echo "#SBATCH -o job.%j.out"
		} >"${JOB_SCRIPT}"

		if [ -n "${input_file}" ]; then
			echo "${mpi_launcher} ${EXE} ${ARGS} < ${input_file}" >>"${JOB_SCRIPT}"
		else
			echo "${mpi_launcher} ${EXE} ${ARGS}" >>"${JOB_SCRIPT}"
		fi

		# Submit batch job

		tmpState="/tmp/submitId.${RANDOM}"

		sbatch "${JOB_SCRIPT}" >&"${tmpState}"
		echo "job script = ${JOB_SCRIPT}"

		head -1 "${tmpState}" | grep -q "Submitted batch job" || ERROR "Unable to submit batch job"
		jobId=$(head -1 "${tmpState}" | awk '{print $4}')
		if [ "${jobId}" -le 0 ]; then
			ERROR "Invalid jobID"
		fi
		rm "${tmpState}"

		echo "Batch job ${jobId} submitted"
		RC=1

		for i in $(seq 1 3000); do
			if ! tmpState=$(scontrol show job "${jobId}" | grep JobState); then
				ERROR_RETURN "Error querying job"
			fi

			if echo "${tmpState}" | grep -E -q "JobState=COMPLETED"; then
				echo "Job completed..."
				RC=0
				break

			elif echo "${tmpState}" | grep -E -q "JobState=FAILED"; then
				local TMP_REASON
				TMP_REASON=$(scontrol show job "${jobId}" | grep Reason | awk '{print $2}')
				echo " "
				echo "Job ${jobId} failed..."
				echo "${TMP_REASON}"
				break

			elif echo "${tmpState}" | grep -E -q "JobState=TIMEOUT"; then
				local TMP_REASON
				TMP_REASON=$(scontrol show job "${jobId}" | grep Reason | awk '{print $2}')
				echo " "
				echo "Job ${jobId} encountered timeout..."
				echo "${TMP_REASON}"
				break

			elif echo "${tmpState}" | grep -E -q "JobState=CANCELLED"; then
				echo " "
				echo "Job ${jobId} cancelled..."
				break

			else
				sleep 1
			fi
		done

		# Look for evidence of failure in job output

		echo " "
		cat job."${jobId}".out

		if grep -E -q "${jobId} FAILED|${jobId} CANCELLED|exited on signal|command not found|failed to start|Unable to access executable|Error in init phase" job."${jobId}".out; then
			RC=1
		fi

		if [ -n "${output_file}" ]; then
			mv job."${jobId}".out "${output_file}"
		fi

		rm "${JOB_SCRIPT}"
		return "${RC}"

	elif [ "${RESOURCE_MANAGER}" = "openpbs" ]; then
		JOB_SCRIPT="/tmp/job.${USER}.${RANDOM}"

		# infer tasks/node from total tasks
		((TASKS_PER_NODE = NTASKS / NNODES))

		{
			echo "#!/bin/bash"
			echo "#PBS -N OpenHPC-test"
			echo "#PBS -l select=${NNODES}:mpiprocs=${TASKS_PER_NODE} -l place=scatter"
			echo "#PBS -l walltime=${timeout}"
			echo "#PBS -l place=excl"
			echo "#PBS -o job.out"
			echo "cd \$PBS_O_WORKDIR"
		} >>"${JOB_SCRIPT}"

		if [ -n "${input_file}" ]; then
			echo "${mpi_launcher} ${EXE} ${ARGS} < ${input_file}" >>"${JOB_SCRIPT}"
		else
			echo "${mpi_launcher} ${EXE} ${ARGS}" >>"${JOB_SCRIPT}"
		fi

		# Submit batch job
		qsub -W block=true "${JOB_SCRIPT}"
		RC="$?"

		if [ -n "${output_file}" ]; then
			if [ "${output_file}" != "job.out" ]; then
				mv job.out "${output_file}"
			fi
		fi
		rm "${JOB_SCRIPT}"
		return "${RC}"
	else
		ERROR_RETURN "Unsupported resource manager"
	fi

} # end run_mpi_binary()

get_rpm_name() {
	local family
	if [[ "${LMOD_FAMILY_COMPILER}" =~ "acfl" ]]; then
		# The arm compiler package set "acfl".
		# OpenHPC expects "arm1".
		family="arm1"
	else
		family="${LMOD_FAMILY_COMPILER}"
	fi

	if [ -n "${LMOD_FAMILY_MPI}" ]; then
		echo "${1}-${family}-${LMOD_FAMILY_MPI}${DELIM}"
	else
		echo "${1}-${family}${DELIM}"
	fi
}
