############################################################################
# Hadoop Filesystem Mode Configurations
############################################################################

# Set how the filesystem should be setup
#
# "hdfs" - Normal straight up HDFS if you have local disk in your
#          cluster.  This option is primarily for benchmarking and
#          caching, but probably shouldn't be used in the general case.
#
#          Be careful running this in a cluster environment.  The next
#          time you execute your job, if a different set of nodes are
#          allocated to you, the HDFS data you wrote from a previous
#          job may not be there.  Specifying specific nodes to use in
#          your job submission (e.g. --nodelist in sbatch) may be a
#          way to alleviate this.
#
#          User must set HADOOP_HDFS_PATH below.
#
@MODE_HDFSOVERLUSTRE@
@MODE_HDFSOVERNETWORKFS@

# "rawnetworkfs" - Use Hadoop RawLocalFileSystem (i.e. file: scheme),
#           to use networked file system directly.  It could be a
#           Lustre mount or NFS mount.  Whatever you please.
#
#           User must set HADOOP_RAWNETWORKFS_PATH below.
#
export HADOOP_FILESYSTEM_MODE=HADOOPFILESYSTEMMODE

# Local Filesystem BlockSize
#
# This configuration is the blocksize hadoop will use when doing I/O
# to a local filesystem.  It is used by HDFS when reading from the
# underlying filesystem.  It is also used with
# HADOOP_FILESYSTEM_MODE="rawnetworkfs".
#
# Commonly 33554432, 67108864, 134217728 (i.e. 32m, 64m, 128m)
#
# If not specified, defaults to 33554432
#
# export HADOOP_LOCAL_FILESYSTEM_BLOCKSIZE=33554432

# HDFS Replication
#
# This is used with HADOOP_FILESYSTEM_MODE="hdfs", "hdfsoverlustre",
# and "hdfsovernetworkfs"
#
# HDFS commonly uses 3.  When doing HDFS over Lustre/NetworkFS, higher
# replication can also help with resilience if nodes fail.  You may
# wish to set this to < 3 to save space.
#
# If not specified, defaults to 3
#
# export HADOOP_HDFS_REPLICATION=3

# HDFS Block Size
#
# This is used with HADOOP_FILESYSTEM_MODE="hdfs", "hdfsoverlustre",
# and "hdfsovernetworkfs"
#
# Commonly 134217728, 268435456, 536870912 (i.e. 128m, 256m, 512m)
#
# If not specified, defaults to 134217728
#
# export HADOOP_HDFS_BLOCKSIZE=134217728

# Path for HDFS when using local disk
#
# This is used with HADOOP_FILESYSTEM_MODE="hdfs"
#
# If you want to specify multiple paths (such as multiple drives),
# make them comma separated (e.g. /dir1,/dir2,/dir3).  The multiple
# paths will be used for local intermediate data and HDFS.  The first
# path will also store daemon data, such as namenode or jobtracker
# data.
#
export HADOOP_HDFS_PATH="LOCALDRIVEPREFIX/hdfs"

# HDFS cleanup
#
# This is used with HADOOP_FILESYSTEM_MODE="hdfs"
#
# After your job has completed, if HADOOP_HDFS_PATH_CLEAR is set to
# yes, Magpie will do a rm -rf on HADOOP_HDFS_PATH.
#
# This is particularly useful when doing normal HDFS on local storage.
# On your next job run, you may not be able to get the nodes you want
# on your next run.  So you may want to clean up your work before the
# next user uses the node.
#
# export HADOOP_HDFS_PATH_CLEAR="yes"

@CONFIG_HDFSOVERLUSTRE@

@CONFIG_HDFSOVERNETWORKFS@

# Path for rawnetworkfs
#
# This is used with HADOOP_FILESYSTEM_MODE="rawnetworkfs"
#
export HADOOP_RAWNETWORKFS_PATH="RAWFSDIRPREFIX/rawnetworkfs/"

# If you have a local SSD or NVRAM, performance may be better to store
# intermediate data on it rather than Lustre or some other networked
# filesystem.  If the below environment variable is specified, local
# intermediate data will be stored in the specified directory.
# Otherwise it will go to an appropriate directory in Lustre/networked
# FS.
#
# Be wary, local SSDs/NVRAM stores may have less space than HDDs or
# networked file systems.  It can be easy to run out of space.
#
# If you want to specify multiple paths (such as multiple drives),
# make them comma separated (e.g. /dir1,/dir2,/dir3).  The multiple
# paths will be used for local intermediate data.
#
# export HADOOP_LOCALSTORE="LOCALDRIVEPREFIX/localstore/"

# HADOOP_LOCALSTORE_CLEAR
#
# After your job has completed, if HADOOP_LOCALSTORE_CLEAR is set to
# yes, Magpie will do a rm -rf on all directories in
# HADOOP_LOCALSTORE.  This is particularly useful if the localstore
# directory is on local storage and you want to clean up your work
# before the next user uses the node.
#
# export HADOOP_LOCALSTORE_CLEAR="yes"
