ddn-ime-evaluation/run.sh

99 lines
2.8 KiB
Bash
Raw Normal View History

2018-10-19 15:15:42 +00:00
#!/bin/bash
function force_exit {
echo "Committing suicide (PID $$)"
exit
}
trap force_exit SIGINT
# Provides a list of good hosts (that contains QDR connection)
function hosts() {
num="$1"
2018-10-22 07:00:38 +00:00
#HOST_LIST=( isc17-c04 isc17-c05 isc17-c06 isc17-c18 )
2018-10-23 08:49:36 +00:00
#HOST_LIST=( isc17-c04 isc17-c05 isc17-c06 isc17-c18 isc17-c01 isc17-c02 isc17-c03 isc17-c07 isc17-c08 isc17-c09 isc17-c11 isc17-c12 isc17-c13 isc17-c14 isc17-c15 isc17-c22 )
HOST_LIST=( isc17-c04 isc17-c05 isc17-c06 isc17-c07 isc17-c08 isc17-c09 isc17-c11 isc17-c12 isc17-c13 isc17-c14 isc17-c15 isc17-c18 isc17-c22 isc17-c01 isc17-c02 isc17-c03 )
2018-10-21 16:22:48 +00:00
2018-10-19 15:43:07 +00:00
hlist=${HOST_LIST[0]}
for POS in $(seq 1 $(($num - 1))) ; do
2018-10-19 15:15:42 +00:00
hlist="$hlist,${HOST_LIST[$POS]}"
done
echo $hlist
}
2018-10-22 16:30:44 +00:00
2018-10-22 06:59:54 +00:00
export MODULEPATH=/esfs/jtacquaviva/software/modules:$MODULEPATH
2018-10-19 15:15:42 +00:00
module purge
module load betke/hdf5/1.8.20-ddn
module load betke/ior/git-ddn
module list
2018-10-21 20:39:16 +00:00
LUSTRE_TESTFILE_WRITE="/esfs/jtacquaviva/ioperf/file_write"
2018-10-22 16:30:44 +00:00
LUSTRE_TESTFILE_READ=""
2018-10-19 15:15:42 +00:00
ITERATIONS=3
IOR="$(which ior)"
MPIEXEC="/opt/ddn/mvapich/bin/mpiexec"
2018-10-22 07:00:38 +00:00
API_ARR=( "POSIX" "MPIIO" )
2018-10-22 16:30:44 +00:00
#NN_ARR=( 4 2 1 8 10 16)
2018-10-23 08:49:36 +00:00
NN_ARR=( 16 )
2018-10-21 20:39:16 +00:00
PPN_ARR=( 8 6 4 2 1 )
2018-10-23 08:44:54 +00:00
T_ARR=( $((10*1024*1024)) $((1*1024*1024)) $((100*1024)) $((16*1024)) )
2018-10-19 15:15:42 +00:00
2018-10-19 15:57:40 +00:00
for COUNT in $(seq 1); do
2018-10-19 15:15:42 +00:00
for NN in ${NN_ARR[@]}; do
2018-10-22 16:30:44 +00:00
for T in ${T_ARR[@]}; do
2018-10-19 15:15:42 +00:00
for PPN in ${PPN_ARR[@]}; do
for API in ${API_ARR[@]}; do
BENCHFILE="./output/COUNT:$COUNT#NN:$NN#PPN:$PPN#API:$API#T:$T.txt"
if [ ! -e "${BENCHFILE}" ]; then
2018-10-22 06:59:54 +00:00
OUTDIR="$(dirname $BENCHFILE)"
2018-10-19 15:15:42 +00:00
if [ ! -d $OUTDIR ]; then
2018-10-22 06:59:54 +00:00
mkdir $OUTDIR
2018-10-19 15:15:42 +00:00
fi
touch $BENCHFILE
2018-10-22 16:30:44 +00:00
IOR_API_OPTS=""
if [[ "POSIX" == $API ]]; then
IOR_API_OPTS="-F"
LUSTRE_TESTFILE_READ="/esfs/jtacquaviva/indread$NN/file"
elif [[ "MPIIO" == $API ]]; then
IOR_API_OPTS=""
LUSTRE_TESTFILE_READ="/esfs/jtacquaviva/file_read"
fi
2018-10-23 08:44:54 +00:00
IOR_PARAMS="-i $ITERATIONS -s 1 -t $T -b $((4800 * 1024 * 1024 * 32 / $PPN)) -D $((120)) -a $API $IOR_API_OPTS -e -g -z -k"
2018-10-22 16:30:44 +00:00
ENVVAR="-genv MV2_NUM_HCAS 1 -genv MV2_CPU_BINDING_LEVEL core -genv MV2_CPU_BINDING_POLICY scatter"
2018-10-21 20:39:16 +00:00
MPIEXEC_PARAMS=" -ppn $PPN -np $(($NN * $PPN)) $ENVVAR --hosts $(hosts $NN) "
2018-10-19 15:15:42 +00:00
2018-10-21 20:39:16 +00:00
TESTDIR="$(dirname $LUSTRE_TESTFILE_WRITE)"
2018-10-19 15:15:42 +00:00
if [ -d $TESTDIR ]; then
2018-10-22 06:59:54 +00:00
rm -r $TESTDIR
2018-10-19 15:15:42 +00:00
fi
mkdir -p $TESTDIR
lfs setstripe -c $((2 * $NN)) $TESTDIR
2018-10-21 20:39:16 +00:00
(
2018-10-19 15:43:07 +00:00
set -x
2018-10-21 20:39:16 +00:00
$MPIEXEC $MPIEXEC_PARAMS $IOR $IOR_PARAMS -o $LUSTRE_TESTFILE_WRITE -w | tee -a $BENCHFILE
2018-10-22 16:30:44 +00:00
$MPIEXEC $MPIEXEC_PARAMS /esfs/jtacquaviva/git/ime-evaluation/drop_caches.sh
2018-10-21 20:39:16 +00:00
$MPIEXEC $MPIEXEC_PARAMS $IOR $IOR_PARAMS -o $LUSTRE_TESTFILE_READ -r | tee -a $BENCHFILE
2018-10-19 15:43:07 +00:00
set +x
2018-10-21 20:39:16 +00:00
) 2> >(tee -a $BENCHFILE)
2018-10-19 15:15:42 +00:00
lfs getstripe $TESTDIR | tee -a $BENCHFILE
else
echo "skip $(readlink -f $BENCHFILE), already exists"
2018-10-22 06:59:54 +00:00
fi
2018-10-19 15:15:42 +00:00
done
done
done
done
done