nai
This commit is contained in:
parent
cdbc08d9de
commit
190fa51c3a
|
@ -10,7 +10,7 @@ if [[ "isc17" == ${hostname:0:5} ]]; then
|
||||||
module load ddn/mvapich/3.1.4
|
module load ddn/mvapich/3.1.4
|
||||||
module load root/hdf5/1.10.4
|
module load root/hdf5/1.10.4
|
||||||
module load root/ior/git-20181107
|
module load root/ior/git-20181107
|
||||||
module list
|
#module list
|
||||||
export TD="/esfs/jtacquaviva/testfiles"
|
export TD="/esfs/jtacquaviva/testfiles"
|
||||||
export WD="/esfs/jtacquaviva/git/ddn-ime-evaluation/benchmark"
|
export WD="/esfs/jtacquaviva/git/ddn-ime-evaluation/benchmark"
|
||||||
export NODES=( isc17-c04 isc17-c05 isc17-c02 isc17-c03 isc17-c05 isc17-c06 isc17-c07 isc17-c08 isc17-c09 isc17-c12 isc17-c13 isc17-c14 isc17-c15 isc17-c18 isc17-c22 isc17-c01 )
|
export NODES=( isc17-c04 isc17-c05 isc17-c02 isc17-c03 isc17-c05 isc17-c06 isc17-c07 isc17-c08 isc17-c09 isc17-c12 isc17-c13 isc17-c14 isc17-c15 isc17-c18 isc17-c22 isc17-c01 )
|
||||||
|
@ -24,23 +24,26 @@ elif [[ "m" == ${hostname:0:1} ]]; then
|
||||||
module load bullxmpi_mlx_mt/bullxmpi_mlx_mt-1.2.9.2
|
module load bullxmpi_mlx_mt/bullxmpi_mlx_mt-1.2.9.2
|
||||||
module load k202107/hdf5/1.10.4
|
module load k202107/hdf5/1.10.4
|
||||||
module load k202107/ior/git-20181108
|
module load k202107/ior/git-20181108
|
||||||
module list
|
#module list
|
||||||
export TD="/mnt/lustre01/work/ku0598/k202107/git/ddn-ime-evaluation/benchmark/wd"
|
export TD="/mnt/lustre01/work/ku0598/k202107/git/ddn-ime-evaluation/benchmark/wd"
|
||||||
export WD="/mnt/lustre01/work/ku0598/k202107/git/ddn-ime-evaluation/benchmark"
|
export WD="/mnt/lustre01/work/ku0598/k202107/git/ddn-ime-evaluation/benchmark"
|
||||||
export NODES=()
|
export NODES=()
|
||||||
else
|
else
|
||||||
echo "Configuration faile: Cluster $hostname is not supported. Quitting."
|
echo "Configuration failed: Cluster $hostname is not supported. Quitting."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
# Lustre cache on DDN cluster ist 32108MB. Test file has to be at least twice as large therefore: DATASIZE = 76800MB
|
# Lustre cache on DDN cluster ist 32108MB. Test file has to be at least twice as large therefore: DATASIZE = 76800MB
|
||||||
DATASIZE=$((4800 * 1024 * 1024 * 16))
|
#DATASIZE=$((4800 * 1024 * 1024 * 16))
|
||||||
|
DATASIZE=$((48000 * 1024 * 1024 * 16))
|
||||||
|
|
||||||
COUNT_ARR=( $(seq 3) )
|
COUNT_ARR=( $(seq 3) )
|
||||||
TYPE_ARR=( "read" "write" )
|
TYPE_ARR=( "write" "read" )
|
||||||
API_ARR=( "MPIIO" "POSIX")
|
API_ARR=( "POSIX") # "MPIIO" ) #
|
||||||
NN_ARR=( 1 2 4 8 16)
|
#API_ARR=( "MPIIO" ) #
|
||||||
|
#NN_ARR=( 1 2 4 8 16)
|
||||||
|
NN_ARR=( 2 1)
|
||||||
PPN_ARR=( 8 4 1 )
|
PPN_ARR=( 8 4 1 )
|
||||||
T_ARR=( $((10*1024*1024)) $((1*1024*1024)) $((100*1024)) $((16*1024)) )
|
T_ARR=( $((10*1024*1024)) $((1*1024*1024)) $((100*1024)) $((16*1024)) )
|
||||||
|
|
||||||
|
|
|
@ -1,18 +1,16 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
oscs=( $( find /proc/fs/lustre/osc -mindepth 1 -maxdepth 1 -type d ) )
|
oscs=( $( find /proc/fs/lustre/osc -mindepth 1 -maxdepth 1 -type d ) )
|
||||||
echo $oscs
|
#echo $oscs
|
||||||
|
|
||||||
while [ ! 0 -eq ${#oscs[@]} ]; do
|
while [ ! 0 -eq ${#oscs[@]} ]; do
|
||||||
set -x
|
|
||||||
sync
|
sync
|
||||||
echo 3 > /proc/sys/vm/drop_caches
|
echo 3 > /proc/sys/vm/drop_caches
|
||||||
set +x
|
|
||||||
|
|
||||||
for i in ${!oscs[@]}; do
|
for i in ${!oscs[@]}; do
|
||||||
used_mb=$( grep -h used_mb ${oscs[$i]}/osc_cached_mb | cut -d" " -f 2 )
|
used_mb=$( grep -h used_mb ${oscs[$i]}/osc_cached_mb | cut -d" " -f 2 )
|
||||||
if [ 0 == $used_mb ]; then
|
if [ 0 == $used_mb ]; then
|
||||||
echo "remove from list ${oscs[$i]}, because cache is $used_mb MB"
|
#echo "remove from list ${oscs[$i]}, because cache is $used_mb MB"
|
||||||
unset oscs[$i]
|
unset oscs[$i]
|
||||||
else
|
else
|
||||||
echo "skip ${oscs[$i]}, cache is still $used_mb MB"
|
echo "skip ${oscs[$i]}, cache is still $used_mb MB"
|
||||||
|
|
|
@ -7,11 +7,9 @@ NETOUTDIR=$2
|
||||||
HOST="$hostname"
|
HOST="$hostname"
|
||||||
PID=$$
|
PID=$$
|
||||||
|
|
||||||
NETOUTFILE="$NETOUTDIR/HOST:$HOST#PID:$PID.txt"
|
NETOUTFILE="$NETOUTDIR/HOST:$HOST-PID:$PID-LABEL:"
|
||||||
|
|
||||||
function capture_network_state {
|
function capture_network_state {
|
||||||
label=$1
|
|
||||||
echo "LABEL $label"
|
|
||||||
echo "TIMESTAMP $(date +%s)"
|
echo "TIMESTAMP $(date +%s)"
|
||||||
perfquery -x
|
perfquery -x
|
||||||
cat /proc/net/dev
|
cat /proc/net/dev
|
||||||
|
@ -19,7 +17,8 @@ function capture_network_state {
|
||||||
cat /proc/fs/lustre/llite/esfs-*/max_cached_mb
|
cat /proc/fs/lustre/llite/esfs-*/max_cached_mb
|
||||||
}
|
}
|
||||||
|
|
||||||
echo "" > $NETOUTFILE
|
echo "$( capture_network_state )" &> "${NETOUTFILE}start.txt"
|
||||||
echo "$( capture_network_state "START" )" &>> $NETOUTFILE
|
|
||||||
$IOR $IOR_PARAMS
|
$IOR $IOR_PARAMS
|
||||||
echo "$( capture_network_state "STOP" )" &>> $NETOUTFILE
|
echo "$( capture_network_state )" &> "${NETOUTFILE}stop.txt"
|
||||||
|
|
||||||
|
exit 0
|
||||||
|
|
|
@ -2,30 +2,26 @@
|
||||||
|
|
||||||
. ./config.sh
|
. ./config.sh
|
||||||
|
|
||||||
NODES='isc17-c02,isc17-c03,isc17-c04,isc17-c05,isc17-c06,isc17-c07,isc17-c08,isc17-c09,isc17-c11,isc17-c12,isc17-c13,isc17-c14,isc17-c15,isc17-c18,isc17-c22,isc17-c01'
|
NODES='isc17-c04,isc17-c05,isc17-c02,isc17-c03,isc17-c06,isc17-c07,isc17-c08,isc17-c09,isc17-c11,isc17-c12,isc17-c13,isc17-c14,isc17-c15,isc17-c18,isc17-c22,isc17-c01'
|
||||||
|
|
||||||
|
|
||||||
#for NN in ${NN_ARR[@]}; do
|
for NN in ${NN_ARR[@]}; do
|
||||||
|
|
||||||
for NN in 1; do
|
|
||||||
LUSTRE_TESTFILE="$TD/sharedread${NN}/file"
|
LUSTRE_TESTFILE="$TD/sharedread${NN}/file"
|
||||||
|
if [ ! -f $LUSTRE_TESTFILE ]; then
|
||||||
TESTDIR="$(dirname $LUSTRE_TESTFILE)"
|
TESTDIR="$(dirname $LUSTRE_TESTFILE)"
|
||||||
mkdir $TESTDIR
|
mkdir $TESTDIR
|
||||||
lfs setstripe -c $(($NN * 2)) $TESTDIR
|
lfs setstripe -c $(($NN * 2)) $TESTDIR
|
||||||
|
|
||||||
MPIEXEC_PARAMS="-ppn 8 -np $((8*$NN)) -hosts isc17-c04,isc17-c05 "
|
MPIEXEC_PARAMS="-ppn 8 -np $((8*$NN)) -hosts $NODES "
|
||||||
MPIEXEC_PARAMS+="-genv MV2_NUM_HCAS 1 -genv MV2_CPU_BINDING_LEVEL core -genv MV2_CPU_BINDING_POLICY scatter"
|
MPIEXEC_PARAMS+="-genv MV2_NUM_HCAS 1 -genv MV2_CPU_BINDING_LEVEL core -genv MV2_CPU_BINDING_POLICY scatter"
|
||||||
|
|
||||||
IOR_PARAMS="-i 1 -s 1 -t $((16 * 1024 * 1024)) -b $(($DATASIZE / 8)) -o $LUSTRE_TESTFILE -a MPIIO -e -g -k "
|
IOR_PARAMS="-i 1 -s 1 -t $((16 * 1024 * 1024)) -b $(($DATASIZE / 8)) -o $LUSTRE_TESTFILE -a MPIIO -e -g -k "
|
||||||
IOR_PARAMS+="-D 60 -O stoneWallingWearOut=1 "
|
#IOR_PARAMS+="-D 60 -O stoneWallingWearOut=1 "
|
||||||
|
|
||||||
./drop_caches.sh
|
#./drop_caches.sh
|
||||||
set +x
|
set +x
|
||||||
$MPIEXEC $MPIEXEC_PARAMS $IOR $IOR_PARAMS -w
|
$MPIEXEC $MPIEXEC_PARAMS $IOR $IOR_PARAMS -w
|
||||||
set -x
|
set -x
|
||||||
|
|
||||||
# ./drop_caches.sh
|
fi
|
||||||
# set +x
|
|
||||||
# $MPIEXEC $MPIEXEC_PARAMS $IOR $IOR_PARAMS -r
|
|
||||||
# set -x
|
|
||||||
done
|
done
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
|
|
||||||
function force_exit {
|
function force_exit {
|
||||||
echo "Committing suicide (PID $$)"
|
echo "Committing suicide (PID $$)"
|
||||||
exit
|
exit
|
||||||
|
@ -27,45 +26,47 @@ for NN in ${NN_ARR[@]}; do
|
||||||
for T in ${T_ARR[@]}; do
|
for T in ${T_ARR[@]}; do
|
||||||
for PPN in ${PPN_ARR[@]}; do
|
for PPN in ${PPN_ARR[@]}; do
|
||||||
for API in ${API_ARR[@]}; do
|
for API in ${API_ARR[@]}; do
|
||||||
|
BENCHFILE="./output_v2/COUNT:$COUNT-NN:$NN-PPN:$PPN-API:$API-T:$T-TYPE:$TYPE.txt"
|
||||||
|
|
||||||
BENCHFILE="./output_v2/COUNT:$COUNT#NN:$NN#PPN:$PPN#API:$API#T:$T#TYPE:$TYPE.txt"
|
if [ ! -e "${BENCHFILE}" ]; then
|
||||||
|
|
||||||
#if [ ! -e "${BENCHFILE}" ]; then
|
|
||||||
if true; then
|
|
||||||
OUTDIR="$(dirname $BENCHFILE)"
|
OUTDIR="$(dirname $BENCHFILE)"
|
||||||
[ ! -d $OUTDIR ] && mkdir $OUTDIR
|
[ ! -d $OUTDIR ] && mkdir $OUTDIR
|
||||||
touch $BENCHFILE
|
touch $BENCHFILE
|
||||||
|
|
||||||
IOR_TYPE_OPTS=""
|
|
||||||
if [[ "read" == $TYPE ]]; then
|
if [[ "read" == $TYPE ]]; then
|
||||||
IOR_TYPE_OPTS="-r"
|
IOR_TYPE_OPTS="-r -E -k"
|
||||||
LUSTRE_TESTFILE="$TD/sharedread$NN/file"
|
LUSTRE_TESTFILE="$TD/sharedread$NN/file"
|
||||||
TESTDIR="$(dirname $LUSTRE_TESTFILE)"
|
TESTDIR="$(dirname $LUSTRE_TESTFILE)"
|
||||||
lfs getstripe $TESTDIR | tee -a $BENCHFILE
|
lfs getstripe $TESTDIR | tee -a $BENCHFILE
|
||||||
|
|
||||||
elif [[ "write" == $TYPE ]]; then
|
elif [[ "write" == $TYPE ]]; then
|
||||||
IOR_TYPE_OPTS="-w"
|
IOR_TYPE_OPTS="-w"
|
||||||
LUSTRE_TESTFILE="$TD/sharedwrite/file"
|
LUSTRE_TESTFILE="$TD/sharedwrite$NN/file"
|
||||||
TESTDIR="$(dirname $LUSTRE_TESTFILE)"
|
TESTDIR="$(dirname $LUSTRE_TESTFILE)"
|
||||||
[ -d $TESTDIR ] && rm -r $TESTDIR || mkdir -p $TESTDIR
|
[ -d $TESTDIR ] && rm -r $TESTDIR
|
||||||
|
mkdir -p $TESTDIR
|
||||||
lfs setstripe -c $((2 * $NN)) $TESTDIR
|
lfs setstripe -c $((2 * $NN)) $TESTDIR
|
||||||
lfs getstripe $TESTDIR | tee -a $BENCHFILE
|
lfs getstripe $TESTDIR | tee -a $BENCHFILE
|
||||||
fi
|
fi
|
||||||
|
|
||||||
IOR_PARAMS="-i 1 "
|
IOR_PARAMS="-i 1 "
|
||||||
IOR_PARAMS+="-s 1 -t $T -b $((4800 * 1024 * 1024 * 32 / $PPN)) "
|
IOR_PARAMS+="-s 1 -t $T -b $(($DATASIZE / $PPN)) "
|
||||||
IOR_PARAMS+="-D 60 -O stoneWallingWearOut=1 "
|
IOR_PARAMS+="-D 60 -O stoneWallingWearOut=1 "
|
||||||
IOR_PARAMS+="-a $API "
|
IOR_PARAMS+="-a $API "
|
||||||
IOR_PARAMS+="-e -g -z -k -o $LUSTRE_TESTFILE $IOR_TYPE_OPTS "
|
#IOR_PARAMS+="-p " # preallocate -- preallocate file size
|
||||||
|
#IOR_PARAMS+="-e " # fsync -- perform sync operation after each block write
|
||||||
|
IOR_PARAMS+="-g " # intraTestBarriers -- use barriers between open, write/read, and close
|
||||||
|
IOR_PARAMS+="-H " # showHints -- show hints
|
||||||
|
IOR_PARAMS+="-z " # randomOffset -- access is to random, not sequential, offsets within a file
|
||||||
|
IOR_PARAMS+="-o $LUSTRE_TESTFILE $IOR_TYPE_OPTS "
|
||||||
|
IOR_PARAMS+="-O summaryFile=${BENCHFILE}.json "
|
||||||
|
IOR_PARAMS+="-O summaryFormat=JSON "
|
||||||
|
|
||||||
if [[ "isc17" == ${hostname:0:5} ]]; then
|
if [[ "isc17" == ${hostname:0:5} ]]; then
|
||||||
$MPIEXEC -np $NN --hosts $(hosts $NN) ./drop_caches.sh
|
$MPIEXEC -np $NN --hosts $(hosts $NN) ./drop_caches.sh
|
||||||
|
|
||||||
MPIEXEC_PARAMS=" -ppn $PPN -np $(($NN * $PPN)) --hosts $(hosts $NN) -wdir $WD "
|
MPIEXEC_PARAMS=" -ppn $PPN -np $(($NN * $PPN)) --hosts $(hosts $NN) -wdir $WD "
|
||||||
MPIEXEC_PARAMS+="-genv MV2_NUM_HCAS 1 "
|
MPIEXEC_PARAMS+="-genv MV2_NUM_HCAS 1 "
|
||||||
MPIEXEC_PARAMS+="-genv MV2_CPU_BINDING_LEVEL core "
|
MPIEXEC_PARAMS+="-genv MV2_CPU_BINDING_LEVEL core "
|
||||||
MPIEXEC_PARAMS+="-genv MV2_CPU_BINDING_POLICY scatter "
|
MPIEXEC_PARAMS+="-genv MV2_CPU_BINDING_POLICY scatter "
|
||||||
|
|
||||||
elif [[ "m" == ${hostname:0:1} ]]; then
|
elif [[ "m" == ${hostname:0:1} ]]; then
|
||||||
#MPIEXEC_PARAMS=" -ppn $PPN -n $(($NN * $PPN)) -wdir $WD --host $(hosts $NN) "
|
#MPIEXEC_PARAMS=" -ppn $PPN -n $(($NN * $PPN)) -wdir $WD --host $(hosts $NN) "
|
||||||
MPIEXEC_PARAMS=" -ppn $PPN -n $(($NN * $PPN)) -wdir $WD "
|
MPIEXEC_PARAMS=" -ppn $PPN -n $(($NN * $PPN)) -wdir $WD "
|
||||||
|
@ -74,12 +75,11 @@ for API in ${API_ARR[@]}; do
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
(
|
|
||||||
set -x
|
|
||||||
NETOUTDIR="${BENCHFILE}_network"
|
NETOUTDIR="${BENCHFILE}_network"
|
||||||
echo $NETjOUTDIR
|
|
||||||
[[ ! -d $NETOUTDIR ]] && mkdir $NETOUTDIR || rm $NETOUTDIR/HOST*PID*.txt
|
[[ ! -d $NETOUTDIR ]] && mkdir $NETOUTDIR || rm $NETOUTDIR/HOST*PID*.txt
|
||||||
|
|
||||||
|
(
|
||||||
|
set -x
|
||||||
$MPIEXEC $MPIEXEC_PARAMS ./ior_wrapper.sh "$IOR_PARAMS" "$NETOUTDIR" | tee -a $BENCHFILE
|
$MPIEXEC $MPIEXEC_PARAMS ./ior_wrapper.sh "$IOR_PARAMS" "$NETOUTDIR" | tee -a $BENCHFILE
|
||||||
#$MPIEXEC $MPIEXEC_PARAMS $IOR $IOR_PARAMS | tee -a $BENCHFILE
|
#$MPIEXEC $MPIEXEC_PARAMS $IOR $IOR_PARAMS | tee -a $BENCHFILE
|
||||||
set +x
|
set +x
|
||||||
|
|
Loading…
Reference in New Issue